1 : /* -*- mode: c; c-file-style: "k&r" -*-
2 :
3 : Modified for PHP by Andrei Zmievski <andrei@php.net>
4 :
5 : strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
6 : Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
7 :
8 : This software is provided 'as-is', without any express or implied
9 : warranty. In no event will the authors be held liable for any damages
10 : arising from the use of this software.
11 :
12 : Permission is granted to anyone to use this software for any purpose,
13 : including commercial applications, and to alter it and redistribute it
14 : freely, subject to the following restrictions:
15 :
16 : 1. The origin of this software must not be misrepresented; you must not
17 : claim that you wrote the original software. If you use this software
18 : in a product, an acknowledgment in the product documentation would be
19 : appreciated but is not required.
20 : 2. Altered source versions must be plainly marked as such, and must not be
21 : misrepresented as being the original software.
22 : 3. This notice may not be removed or altered from any source distribution.
23 : */
24 :
25 : #include <ctype.h>
26 : #include <string.h>
27 : #include <assert.h>
28 : #include <stdio.h>
29 :
30 : #include "php.h"
31 : #include "php_string.h"
32 :
33 : #if defined(__GNUC__)
34 : # define UNUSED __attribute__((__unused__))
35 : #else
36 : # define UNUSED
37 : #endif
38 :
39 : #if 0
40 : static char const *version UNUSED =
41 : "$Id: strnatcmp.c 288896 2009-09-28 13:29:53Z rasmus $";
42 : #endif
43 : /* {{{ compare_right
44 : */
45 : static int
46 : compare_right(char const *a, char const *b)
47 5 : {
48 5 : int bias = 0;
49 :
50 : /* The longest run of digits wins. That aside, the greatest
51 : value wins, but we can't know that it will until we've scanned
52 : both numbers to know that they have the same magnitude, so we
53 : remember it in BIAS. */
54 6 : for(;; a++, b++) {
55 11 : if (!isdigit((int)(unsigned char)*a) &&
56 : !isdigit((int)(unsigned char)*b))
57 1 : return bias;
58 10 : else if (!isdigit((int)(unsigned char)*a))
59 0 : return -1;
60 10 : else if (!isdigit((int)(unsigned char)*b))
61 4 : return +1;
62 6 : else if (*a < *b) {
63 3 : if (!bias)
64 3 : bias = -1;
65 3 : } else if (*a > *b) {
66 1 : if (!bias)
67 1 : bias = +1;
68 2 : } else if (!*a && !*b) {
69 0 : return bias;
70 : }
71 6 : }
72 :
73 : return 0;
74 : }
75 : /* }}} */
76 :
77 : /* {{{ compare_left
78 : */
79 : static int
80 : compare_left(char const *a, char const *b)
81 33 : {
82 : /* Compare two left-aligned numbers: the first to have a
83 : different value wins. */
84 8 : for(;; a++, b++) {
85 33 : if (!isdigit((int)(unsigned char)*a) &&
86 : !isdigit((int)(unsigned char)*b))
87 4 : return 0;
88 29 : else if (!isdigit((int)(unsigned char)*a))
89 4 : return -1;
90 25 : else if (!isdigit((int)(unsigned char)*b))
91 0 : return +1;
92 25 : else if (*a < *b)
93 0 : return -1;
94 25 : else if (*a > *b)
95 17 : return +1;
96 8 : }
97 :
98 : return 0;
99 : }
100 : /* }}} */
101 :
102 : /* {{{ strnatcmp_ex
103 : */
104 : PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, int fold_case)
105 39 : {
106 : unsigned char ca, cb;
107 : unsigned int ai, bi;
108 : int fractional, result;
109 39 : short leading = 1;
110 :
111 39 : ai = bi = 0;
112 : while (1) {
113 44 : ca = a[ai]; cb = b[bi];
114 :
115 : /* skip over leading zeros */
116 160 : while (leading && ca == '0' && (ai+1 < a_len) && isdigit(a[ai+1])) {
117 72 : ca = a[++ai];
118 : }
119 :
120 106 : while (leading && cb == '0' && (bi+1 < b_len) && isdigit(a[ai+1])) {
121 18 : cb = b[++bi];
122 : }
123 :
124 44 : leading = 0;
125 :
126 : /* Strip consecutive whitespace */
127 88 : while (isspace((int)(unsigned char)ca)) {
128 0 : ca = a[++ai];
129 : }
130 :
131 88 : while (isspace((int)(unsigned char)cb)) {
132 0 : cb = b[++bi];
133 : }
134 :
135 : /* process run of digits */
136 44 : if (isdigit((int)(unsigned char)ca) && isdigit((int)(unsigned char)cb)) {
137 30 : fractional = (ca == '0' || cb == '0');
138 :
139 30 : if (fractional) {
140 25 : if ((result = compare_left(a+ai, b+bi)) != 0) {
141 21 : return result;
142 : }
143 : } else {
144 5 : if ((result = compare_right(a+ai, b+bi)) != 0)
145 5 : return result;
146 : }
147 : }
148 :
149 18 : if (!ca && !cb) {
150 : /* The strings compare the same. Perhaps the caller
151 : will want to call strcmp to break the tie. */
152 1 : return 0;
153 : }
154 :
155 17 : if (fold_case) {
156 0 : ca = toupper((int)(unsigned char)ca);
157 0 : cb = toupper((int)(unsigned char)cb);
158 : }
159 :
160 17 : if (ca < cb)
161 4 : return -1;
162 13 : else if (ca > cb)
163 8 : return +1;
164 :
165 5 : ++ai; ++bi;
166 5 : }
167 : }
168 : /* }}} */
169 :
170 : /* {{{ u_compare_right
171 : */
172 : static int
173 : u_compare_right(UChar const *a, int a_len, int *a_curr, UChar const *b, int b_len, int *b_curr)
174 132 : {
175 : UChar32 ca, cb;
176 : int a_off, b_off;
177 132 : int bias = 0;
178 :
179 : /* The longest run of digits wins. That aside, the greatest
180 : value wins, but we can't know that it will until we've scanned
181 : both numbers to know that they have the same magnitude, so we
182 : remember it in BIAS. */
183 :
184 : for (;;) {
185 383 : a_off = *a_curr;
186 383 : b_off = *b_curr;
187 383 : U16_NEXT(a, a_off, a_len, ca);
188 383 : U16_NEXT(b, b_off, b_len, cb);
189 :
190 383 : if (!u_isdigit(ca) && !u_isdigit(cb)) {
191 73 : return bias;
192 310 : } else if (!u_isdigit(ca)) {
193 26 : return -1;
194 284 : } else if (!u_isdigit(cb)) {
195 33 : return +1;
196 251 : } else if (ca < cb) {
197 71 : if (!bias)
198 42 : bias = -1;
199 180 : } else if (ca > cb) {
200 90 : if (!bias)
201 62 : bias = +1;
202 90 : } else if (ca == 0 && cb == 0) {
203 0 : return bias;
204 : }
205 251 : *a_curr = a_off;
206 251 : *b_curr = b_off;
207 251 : }
208 :
209 : return 0;
210 : }
211 : /* }}} */
212 :
213 : /* {{{ u_compare_left
214 : */
215 : static int
216 : u_compare_left(UChar const *a, int a_len, int *a_curr, UChar const *b, int b_len, int *b_curr)
217 73 : {
218 : int a_off, b_off;
219 : UChar32 ca, cb;
220 :
221 : /* Compare two left-aligned numbers: the first to have a
222 : different value wins. */
223 : for (;;) {
224 73 : a_off = *a_curr;
225 73 : b_off = *b_curr;
226 73 : U16_NEXT(a, a_off, a_len, ca);
227 73 : U16_NEXT(b, b_off, b_len, cb);
228 :
229 73 : if (!u_isdigit(ca) && !u_isdigit(cb)) {
230 15 : return 0;
231 58 : } else if (!u_isdigit(ca)) {
232 0 : return -1;
233 58 : } else if (!u_isdigit(cb)) {
234 0 : return +1;
235 58 : } else if (ca < cb) {
236 23 : return -1;
237 35 : } else if (ca > cb) {
238 14 : return +1;
239 : }
240 21 : *a_curr = a_off;
241 21 : *b_curr = b_off;
242 21 : }
243 :
244 : return 0;
245 : }
246 : /* }}} */
247 :
248 : /* {{{ u_strnatcmp_ex
249 : */
250 : PHPAPI int u_strnatcmp_ex(UChar const *a, size_t a_len, UChar const *b, size_t b_len, int fold_case)
251 465 : {
252 : UChar ca, cb;
253 : UChar const *ap, *bp;
254 : int fractional, result;
255 : int a_off, b_off;
256 : unsigned int a_curr, b_curr;
257 :
258 465 : if (a_len == 0 || b_len == 0)
259 23 : return a_len - b_len;
260 :
261 442 : ap = a;
262 442 : bp = b;
263 442 : a_curr = b_curr = 0;
264 :
265 : while (1) {
266 825 : a_off = a_curr;
267 825 : b_off = b_curr;
268 825 : U16_NEXT(a, a_curr, a_len, ca);
269 825 : U16_NEXT(b, b_curr, b_len, cb);
270 :
271 : /* skip over leading spaces */
272 1654 : for ( ; u_isspace(ca) && a_curr < a_len; ) {
273 4 : a_off = a_curr;
274 4 : U16_NEXT(a, a_curr, a_len, ca);
275 : }
276 :
277 1656 : for ( ; u_isspace(cb) && b_curr < b_len; ) {
278 6 : b_off = b_curr;
279 6 : U16_NEXT(b, b_curr, b_len, cb);
280 : }
281 :
282 : /* process run of digits */
283 825 : if (u_isdigit(ca) && u_isdigit(cb)) {
284 184 : fractional = (ca == 0x30 /*'0'*/ || cb == 0x30 /*'0'*/);
285 :
286 184 : if (fractional) {
287 52 : if ((result = u_compare_left(a, a_len, &a_off, b, b_len, &b_off)) != 0) {
288 37 : return result;
289 : }
290 : } else {
291 132 : if ((result = u_compare_right(a, a_len, &a_off, b, b_len, &b_off)) != 0) {
292 122 : return result;
293 : }
294 : }
295 :
296 25 : a_curr = a_off;
297 25 : b_curr = b_off;
298 : }
299 :
300 666 : if (ca == 0 && cb == 0) {
301 : /* The strings compare the same. Perhaps the caller
302 : will want to call strcmp to break the tie. */
303 31 : return 0;
304 : }
305 :
306 635 : if (fold_case) {
307 521 : ca = u_toupper(ca);
308 521 : cb = u_toupper(cb);
309 : }
310 :
311 635 : if (ca < cb)
312 96 : return -1;
313 539 : else if (ca > cb)
314 156 : return +1;
315 383 : }
316 : }
317 : /* }}} */
318 : /*
319 : * Local variables:
320 : * tab-width: 4
321 : * c-basic-offset: 4
322 : * End:
323 : * vim600: sw=4 ts=4 fdm=marker
324 : * vim<600: sw=4 ts=4
325 : */
|