1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | This source file is subject to version 3.01 of the PHP license, |
6 : | that is bundled with this package in the file LICENSE, and is |
7 : | available through the world-wide-web at the following url: |
8 : | http://www.php.net/license/3_01.txt |
9 : | If you did not receive a copy of the PHP license and are unable to |
10 : | obtain it through the world-wide-web, please send a note to |
11 : | license@php.net so we can mail you a copy immediately. |
12 : +----------------------------------------------------------------------+
13 : | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
14 : | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
15 : +----------------------------------------------------------------------+
16 : */
17 :
18 : #ifdef HAVE_CONFIG_H
19 : #include "config.h"
20 : #endif
21 :
22 : #include "php_intl.h"
23 : #include "collator_class.h"
24 : #include "collator_is_numeric.h"
25 : #include "collator_convert.h"
26 : #include "intl_convert.h"
27 :
28 : #include <unicode/ustring.h>
29 : #include <php.h>
30 :
31 : #if (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION <= 1)
32 : #define CAST_OBJECT_SHOULD_FREE ,0
33 : #else
34 : #define CAST_OBJECT_SHOULD_FREE
35 : #endif
36 :
37 : #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
38 : zval_add_ref( &retval ); \
39 : return retval; \
40 : }
41 :
42 : /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
43 : static void collator_convert_hash_item_from_utf8_to_utf16(
44 : HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
45 : UErrorCode* status )
46 228 : {
47 : const char* old_val;
48 : int old_val_len;
49 228 : UChar* new_val = NULL;
50 228 : int new_val_len = 0;
51 228 : zval** hashData = NULL;
52 228 : zval* znew_val = NULL;
53 :
54 : /* Get current hash item. */
55 228 : zend_hash_get_current_data( hash, (void**) &hashData );
56 :
57 : /* Process string values only. */
58 228 : if( Z_TYPE_P( *hashData ) != IS_STRING )
59 20 : return;
60 :
61 208 : old_val = Z_STRVAL_P( *hashData );
62 208 : old_val_len = Z_STRLEN_P( *hashData );
63 :
64 : /* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
65 208 : intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
66 208 : if( U_FAILURE( *status ) )
67 0 : return;
68 :
69 : /* Update current hash item with the converted value. */
70 208 : MAKE_STD_ZVAL( znew_val );
71 208 : ZVAL_STRINGL( znew_val, (char*)new_val, UBYTES(new_val_len), FALSE );
72 :
73 208 : if( hashKeyType == HASH_KEY_IS_STRING )
74 : {
75 30 : zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
76 : (void*) &znew_val, sizeof(zval*), NULL );
77 : }
78 : else /* hashKeyType == HASH_KEY_IS_LONG */
79 : {
80 178 : zend_hash_index_update( hash, hashIndex,
81 : (void*) &znew_val, sizeof(zval*), NULL );
82 : }
83 : }
84 : /* }}} */
85 :
86 : /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
87 : static void collator_convert_hash_item_from_utf16_to_utf8(
88 : HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
89 : UErrorCode* status )
90 228 : {
91 : const char* old_val;
92 : int old_val_len;
93 228 : char* new_val = NULL;
94 228 : int new_val_len = 0;
95 228 : zval** hashData = NULL;
96 228 : zval* znew_val = NULL;
97 :
98 : /* Get current hash item. */
99 228 : zend_hash_get_current_data( hash, (void**) &hashData );
100 :
101 : /* Process string values only. */
102 228 : if( Z_TYPE_P( *hashData ) != IS_STRING )
103 20 : return;
104 :
105 208 : old_val = Z_STRVAL_P( *hashData );
106 208 : old_val_len = Z_STRLEN_P( *hashData );
107 :
108 : /* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
109 208 : intl_convert_utf16_to_utf8( &new_val, &new_val_len,
110 : (UChar*)old_val, UCHARS(old_val_len), status );
111 208 : if( U_FAILURE( *status ) )
112 0 : return;
113 :
114 : /* Update current hash item with the converted value. */
115 208 : MAKE_STD_ZVAL( znew_val );
116 208 : ZVAL_STRINGL( znew_val, (char*)new_val, new_val_len, FALSE );
117 :
118 208 : if( hashKeyType == HASH_KEY_IS_STRING )
119 : {
120 30 : zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
121 : (void*) &znew_val, sizeof(zval*), NULL );
122 : }
123 : else /* hashKeyType == HASH_KEY_IS_LONG */
124 : {
125 178 : zend_hash_index_update( hash, hashIndex,
126 : (void*) &znew_val, sizeof(zval*), NULL );
127 : }
128 : }
129 : /* }}} */
130 :
131 : /* {{{ collator_convert_hash_from_utf8_to_utf16
132 : * Convert values of the given hash from UTF-8 encoding to UTF-16LE.
133 : */
134 : void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
135 74 : {
136 74 : ulong hashIndex = 0;
137 74 : char* hashKey = NULL;
138 74 : int hashKeyType = 0;
139 :
140 74 : zend_hash_internal_pointer_reset( hash );
141 376 : while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
142 : != HASH_KEY_NON_EXISTANT )
143 : {
144 : /* Convert current hash item from UTF-8 to UTF-16LE. */
145 228 : collator_convert_hash_item_from_utf8_to_utf16(
146 : hash, hashKeyType, hashKey, hashIndex, status );
147 228 : if( U_FAILURE( *status ) )
148 0 : return;
149 :
150 : /* Proceed to the next item. */
151 228 : zend_hash_move_forward( hash );
152 : }
153 : }
154 : /* }}} */
155 :
156 : /* {{{ collator_convert_hash_from_utf16_to_utf8
157 : * Convert values of the given hash from UTF-16LE encoding to UTF-8.
158 : */
159 : void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
160 74 : {
161 74 : ulong hashIndex = 0;
162 74 : char* hashKey = NULL;
163 74 : int hashKeyType = 0;
164 :
165 74 : zend_hash_internal_pointer_reset( hash );
166 376 : while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
167 : != HASH_KEY_NON_EXISTANT )
168 : {
169 : /* Convert current hash item from UTF-16LE to UTF-8. */
170 228 : collator_convert_hash_item_from_utf16_to_utf8(
171 : hash, hashKeyType, hashKey, hashIndex, status );
172 228 : if( U_FAILURE( *status ) ) {
173 0 : return;
174 : }
175 :
176 : /* Proceed to the next item. */
177 228 : zend_hash_move_forward( hash );
178 : }
179 : }
180 : /* }}} */
181 :
182 : /* {{{ collator_convert_zstr_utf16_to_utf8
183 : *
184 : * Convert string from utf16 to utf8.
185 : *
186 : * @param zval* utf16_zval String to convert.
187 : *
188 : * @return zval* Converted string.
189 : */
190 : zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval )
191 8 : {
192 8 : zval* utf8_zval = NULL;
193 8 : char* str = NULL;
194 8 : int str_len = 0;
195 8 : UErrorCode status = U_ZERO_ERROR;
196 :
197 : /* Convert to utf8 then. */
198 8 : intl_convert_utf16_to_utf8( &str, &str_len,
199 : (UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
200 8 : if( U_FAILURE( status ) )
201 0 : php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
202 :
203 8 : ALLOC_INIT_ZVAL( utf8_zval );
204 8 : ZVAL_STRINGL( utf8_zval, str, str_len, FALSE );
205 :
206 8 : return utf8_zval;
207 : }
208 : /* }}} */
209 :
210 : /* {{{ collator_convert_zstr_utf8_to_utf16
211 : *
212 : * Convert string from utf8 to utf16.
213 : *
214 : * @param zval* utf8_zval String to convert.
215 : *
216 : * @return zval* Converted string.
217 : */
218 : zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval )
219 8 : {
220 8 : zval* zstr = NULL;
221 8 : UChar* ustr = NULL;
222 8 : int ustr_len = 0;
223 8 : UErrorCode status = U_ZERO_ERROR;
224 :
225 : /* Convert the string to UTF-16. */
226 8 : intl_convert_utf8_to_utf16(
227 : &ustr, &ustr_len,
228 : Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
229 : &status );
230 8 : if( U_FAILURE( status ) )
231 0 : php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
232 :
233 : /* Set string. */
234 8 : ALLOC_INIT_ZVAL( zstr );
235 8 : ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
236 :
237 8 : return zstr;
238 : }
239 : /* }}} */
240 :
241 : /* {{{ collator_convert_object_to_string
242 : * Convert object to UTF16-encoded string.
243 : */
244 : zval* collator_convert_object_to_string( zval* obj TSRMLS_DC )
245 392 : {
246 392 : zval* zstr = NULL;
247 392 : UErrorCode status = U_ZERO_ERROR;
248 392 : UChar* ustr = NULL;
249 392 : int ustr_len = 0;
250 :
251 : /* Bail out if it's not an object. */
252 392 : if( Z_TYPE_P( obj ) != IS_OBJECT )
253 : {
254 392 : COLLATOR_CONVERT_RETURN_FAILED( obj );
255 : }
256 :
257 : /* Try object's handlers. */
258 0 : if( Z_OBJ_HT_P(obj)->get )
259 : {
260 0 : zstr = Z_OBJ_HT_P(obj)->get( obj TSRMLS_CC );
261 :
262 0 : switch( Z_TYPE_P( zstr ) )
263 : {
264 : case IS_OBJECT:
265 : {
266 : /* Bail out. */
267 0 : zval_ptr_dtor( &zstr );
268 0 : COLLATOR_CONVERT_RETURN_FAILED( obj );
269 : } break;
270 :
271 : case IS_STRING:
272 0 : break;
273 :
274 : default:
275 : {
276 0 : convert_to_string( zstr );
277 : } break;
278 : }
279 : }
280 0 : else if( Z_OBJ_HT_P(obj)->cast_object )
281 : {
282 0 : ALLOC_INIT_ZVAL( zstr );
283 :
284 0 : if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING CAST_OBJECT_SHOULD_FREE TSRMLS_CC ) == FAILURE )
285 : {
286 : /* cast_object failed => bail out. */
287 0 : zval_ptr_dtor( &zstr );
288 0 : COLLATOR_CONVERT_RETURN_FAILED( obj );
289 : }
290 : }
291 :
292 : /* Object wasn't successfuly converted => bail out. */
293 0 : if( zstr == NULL )
294 : {
295 0 : COLLATOR_CONVERT_RETURN_FAILED( obj );
296 : }
297 :
298 : /* Convert the string to UTF-16. */
299 0 : intl_convert_utf8_to_utf16(
300 : &ustr, &ustr_len,
301 : Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
302 : &status );
303 0 : if( U_FAILURE( status ) )
304 0 : php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
305 :
306 : /* Cleanup zstr to hold utf16 string. */
307 0 : zval_dtor( zstr );
308 :
309 : /* Set string. */
310 0 : ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
311 :
312 : /* Don't free ustr cause it's set in zstr without copy.
313 : * efree( ustr );
314 : */
315 :
316 0 : return zstr;
317 : }
318 : /* }}} */
319 :
320 : /* {{{ collator_convert_string_to_number
321 : *
322 : * Convert string to number.
323 : *
324 : * @param zval* str String to convert.
325 : *
326 : * @return zval* Number. If str is not numeric string return number zero.
327 : */
328 : zval* collator_convert_string_to_number( zval* str )
329 0 : {
330 0 : zval* num = collator_convert_string_to_number_if_possible( str );
331 0 : if( num == str )
332 : {
333 : /* String wasn't converted => return zero. */
334 0 : zval_ptr_dtor( &num );
335 :
336 0 : ALLOC_INIT_ZVAL( num );
337 0 : ZVAL_LONG( num, 0 );
338 : }
339 :
340 0 : return num;
341 : }
342 : /* }}} */
343 :
344 : /* {{{ collator_convert_string_to_double
345 : *
346 : * Convert string to double.
347 : *
348 : * @param zval* str String to convert.
349 : *
350 : * @return zval* Number. If str is not numeric string return number zero.
351 : */
352 : zval* collator_convert_string_to_double( zval* str )
353 0 : {
354 0 : zval* num = collator_convert_string_to_number( str );
355 0 : if( Z_TYPE_P(num) == IS_LONG )
356 : {
357 0 : ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
358 : }
359 :
360 0 : return num;
361 : }
362 : /* }}} */
363 :
364 : /* {{{ collator_convert_string_to_number_if_possible
365 : *
366 : * Convert string to numer.
367 : *
368 : * @param zval* str String to convert.
369 : *
370 : * @return zval* Number if str is numeric string. Otherwise
371 : * original str param.
372 : */
373 : zval* collator_convert_string_to_number_if_possible( zval* str )
374 212 : {
375 212 : zval* num = NULL;
376 212 : int is_numeric = 0;
377 212 : long lval = 0;
378 212 : double dval = 0;
379 :
380 212 : if( Z_TYPE_P( str ) != IS_STRING )
381 : {
382 0 : COLLATOR_CONVERT_RETURN_FAILED( str );
383 : }
384 :
385 212 : if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
386 : {
387 46 : ALLOC_INIT_ZVAL( num );
388 :
389 46 : if( is_numeric == IS_LONG )
390 46 : Z_LVAL_P(num) = lval;
391 46 : if( is_numeric == IS_DOUBLE )
392 0 : Z_DVAL_P(num) = dval;
393 :
394 46 : Z_TYPE_P(num) = is_numeric;
395 : }
396 : else
397 : {
398 166 : COLLATOR_CONVERT_RETURN_FAILED( str );
399 : }
400 :
401 46 : return num;
402 : }
403 : /* }}} */
404 :
405 : /* {{{ collator_make_printable_zval
406 : *
407 : * Returns string from input zval.
408 : *
409 : * @param zval* arg zval to get string from
410 : *
411 : * @return zval* UTF16 string.
412 : */
413 : zval* collator_make_printable_zval( zval* arg )
414 80 : {
415 : zval arg_copy;
416 80 : int use_copy = 0;
417 80 : zval* str = NULL;
418 :
419 80 : if( Z_TYPE_P(arg) != IS_STRING )
420 : {
421 8 : zend_make_printable_zval(arg, &arg_copy, &use_copy);
422 :
423 8 : if( use_copy )
424 : {
425 8 : str = collator_convert_zstr_utf8_to_utf16( &arg_copy );
426 8 : zval_dtor( &arg_copy );
427 : }
428 : else
429 : {
430 0 : str = collator_convert_zstr_utf8_to_utf16( arg );
431 : }
432 : }
433 : else
434 : {
435 72 : COLLATOR_CONVERT_RETURN_FAILED( arg );
436 : }
437 :
438 8 : return str;
439 : }
440 : /* }}} */
441 :
442 : /* {{{ collator_normalize_sort_argument
443 : *
444 : * Normalize argument to use in sort's compare function.
445 : *
446 : * @param zval* arg Sort's argument to normalize.
447 : *
448 : * @return zval* Normalized copy of arg or unmodified arg
449 : * if normalization is not needed.
450 : */
451 : zval* collator_normalize_sort_argument( zval* arg )
452 44 : {
453 44 : zval* n_arg = NULL;
454 :
455 44 : if( Z_TYPE_P( arg ) != IS_STRING )
456 : {
457 : /* If its not a string then nothing to do.
458 : * Return original arg.
459 : */
460 26 : COLLATOR_CONVERT_RETURN_FAILED( arg );
461 : }
462 :
463 : /* Try convert to number. */
464 18 : n_arg = collator_convert_string_to_number_if_possible( arg );
465 :
466 18 : if( n_arg == arg )
467 : {
468 : /* Conversion to number failed. */
469 8 : zval_ptr_dtor( &n_arg );
470 :
471 : /* Convert string to utf8. */
472 8 : n_arg = collator_convert_zstr_utf16_to_utf8( arg );
473 : }
474 :
475 18 : return n_arg;
476 : }
477 : /* }}} */
478 : /*
479 : * Local variables:
480 : * tab-width: 4
481 : * c-basic-offset: 4
482 : * End:
483 : * vim600: noet sw=4 ts=4 fdm=marker
484 : * vim<600: noet sw=4 ts=4
485 : */
|