PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - var/php_gcov/PHP_HEAD/Zend - zend_unicode.c
Test: PHP Code Coverage
Date: 2009-11-23 Instrumented lines: 398
Code covered: 72.4 % Executed lines: 288
Legend: not executed executed

       1                 : /*
       2                 :    +----------------------------------------------------------------------+
       3                 :    | Zend Engine                                                          |
       4                 :    +----------------------------------------------------------------------+
       5                 :    | Copyright (c) 1998-2009 Zend Technologies Ltd. (http://www.zend.com) |
       6                 :    +----------------------------------------------------------------------+
       7                 :    | This source file is subject to version 2.00 of the Zend license,     |
       8                 :    | that is bundled with this package in the file LICENSE, and is        |
       9                 :    | available through the world-wide-web at                              |
      10                 :    | http://www.zend.com/license/2_00.txt.                                |
      11                 :    | If you did not receive a copy of the Zend license and are unable to  |
      12                 :    | obtain it through the world-wide-web, please send a note to          |
      13                 :    | license@zend.com so we can mail you a copy immediately.              |
      14                 :    +----------------------------------------------------------------------+
      15                 :    | Authors: Andrei Zmievski <andrei@php.net>                            |
      16                 :    +----------------------------------------------------------------------+
      17                 : */
      18                 : 
      19                 : #include "zend.h"
      20                 : #include "zend_globals.h"
      21                 : #include "zend_operators.h"
      22                 : #include "zend_exceptions.h"
      23                 : #include "zend_API.h"
      24                 : #include "zend_unicode.h"
      25                 : #include <unicode/unorm.h>
      26                 : 
      27                 : #ifdef ZTS
      28                 : ZEND_API ts_rsrc_id unicode_globals_id;
      29                 : #else
      30                 : ZEND_API zend_unicode_globals unicode_globals;
      31                 : #endif
      32                 : 
      33                 : ZEND_API zend_class_entry *unicodeConversionException;
      34                 : 
      35                 : /* {{{ zend_set_converter_error_mode */
      36                 : void zend_set_converter_error_mode(UConverter *conv, zend_conv_direction direction, uint16_t error_mode)
      37          189662 : {
      38          189662 :         UErrorCode status = U_ZERO_ERROR;
      39                 : 
      40          189662 :         switch (error_mode & 0xff) {
      41                 :                 case ZEND_CONV_ERROR_STOP:
      42          114516 :                         if (direction == ZEND_FROM_UNICODE)
      43           17007 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
      44                 :                         else
      45           97509 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
      46          114516 :                         break;
      47                 : 
      48                 :                 case ZEND_CONV_ERROR_SKIP:
      49               1 :                         if (direction == ZEND_FROM_UNICODE)
      50               0 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);
      51                 :                         else
      52               1 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);
      53               1 :                         break;
      54                 : 
      55                 :                 case ZEND_CONV_ERROR_SUBST:
      56           73703 :                         if (direction == ZEND_FROM_UNICODE)
      57           73703 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
      58                 :                         else
      59               0 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
      60           73703 :                         break;
      61                 : 
      62                 :                 case ZEND_CONV_ERROR_ESCAPE_UNICODE:
      63            1396 :                         if (direction == ZEND_FROM_UNICODE)
      64               0 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status);
      65                 :                         else
      66            1396 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status);
      67            1396 :                         break;
      68                 : 
      69                 :                 case ZEND_CONV_ERROR_ESCAPE_ICU:
      70               0 :                         if (direction == ZEND_FROM_UNICODE)
      71               0 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, NULL, NULL, &status);
      72                 :                         else
      73               0 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, NULL, NULL, &status);
      74               0 :                         break;
      75                 : 
      76                 :                 case ZEND_CONV_ERROR_ESCAPE_JAVA:
      77              46 :                         if (direction == ZEND_FROM_UNICODE)
      78              46 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, NULL, NULL, &status);
      79                 :                         else
      80                 :                                 /*
      81                 :                                  * use C escape, even though JAVA is requested, so that we don't
      82                 :                                  * have to expose another constant
      83                 :                                  */
      84               0 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &status);
      85              46 :                         break;
      86                 : 
      87                 :                 case ZEND_CONV_ERROR_ESCAPE_XML_DEC:
      88               0 :                         if (direction == ZEND_FROM_UNICODE)
      89               0 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, NULL, NULL, &status);
      90                 :                         else
      91               0 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, NULL, NULL, &status);
      92               0 :                         break;
      93                 : 
      94                 :                 case ZEND_CONV_ERROR_ESCAPE_XML_HEX:
      95               0 :                         if (direction == ZEND_FROM_UNICODE)
      96               0 :                                 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, NULL, NULL, &status);
      97                 :                         else
      98               0 :                                 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, NULL, NULL, &status);
      99               0 :                         break;
     100                 : 
     101                 :                 default:
     102               0 :                         assert(0);
     103                 :                         break;
     104                 :         }
     105          189662 : }
     106                 : /* }}} */
     107                 : 
     108                 : /* {{{ zend_set_converter_subst_char */
     109                 : void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char)
     110           73657 : {
     111                 :         char dest[8], *dest_ptr;
     112           73657 :         int8_t dest_len = 8;
     113           73657 :         UErrorCode status = U_ZERO_ERROR;
     114           73657 :         UErrorCode temp = U_ZERO_ERROR;
     115                 :         const void *old_context;
     116                 :         UConverterFromUCallback old_cb;
     117           73657 :         int32_t subst_char_len = u_strlen(subst_char);
     118                 : 
     119           73657 :         if (!subst_char_len)
     120               0 :                 return;
     121                 : 
     122           73657 :         ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_STOP, NULL, &old_cb, &old_context, &temp);
     123           73657 :         dest_len = ucnv_fromUChars(conv, dest, dest_len, subst_char, subst_char_len, &status);
     124           73657 :         ucnv_setFromUCallBack(conv, old_cb, old_context, NULL, NULL, &temp);
     125           73657 :         if (U_FAILURE(status)) {
     126               0 :                 zend_error(E_WARNING, "Could not set substitution character for the converter");
     127               0 :                 return;
     128                 :         }
     129                 : 
     130                 :         /* skip BOM for UTF-16/32 converters */
     131           73657 :         switch (ucnv_getType(conv)) {
     132                 :                 case UCNV_UTF16:
     133               7 :                         dest_ptr = dest + 2;
     134               7 :                         dest_len -= 2;
     135               7 :                         break;
     136                 : 
     137                 :                 case UCNV_UTF32:
     138               0 :                         dest_ptr = dest + 4;
     139               0 :                         dest_len -= 4;
     140               0 :                         break;
     141                 : 
     142                 :                 default:
     143           73650 :                         dest_ptr = dest;
     144                 :                         break;
     145                 :         }
     146                 : 
     147           73657 :         ucnv_setSubstChars(conv, dest_ptr, dest_len, &status);
     148           73657 :         if (status == U_ILLEGAL_ARGUMENT_ERROR) {
     149               0 :                 zend_error(E_WARNING, "Substitution character byte sequence is too short or long for this converter");
     150               0 :                 return;
     151                 :         }
     152                 : }
     153                 : /* }}} */
     154                 : 
     155                 : /* {{{ zend_set_converter_encoding */
     156                 : int zend_set_converter_encoding(UConverter **converter, const char *encoding)
     157          161106 : {
     158          161106 :         UErrorCode status = U_ZERO_ERROR;
     159          161106 :         UConverter *new_converter = NULL;
     160                 : 
     161          161106 :         if (!converter) {
     162               0 :                 return FAILURE;
     163                 :         }
     164                 : 
     165                 :         /*
     166                 :          * The specified encoding might be the same as converter's existing one,
     167                 :          * which results in a no-op.
     168                 :          */
     169          161106 :         if (*converter && encoding && encoding[0]) {
     170              60 :                 const char *current = ucnv_getName(*converter, &status);
     171              60 :                 status = U_ZERO_ERROR; /* reset error */
     172              60 :                 if (!ucnv_compareNames(current, encoding)) {
     173               2 :                         return SUCCESS;
     174                 :                 }
     175                 :         }
     176                 : 
     177                 :         /*
     178                 :          * If encoding is NULL, ucnv_open() will return a converter based on
     179                 :          * the default platform encoding as determined by ucnv_getDefaultName().
     180                 :          */
     181          161104 :         new_converter = ucnv_open(encoding, &status);
     182          161104 :         if (U_FAILURE(status)) {
     183               7 :                 return FAILURE;
     184                 :         }
     185                 : 
     186          161097 :         if (*converter) {
     187              58 :                 ucnv_close(*converter);
     188                 :         }
     189          161097 :         *converter = new_converter;
     190                 : 
     191          161097 :         return SUCCESS;
     192                 : }
     193                 : /* }}} */
     194                 : 
     195                 : /* {{{ zend_copy_converter */
     196                 : int zend_copy_converter(UConverter **target, UConverter *source)
     197               0 : {
     198               0 :         UErrorCode status = U_ZERO_ERROR;
     199                 :         const char *encoding;
     200                 : 
     201               0 :         assert(source != NULL);
     202                 : 
     203               0 :         encoding = ucnv_getName(source, &status);
     204               0 :         if (U_FAILURE(status)) {
     205               0 :                 return FAILURE;
     206                 :         }
     207                 : 
     208               0 :         return zend_set_converter_encoding(target, encoding);
     209                 : }
     210                 : /* }}} */
     211                 : 
     212                 : /* {{{ zend_string_to_unicode_ex */
     213                 : ZEND_API int zend_string_to_unicode_ex(UConverter *conv, UChar **target, int *target_len, const char *source, int source_len, UErrorCode *status)
     214         3792131 : {
     215         3792131 :         UChar *buffer = NULL;
     216                 :         UChar *output;
     217         3792131 :         int32_t buffer_len = 0;
     218         3792131 :         int32_t converted = 0;
     219         3792131 :         const char *input = source;
     220                 :         UConverterType conv_type;
     221                 : 
     222         3792131 :         if (U_FAILURE(*status)) {
     223               0 :                 return 0;
     224                 :         }
     225                 : 
     226         3792131 :         ucnv_resetToUnicode(conv);
     227         3792131 :         conv_type = ucnv_getType(conv);
     228                 : 
     229         3792131 :         switch (conv_type) {
     230                 :                 case UCNV_SBCS:
     231                 :                 case UCNV_LATIN_1:
     232                 :                 case UCNV_US_ASCII:
     233                 :                         /*
     234                 :                          * For single-byte charsets, 1 input byte = 1 output UChar
     235                 :                          */
     236         1694125 :                         buffer_len = source_len;
     237         1694125 :                         break;
     238                 : 
     239                 :                 default:
     240                 :                         /*
     241                 :                          * Initial estimate: 1.25 UChar's for every 2 source bytes + 2 (past a
     242                 :                          * certain limit (2)). The rationale behind this is that (atleast
     243                 :                          * in the case of GB2312) it is possible that there are single byte
     244                 :                          * characters in the input string. By using an GD2312 text as
     245                 :                          * example it seemed that a value of 1.25 allowed for as little
     246                 :                          * re-allocations as possible without over estimating the buffer
     247                 :                          * too much. In case there is a lot of single-byte characters
     248                 :                          * around a single multi-byte character this estimation is too low,
     249                 :                          * and then the re-allocation routines in the loop below kick in.
     250                 :                          * There we multiply by 1.33 and add 1 so that it's quite efficient
     251                 :                          * for smaller input strings without causing too many iterations of
     252                 :                          * this loop.
     253                 :                          */
     254         2098006 :                         buffer_len = (source_len > 2) ? ((source_len >> 1) + (source_len >> 3) + 2) : source_len;
     255                 :                         break;
     256                 :         }
     257                 : 
     258                 :         while (1) {
     259         5303210 :                 buffer = eurealloc(buffer, buffer_len + 1);
     260         5303210 :                 output = buffer + converted;
     261         5303210 :                 ucnv_toUnicode(conv, &output, buffer + buffer_len, &input, source + source_len, NULL, TRUE, status);
     262         5303210 :                 converted = (int32_t) (output - buffer);
     263         5303210 :                 if (*status == U_BUFFER_OVERFLOW_ERROR) {
     264         1511079 :                         buffer_len = (buffer_len * 1.33) + 1;
     265         1511079 :                         *status = U_ZERO_ERROR;
     266                 :                 } else {
     267         3792131 :                         break;
     268                 :                 }
     269         1511079 :         }
     270                 : 
     271                 :         /*
     272                 :          * We return the buffer in case of failure anyway. The caller may want to
     273                 :          * use partially converted string for something.
     274                 :          */
     275                 : 
     276         3792131 :         buffer[converted] = 0;
     277         3792131 :         *target = buffer;
     278         3792131 :         *target_len = converted;
     279                 : 
     280         3792131 :         return input - source;
     281                 : }
     282                 : /* }}} */
     283                 : 
     284                 : /* {{{ zend_unicode_to_string_ex */
     285                 : ZEND_API int zend_unicode_to_string_ex(UConverter *conv, char **target, int *target_len, const UChar *source, int source_len, UErrorCode *status)
     286        12635333 : {
     287        12635333 :         char *buffer = NULL;
     288                 :         char *output;
     289        12635333 :         int32_t buffer_len = 0;
     290        12635333 :         int32_t converted = 0;
     291        12635333 :         const UChar *input = source;
     292                 : 
     293        12635333 :         if (U_FAILURE(*status)) {
     294               0 :                 return 0;
     295                 :         }
     296                 : 
     297        12635333 :         ucnv_resetFromUnicode(conv);
     298                 : 
     299        12635333 :         buffer_len = ucnv_getMaxCharSize(conv) * source_len;
     300                 : 
     301                 :         while (1) {
     302        12635333 :                 buffer = erealloc(buffer, buffer_len + 1);
     303        12635333 :                 output = buffer + converted;
     304        12635333 :                 ucnv_fromUnicode(conv, &output, buffer + buffer_len, &input, source + source_len, NULL, TRUE, status);
     305        12635333 :                 converted = (int32_t) (output - buffer);
     306        12635333 :                 if (*status == U_BUFFER_OVERFLOW_ERROR) {
     307               0 :                         buffer_len += 64;
     308               0 :                         *status = U_ZERO_ERROR;
     309                 :                 } else {
     310        12635333 :                         break;
     311                 :                 }
     312               0 :         }
     313                 : 
     314                 :         /*
     315                 :          * We return the buffer in case of failure anyway. The caller may want to
     316                 :          * use partially converted string for something.
     317                 :          */
     318                 : 
     319        12635333 :         buffer[converted] = 0; /* NULL-terminate the output string */
     320        12635333 :         *target = buffer;
     321        12635333 :         *target_len = converted;
     322                 : 
     323        12635333 :         return input - source;
     324                 : }
     325                 : /* }}} */
     326                 : 
     327                 : /* {{{ zend_convert_encodings */
     328                 : ZEND_API void zend_convert_encodings(UConverter *target_conv, UConverter *source_conv,
     329                 :                 char **target, int *target_len,
     330                 :                 const char *source, int source_len, UErrorCode *status)
     331          121527 : {
     332          121527 :         char *buffer = NULL;
     333                 :         char *output;
     334          121527 :         const char *input = source;
     335          121527 :         int32_t allocated = 0;
     336          121527 :         int32_t converted = 0;
     337                 :         int8_t null_size;
     338                 :         UChar pivot_buf[1024], *pivot, *pivot2;
     339                 : 
     340          121527 :         if (U_FAILURE(*status)) {
     341               0 :                 return;
     342                 :         }
     343                 : 
     344          121527 :         null_size = ucnv_getMinCharSize(target_conv);
     345          121527 :         allocated = source_len + null_size;
     346                 : 
     347          121527 :         ucnv_resetToUnicode(source_conv);
     348          121527 :         ucnv_resetFromUnicode(target_conv);
     349          121527 :         pivot = pivot2 = pivot_buf;
     350                 : 
     351                 :         while (1) {
     352          121527 :                 buffer = (char *) erealloc(buffer, allocated);
     353          121527 :                 output = buffer + converted;
     354          121527 :                 ucnv_convertEx(target_conv, source_conv, &output, buffer + allocated - null_size,
     355                 :                                 &input, source + source_len, pivot_buf, &pivot, &pivot2, pivot_buf + 1024, FALSE, TRUE, status);
     356          121527 :                 converted = (int32_t) (output - buffer);
     357          121527 :                 if (*status == U_BUFFER_OVERFLOW_ERROR) {
     358               0 :                         allocated += 1024;
     359               0 :                         *status = U_ZERO_ERROR;
     360                 :                 } else {
     361          121527 :                         break;
     362                 :                 }
     363               0 :         }
     364                 : 
     365          121527 :         memset(buffer + converted, 0, null_size); /* NULL-terminate the output string */
     366          121527 :         *target = buffer;
     367          121527 :         *target_len = converted;
     368                 : }
     369                 : /* }}} */
     370                 : 
     371                 : /* {{{ zend_unicode_to_ascii */
     372                 : ZEND_API char* zend_unicode_to_ascii(const UChar *us, int us_len TSRMLS_DC)
     373          307522 : {
     374                 :         char *cs;
     375                 :         int   cs_len;
     376          307522 :         UErrorCode status = U_ZERO_ERROR;
     377                 : 
     378          307522 :         zend_unicode_to_string_ex(UG(ascii_conv), &cs, &cs_len, us, us_len, &status);
     379          307522 :         if (U_FAILURE(status)) {
     380               7 :                 efree(cs);
     381               7 :                 return NULL;
     382                 :         }
     383          307515 :         return cs;
     384                 : }
     385                 : /* }}} */
     386                 : 
     387                 : /* {{{ zend_default_conversion_error_handler */
     388                 : static void zend_default_conversion_error_handler(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC)
     389              18 : {
     390                 :         const char *conv_name;
     391              18 :         UErrorCode status = U_ZERO_ERROR;
     392                 : 
     393              18 :         if (!message)
     394               0 :                 return;
     395                 : 
     396              18 :         conv_name = ucnv_getName(conv, &status);
     397                 :         /*
     398                 :          * UTODO
     399                 :          * use some other standard than MIME? or fallback onto IANA? or use
     400                 :          * internal converter name? ponder
     401                 :          */
     402              18 :         conv_name = ucnv_getStandardName(conv_name, "MIME", &status);
     403              18 :         status = U_ZERO_ERROR;
     404                 : 
     405              18 :         if (dir == ZEND_FROM_UNICODE) {
     406                 :                 UChar err_char[U16_MAX_LENGTH];
     407               5 :                 int8_t err_char_len = sizeof(err_char);
     408                 :                 UChar32 codepoint;
     409               5 :                 char *message_fmt = "%s (converter %s failed on character {U+%04X} at offset %d)";
     410                 : 
     411               5 :                 memset(&err_char, '\0', U16_MAX_LENGTH);
     412               5 :                 ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status);
     413               5 :                 codepoint = (err_char_len < 2) ? err_char[0] : U16_GET_SUPPLEMENTARY(err_char[0], err_char[1]);
     414                 : 
     415               5 :                 zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", codepoint, error_char_offset-1);
     416                 :         } else {
     417                 :                 char err_char[8]; /* UTF-8 uses up to 8 bytes */
     418                 :                 char buf[40];     /* 4x number of error bytes + 7 separators + 1 for safety */
     419              13 :                 int8_t err_char_len = sizeof(err_char);
     420              13 :                 char *message_fmt = "%s (converter %s failed on bytes (%s) at offset %d)";
     421                 :                 char *p;
     422                 :                 int i;
     423                 : 
     424              13 :                 memset(&err_char, '\0', 8);
     425              13 :                 ucnv_getInvalidChars(conv, err_char, &err_char_len, &status);
     426              13 :                 p = buf;
     427              26 :                 for (i = 0; i < err_char_len; i++) {
     428              13 :                         sprintf(p, "0x%02X%s", (unsigned char)err_char[i], (i+1<err_char_len)?",":"");
     429              13 :                         p += 4 + (i+1<err_char_len?1:0);
     430                 :                 }
     431              13 :                 *p = 0;
     432                 : 
     433              13 :                 zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", buf, error_char_offset-err_char_len);
     434                 :         }
     435                 : }
     436                 : /* }}} */
     437                 : 
     438                 : /* {{{ zend_call_conversion_error_handler */
     439                 : static void zend_call_conversion_error_handler(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC)
     440               0 : {
     441                 :         zval *z_message, *z_dir, *z_encoding, *z_char, *z_offset;
     442                 :         zval ***params;
     443                 :         zval *retval;
     444                 :         zval *orig_user_error_handler;
     445                 :         const char *conv_name;
     446               0 :         UErrorCode status = U_ZERO_ERROR;
     447                 : 
     448               0 :         ALLOC_INIT_ZVAL(z_message);
     449               0 :         ALLOC_INIT_ZVAL(z_dir);
     450               0 :         ALLOC_INIT_ZVAL(z_encoding);
     451               0 :         ALLOC_INIT_ZVAL(z_char);
     452               0 :         ALLOC_INIT_ZVAL(z_offset);
     453                 : 
     454               0 :         if (message) {
     455               0 :                 ZVAL_STRING(z_message, message, 1);
     456                 :         } else {
     457               0 :                 ZVAL_NULL(z_message);
     458                 :         }
     459                 : 
     460               0 :         ZVAL_LONG(z_dir, dir);
     461                 : 
     462               0 :         conv_name = ucnv_getName(conv, &status);
     463                 :         /*
     464                 :          * UTODO
     465                 :          * use some other standard than MIME? or fallback onto IANA? or use
     466                 :          * internal converter name? ponder
     467                 :          * maybe pass Converter object, when it's implemented?
     468                 :          */
     469               0 :         conv_name = ucnv_getStandardName(conv_name, "MIME", &status);
     470               0 :         ZVAL_STRING(z_encoding, (char *) conv_name, 1);
     471                 : 
     472               0 :         if (dir == ZEND_FROM_UNICODE) {
     473                 :                 UChar err_char[U16_MAX_LENGTH];
     474               0 :                 int8_t err_char_len = sizeof(err_char);
     475                 : 
     476               0 :                 memset(&err_char, '\0', U16_MAX_LENGTH);
     477               0 :                 ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status);
     478               0 :                 ZVAL_UNICODEL(z_char, err_char, err_char_len, 1);
     479               0 :                 ZVAL_LONG(z_offset, error_char_offset-1);
     480                 :         } else {
     481                 :                 char err_char[8]; /* UTF-8 uses up to 8 bytes */
     482               0 :                 int8_t err_char_len = sizeof(err_char);
     483                 : 
     484               0 :                 memset(&err_char, '\0', 8);
     485               0 :                 ucnv_getInvalidChars(conv, err_char, &err_char_len, &status);
     486               0 :                 ZVAL_STRINGL(z_char, err_char, err_char_len, 1);
     487               0 :                 ZVAL_LONG(z_offset, error_char_offset-err_char_len);
     488                 :         }
     489                 : 
     490               0 :         params = (zval ***) emalloc(sizeof(zval **) * 6);
     491               0 :         params[0] = &z_dir;
     492               0 :         params[1] = &z_encoding;
     493               0 :         params[2] = &z_char;
     494               0 :         params[3] = &z_offset;
     495               0 :         params[4] = &z_message;
     496                 : 
     497               0 :         orig_user_error_handler = UG(conv_error_handler);
     498               0 :         UG(conv_error_handler) = NULL;
     499                 : 
     500               0 :         if (call_user_function_ex(EG(function_table), NULL, orig_user_error_handler, &retval, 5, params, 1, NULL TSRMLS_CC)==SUCCESS) {
     501               0 :                 if (retval) {
     502                 :                         /* user error handler returned 'false', use built-in error handler */
     503               0 :                         if (Z_TYPE_P(retval) == IS_BOOL && Z_LVAL_P(retval) == 0) {
     504               0 :                                 zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC);
     505                 :                         }
     506               0 :                         zval_ptr_dtor(&retval);
     507                 :                 }
     508               0 :         } else if (!EG(exception)) {
     509                 :                 /* The user error handler failed, use built-in error handler */
     510               0 :                 zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC);
     511                 :         }
     512                 : 
     513               0 :         if (!UG(conv_error_handler)) {
     514               0 :                 UG(conv_error_handler) = orig_user_error_handler;
     515                 :         } else {
     516               0 :                 zval_ptr_dtor(&orig_user_error_handler);
     517                 :         }
     518                 : 
     519               0 :         efree(params);
     520               0 :         zval_ptr_dtor(&z_dir);
     521               0 :         zval_ptr_dtor(&z_encoding);
     522               0 :         zval_ptr_dtor(&z_char);
     523               0 :         zval_ptr_dtor(&z_offset);
     524               0 :         zval_ptr_dtor(&z_message);
     525               0 : }
     526                 : /* }}} */
     527                 : 
     528                 : /* {{{ zend_raise_conversion_error_ex */
     529                 : ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC)
     530              18 : {
     531              18 :         if (UG(conv_error_handler)) {
     532               0 :                 zend_call_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC);
     533                 :         } else {
     534              18 :                 zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC);
     535                 :         }
     536              18 : }
     537                 : /* }}} */
     538                 : 
     539                 : /* {{{ zend_unicode_to_string */
     540                 : ZEND_API int zend_unicode_to_string(UConverter *conv, char **s, int *s_len, const UChar *u, int u_len TSRMLS_DC)
     541         3588269 : {
     542         3588269 :         UErrorCode status = U_ZERO_ERROR;
     543                 :         int num_conv;
     544                 : 
     545         3588269 :         if (conv == NULL) {
     546               0 :                 conv = UG(runtime_encoding_conv);
     547                 :         }
     548                 : 
     549         3588269 :         num_conv = zend_unicode_to_string_ex(conv, s, s_len, u, u_len, &status);
     550                 : 
     551         3588269 :         if (U_FAILURE(status)) {
     552               5 :                 int32_t offset = u_countChar32(u, num_conv);
     553                 : 
     554               5 :                 zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC);
     555               5 :                 if (*s) {
     556               5 :                         efree(*s);
     557                 :                 }
     558               5 :                 *s = NULL;
     559               5 :                 *s_len = 0;
     560               5 :                 return FAILURE;
     561                 :         }
     562         3588264 :         return SUCCESS;
     563                 : }
     564                 : /* }}} */
     565                 : 
     566                 : /* {{{ zval_unicode_to_string_ex */
     567                 : ZEND_API int zval_unicode_to_string_ex(zval *string, UConverter *conv TSRMLS_DC)
     568         3573825 : {
     569         3573825 :         char *s = NULL;
     570                 :         int s_len;
     571                 : 
     572         3573825 :         UChar *u = Z_USTRVAL_P(string);
     573         3573825 :         int u_len = Z_USTRLEN_P(string);
     574                 : 
     575         3573825 :         if (zend_unicode_to_string(conv, &s, &s_len, u, u_len TSRMLS_CC) == SUCCESS) {
     576         3573820 :                 ZVAL_STRINGL(string, s, s_len, 0);
     577         3573820 :                 efree((UChar*)u);
     578         3573820 :                 return SUCCESS;
     579                 :         } else {
     580               5 :                 ZVAL_EMPTY_STRING(string);
     581               5 :                 efree((UChar*)u);
     582               5 :                 return FAILURE;
     583                 :         }
     584                 : }
     585                 : /* }}} */
     586                 : 
     587                 : /* {{{ zval_unicode_to_string */
     588                 : ZEND_API int zval_unicode_to_string(zval *string TSRMLS_DC)
     589              92 : {
     590              92 :         return zval_unicode_to_string_ex(string, ZEND_U_CONVERTER(UG(runtime_encoding_conv)) TSRMLS_CC);
     591                 : }
     592                 : /* }}} */
     593                 : 
     594                 : /* {{{ zend_string_to_unicode */
     595                 : ZEND_API int zend_string_to_unicode(UConverter *conv, UChar **u, int *u_len, char *s, int s_len TSRMLS_DC)
     596         1013172 : {
     597         1013172 :         UErrorCode status = U_ZERO_ERROR;
     598                 :         int num_conv;
     599                 : 
     600         1013172 :         if (conv == NULL) {
     601            4176 :                 conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv));
     602                 :         }
     603                 : 
     604         1013172 :         num_conv = zend_string_to_unicode_ex(conv, u, u_len, s, s_len, &status);
     605                 : 
     606         1013172 :         if (U_FAILURE(status)) {
     607              13 :                 zend_raise_conversion_error_ex("Could not convert binary string to Unicode string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC);
     608              13 :                 if (*u) {
     609              13 :                         efree(*u);
     610                 :                 }
     611              13 :                 *u = NULL;
     612              13 :                 *u_len = 0;
     613              13 :                 return FAILURE;
     614                 :         }
     615         1013159 :         return SUCCESS;
     616                 : }
     617                 : /* }}} */
     618                 : 
     619                 : /* {{{ zval_string_to_unicode_ex */
     620                 : ZEND_API int zval_string_to_unicode_ex(zval *string, UConverter *conv TSRMLS_DC)
     621          304917 : {
     622          304917 :         UChar *u = NULL;
     623                 :         int u_len;
     624                 : 
     625          304917 :         char *s = Z_STRVAL_P(string);
     626          304917 :         int s_len = Z_STRLEN_P(string);
     627                 : 
     628          304917 :         if (zend_string_to_unicode(conv, &u, &u_len, s, s_len TSRMLS_CC) == SUCCESS) {
     629          304914 :                 ZVAL_UNICODEL(string, u, u_len, 0);
     630          304914 :                 efree(s);
     631          304914 :                 return SUCCESS;
     632                 :         } else {
     633               3 :                 ZVAL_EMPTY_UNICODE(string);
     634               3 :                 efree(s);
     635               3 :                 return FAILURE;
     636                 :         }
     637                 : }
     638                 : /* }}} */
     639                 : 
     640                 : /* {{{ zval_string_to_unicode */
     641                 : ZEND_API int zval_string_to_unicode(zval *string TSRMLS_DC)
     642               6 : {
     643               6 :         return zval_string_to_unicode_ex(string, ZEND_U_CONVERTER(UG(runtime_encoding_conv)) TSRMLS_CC);
     644                 : }
     645                 : /* }}} */
     646                 : 
     647                 : /* {{{ zend_cmp_unicode_and_string */
     648                 : ZEND_API int zend_cmp_unicode_and_string(UChar *ustr, char* str, uint len)
     649            4176 : {
     650            4176 :         UChar *u = NULL;
     651                 :         int u_len;
     652            4176 :         int retval = TRUE;
     653                 :         TSRMLS_FETCH();
     654                 : 
     655            4176 :         if (zend_string_to_unicode(NULL, &u, &u_len, str, len TSRMLS_CC) == FAILURE) {
     656               0 :                 return FAILURE;
     657                 :         }
     658            4176 :         retval = u_memcmp(ustr, u, u_len);
     659            4176 :         efree(u);
     660            4176 :         return retval;
     661                 : }
     662                 : /* }}} */
     663                 : 
     664                 : /* {{{ zend_cmp_unicode_and_literal */
     665                 : /*
     666                 :  * Compare a Unicode string and an ASCII literal. Because ASCII maps nicely onto Unicode
     667                 :  * range U+0000 .. U+007F, we can simply cast ASCII chars to Unicode values and avoid
     668                 :  * memory allocation.
     669                 :  */
     670                 : ZEND_API int zend_cmp_unicode_and_literal(UChar *ustr, int ulen, char *str, int slen)
     671         1159203 : {
     672                 :         int result;
     673         1159203 :         uint len = MIN(ulen, slen);
     674                 : 
     675                 :         /* UTODO: make sure we're only comparing against ASCII values here (< 0x80) */
     676         7336559 :         while (len--) {
     677         5732516 :                 result = (int)(uint16_t)*ustr - (int)(uint16_t)*str;
     678         5732516 :                 if (result != 0)
     679          714363 :                         return result;
     680         5018153 :                 ustr++;
     681         5018153 :                 str++;
     682                 :         }
     683                 : 
     684          444840 :         return ulen - slen;
     685                 : }
     686                 : /* }}} */
     687                 : 
     688                 : /* {{{ zend_is_valid_identifier */
     689                 : ZEND_API int zend_is_valid_identifier(UChar *ident, int len)
     690          938434 : {
     691                 :         UChar32 codepoint;
     692                 :         int32_t i;
     693          938434 :         int32_t ident_len = len;
     694          938434 :         UProperty id_prop = UCHAR_XID_START;
     695                 : 
     696        18894636 :         for (i = 0; i < ident_len; ) {
     697        17017789 :                 U16_NEXT(ident, i, ident_len, codepoint);
     698        17017789 :                 if (!u_hasBinaryProperty(codepoint, id_prop) &&
     699                 :                                 codepoint != 0x5f) { /* special case for starting '_' */
     700              21 :                         return 0;
     701                 :                 }
     702        17017768 :                 id_prop = UCHAR_XID_CONTINUE;
     703                 :         }
     704                 : 
     705          938413 :         return 1;
     706                 : }
     707                 : /* }}} */
     708                 : 
     709                 : /* {{{ zend_normalize_string */
     710                 : static inline void zend_normalize_string(UChar **dest, int32_t *dest_len, UChar *src, int src_len, UErrorCode *status)
     711               5 : {
     712               5 :         UChar *buffer = NULL;
     713                 :         int32_t buffer_len;
     714                 : 
     715               5 :         buffer_len = src_len;
     716                 :         while (1) {
     717               5 :                 *status = U_ZERO_ERROR;
     718               5 :                 buffer = eurealloc(buffer, buffer_len+1);
     719               5 :                 buffer_len = unorm_normalize(src, src_len, UNORM_NFKC, 0, buffer, buffer_len, status);
     720               5 :                 if (*status != U_BUFFER_OVERFLOW_ERROR) {
     721               5 :                         break;
     722                 :                 }
     723               0 :         }
     724               5 :         if (U_SUCCESS(*status)) {
     725               5 :                 buffer[buffer_len] = 0;
     726               5 :                 *dest = buffer;
     727               5 :                 *dest_len = buffer_len;
     728                 :         } else {
     729               0 :                 efree(buffer);
     730                 :         }
     731               5 : }
     732                 : /* }}} */
     733                 : 
     734                 : /* {{{ zend_case_fold_string */
     735                 : ZEND_API void zend_case_fold_string(UChar **dest, int *dest_len, UChar *src, int src_len, uint32_t options, UErrorCode *status)
     736        29748695 : {
     737        29748695 :         UChar *buffer = NULL;
     738                 :         int32_t buffer_len;
     739                 : 
     740        29748695 :         buffer_len = src_len;
     741                 :         while (1) {
     742        29748695 :                 *status = U_ZERO_ERROR;
     743        29748695 :                 buffer = eurealloc(buffer, buffer_len+1);
     744        29748695 :                 buffer_len = u_strFoldCase(buffer, buffer_len, src, src_len, options, status);
     745        29748695 :                 if (*status != U_BUFFER_OVERFLOW_ERROR) {
     746        29748695 :                         break;
     747                 :                 }
     748               0 :         }
     749        29748695 :         if (U_SUCCESS(*status)) {
     750        29748695 :                 buffer[buffer_len] = 0;
     751        29748695 :                 *dest = buffer;
     752        29748695 :                 *dest_len = buffer_len;
     753                 :         } else {
     754               0 :                 efree(buffer);
     755                 :         }
     756        29748695 : }
     757                 : /* }}} */
     758                 : 
     759                 : /* {{{ zend_normalize_identifier */
     760                 : ZEND_API int zend_normalize_identifier(UChar **dest, int *dest_len, UChar *ident, int ident_len, zend_bool fold_case)
     761         2425061 : {
     762         2425061 :         UChar *buffer = NULL;
     763         2425061 :         UChar *orig_ident = ident;
     764                 :         int32_t buffer_len;
     765         2425061 :         UErrorCode status = U_ZERO_ERROR;
     766                 : 
     767         2425061 :         if (unorm_quickCheck(ident, ident_len, UNORM_NFKC, &status) != UNORM_YES) {
     768               5 :                 zend_normalize_string(&buffer, &buffer_len, ident, ident_len, &status);
     769               5 :                 if (U_FAILURE(status)) {
     770               0 :                         return FAILURE;
     771                 :                 }
     772               5 :                 ident = buffer;
     773               5 :                 ident_len = buffer_len;
     774                 :         }
     775                 : 
     776         2425061 :         if (fold_case) {
     777         1470375 :                 zend_case_fold_string(&buffer, &buffer_len, ident, ident_len, U_FOLD_CASE_DEFAULT, &status);
     778         1470375 :                 if (ident != orig_ident) {
     779               0 :                         efree(ident);
     780                 :                 }
     781         1470375 :                 if (U_FAILURE(status)) {
     782               0 :                         return FAILURE;
     783                 :                 }
     784         1470375 :                 ident = buffer;
     785         1470375 :                 ident_len = buffer_len;
     786                 : 
     787         1470375 :                 if (unorm_quickCheck(ident, ident_len, UNORM_NFKC, &status) != UNORM_YES) {
     788               0 :                         zend_normalize_string(&buffer, &buffer_len, ident, ident_len, &status);
     789               0 :                         if (ident != orig_ident) {
     790               0 :                                 efree(ident);
     791                 :                         }
     792               0 :                         if (U_FAILURE(status)) {
     793               0 :                                 return FAILURE;
     794                 :                         }
     795               0 :                         ident = buffer;
     796               0 :                         ident_len = buffer_len;
     797                 :                 }
     798                 :         }
     799                 : 
     800         2425061 :         *dest = ident;
     801         2425061 :         *dest_len = ident_len;
     802         2425061 :         return SUCCESS;
     803                 : }
     804                 : /* }}} */
     805                 : 
     806                 : /* {{{ zend_register_unicode_exceptions */
     807                 : void zend_register_unicode_exceptions(TSRMLS_D)
     808           17007 : {
     809                 :         zend_class_entry ce;
     810                 : 
     811           17007 :         INIT_CLASS_ENTRY(ce, "UnicodeConversionException", NULL);
     812           17007 :         unicodeConversionException = zend_register_internal_class_ex(&ce, zend_exception_get_default(TSRMLS_C), NULL TSRMLS_CC);
     813           17007 : }
     814                 : /* }}} */
     815                 : 
     816                 : zend_collator* zend_collator_create(UCollator *coll) /* {{{ */
     817           17017 : {
     818           17017 :         zend_collator *zcoll = NULL;
     819                 : 
     820           17017 :         zcoll = emalloc(sizeof(zend_collator));
     821           17017 :         zcoll->coll = coll;
     822           17017 :         zcoll->refcount = 1;
     823                 : 
     824           17017 :         return zcoll;
     825                 : }
     826                 : /* }}} */
     827                 : 
     828                 : void zend_collator_destroy(zend_collator *zcoll) /* {{{ */
     829           17049 : {
     830           17049 :         zcoll->refcount--;
     831           17049 :         if (zcoll->refcount == 0) {
     832           17049 :                 ucol_close(zcoll->coll);
     833           17049 :                 efree(zcoll);
     834                 :         }
     835           17049 : }
     836                 : /* }}} */
     837                 : 
     838                 : /*
     839                 :  * Local variables:
     840                 :  * tab-width: 4
     841                 :  * c-basic-offset: 4
     842                 :  * indent-tabs-mode: t
     843                 :  * End:
     844                 :  * vim: noet sw=4 ts=4 fdm=marker
     845                 :  */

Generated by: LTP GCOV extension version 1.5

Generated at Mon, 23 Nov 2009 17:39:26 +0000 (33 hours ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.