PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - mbstring - php_unicode.c
Test: PHP Code Coverage
Date: 2009-11-21 Instrumented lines: 109
Code covered: 83.5 % Executed lines: 91
Legend: not executed executed

       1                 : /*
       2                 :    +----------------------------------------------------------------------+
       3                 :    | PHP Version 5                                                        |
       4                 :    +----------------------------------------------------------------------+
       5                 :    | Copyright (c) 1997-2009 The PHP Group                                |
       6                 :    +----------------------------------------------------------------------+
       7                 :    | This source file is subject to version 3.01 of the PHP license,      |
       8                 :    | that is bundled with this package in the file LICENSE, and is        |
       9                 :    | available through the world-wide-web at the following url:           |
      10                 :    | http://www.php.net/license/3_01.txt                                  |
      11                 :    | If you did not receive a copy of the PHP license and are unable to   |
      12                 :    | obtain it through the world-wide-web, please send a note to          |
      13                 :    | license@php.net so we can mail you a copy immediately.               |
      14                 :    +----------------------------------------------------------------------+
      15                 :    | Author: Wez Furlong (wez@thebrainroom.com)                           |
      16                 :    +----------------------------------------------------------------------+
      17                 : 
      18                 :         Based on code from ucdata-2.5, which has the following Copyright:
      19                 :    
      20                 :         Copyright 2001 Computing Research Labs, New Mexico State University
      21                 :  
      22                 :         Permission is hereby granted, free of charge, to any person obtaining a
      23                 :         copy of this software and associated documentation files (the "Software"),
      24                 :         to deal in the Software without restriction, including without limitation
      25                 :         the rights to use, copy, modify, merge, publish, distribute, sublicense,
      26                 :         and/or sell copies of the Software, and to permit persons to whom the
      27                 :         Software is furnished to do so, subject to the following conditions:
      28                 :  
      29                 :         The above copyright notice and this permission notice shall be included in
      30                 :         all copies or substantial portions of the Software.
      31                 : */
      32                 : 
      33                 : #ifdef HAVE_CONFIG_H
      34                 : #include "config.h"
      35                 : #endif
      36                 : 
      37                 : #include "php.h"
      38                 : #include "php_ini.h"
      39                 : 
      40                 : #if HAVE_MBSTRING
      41                 : 
      42                 : /* include case folding data generated from the official UnicodeData.txt file */
      43                 : #include "mbstring.h"
      44                 : #include "php_unicode.h"
      45                 : #include "unicode_data.h"
      46                 : 
      47                 : ZEND_EXTERN_MODULE_GLOBALS(mbstring)
      48                 : 
      49                 : /*
      50                 :  * A simple array of 32-bit masks for lookup.
      51                 :  */
      52                 : static unsigned long masks32[32] = {
      53                 :     0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
      54                 :     0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
      55                 :     0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
      56                 :     0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
      57                 :     0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
      58                 :     0x40000000, 0x80000000
      59                 : };
      60                 : 
      61                 : 
      62                 : static int prop_lookup(unsigned long code, unsigned long n)
      63           19133 : {
      64                 :         long l, r, m;
      65                 : 
      66                 :         /*
      67                 :          * There is an extra node on the end of the offsets to allow this routine
      68                 :          * to work right.  If the index is 0xffff, then there are no nodes for the
      69                 :          * property.
      70                 :          */
      71           19133 :         if ((l = _ucprop_offsets[n]) == 0xffff)
      72               0 :                 return 0;
      73                 : 
      74                 :         /*
      75                 :          * Locate the next offset that is not 0xffff.  The sentinel at the end of
      76                 :          * the array is the max index value.
      77                 :          */
      78           19133 :         for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++)
      79                 :                 ;
      80                 : 
      81           19133 :         r = _ucprop_offsets[n + m] - 1;
      82                 : 
      83          194705 :         while (l <= r) {
      84                 :                 /*
      85                 :                  * Determine a "mid" point and adjust to make sure the mid point is at
      86                 :                  * the beginning of a range pair.
      87                 :                  */
      88          163779 :                 m = (l + r) >> 1;
      89          163779 :                 m -= (m & 1);
      90          163779 :                 if (code > _ucprop_ranges[m + 1])
      91           41168 :                         l = m + 2;
      92          122611 :                 else if (code < _ucprop_ranges[m])
      93          115271 :                         r = m - 2;
      94            7340 :                 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
      95            7340 :                         return 1;
      96                 :         }
      97           11793 :         return 0;
      98                 : 
      99                 : }
     100                 : 
     101                 : MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1,
     102                 :                 unsigned long mask2)
     103           19109 : {
     104                 :         unsigned long i;
     105                 : 
     106           19109 :         if (mask1 == 0 && mask2 == 0)
     107               0 :                 return 0;
     108                 : 
     109          503429 :         for (i = 0; mask1 && i < 32; i++) {
     110          491660 :                 if ((mask1 & masks32[i]) && prop_lookup(code, i))
     111            7340 :                         return 1;
     112                 :         }
     113                 : 
     114           11769 :         for (i = 32; mask2 && i < _ucprop_size; i++) {
     115               0 :                 if ((mask2 & masks32[i & 31]) && prop_lookup(code, i))
     116               0 :                         return 1;
     117                 :         }
     118                 : 
     119           11769 :         return 0;
     120                 : }
     121                 : 
     122                 : static unsigned long case_lookup(unsigned long code, long l, long r, int field)
     123            8416 : {
     124                 :         long m;
     125                 : 
     126                 :         /*
     127                 :          * Do the binary search.
     128                 :          */
     129           56592 :         while (l <= r) {
     130                 :                 /*
     131                 :                  * Determine a "mid" point and adjust to make sure the mid point is at
     132                 :                  * the beginning of a case mapping triple.
     133                 :                  */
     134           44825 :                 m = (l + r) >> 1;
     135           44825 :                 m -= (m % 3);
     136           44825 :                 if (code > _uccase_map[m])
     137           11101 :                         l = m + 3;
     138           33724 :                 else if (code < _uccase_map[m])
     139           28659 :                         r = m - 3;
     140            5065 :                 else if (code == _uccase_map[m])
     141            5065 :                         return _uccase_map[m + field];
     142                 :         }
     143                 : 
     144            3351 :         return code;
     145                 : }
     146                 : 
     147                 : MBSTRING_API unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field)
     148               0 : {
     149               0 :         if (code == 0x0069L) {
     150               0 :                 return 0x0130L;
     151                 :         }
     152               0 :         return case_lookup(code, l, r, field);
     153                 : }
     154                 : 
     155                 : MBSTRING_API unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field)
     156               0 : {
     157               0 :         if (code == 0x0049L) {
     158               0 :                 return 0x0131L;
     159                 :         }       
     160               0 :         return case_lookup(code, l, r, field);
     161                 : }
     162                 : 
     163                 : MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC)
     164           10353 : {
     165                 :         int field;
     166                 :         long l, r;
     167                 : 
     168           10353 :         if (php_unicode_is_upper(code))
     169            2211 :                 return code;
     170                 : 
     171            8142 :         if (php_unicode_is_lower(code)) {
     172                 :                 /*
     173                 :                  * The character is lower case.
     174                 :                  */
     175            4887 :                 field = 2;
     176            4887 :                 l = _uccase_len[0];
     177            4887 :                 r = (l + _uccase_len[1]) - 3;
     178                 : 
     179            4887 :                 if (enc == mbfl_no_encoding_8859_9) {
     180               0 :                         return php_turkish_toupper(code, l, r, field);
     181                 :                 }
     182                 : 
     183                 :         } else {
     184                 :                 /*
     185                 :                  * The character is title case.
     186                 :                  */
     187            3255 :                 field = 1;
     188            3255 :                 l = _uccase_len[0] + _uccase_len[1];
     189            3255 :                 r = _uccase_size - 3;
     190                 :         }
     191            8142 :         return case_lookup(code, l, r, field);
     192                 : }
     193                 : 
     194                 : MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC)
     195             335 : {
     196                 :         int field;
     197                 :         long l, r;
     198                 : 
     199             335 :         if (php_unicode_is_lower(code))
     200              62 :                 return code;
     201                 : 
     202             273 :         if (php_unicode_is_upper(code)) {
     203                 :                 /*
     204                 :                  * The character is upper case.
     205                 :                  */
     206             177 :                 field = 1;
     207             177 :                 l = 0;
     208             177 :                 r = _uccase_len[0] - 3;
     209                 : 
     210             177 :                 if (enc == mbfl_no_encoding_8859_9) {
     211               0 :                         return php_turkish_tolower(code, l, r, field);
     212                 :                 }
     213                 : 
     214                 :         } else {
     215                 :                 /*
     216                 :                  * The character is title case.
     217                 :                  */
     218              96 :                 field = 2;
     219              96 :                 l = _uccase_len[0] + _uccase_len[1];
     220              96 :                 r = _uccase_size - 3;
     221                 :         }
     222             273 :         return case_lookup(code, l, r, field);
     223                 : }
     224                 : 
     225                 : MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC)
     226               1 : {
     227                 :         int field;
     228                 :         long l, r;
     229                 : 
     230               1 :         if (php_unicode_is_title(code))
     231               0 :                 return code;
     232                 : 
     233                 :         /*
     234                 :          * The offset will always be the same for converting to title case.
     235                 :          */
     236               1 :         field = 2;
     237                 : 
     238               1 :         if (php_unicode_is_upper(code)) {
     239                 :                 /*
     240                 :                  * The character is upper case.
     241                 :                  */
     242               0 :                 l = 0;
     243               0 :                 r = _uccase_len[0] - 3;
     244                 :         } else {
     245                 :                 /*
     246                 :                  * The character is lower case.
     247                 :                  */
     248               1 :                 l = _uccase_len[0];
     249               1 :                 r = (l + _uccase_len[1]) - 3;
     250                 :         }
     251               1 :         return case_lookup(code, l, r, field);
     252                 : 
     253                 : }
     254                 : 
     255                 : 
     256                 : #define BE_ARY_TO_UINT32(ptr) (\
     257                 :         ((unsigned char*)(ptr))[0]<<24 |\
     258                 :         ((unsigned char*)(ptr))[1]<<16 |\
     259                 :         ((unsigned char*)(ptr))[2]<< 8 |\
     260                 :         ((unsigned char*)(ptr))[3] )
     261                 : 
     262                 : #define UINT32_TO_BE_ARY(ptr,val) { \
     263                 :         unsigned int v = val; \
     264                 :         ((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\
     265                 :         ((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\
     266                 :         ((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\
     267                 :         ((unsigned char*)(ptr))[3] = (v    ) & 0xff;\
     268                 : }
     269                 : 
     270                 : MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
     271                 :                 const char *src_encoding TSRMLS_DC)
     272            1251 : {
     273                 :         char *unicode, *newstr;
     274                 :         size_t unicode_len;
     275                 :         unsigned char *unicode_ptr;
     276                 :         size_t i;
     277            1251 :         enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
     278                 : 
     279            1251 :         if (_src_encoding == mbfl_no_encoding_invalid) {
     280              90 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", src_encoding);
     281              90 :                 return NULL;
     282                 :         }       
     283                 : 
     284            1161 :         unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len TSRMLS_CC);
     285            1161 :         if (unicode == NULL)
     286               0 :                 return NULL;
     287                 :         
     288            1161 :         unicode_ptr = (unsigned char *)unicode;
     289                 : 
     290            1161 :         switch(case_mode) {
     291                 :                 case PHP_UNICODE_CASE_UPPER:
     292           11477 :                         for (i = 0; i < unicode_len; i+=4) {
     293           10353 :                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     294                 :                                         php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     295                 :                         }
     296            1124 :                         break;
     297                 : 
     298                 :                 case PHP_UNICODE_CASE_LOWER:
     299             369 :                         for (i = 0; i < unicode_len; i+=4) {
     300             333 :                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     301                 :                                         php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     302                 :                         }
     303              36 :                         break;
     304                 : 
     305                 :                 case PHP_UNICODE_CASE_TITLE: {
     306               1 :                         int mode = 0; 
     307                 : 
     308               5 :                         for (i = 0; i < unicode_len; i+=4) {
     309                 :                                 int res = php_unicode_is_prop(
     310                 :                                         BE_ARY_TO_UINT32(&unicode_ptr[i]),
     311               4 :                                         UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT|UC_PO|UC_OS, 0);
     312               4 :                                 if (mode) {
     313               3 :                                         if (res) {
     314               2 :                                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     315                 :                                                         php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     316                 :                                         } else {
     317               1 :                                                 mode = 0;
     318                 :                                         }       
     319                 :                                 } else {
     320               1 :                                         if (res) {
     321               1 :                                                 mode = 1;
     322               1 :                                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     323                 :                                                         php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     324                 :                                         }
     325                 :                                 }
     326                 :                         }
     327                 :                 } break;
     328                 : 
     329                 :         }
     330                 :         
     331            1161 :         newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding, "UCS-4BE", ret_len TSRMLS_CC);
     332            1161 :         efree(unicode);
     333                 : 
     334            1161 :         return newstr;
     335                 : }
     336                 : 
     337                 : 
     338                 : #endif /* HAVE_MBSTRING */
     339                 : 
     340                 : /*
     341                 :  * Local variables:
     342                 :  * tab-width: 4
     343                 :  * c-basic-offset: 4
     344                 :  * End:
     345                 :  * vim600: sw=4 ts=4 fdm=marker
     346                 :  * vim<600: sw=4 ts=4
     347                 :  */

Generated by: LTP GCOV extension version 1.5

Generated at Sat, 21 Nov 2009 12:27:02 +0000 (3 days ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.