PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/mbstring - php_unicode.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 93 111 83.8 %
Date: 2014-11-22 Functions: 7 9 77.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                        |
       4             :    +----------------------------------------------------------------------+
       5             :    | Copyright (c) 1997-2014 The PHP Group                                |
       6             :    +----------------------------------------------------------------------+
       7             :    | This source file is subject to version 3.01 of the PHP license,      |
       8             :    | that is bundled with this package in the file LICENSE, and is        |
       9             :    | available through the world-wide-web at the following url:           |
      10             :    | http://www.php.net/license/3_01.txt                                  |
      11             :    | If you did not receive a copy of the PHP license and are unable to   |
      12             :    | obtain it through the world-wide-web, please send a note to          |
      13             :    | license@php.net so we can mail you a copy immediately.               |
      14             :    +----------------------------------------------------------------------+
      15             :    | Author: Wez Furlong (wez@thebrainroom.com)                           |
      16             :    +----------------------------------------------------------------------+
      17             : 
      18             :         Based on code from ucdata-2.5, which has the following Copyright:
      19             :    
      20             :         Copyright 2001 Computing Research Labs, New Mexico State University
      21             :  
      22             :         Permission is hereby granted, free of charge, to any person obtaining a
      23             :         copy of this software and associated documentation files (the "Software"),
      24             :         to deal in the Software without restriction, including without limitation
      25             :         the rights to use, copy, modify, merge, publish, distribute, sublicense,
      26             :         and/or sell copies of the Software, and to permit persons to whom the
      27             :         Software is furnished to do so, subject to the following conditions:
      28             :  
      29             :         The above copyright notice and this permission notice shall be included in
      30             :         all copies or substantial portions of the Software.
      31             : */
      32             : 
      33             : #ifdef HAVE_CONFIG_H
      34             : #include "config.h"
      35             : #endif
      36             : 
      37             : #include "php.h"
      38             : #include "php_ini.h"
      39             : 
      40             : #if HAVE_MBSTRING
      41             : 
      42             : /* include case folding data generated from the official UnicodeData.txt file */
      43             : #include "mbstring.h"
      44             : #include "php_unicode.h"
      45             : #include "unicode_data.h"
      46             : 
      47             : ZEND_EXTERN_MODULE_GLOBALS(mbstring)
      48             : 
      49             : /*
      50             :  * A simple array of 32-bit masks for lookup.
      51             :  */
      52             : static unsigned long masks32[32] = {
      53             :     0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
      54             :     0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
      55             :     0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
      56             :     0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
      57             :     0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
      58             :     0x40000000, 0x80000000
      59             : };
      60             : 
      61             : 
      62       19327 : static int prop_lookup(unsigned long code, unsigned long n)
      63             : {
      64             :         long l, r, m;
      65             : 
      66             :         /*
      67             :          * There is an extra node on the end of the offsets to allow this routine
      68             :          * to work right.  If the index is 0xffff, then there are no nodes for the
      69             :          * property.
      70             :          */
      71       19327 :         if ((l = _ucprop_offsets[n]) == 0xffff)
      72           0 :                 return 0;
      73             : 
      74             :         /*
      75             :          * Locate the next offset that is not 0xffff.  The sentinel at the end of
      76             :          * the array is the max index value.
      77             :          */
      78       19327 :         for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++)
      79             :                 ;
      80             : 
      81       19327 :         r = _ucprop_offsets[n + m] - 1;
      82             : 
      83      203419 :         while (l <= r) {
      84             :                 /*
      85             :                  * Determine a "mid" point and adjust to make sure the mid point is at
      86             :                  * the beginning of a range pair.
      87             :                  */
      88      172188 :                 m = (l + r) >> 1;
      89      172188 :                 m -= (m & 1);
      90      172188 :                 if (code > _ucprop_ranges[m + 1])
      91       25550 :                         l = m + 2;
      92      146638 :                 else if (code < _ucprop_ranges[m])
      93      139215 :                         r = m - 2;
      94        7423 :                 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
      95        7423 :                         return 1;
      96             :         }
      97       11904 :         return 0;
      98             : 
      99             : }
     100             : 
     101       19303 : MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1,
     102             :                 unsigned long mask2)
     103             : {
     104             :         unsigned long i;
     105             : 
     106       19303 :         if (mask1 == 0 && mask2 == 0)
     107           0 :                 return 0;
     108             : 
     109      508412 :         for (i = 0; mask1 && i < 32; i++) {
     110      496532 :                 if ((mask1 & masks32[i]) && prop_lookup(code, i))
     111        7423 :                         return 1;
     112             :         }
     113             : 
     114       11880 :         for (i = 32; mask2 && i < _ucprop_size; i++) {
     115           0 :                 if ((mask2 & masks32[i & 31]) && prop_lookup(code, i))
     116           0 :                         return 1;
     117             :         }
     118             : 
     119       11880 :         return 0;
     120             : }
     121             : 
     122        8509 : static unsigned long case_lookup(unsigned long code, long l, long r, int field)
     123             : {
     124             :         long m;
     125             : 
     126             :         /*
     127             :          * Do the binary search.
     128             :          */
     129       59388 :         while (l <= r) {
     130             :                 /*
     131             :                  * Determine a "mid" point and adjust to make sure the mid point is at
     132             :                  * the beginning of a case mapping triple.
     133             :                  */
     134       47510 :                 m = (l + r) >> 1;
     135       47510 :                 m -= (m % 3);
     136       47510 :                 if (code > _uccase_map[m])
     137       11737 :                         l = m + 3;
     138       35773 :                 else if (code < _uccase_map[m])
     139       30633 :                         r = m - 3;
     140        5140 :                 else if (code == _uccase_map[m])
     141        5140 :                         return _uccase_map[m + field];
     142             :         }
     143             : 
     144        3369 :         return code;
     145             : }
     146             : 
     147           0 : MBSTRING_API unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field)
     148             : {
     149           0 :         if (code == 0x0069L) {
     150           0 :                 return 0x0130L;
     151             :         }
     152           0 :         return case_lookup(code, l, r, field);
     153             : }
     154             : 
     155           0 : MBSTRING_API unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field)
     156             : {
     157           0 :         if (code == 0x0049L) {
     158           0 :                 return 0x0131L;
     159             :         }       
     160           0 :         return case_lookup(code, l, r, field);
     161             : }
     162             : 
     163       10454 : MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC)
     164             : {
     165             :         int field;
     166             :         long l, r;
     167             : 
     168       10454 :         if (php_unicode_is_upper(code))
     169        2219 :                 return code;
     170             : 
     171        8235 :         if (php_unicode_is_lower(code)) {
     172             :                 /*
     173             :                  * The character is lower case.
     174             :                  */
     175        4962 :                 field = 2;
     176        4962 :                 l = _uccase_len[0];
     177        4962 :                 r = (l + _uccase_len[1]) - 3;
     178             : 
     179        4962 :                 if (enc == mbfl_no_encoding_8859_9) {
     180           0 :                         return php_turkish_toupper(code, l, r, field);
     181             :                 }
     182             : 
     183             :         } else {
     184             :                 /*
     185             :                  * The character is title case.
     186             :                  */
     187        3273 :                 field = 1;
     188        3273 :                 l = _uccase_len[0] + _uccase_len[1];
     189        3273 :                 r = _uccase_size - 3;
     190             :         }
     191        8235 :         return case_lookup(code, l, r, field);
     192             : }
     193             : 
     194         335 : MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC)
     195             : {
     196             :         int field;
     197             :         long l, r;
     198             : 
     199         335 :         if (php_unicode_is_lower(code))
     200          62 :                 return code;
     201             : 
     202         273 :         if (php_unicode_is_upper(code)) {
     203             :                 /*
     204             :                  * The character is upper case.
     205             :                  */
     206         177 :                 field = 1;
     207         177 :                 l = 0;
     208         177 :                 r = _uccase_len[0] - 3;
     209             : 
     210         177 :                 if (enc == mbfl_no_encoding_8859_9) {
     211           0 :                         return php_turkish_tolower(code, l, r, field);
     212             :                 }
     213             : 
     214             :         } else {
     215             :                 /*
     216             :                  * The character is title case.
     217             :                  */
     218          96 :                 field = 2;
     219          96 :                 l = _uccase_len[0] + _uccase_len[1];
     220          96 :                 r = _uccase_size - 3;
     221             :         }
     222         273 :         return case_lookup(code, l, r, field);
     223             : }
     224             : 
     225           1 : MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_encoding enc TSRMLS_DC)
     226             : {
     227             :         int field;
     228             :         long l, r;
     229             : 
     230           1 :         if (php_unicode_is_title(code))
     231           0 :                 return code;
     232             : 
     233             :         /*
     234             :          * The offset will always be the same for converting to title case.
     235             :          */
     236           1 :         field = 2;
     237             : 
     238           1 :         if (php_unicode_is_upper(code)) {
     239             :                 /*
     240             :                  * The character is upper case.
     241             :                  */
     242           0 :                 l = 0;
     243           0 :                 r = _uccase_len[0] - 3;
     244             :         } else {
     245             :                 /*
     246             :                  * The character is lower case.
     247             :                  */
     248           1 :                 l = _uccase_len[0];
     249           1 :                 r = (l + _uccase_len[1]) - 3;
     250             :         }
     251           1 :         return case_lookup(code, l, r, field);
     252             : 
     253             : }
     254             : 
     255             : 
     256             : #define BE_ARY_TO_UINT32(ptr) (\
     257             :         ((unsigned char*)(ptr))[0]<<24 |\
     258             :         ((unsigned char*)(ptr))[1]<<16 |\
     259             :         ((unsigned char*)(ptr))[2]<< 8 |\
     260             :         ((unsigned char*)(ptr))[3] )
     261             : 
     262             : #define UINT32_TO_BE_ARY(ptr,val) { \
     263             :         unsigned int v = val; \
     264             :         ((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\
     265             :         ((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\
     266             :         ((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\
     267             :         ((unsigned char*)(ptr))[3] = (v    ) & 0xff;\
     268             : }
     269             : 
     270        1266 : MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
     271             :                 const char *src_encoding TSRMLS_DC)
     272             : {
     273             :         char *unicode, *newstr;
     274             :         size_t unicode_len;
     275             :         unsigned char *unicode_ptr;
     276             :         size_t i;
     277        1266 :         enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
     278             : 
     279        1266 :         if (_src_encoding == mbfl_no_encoding_invalid) {
     280          90 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", src_encoding);
     281          90 :                 return NULL;
     282             :         }       
     283             : 
     284        1176 :         unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len TSRMLS_CC);
     285        1176 :         if (unicode == NULL)
     286           0 :                 return NULL;
     287             :         
     288        1176 :         unicode_ptr = (unsigned char *)unicode;
     289             : 
     290        1176 :         switch(case_mode) {
     291             :                 case PHP_UNICODE_CASE_UPPER:
     292       11593 :                         for (i = 0; i < unicode_len; i+=4) {
     293       10454 :                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     294             :                                         php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     295             :                         }
     296        1139 :                         break;
     297             : 
     298             :                 case PHP_UNICODE_CASE_LOWER:
     299         369 :                         for (i = 0; i < unicode_len; i+=4) {
     300         333 :                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     301             :                                         php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     302             :                         }
     303          36 :                         break;
     304             : 
     305             :                 case PHP_UNICODE_CASE_TITLE: {
     306           1 :                         int mode = 0; 
     307             : 
     308           5 :                         for (i = 0; i < unicode_len; i+=4) {
     309           4 :                                 int res = php_unicode_is_prop(
     310           4 :                                         BE_ARY_TO_UINT32(&unicode_ptr[i]),
     311           4 :                                         UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT|UC_PO|UC_OS, 0);
     312           4 :                                 if (mode) {
     313           3 :                                         if (res) {
     314           2 :                                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     315             :                                                         php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     316             :                                         } else {
     317           1 :                                                 mode = 0;
     318             :                                         }       
     319             :                                 } else {
     320           1 :                                         if (res) {
     321           1 :                                                 mode = 1;
     322           1 :                                                 UINT32_TO_BE_ARY(&unicode_ptr[i],
     323             :                                                         php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding TSRMLS_CC));
     324             :                                         }
     325             :                                 }
     326             :                         }
     327             :                 } break;
     328             : 
     329             :         }
     330             :         
     331        1176 :         newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding, "UCS-4BE", ret_len TSRMLS_CC);
     332        1176 :         efree(unicode);
     333             : 
     334        1176 :         return newstr;
     335             : }
     336             : 
     337             : 
     338             : #endif /* HAVE_MBSTRING */
     339             : 
     340             : /*
     341             :  * Local variables:
     342             :  * tab-width: 4
     343             :  * c-basic-offset: 4
     344             :  * End:
     345             :  * vim600: sw=4 ts=4 fdm=marker
     346             :  * vim<600: sw=4 ts=4
     347             :  */

Generated by: LCOV version 1.10

Generated at Sat, 22 Nov 2014 23:01:17 +0000 (5 hours ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.