PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/intl/grapheme - grapheme_string.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 316 361 87.5 %
Date: 2014-04-18 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 5                                                                                                                |
       4             :    +----------------------------------------------------------------------+
       5             :    | This source file is subject to version 3.01 of the PHP license,      |
       6             :    | that is bundled with this package in the file LICENSE, and is                |
       7             :    | available through the world-wide-web at the following url:                   |
       8             :    | http://www.php.net/license/3_01.txt                                                                  |
       9             :    | If you did not receive a copy of the PHP license and are unable to   |
      10             :    | obtain it through the world-wide-web, please send a note to                  |
      11             :    | license@php.net so we can mail you a copy immediately.                               |
      12             :    +----------------------------------------------------------------------+
      13             :    | Author: Ed Batutis <ed@batutis.com>                                                            |
      14             :    +----------------------------------------------------------------------+
      15             :  */
      16             : 
      17             : /* {{{ includes */
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config.h"
      20             : #endif
      21             : 
      22             : #include <php.h>
      23             : #include "grapheme.h"
      24             : #include "grapheme_util.h"
      25             : 
      26             : #include <unicode/utypes.h>
      27             : #include <unicode/ucol.h>
      28             : #include <unicode/ustring.h>
      29             : #include <unicode/ubrk.h>
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : /* }}} */
      34             : 
      35             : #define GRAPHEME_EXTRACT_TYPE_COUNT             0
      36             : #define GRAPHEME_EXTRACT_TYPE_MAXBYTES  1
      37             : #define GRAPHEME_EXTRACT_TYPE_MAXCHARS  2
      38             : #define GRAPHEME_EXTRACT_TYPE_MIN       GRAPHEME_EXTRACT_TYPE_COUNT
      39             : #define GRAPHEME_EXTRACT_TYPE_MAX       GRAPHEME_EXTRACT_TYPE_MAXCHARS
      40             : 
      41             : 
      42             : /* {{{ grapheme_register_constants
      43             :  * Register API constants
      44             :  */
      45       19341 : void grapheme_register_constants( INIT_FUNC_ARGS )
      46             : {
      47       19341 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
      48       19341 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
      49       19341 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
      50       19341 : }
      51             : /* }}} */
      52             : 
      53             : /* {{{ proto int grapheme_strlen(string str)
      54             :    Get number of graphemes in a string */
      55           7 : PHP_FUNCTION(grapheme_strlen)
      56             : {
      57             :         unsigned char* string;
      58             :         int string_len;
      59           7 :         UChar* ustring = NULL;
      60           7 :         int ustring_len = 0;
      61             :         int ret_len;
      62             :         UErrorCode status;
      63             : 
      64           7 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string, &string_len) == FAILURE) {
      65             : 
      66           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
      67             :                          "grapheme_strlen: unable to parse input param", 0 TSRMLS_CC );
      68             : 
      69           1 :                 RETURN_FALSE;
      70             :         }
      71             : 
      72           6 :         ret_len = grapheme_ascii_check(string, string_len);
      73             : 
      74           6 :         if ( ret_len >= 0 )
      75           2 :                 RETURN_LONG(ret_len);
      76             : 
      77             :         /* convert the string to UTF-16. */
      78           4 :         status = U_ZERO_ERROR;
      79           4 :         intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) string, string_len, &status );
      80             : 
      81           4 :         if ( U_FAILURE( status ) ) {
      82             :                 /* Set global error code. */
      83           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
      84             : 
      85             :                 /* Set error messages. */
      86           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
      87           0 :                 if (ustring) {
      88           0 :                         efree( ustring );
      89             :                 }
      90           0 :                 RETURN_NULL();
      91             :         }
      92             : 
      93           4 :         ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 TSRMLS_CC );
      94             : 
      95           4 :         if (ustring) {
      96           4 :                 efree( ustring );
      97             :         }
      98             : 
      99           4 :         if (ret_len >= 0) {
     100           4 :                 RETVAL_LONG(ret_len);
     101             :         } else {
     102           0 :                 RETVAL_FALSE;
     103             :         }
     104             : }
     105             : /* }}} */
     106             : 
     107             : /* {{{ proto int grapheme_strpos(string haystack, string needle [, int offset ])
     108             :    Find position of first occurrence of a string within another */
     109          38 : PHP_FUNCTION(grapheme_strpos)
     110             : {
     111             :         unsigned char *haystack, *needle;
     112             :         int haystack_len, needle_len;
     113             :         unsigned char *found;
     114          38 :         long loffset = 0;
     115          38 :         int32_t offset = 0;
     116             :         int ret_pos, uchar_pos;
     117             : 
     118          38 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     119             : 
     120           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     121             :                          "grapheme_strpos: unable to parse input param", 0 TSRMLS_CC );
     122             : 
     123           1 :                 RETURN_FALSE;
     124             :         }
     125             : 
     126          37 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     127             : 
     128           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     129             : 
     130           1 :                 RETURN_FALSE;
     131             :         }
     132             : 
     133             :         /* we checked that it will fit: */
     134          36 :         offset = (int32_t) loffset;
     135             : 
     136             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     137             : 
     138          36 :         if (needle_len == 0) {
     139             : 
     140           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     141             : 
     142           0 :                 RETURN_FALSE;
     143             :         }
     144             : 
     145             : 
     146             :         /* quick check to see if the string might be there
     147             :          * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     148             :         */
     149          36 :         found = (unsigned char *)php_memnstr((char *)haystack + offset, (char *)needle, needle_len, (char *)haystack + haystack_len);
     150             : 
     151             :         /* if it isn't there the we are done */
     152          36 :         if (!found) {
     153           9 :                 RETURN_FALSE;
     154             :         }
     155             : 
     156             :         /* if it is there, and if the haystack is ascii, we are all done */
     157          27 :         if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
     158             : 
     159          12 :                 RETURN_LONG(found - haystack);
     160             :         }
     161             : 
     162             :         /* do utf16 part of the strpos */
     163          15 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, &uchar_pos, 0 /* fIgnoreCase */ TSRMLS_CC );
     164             : 
     165          15 :         if ( ret_pos >= 0 ) {
     166          13 :                 RETURN_LONG(ret_pos + offset);
     167             :         } else {
     168           2 :                 RETURN_FALSE;
     169             :         }
     170             : 
     171             : }
     172             : /* }}} */
     173             : 
     174             : /* {{{ proto int grapheme_stripos(string haystack, string needle [, int offset ])
     175             :    Find position of first occurrence of a string within another, ignoring case differences */
     176          38 : PHP_FUNCTION(grapheme_stripos)
     177             : {
     178             :         unsigned char *haystack, *needle, *haystack_dup, *needle_dup;
     179             :         int haystack_len, needle_len;
     180             :         unsigned char *found;
     181          38 :         long loffset = 0;
     182          38 :         int32_t offset = 0;
     183             :         int ret_pos, uchar_pos;
     184             :         int is_ascii;
     185             : 
     186          38 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     187             : 
     188           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     189             :                          "grapheme_stripos: unable to parse input param", 0 TSRMLS_CC );
     190             : 
     191           1 :                 RETURN_FALSE;
     192             :         }
     193             : 
     194          37 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     195             : 
     196           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 TSRMLS_CC );
     197             : 
     198           1 :                 RETURN_FALSE;
     199             :         }
     200             : 
     201             :         /* we checked that it will fit: */
     202          36 :         offset = (int32_t) loffset;
     203             : 
     204             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     205             : 
     206          36 :         if (needle_len == 0) {
     207             : 
     208           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 TSRMLS_CC );
     209             : 
     210           0 :                 RETURN_FALSE;
     211             :         }
     212             : 
     213             : 
     214          36 :         is_ascii = ( grapheme_ascii_check(haystack, haystack_len) >= 0 );
     215             : 
     216          36 :         if ( is_ascii ) {
     217          19 :                 needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
     218          19 :                 php_strtolower((char *)needle_dup, needle_len);
     219          19 :                 haystack_dup = (unsigned char *)estrndup((char *)haystack, haystack_len);
     220          19 :                 php_strtolower((char *)haystack_dup, haystack_len);
     221             : 
     222          19 :                 found = (unsigned char*) php_memnstr((char *)haystack_dup + offset, (char *)needle_dup, needle_len, (char *)haystack_dup + haystack_len);
     223             : 
     224          19 :                 efree(haystack_dup);
     225          19 :                 efree(needle_dup);
     226             : 
     227          19 :                 if (found) {
     228          12 :                         RETURN_LONG(found - haystack_dup);
     229             :                 }
     230             : 
     231             :                 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
     232           7 :                 if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
     233           5 :                         RETURN_FALSE;
     234             :                 }
     235             :         }
     236             : 
     237             :         /* do utf16 part of the strpos */
     238          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, &uchar_pos, 1 /* fIgnoreCase */ TSRMLS_CC );
     239             : 
     240          19 :         if ( ret_pos >= 0 ) {
     241          13 :                 RETURN_LONG(ret_pos + offset);
     242             :         } else {
     243           6 :                 RETURN_FALSE;
     244             :         }
     245             : 
     246             : }
     247             : /* }}} */
     248             : 
     249             : /* {{{ proto int grapheme_strrpos(string haystack, string needle [, int offset])
     250             :    Find position of last occurrence of a string within another */
     251          37 : PHP_FUNCTION(grapheme_strrpos)
     252             : {
     253             :         unsigned char *haystack, *needle;
     254             :         int haystack_len, needle_len;
     255          37 :         long loffset = 0;
     256          37 :         int32_t offset = 0;
     257             :         int32_t ret_pos;
     258             :         int is_ascii;
     259             : 
     260          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     261             : 
     262           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     263             :                          "grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
     264             : 
     265           1 :                 RETURN_FALSE;
     266             :         }
     267             : 
     268          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     269             : 
     270           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     271             : 
     272           0 :                 RETURN_FALSE;
     273             :         }
     274             : 
     275             :         /* we checked that it will fit: */
     276          36 :         offset = (int32_t) loffset;
     277             : 
     278             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     279             : 
     280          36 :         if (needle_len == 0) {
     281             : 
     282           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     283             : 
     284           0 :                 RETURN_FALSE;
     285             :         }
     286             : 
     287          36 :         is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
     288             : 
     289          36 :         if ( is_ascii ) {
     290             : 
     291          19 :                 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
     292             : 
     293             : 
     294          19 :                 if ( ret_pos >= 0 ) {
     295          12 :                         RETURN_LONG(ret_pos);
     296             :                 }
     297             : 
     298             :                 /* if the needle was ascii too, we are done */
     299             : 
     300           7 :                 if (  grapheme_ascii_check(needle, needle_len) >= 0 ) {
     301           5 :                         RETURN_FALSE;
     302             :                 }
     303             : 
     304             :                 /* else we need to continue via utf16 */
     305             :         }
     306             : 
     307          19 :         ret_pos = grapheme_strrpos_utf16(haystack, haystack_len, needle, needle_len, offset, 0 /* f_ignore_case */ TSRMLS_CC);
     308             : 
     309          19 :         if ( ret_pos >= 0 ) {
     310          13 :                 RETURN_LONG(ret_pos);
     311             :         } else {
     312           6 :                 RETURN_FALSE;
     313             :         }
     314             : 
     315             : 
     316             : }
     317             : /* }}} */
     318             : 
     319             : /* {{{ proto int grapheme_strripos(string haystack, string needle [, int offset])
     320             :    Find position of last occurrence of a string within another, ignoring case */
     321          37 : PHP_FUNCTION(grapheme_strripos)
     322             : {
     323             :         unsigned char *haystack, *needle;
     324             :         int haystack_len, needle_len;
     325          37 :         long loffset = 0;
     326          37 :         int32_t offset = 0;
     327             :         int32_t ret_pos;
     328             :         int is_ascii;
     329             : 
     330          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     331             : 
     332           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     333             :                          "grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
     334             : 
     335           1 :                 RETURN_FALSE;
     336             :         }
     337             : 
     338          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     339             : 
     340           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     341             : 
     342           0 :                 RETURN_FALSE;
     343             :         }
     344             : 
     345             :         /* we checked that it will fit: */
     346          36 :         offset = (int32_t) loffset;
     347             : 
     348             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     349             : 
     350          36 :         if (needle_len == 0) {
     351             : 
     352           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     353             : 
     354           0 :                 RETURN_FALSE;
     355             :         }
     356             : 
     357          36 :         is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
     358             : 
     359          36 :         if ( is_ascii ) {
     360             :                 unsigned char *needle_dup, *haystack_dup;
     361             : 
     362          19 :                 needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
     363          19 :                 php_strtolower((char *)needle_dup, needle_len);
     364          19 :                 haystack_dup = (unsigned char *)estrndup((char *)haystack, haystack_len);
     365          19 :                 php_strtolower((char *)haystack_dup, haystack_len);
     366             : 
     367          19 :                 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
     368             : 
     369          19 :                 efree(haystack_dup);
     370          19 :                 efree(needle_dup);
     371             : 
     372          19 :                 if ( ret_pos >= 0 ) {
     373          12 :                         RETURN_LONG(ret_pos);
     374             :                 }
     375             : 
     376             :                 /* if the needle was ascii too, we are done */
     377             : 
     378           7 :                 if (  grapheme_ascii_check(needle, needle_len) >= 0 ) {
     379           5 :                         RETURN_FALSE;
     380             :                 }
     381             : 
     382             :                 /* else we need to continue via utf16 */
     383             :         }
     384             : 
     385          19 :         ret_pos = grapheme_strrpos_utf16(haystack, haystack_len, needle, needle_len, offset, 1 /* f_ignore_case */ TSRMLS_CC);
     386             : 
     387          19 :         if ( ret_pos >= 0 ) {
     388          13 :                 RETURN_LONG(ret_pos);
     389             :         } else {
     390           6 :                 RETURN_FALSE;
     391             :         }
     392             : 
     393             : 
     394             : }
     395             : /* }}} */
     396             : 
     397             : /* {{{ proto string grapheme_substr(string str, int start [, int length])
     398             :    Returns part of a string */
     399          64 : PHP_FUNCTION(grapheme_substr)
     400             : {
     401             :         unsigned char *str, *sub_str;
     402             :         UChar *ustr;
     403             :         int str_len, sub_str_len, ustr_len;
     404          64 :         long lstart = 0, length = 0;
     405          64 :         int32_t start = 0;
     406             :         int iter_val;
     407             :         UErrorCode status;
     408             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     409          64 :         UBreakIterator* bi = NULL;
     410             :         int sub_str_start_pos, sub_str_end_pos;
     411             :         int32_t (*iter_func)(UBreakIterator *);
     412             : 
     413          64 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|l", (char **)&str, &str_len, &lstart, &length) == FAILURE) {
     414             : 
     415           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     416             :                          "grapheme_substr: unable to parse input param", 0 TSRMLS_CC );
     417             : 
     418           1 :                 RETURN_FALSE;
     419             :         }
     420             : 
     421          63 :         if ( OUTSIDE_STRING(lstart, str_len) ) {
     422             : 
     423           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 TSRMLS_CC );
     424             : 
     425           5 :                 RETURN_FALSE;
     426             :         }
     427             : 
     428             :         /* we checked that it will fit: */
     429          58 :         start = (int32_t) lstart;
     430             : 
     431             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     432             : 
     433          58 :         if ( grapheme_ascii_check(str, str_len) >= 0 ) {
     434           6 :                 grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len);
     435             : 
     436           6 :                 if ( NULL == sub_str ) {
     437           0 :                         RETURN_FALSE;
     438             :                 }
     439             : 
     440           6 :                 RETURN_STRINGL(((char *)sub_str), sub_str_len, 1);
     441             :         }
     442             : 
     443          52 :         ustr = NULL;
     444          52 :         ustr_len = 0;
     445          52 :         status = U_ZERO_ERROR;
     446          52 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, (char *)str, str_len, &status);
     447             : 
     448          52 :         if ( U_FAILURE( status ) ) {
     449             :                 /* Set global error code. */
     450           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     451             : 
     452             :                 /* Set error messages. */
     453           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
     454           0 :                 if (ustr) {
     455           0 :                         efree( ustr );
     456             :                 }
     457           0 :                 RETURN_FALSE;
     458             :         }
     459             : 
     460          52 :         bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status TSRMLS_CC );
     461             : 
     462          52 :         if( U_FAILURE(status) ) {
     463           0 :                 RETURN_FALSE;
     464             :         }
     465             : 
     466          52 :         ubrk_setText(bi, ustr, ustr_len,        &status);
     467             : 
     468          52 :         if ( start < 0 ) {
     469          27 :                 iter_func = ubrk_previous;
     470          27 :                 ubrk_last(bi);
     471          27 :                 iter_val = 1;
     472             :         }
     473             :         else {
     474          25 :                 iter_func = ubrk_next;
     475          25 :                 iter_val = -1;
     476             :         }
     477             : 
     478          52 :         sub_str_start_pos = 0;
     479             : 
     480         346 :         while ( start ) {
     481         242 :                 sub_str_start_pos = iter_func(bi);
     482             : 
     483         242 :                 if ( UBRK_DONE == sub_str_start_pos ) {
     484           0 :                         break;
     485             :                 }
     486             : 
     487         242 :                 start += iter_val;
     488             :         }
     489             : 
     490          52 :         if ( 0 != start || sub_str_start_pos >= ustr_len ) {
     491             : 
     492           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 TSRMLS_CC );
     493             : 
     494           3 :                 if (ustr) {
     495           3 :                         efree(ustr);
     496             :                 }
     497           3 :                 ubrk_close(bi);
     498           3 :                 RETURN_FALSE;
     499             :         }
     500             : 
     501          49 :         if (ZEND_NUM_ARGS() <= 2) {
     502             : 
     503             :                 /* no length supplied, return the rest of the string */
     504             : 
     505          16 :                 sub_str = NULL;
     506          16 :                 sub_str_len = 0;
     507          16 :                 status = U_ZERO_ERROR;
     508          16 :                 intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
     509             : 
     510          16 :                 if (ustr) {
     511          16 :                         efree( ustr );
     512             :                 }
     513          16 :                 ubrk_close( bi );
     514             : 
     515          16 :                 if ( U_FAILURE( status ) ) {
     516             :                         /* Set global error code. */
     517           0 :                         intl_error_set_code( NULL, status TSRMLS_CC );
     518             : 
     519             :                         /* Set error messages. */
     520           0 :                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
     521             : 
     522           0 :                         if (sub_str) {
     523           0 :                                 efree( sub_str );
     524             :                         }
     525             : 
     526           0 :                         RETURN_FALSE;
     527             :                 }
     528             : 
     529             :                 /* return the allocated string, not a duplicate */
     530          16 :                 RETURN_STRINGL(((char *)sub_str), sub_str_len, 0);
     531             :         }
     532             : 
     533             :         /* find the end point of the string to return */
     534             : 
     535          33 :         if ( length < 0 ) {
     536          23 :                 iter_func = ubrk_previous;
     537          23 :                 ubrk_last(bi);
     538          23 :                 iter_val = 1;
     539             :         }
     540             :         else {
     541          10 :                 iter_func = ubrk_next;
     542          10 :                 iter_val = -1;
     543             :         }
     544             : 
     545          33 :         sub_str_end_pos = 0;
     546             : 
     547         216 :         while ( length ) {
     548         154 :                 sub_str_end_pos = iter_func(bi);
     549             : 
     550         154 :                 if ( UBRK_DONE == sub_str_end_pos ) {
     551           4 :                         break;
     552             :                 }
     553             : 
     554         150 :                 length += iter_val;
     555             :         }
     556             : 
     557          33 :         if ( UBRK_DONE == sub_str_end_pos && length < 0) {
     558             : 
     559           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 TSRMLS_CC );
     560             : 
     561           3 :                 efree(ustr);
     562           3 :                 ubrk_close(bi);
     563           3 :                 RETURN_FALSE;
     564             :         }
     565             : 
     566          30 :         sub_str = NULL;
     567          30 :         status = U_ZERO_ERROR;
     568          30 :         intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
     569             : 
     570          30 :         efree( ustr );
     571          30 :         ubrk_close( bi );
     572             : 
     573          30 :         if ( U_FAILURE( status ) ) {
     574             :                 /* Set global error code. */
     575           1 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     576             : 
     577             :                 /* Set error messages. */
     578           1 :                 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
     579             : 
     580           1 :                 if ( NULL != sub_str )
     581           0 :                         efree( sub_str );
     582             : 
     583           1 :                 RETURN_FALSE;
     584             :         }
     585             : 
     586             :          /* return the allocated string, not a duplicate */
     587          29 :         RETURN_STRINGL(((char *)sub_str), sub_str_len, 0);
     588             : 
     589             : }
     590             : /* }}} */
     591             : 
     592             : /* {{{  strstr_common_handler */
     593          72 : static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
     594             : {
     595             :         unsigned char *haystack, *needle, *found;
     596             :         int haystack_len, needle_len;
     597             :         int ret_pos, uchar_pos;
     598          72 :         zend_bool part = 0;
     599             : 
     600          72 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|b", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &part) == FAILURE) {
     601             : 
     602           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     603             :                          "grapheme_strstr: unable to parse input param", 0 TSRMLS_CC );
     604             : 
     605           2 :                 RETURN_FALSE;
     606             :         }
     607             : 
     608          70 :         if (needle_len == 0) {
     609             : 
     610           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     611             : 
     612           0 :                 RETURN_FALSE;
     613             :         }
     614             : 
     615             : 
     616          70 :         if ( !f_ignore_case ) {
     617             : 
     618             :                 /* ASCII optimization: quick check to see if the string might be there
     619             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     620             :                 */
     621          35 :                 found = (unsigned char *)php_memnstr((char *)haystack, (char *)needle, needle_len, (char *)haystack + haystack_len);
     622             : 
     623             :                 /* if it isn't there the we are done */
     624          35 :                 if ( !found ) {
     625           3 :                         RETURN_FALSE;
     626             :                 }
     627             : 
     628             :                 /* if it is there, and if the haystack is ascii, we are all done */
     629          32 :                 if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
     630          13 :                         size_t found_offset = found - haystack;
     631             : 
     632          13 :                         if (part) {
     633           5 :                                 RETURN_STRINGL(((char *)haystack) , found_offset, 1);
     634             :                         } else {
     635           8 :                                 RETURN_STRINGL(((char *)found), haystack_len - found_offset, 1);
     636             :                         }
     637             :                 }
     638             : 
     639             :         }
     640             : 
     641             :         /* need to work in utf16 */
     642          54 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case TSRMLS_CC );
     643             : 
     644          54 :         if ( ret_pos < 0 ) {
     645           9 :                 RETURN_FALSE;
     646             :         }
     647             : 
     648             :         /* uchar_pos is the 'nth' Unicode character position of the needle */
     649             : 
     650          45 :         ret_pos = 0;
     651          45 :         U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
     652             : 
     653          45 :         if (part) {
     654          15 :                 RETURN_STRINGL(((char *)haystack), ret_pos, 1);
     655             :         }
     656             :         else {
     657          30 :                 RETURN_STRINGL(((char *)haystack) + ret_pos, haystack_len - ret_pos, 1);
     658             :         }
     659             : 
     660             : }
     661             : /* }}} */
     662             : 
     663             : /* {{{ proto string grapheme_strstr(string haystack, string needle[, bool part])
     664             :    Finds first occurrence of a string within another */
     665          36 : PHP_FUNCTION(grapheme_strstr)
     666             : {
     667          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
     668          36 : }
     669             : /* }}} */
     670             : 
     671             : /* {{{ proto string grapheme_stristr(string haystack, string needle[, bool part])
     672             :    Finds first occurrence of a string within another */
     673          36 : PHP_FUNCTION(grapheme_stristr)
     674             : {
     675          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
     676          36 : }
     677             : /* }}} */
     678             : 
     679             : /* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
     680             : static inline int32_t
     681          18 : grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
     682             : {
     683          18 :         int pos = 0, prev_pos = 0;
     684          18 :         int ret_pos = 0, prev_ret_pos = 0;
     685             : 
     686             :         while ( 1 ) {
     687          96 :                 pos = ubrk_next(bi);
     688             : 
     689          96 :                 if ( UBRK_DONE == pos ) {
     690           7 :                         break;
     691             :                 }
     692             : 
     693             :                 /* if we are beyond our limit, then the loop is done */
     694          89 :                 if ( pos > csize ) {
     695          11 :                         break;
     696             :                 }
     697             : 
     698             :                 /* update our pointer in the original UTF-8 buffer by as many characters
     699             :                    as ubrk_next iterated over */
     700             : 
     701          78 :                 prev_ret_pos = ret_pos;
     702          78 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     703             : 
     704          78 :                 if ( prev_ret_pos == ret_pos ) {
     705             :                         /* something wrong - malformed utf8? */
     706           0 :                         break;
     707             :                 }
     708             : 
     709          78 :                 prev_pos = pos;
     710          78 :         }
     711             : 
     712          18 :         return ret_pos;
     713             : }
     714             : /* }}} */
     715             : 
     716             : /* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
     717             : static inline int32_t
     718          23 : grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
     719             : {
     720          23 :         int pos = 0, prev_pos = 0;
     721          23 :         int ret_pos = 0, prev_ret_pos = 0;
     722             : 
     723             :         while ( 1 ) {
     724          63 :                 pos = ubrk_next(bi);
     725             : 
     726          63 :                 if ( UBRK_DONE == pos ) {
     727           8 :                         break;
     728             :                 }
     729             : 
     730          55 :                 prev_ret_pos = ret_pos;
     731          55 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     732             : 
     733          55 :                 if ( ret_pos > bsize ) {
     734          15 :                         ret_pos = prev_ret_pos;
     735          15 :                         break;
     736             :                 }
     737             : 
     738          40 :                 if ( prev_ret_pos == ret_pos ) {
     739             :                         /* something wrong - malformed utf8? */
     740           0 :                         break;
     741             :                 }
     742             : 
     743          40 :                 prev_pos = pos;
     744          40 :         }
     745             : 
     746          23 :         return ret_pos;
     747             : }
     748             : /* }}} */
     749             : 
     750             : /* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
     751             : static inline int32_t
     752          21 : grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
     753             : {
     754          21 :         int pos = 0, next_pos = 0;
     755          21 :         int ret_pos = 0;
     756             : 
     757          80 :         while ( size ) {
     758          40 :                 next_pos = ubrk_next(bi);
     759             : 
     760          40 :                 if ( UBRK_DONE == next_pos ) {
     761           2 :                         break;
     762             :                 }
     763          38 :                 pos = next_pos;
     764          38 :                 size--;
     765             :         }
     766             : 
     767             :         /* pos is one past the last UChar - and represent the number of code units to
     768             :                 advance in the utf-8 buffer
     769             :         */
     770             : 
     771          21 :         U8_FWD_N(pstr, ret_pos, str_len, pos);
     772             : 
     773          21 :         return ret_pos;
     774             : }
     775             : /* }}} */
     776             : 
     777             : /* {{{ grapheme extract iter function pointer array */
     778             : typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
     779             : 
     780             : static grapheme_extract_iter grapheme_extract_iters[] = {
     781             :         &grapheme_extract_count_iter,
     782             :         &grapheme_extract_bytecount_iter,
     783             :         &grapheme_extract_charcount_iter,
     784             : };
     785             : /* }}} */
     786             : 
     787             : /* {{{ proto string grapheme_extract(string str, int size[, int extract_type[, int start[, int next]]])
     788             :         Function to extract a sequence of default grapheme clusters */
     789         106 : PHP_FUNCTION(grapheme_extract)
     790             : {
     791             :         unsigned char *str, *pstr;
     792             :         UChar *ustr;
     793             :         int str_len, ustr_len;
     794             :         long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
     795         106 :         long lstart = 0; /* starting position in str in bytes */
     796         106 :         int32_t start = 0;
     797         106 :         long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
     798             :         UErrorCode status;
     799             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     800         106 :         UBreakIterator* bi = NULL;
     801             :         int ret_pos;
     802         106 :         zval *next = NULL; /* return offset of next part of the string */
     803             : 
     804         106 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|llz", (char **)&str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
     805             : 
     806           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     807             :                          "grapheme_extract: unable to parse input param", 0 TSRMLS_CC );
     808             : 
     809           1 :                 RETURN_FALSE;
     810             :         }
     811             : 
     812         105 :         if ( NULL != next ) {
     813          32 :                 if ( !PZVAL_IS_REF(next) ) {
     814           0 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     815             :                                  "grapheme_extract: 'next' was not passed by reference", 0 TSRMLS_CC );
     816             : 
     817           0 :                         RETURN_FALSE;
     818             :                 }
     819             :                 else {
     820             :                         /* initialize next */
     821          16 :                         zval_dtor(next);
     822          16 :             ZVAL_LONG(next, lstart);
     823             :                 }
     824             :         }
     825             : 
     826         105 :         if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
     827             : 
     828           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     829             :                          "grapheme_extract: unknown extract type param", 0 TSRMLS_CC );
     830             : 
     831           1 :                 RETURN_FALSE;
     832             :         }
     833             : 
     834         104 :         if ( lstart > INT32_MAX || lstart < 0 || lstart >= str_len ) {
     835           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 TSRMLS_CC );
     836           5 :                 RETURN_FALSE;
     837             :         }
     838             : 
     839          99 :         if ( size > INT32_MAX || size < 0) {
     840           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 TSRMLS_CC );
     841           0 :                 RETURN_FALSE;
     842             :         }
     843          99 :         if (size == 0) {
     844           8 :                 RETURN_EMPTY_STRING();
     845             :         }
     846             : 
     847             :         /* we checked that it will fit: */
     848          91 :         start = (int32_t) lstart;
     849             : 
     850          91 :         pstr = str + start;
     851             : 
     852             :         /* just in case pstr points in the middle of a character, move forward to the start of the next char */
     853          91 :         if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     854           9 :                 unsigned char *str_end = str + str_len;
     855             : 
     856          25 :                 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     857           9 :                         pstr++;
     858           9 :                         if ( pstr >= str_end ) {
     859           2 :                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     860             :                                                                 "grapheme_extract: invalid input string", 0 TSRMLS_CC );
     861             : 
     862           2 :                                 RETURN_FALSE;
     863             :                         }
     864             :                 }
     865             :         }
     866             : 
     867          89 :         str_len -= (pstr - str);
     868             : 
     869             :         /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
     870             :                 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
     871             :          */
     872             : 
     873          89 :         if ( -1 != grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
     874          27 :         long nsize = ( size < str_len ? size : str_len );
     875          27 :                 if ( NULL != next ) {
     876           9 :                         ZVAL_LONG(next, start+nsize);
     877             :                 }
     878          27 :                 RETURN_STRINGL(((char *)pstr), nsize, 1);
     879             :         }
     880             : 
     881             :         /* convert the strings to UTF-16. */
     882          62 :         ustr = NULL;
     883          62 :         ustr_len = 0;
     884          62 :         status = U_ZERO_ERROR;
     885          62 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, (char *)pstr, str_len, &status );
     886             : 
     887          62 :         if ( U_FAILURE( status ) ) {
     888             :                 /* Set global error code. */
     889           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     890             : 
     891             :                 /* Set error messages. */
     892           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
     893             : 
     894           0 :                 if ( NULL != ustr )
     895           0 :                         efree( ustr );
     896             : 
     897           0 :                 RETURN_FALSE;
     898             :         }
     899             : 
     900          62 :         bi = NULL;
     901          62 :         status = U_ZERO_ERROR;
     902          62 :         bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
     903             : 
     904          62 :         ubrk_setText(bi, ustr, ustr_len, &status);
     905             : 
     906             :         /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
     907             :                 can't back up. So, we will not do anything. */
     908             : 
     909             :         /* now we need to find the end of the chunk the user wants us to return */
     910             : 
     911          62 :         ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, pstr, str_len);
     912             : 
     913          62 :         if (ustr) {
     914          62 :                 efree(ustr);
     915             :         }
     916          62 :         ubrk_close(bi);
     917             : 
     918          62 :         if ( NULL != next ) {
     919           4 :                 ZVAL_LONG(next, start+ret_pos);
     920             :         }
     921             : 
     922          62 :         RETURN_STRINGL(((char *)pstr), ret_pos, 1);
     923             : }
     924             : 
     925             : /* }}} */
     926             : 
     927             : /*
     928             :  * Local variables:
     929             :  * tab-width: 4
     930             :  * c-basic-offset: 4
     931             :  * End:
     932             :  * vim600: fdm=marker
     933             :  * vim: noet sw=4 ts=4
     934             :  */
     935             : 

Generated by: LCOV version 1.10

Generated at Fri, 18 Apr 2014 07:01:28 +0000 (2 days ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.