PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/intl/grapheme - grapheme_string.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 334 380 87.9 %
Date: 2016-07-19 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                                                                                |
       4             :    +----------------------------------------------------------------------+
       5             :    | This source file is subject to version 3.01 of the PHP license,      |
       6             :    | that is bundled with this package in the file LICENSE, and is                |
       7             :    | available through the world-wide-web at the following url:                   |
       8             :    | http://www.php.net/license/3_01.txt                                                                  |
       9             :    | If you did not receive a copy of the PHP license and are unable to   |
      10             :    | obtain it through the world-wide-web, please send a note to                  |
      11             :    | license@php.net so we can mail you a copy immediately.                               |
      12             :    +----------------------------------------------------------------------+
      13             :    | Author: Ed Batutis <ed@batutis.com>                                                            |
      14             :    +----------------------------------------------------------------------+
      15             :  */
      16             : 
      17             : /* {{{ includes */
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config.h"
      20             : #endif
      21             : 
      22             : #include <php.h>
      23             : #include "grapheme.h"
      24             : #include "grapheme_util.h"
      25             : 
      26             : #include <unicode/utypes.h>
      27             : #include <unicode/ucol.h>
      28             : #include <unicode/ustring.h>
      29             : #include <unicode/ubrk.h>
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : /* }}} */
      34             : 
      35             : #define GRAPHEME_EXTRACT_TYPE_COUNT             0
      36             : #define GRAPHEME_EXTRACT_TYPE_MAXBYTES  1
      37             : #define GRAPHEME_EXTRACT_TYPE_MAXCHARS  2
      38             : #define GRAPHEME_EXTRACT_TYPE_MIN       GRAPHEME_EXTRACT_TYPE_COUNT
      39             : #define GRAPHEME_EXTRACT_TYPE_MAX       GRAPHEME_EXTRACT_TYPE_MAXCHARS
      40             : 
      41             : 
      42             : /* {{{ grapheme_register_constants
      43             :  * Register API constants
      44             :  */
      45       23409 : void grapheme_register_constants( INIT_FUNC_ARGS )
      46             : {
      47       23409 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
      48       23409 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
      49       23409 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
      50       23409 : }
      51             : /* }}} */
      52             : 
      53             : /* {{{ proto size_t grapheme_strlen(string str)
      54             :    Get number of graphemes in a string */
      55           7 : PHP_FUNCTION(grapheme_strlen)
      56             : {
      57             :         char* string;
      58             :         size_t string_len;
      59           7 :         UChar* ustring = NULL;
      60           7 :         int ustring_len = 0;
      61             :         zend_long ret_len;
      62             :         UErrorCode status;
      63             : 
      64           7 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &string, &string_len) == FAILURE) {
      65           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
      66             :                          "grapheme_strlen: unable to parse input param", 0 );
      67           1 :                 RETURN_FALSE;
      68             :         }
      69             : 
      70           6 :         ret_len = grapheme_ascii_check((unsigned char *)string, string_len);
      71             : 
      72           6 :         if ( ret_len >= 0 )
      73           2 :                 RETURN_LONG(string_len);
      74             : 
      75             :         /* convert the string to UTF-16. */
      76           4 :         status = U_ZERO_ERROR;
      77           4 :         intl_convert_utf8_to_utf16(&ustring, &ustring_len, string, string_len, &status );
      78             : 
      79           4 :         if ( U_FAILURE( status ) ) {
      80             :                 /* Set global error code. */
      81           0 :                 intl_error_set_code( NULL, status );
      82             : 
      83             :                 /* Set error messages. */
      84           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
      85           0 :                 if (ustring) {
      86           0 :                         efree( ustring );
      87             :                 }
      88           0 :                 RETURN_NULL();
      89             :         }
      90             : 
      91           4 :         ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 );
      92             : 
      93           4 :         if (ustring) {
      94           4 :                 efree( ustring );
      95             :         }
      96             : 
      97           4 :         if (ret_len >= 0) {
      98           4 :                 RETVAL_LONG(ret_len);
      99             :         } else {
     100           0 :                 RETVAL_FALSE;
     101             :         }
     102             : }
     103             : /* }}} */
     104             : 
     105             : /* {{{ proto int grapheme_strpos(string haystack, string needle [, int offset ])
     106             :    Find position of first occurrence of a string within another */
     107          44 : PHP_FUNCTION(grapheme_strpos)
     108             : {
     109             :         char *haystack, *needle;
     110             :         size_t haystack_len, needle_len;
     111             :         const char *found;
     112          44 :         zend_long loffset = 0;
     113          44 :         int32_t offset = 0, noffset = 0;
     114             :         zend_long ret_pos;
     115             : 
     116          44 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     117           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     118             :                          "grapheme_strpos: unable to parse input param", 0 );
     119           1 :                 RETURN_FALSE;
     120             :         }
     121             : 
     122          43 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     123           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     124           1 :                 RETURN_FALSE;
     125             :         }
     126             : 
     127             :         /* we checked that it will fit: */
     128          42 :         offset = (int32_t) loffset;
     129          42 :         noffset = offset >= 0 ? offset : (int32_t)haystack_len + offset;
     130             : 
     131             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     132             : 
     133          42 :         if (needle_len == 0) {
     134           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     135           0 :                 RETURN_FALSE;
     136             :         }
     137             : 
     138          42 :         if (offset >= 0) {
     139             :                 /* quick check to see if the string might be there
     140             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     141             :                 */
     142          72 :                 found = php_memnstr(haystack + noffset, needle, needle_len, haystack + haystack_len);
     143             : 
     144             :                 /* if it isn't there the we are done */
     145          36 :                 if (!found) {
     146           9 :                         RETURN_FALSE;
     147             :                 }
     148             : 
     149             :                 /* if it is there, and if the haystack is ascii, we are all done */
     150          27 :                 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
     151          12 :                         RETURN_LONG(found - haystack);
     152             :                 }
     153             :         }
     154             : 
     155             :         /* do utf16 part of the strpos */
     156          21 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ );
     157             : 
     158          21 :         if ( ret_pos >= 0 ) {
     159          18 :                 RETURN_LONG(ret_pos);
     160             :         } else {
     161           3 :                 RETURN_FALSE;
     162             :         }
     163             : 
     164             : }
     165             : /* }}} */
     166             : 
     167             : /* {{{ proto int grapheme_stripos(string haystack, string needle [, int offset ])
     168             :    Find position of first occurrence of a string within another, ignoring case differences */
     169          44 : PHP_FUNCTION(grapheme_stripos)
     170             : {
     171             :         char *haystack, *needle, *haystack_dup, *needle_dup;
     172             :         size_t haystack_len, needle_len;
     173             :         const char *found;
     174          44 :         zend_long loffset = 0;
     175          44 :         int32_t offset = 0;
     176             :         zend_long ret_pos;
     177             :         int is_ascii;
     178             : 
     179          44 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     180           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     181             :                          "grapheme_stripos: unable to parse input param", 0 );
     182           1 :                 RETURN_FALSE;
     183             :         }
     184             : 
     185          43 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     186           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 );
     187           1 :                 RETURN_FALSE;
     188             :         }
     189             : 
     190             :         /* we checked that it will fit: */
     191          42 :         offset = (int32_t) loffset;
     192             : 
     193             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     194             : 
     195          42 :         if (needle_len == 0) {
     196           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 );
     197           0 :                 RETURN_FALSE;
     198             :         }
     199             : 
     200          42 :         is_ascii = ( grapheme_ascii_check((unsigned char*)haystack, haystack_len) >= 0 );
     201             : 
     202          42 :         if ( is_ascii ) {
     203          21 :                 int32_t noffset = offset >= 0 ? offset : (int32_t)haystack_len + offset;
     204          21 :                 needle_dup = estrndup(needle, needle_len);
     205          21 :                 php_strtolower(needle_dup, needle_len);
     206          21 :                 haystack_dup = estrndup(haystack, haystack_len);
     207          21 :                 php_strtolower(haystack_dup, haystack_len);
     208             : 
     209          42 :                 found = php_memnstr(haystack_dup + noffset, needle_dup, needle_len, haystack_dup + haystack_len);
     210             : 
     211          21 :                 efree(haystack_dup);
     212          21 :                 efree(needle_dup);
     213             : 
     214          21 :                 if (found) {
     215          14 :                         RETURN_LONG(found - haystack_dup);
     216             :                 }
     217             : 
     218             :                 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
     219           7 :                 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     220           5 :                         RETURN_FALSE;
     221             :                 }
     222             :         }
     223             : 
     224             :         /* do utf16 part of the strpos */
     225          23 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ );
     226             : 
     227          23 :         if ( ret_pos >= 0 ) {
     228          17 :                 RETURN_LONG(ret_pos);
     229             :         } else {
     230           6 :                 RETURN_FALSE;
     231             :         }
     232             : 
     233             : }
     234             : /* }}} */
     235             : 
     236             : /* {{{ proto int grapheme_strrpos(string haystack, string needle [, int offset])
     237             :    Find position of last occurrence of a string within another */
     238          37 : PHP_FUNCTION(grapheme_strrpos)
     239             : {
     240             :         char *haystack, *needle;
     241             :         size_t haystack_len, needle_len;
     242          37 :         zend_long loffset = 0;
     243          37 :         int32_t offset = 0;
     244             :         zend_long ret_pos;
     245             :         int is_ascii;
     246             : 
     247          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     248           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     249             :                          "grapheme_strrpos: unable to parse input param", 0 );
     250           1 :                 RETURN_FALSE;
     251             :         }
     252             : 
     253          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     254           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     255           0 :                 RETURN_FALSE;
     256             :         }
     257             : 
     258             :         /* we checked that it will fit: */
     259          36 :         offset = (int32_t) loffset;
     260             : 
     261             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     262             : 
     263          36 :         if (needle_len == 0) {
     264           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     265           0 :                 RETURN_FALSE;
     266             :         }
     267             : 
     268          36 :         is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
     269             : 
     270          36 :         if ( is_ascii ) {
     271             : 
     272          19 :                 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
     273             : 
     274          19 :                 if ( ret_pos >= 0 ) {
     275          12 :                         RETURN_LONG(ret_pos);
     276             :                 }
     277             : 
     278             :                 /* if the needle was ascii too, we are done */
     279             : 
     280           7 :                 if (  grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     281           5 :                         RETURN_FALSE;
     282             :                 }
     283             : 
     284             :                 /* else we need to continue via utf16 */
     285             :         }
     286             : 
     287          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */);
     288             : 
     289          19 :         if ( ret_pos >= 0 ) {
     290          13 :                 RETURN_LONG(ret_pos);
     291             :         } else {
     292           6 :                 RETURN_FALSE;
     293             :         }
     294             : 
     295             : 
     296             : }
     297             : /* }}} */
     298             : 
     299             : /* {{{ proto int grapheme_strripos(string haystack, string needle [, int offset])
     300             :    Find position of last occurrence of a string within another, ignoring case */
     301          37 : PHP_FUNCTION(grapheme_strripos)
     302             : {
     303             :         char *haystack, *needle;
     304             :         size_t haystack_len, needle_len;
     305          37 :         zend_long loffset = 0;
     306          37 :         int32_t offset = 0;
     307             :         zend_long ret_pos;
     308             :         int is_ascii;
     309             : 
     310          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     311           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     312             :                          "grapheme_strrpos: unable to parse input param", 0 );
     313           1 :                 RETURN_FALSE;
     314             :         }
     315             : 
     316          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     317           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     318           0 :                 RETURN_FALSE;
     319             :         }
     320             : 
     321             :         /* we checked that it will fit: */
     322          36 :         offset = (int32_t) loffset;
     323             : 
     324             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     325             : 
     326          36 :         if (needle_len == 0) {
     327           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     328           0 :                 RETURN_FALSE;
     329             :         }
     330             : 
     331          36 :         is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
     332             : 
     333          36 :         if ( is_ascii ) {
     334             :                 char *needle_dup, *haystack_dup;
     335             : 
     336          19 :                 needle_dup = estrndup(needle, needle_len);
     337          19 :                 php_strtolower(needle_dup, needle_len);
     338          19 :                 haystack_dup = estrndup(haystack, haystack_len);
     339          19 :                 php_strtolower(haystack_dup, haystack_len);
     340             : 
     341          19 :                 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
     342             : 
     343          19 :                 efree(haystack_dup);
     344          19 :                 efree(needle_dup);
     345             : 
     346          19 :                 if ( ret_pos >= 0 ) {
     347          12 :                         RETURN_LONG(ret_pos);
     348             :                 }
     349             : 
     350             :                 /* if the needle was ascii too, we are done */
     351             : 
     352           7 :                 if (  grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     353           5 :                         RETURN_FALSE;
     354             :                 }
     355             : 
     356             :                 /* else we need to continue via utf16 */
     357             :         }
     358             : 
     359          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL,  1 /* f_ignore_case */, 1 /*last */);
     360             : 
     361          19 :         if ( ret_pos >= 0 ) {
     362          13 :                 RETURN_LONG(ret_pos);
     363             :         } else {
     364           6 :                 RETURN_FALSE;
     365             :         }
     366             : 
     367             : 
     368             : }
     369             : /* }}} */
     370             : 
     371             : /* {{{ proto string grapheme_substr(string str, int start [, int length])
     372             :    Returns part of a string */
     373          71 : PHP_FUNCTION(grapheme_substr)
     374             : {
     375             :         char *str;
     376             :         zend_string *u8_sub_str;
     377             :         UChar *ustr;
     378             :         size_t str_len;
     379             :         int32_t ustr_len;
     380          71 :         zend_long lstart = 0, length = 0;
     381          71 :         int32_t start = 0;
     382             :         int iter_val;
     383             :         UErrorCode status;
     384             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     385          71 :         UBreakIterator* bi = NULL;
     386             :         int sub_str_start_pos, sub_str_end_pos;
     387             :         int32_t (*iter_func)(UBreakIterator *);
     388          71 :         zend_bool no_length = 1;
     389             : 
     390          71 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!", &str, &str_len, &lstart, &length, &no_length) == FAILURE) {
     391           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     392             :                          "grapheme_substr: unable to parse input param", 0 );
     393           1 :                 RETURN_FALSE;
     394             :         }
     395             : 
     396          70 :         if ( OUTSIDE_STRING(lstart, str_len)) {
     397           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
     398           5 :                 RETURN_FALSE;
     399             :         }
     400             : 
     401             :         /* we checked that it will fit: */
     402          65 :         start = (int32_t) lstart;
     403             : 
     404          65 :         if(no_length) {
     405          22 :                 length = str_len;
     406             :         }
     407             : 
     408          65 :         if(length < INT32_MIN) {
     409           0 :                 length = INT32_MIN;
     410          65 :         } else if(length > INT32_MAX) {
     411           0 :                 length = INT32_MAX;
     412             :         }
     413             : 
     414             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     415             : 
     416          65 :         if ( grapheme_ascii_check((unsigned char *)str, str_len) >= 0 ) {
     417             :                 int32_t asub_str_len;
     418             :                 char *sub_str;
     419           9 :                 grapheme_substr_ascii(str, str_len, start, (int32_t)length, &sub_str, &asub_str_len);
     420             : 
     421           9 :                 if ( NULL == sub_str ) {
     422           1 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 );
     423           1 :                         RETURN_FALSE;
     424             :                 }
     425             : 
     426          16 :                 RETURN_STRINGL(sub_str, asub_str_len);
     427             :         }
     428             : 
     429          56 :         ustr = NULL;
     430          56 :         ustr_len = 0;
     431          56 :         status = U_ZERO_ERROR;
     432          56 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, &status);
     433             : 
     434          56 :         if ( U_FAILURE( status ) ) {
     435             :                 /* Set global error code. */
     436           0 :                 intl_error_set_code( NULL, status );
     437             : 
     438             :                 /* Set error messages. */
     439           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
     440           0 :                 if (ustr) {
     441           0 :                         efree( ustr );
     442             :                 }
     443           0 :                 RETURN_FALSE;
     444             :         }
     445             : 
     446          56 :         bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
     447             : 
     448          56 :         if( U_FAILURE(status) ) {
     449           0 :                 RETURN_FALSE;
     450             :         }
     451             : 
     452          56 :         ubrk_setText(bi, ustr, ustr_len,        &status);
     453             : 
     454          56 :         if ( start < 0 ) {
     455          28 :                 iter_func = ubrk_previous;
     456          28 :                 ubrk_last(bi);
     457          28 :                 iter_val = 1;
     458             :         }
     459             :         else {
     460          28 :                 iter_func = ubrk_next;
     461          28 :                 iter_val = -1;
     462             :         }
     463             : 
     464          56 :         sub_str_start_pos = 0;
     465             : 
     466         377 :         while ( start ) {
     467         265 :                 sub_str_start_pos = iter_func(bi);
     468             : 
     469         265 :                 if ( UBRK_DONE == sub_str_start_pos ) {
     470           0 :                         break;
     471             :                 }
     472             : 
     473         265 :                 start += iter_val;
     474             :         }
     475             : 
     476          56 :         if ( 0 != start || sub_str_start_pos >= ustr_len ) {
     477             : 
     478           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
     479             : 
     480           3 :                 if (ustr) {
     481           3 :                         efree(ustr);
     482             :                 }
     483           3 :                 ubrk_close(bi);
     484           3 :                 RETURN_FALSE;
     485             :         }
     486             : 
     487             :         /* OK to convert here since if str_len were big, convert above would fail */
     488          53 :         if (length >= (int32_t)str_len) {
     489             : 
     490             :                 /* no length supplied or length is too big, return the rest of the string */
     491             : 
     492          18 :                 status = U_ZERO_ERROR;
     493          18 :                 u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
     494             : 
     495          18 :                 if (ustr) {
     496          18 :                         efree( ustr );
     497             :                 }
     498          18 :                 ubrk_close( bi );
     499             : 
     500          18 :                 if ( !u8_sub_str ) {
     501             :                         /* Set global error code. */
     502           0 :                         intl_error_set_code( NULL, status );
     503             : 
     504             :                         /* Set error messages. */
     505           0 :                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
     506             : 
     507           0 :                         RETURN_FALSE;
     508             :                 }
     509             : 
     510             :                 /* return the allocated string, not a duplicate */
     511          18 :                 RETVAL_NEW_STR(u8_sub_str);
     512          18 :                 return;
     513             :         }
     514             : 
     515          35 :         if(length == 0) {
     516             :                 /* empty length - we've validated start, we can return "" now */
     517           2 :                 if (ustr) {
     518           2 :                         efree(ustr);
     519             :                 }
     520           2 :                 ubrk_close(bi);
     521           2 :                 RETURN_EMPTY_STRING();
     522             :         }
     523             : 
     524             :         /* find the end point of the string to return */
     525             : 
     526          33 :         if ( length < 0 ) {
     527          24 :                 iter_func = ubrk_previous;
     528          24 :                 ubrk_last(bi);
     529          24 :                 iter_val = 1;
     530             :         }
     531             :         else {
     532           9 :                 iter_func = ubrk_next;
     533           9 :                 iter_val = -1;
     534             :         }
     535             : 
     536          33 :         sub_str_end_pos = 0;
     537             : 
     538         220 :         while ( length ) {
     539         158 :                 sub_str_end_pos = iter_func(bi);
     540             : 
     541         158 :                 if ( UBRK_DONE == sub_str_end_pos ) {
     542           4 :                         break;
     543             :                 }
     544             : 
     545         154 :                 length += iter_val;
     546             :         }
     547             : 
     548          33 :         ubrk_close(bi);
     549             : 
     550          33 :         if ( UBRK_DONE == sub_str_end_pos) {
     551           4 :                 if(length < 0) {
     552           3 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 );
     553             : 
     554           3 :                         efree(ustr);
     555           3 :                         RETURN_FALSE;
     556             :                 } else {
     557           1 :                         sub_str_end_pos = ustr_len;
     558             :                 }
     559             :         }
     560             : 
     561          30 :         if(sub_str_start_pos > sub_str_end_pos) {
     562           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
     563             : 
     564           2 :                 efree(ustr);
     565           2 :                 RETURN_FALSE;
     566             :         }
     567             : 
     568          28 :         status = U_ZERO_ERROR;
     569          28 :         u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
     570             : 
     571          28 :         efree( ustr );
     572             : 
     573          28 :         if ( !u8_sub_str ) {
     574             :                 /* Set global error code. */
     575           0 :                 intl_error_set_code( NULL, status );
     576             : 
     577             :                 /* Set error messages. */
     578           0 :                 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
     579             : 
     580           0 :                 RETURN_FALSE;
     581             :         }
     582             : 
     583             :          /* return the allocated string, not a duplicate */
     584          28 :         RETVAL_NEW_STR(u8_sub_str);
     585             : }
     586             : /* }}} */
     587             : 
     588             : /* {{{  strstr_common_handler */
     589          73 : static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
     590             : {
     591             :         char *haystack, *needle;
     592             :         const char *found;
     593             :         size_t haystack_len, needle_len;
     594             :         int32_t ret_pos, uchar_pos;
     595          73 :         zend_bool part = 0;
     596             : 
     597          73 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|b", &haystack, &haystack_len, &needle, &needle_len, &part) == FAILURE) {
     598             : 
     599           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     600             :                          "grapheme_strstr: unable to parse input param", 0 );
     601             : 
     602           2 :                 RETURN_FALSE;
     603             :         }
     604             : 
     605          71 :         if (needle_len == 0) {
     606             : 
     607           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     608             : 
     609           0 :                 RETURN_FALSE;
     610             :         }
     611             : 
     612             : 
     613          71 :         if ( !f_ignore_case ) {
     614             : 
     615             :                 /* ASCII optimization: quick check to see if the string might be there
     616             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     617             :                 */
     618          70 :                 found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
     619             : 
     620             :                 /* if it isn't there the we are done */
     621          35 :                 if ( !found ) {
     622           3 :                         RETURN_FALSE;
     623             :                 }
     624             : 
     625             :                 /* if it is there, and if the haystack is ascii, we are all done */
     626          32 :                 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
     627          13 :                         size_t found_offset = found - haystack;
     628             : 
     629          13 :                         if (part) {
     630          10 :                                 RETURN_STRINGL(haystack, found_offset);
     631             :                         } else {
     632          16 :                                 RETURN_STRINGL(found, haystack_len - found_offset);
     633             :                         }
     634             :                 }
     635             : 
     636             :         }
     637             : 
     638             :         /* need to work in utf16 */
     639          55 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );
     640             : 
     641          55 :         if ( ret_pos < 0 ) {
     642           9 :                 RETURN_FALSE;
     643             :         }
     644             : 
     645             :         /* uchar_pos is the 'nth' Unicode character position of the needle */
     646             : 
     647          46 :         ret_pos = 0;
     648          46 :         U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
     649             : 
     650          46 :         if (part) {
     651          30 :                 RETURN_STRINGL(haystack, ret_pos);
     652             :         } else {
     653          62 :                 RETURN_STRINGL(haystack + ret_pos, haystack_len - ret_pos);
     654             :         }
     655             : 
     656             : }
     657             : /* }}} */
     658             : 
     659             : /* {{{ proto string grapheme_strstr(string haystack, string needle[, bool part])
     660             :    Finds first occurrence of a string within another */
     661          36 : PHP_FUNCTION(grapheme_strstr)
     662             : {
     663          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
     664          36 : }
     665             : /* }}} */
     666             : 
     667             : /* {{{ proto string grapheme_stristr(string haystack, string needle[, bool part])
     668             :    Finds first occurrence of a string within another */
     669          37 : PHP_FUNCTION(grapheme_stristr)
     670             : {
     671          37 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
     672          37 : }
     673             : /* }}} */
     674             : 
     675             : /* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
     676             : static inline int32_t
     677          18 : grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
     678             : {
     679          18 :         int pos = 0, prev_pos = 0;
     680          18 :         int ret_pos = 0, prev_ret_pos = 0;
     681             : 
     682             :         while ( 1 ) {
     683          96 :                 pos = ubrk_next(bi);
     684             : 
     685          96 :                 if ( UBRK_DONE == pos ) {
     686           7 :                         break;
     687             :                 }
     688             : 
     689             :                 /* if we are beyond our limit, then the loop is done */
     690          89 :                 if ( pos > csize ) {
     691          11 :                         break;
     692             :                 }
     693             : 
     694             :                 /* update our pointer in the original UTF-8 buffer by as many characters
     695             :                    as ubrk_next iterated over */
     696             : 
     697          78 :                 prev_ret_pos = ret_pos;
     698          78 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     699             : 
     700          78 :                 if ( prev_ret_pos == ret_pos ) {
     701             :                         /* something wrong - malformed utf8? */
     702           0 :                         break;
     703             :                 }
     704             : 
     705          78 :                 prev_pos = pos;
     706          78 :         }
     707             : 
     708          18 :         return ret_pos;
     709             : }
     710             : /* }}} */
     711             : 
     712             : /* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
     713             : static inline int32_t
     714          23 : grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
     715             : {
     716          23 :         int pos = 0, prev_pos = 0;
     717          23 :         int ret_pos = 0, prev_ret_pos = 0;
     718             : 
     719             :         while ( 1 ) {
     720          63 :                 pos = ubrk_next(bi);
     721             : 
     722          63 :                 if ( UBRK_DONE == pos ) {
     723           8 :                         break;
     724             :                 }
     725             : 
     726          55 :                 prev_ret_pos = ret_pos;
     727          55 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     728             : 
     729          55 :                 if ( ret_pos > bsize ) {
     730          15 :                         ret_pos = prev_ret_pos;
     731          15 :                         break;
     732             :                 }
     733             : 
     734          40 :                 if ( prev_ret_pos == ret_pos ) {
     735             :                         /* something wrong - malformed utf8? */
     736           0 :                         break;
     737             :                 }
     738             : 
     739          40 :                 prev_pos = pos;
     740          40 :         }
     741             : 
     742          23 :         return ret_pos;
     743             : }
     744             : /* }}} */
     745             : 
     746             : /* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
     747             : static inline int32_t
     748          23 : grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
     749             : {
     750          23 :         int pos = 0, next_pos = 0;
     751          23 :         int ret_pos = 0;
     752             : 
     753          90 :         while ( size ) {
     754          46 :                 next_pos = ubrk_next(bi);
     755             : 
     756          46 :                 if ( UBRK_DONE == next_pos ) {
     757           2 :                         break;
     758             :                 }
     759          44 :                 pos = next_pos;
     760          44 :                 size--;
     761             :         }
     762             : 
     763             :         /* pos is one past the last UChar - and represent the number of code units to
     764             :                 advance in the utf-8 buffer
     765             :         */
     766             : 
     767          23 :         U8_FWD_N(pstr, ret_pos, str_len, pos);
     768             : 
     769          23 :         return ret_pos;
     770             : }
     771             : /* }}} */
     772             : 
     773             : /* {{{ grapheme extract iter function pointer array */
     774             : typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
     775             : 
     776             : static grapheme_extract_iter grapheme_extract_iters[] = {
     777             :         &grapheme_extract_count_iter,
     778             :         &grapheme_extract_bytecount_iter,
     779             :         &grapheme_extract_charcount_iter,
     780             : };
     781             : /* }}} */
     782             : 
     783             : /* {{{ proto string grapheme_extract(string str, int size[, int extract_type[, int start[, int next]]])
     784             :         Function to extract a sequence of default grapheme clusters */
     785         110 : PHP_FUNCTION(grapheme_extract)
     786             : {
     787             :         char *str, *pstr;
     788             :         UChar *ustr;
     789             :         size_t str_len;
     790             :         int32_t ustr_len;
     791             :         zend_long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
     792         110 :         zend_long lstart = 0; /* starting position in str in bytes */
     793         110 :         int32_t start = 0;
     794         110 :         zend_long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
     795             :         UErrorCode status;
     796             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     797         110 :         UBreakIterator* bi = NULL;
     798             :         int ret_pos;
     799         110 :         zval *next = NULL; /* return offset of next part of the string */
     800             : 
     801         110 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|llz", &str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
     802           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     803             :                          "grapheme_extract: unable to parse input param", 0 );
     804           1 :                 RETURN_FALSE;
     805             :         }
     806             : 
     807         109 :         if (lstart < 0) {
     808           5 :                 lstart += str_len;
     809             :         }
     810             : 
     811         109 :         if ( NULL != next ) {
     812          40 :                 if ( !Z_ISREF_P(next) ) {
     813           0 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     814             :                                  "grapheme_extract: 'next' was not passed by reference", 0 );
     815           0 :                         RETURN_FALSE;
     816             :                 } else {
     817          40 :                         ZVAL_DEREF(next);
     818             :                         /* initialize next */
     819          21 :                         SEPARATE_ZVAL_NOREF(next);
     820          20 :                         zval_dtor(next);
     821          20 :             ZVAL_LONG(next, lstart);
     822             :                 }
     823             :         }
     824             : 
     825         109 :         if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
     826           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     827             :                          "grapheme_extract: unknown extract type param", 0 );
     828           1 :                 RETURN_FALSE;
     829             :         }
     830             : 
     831         108 :         if ( lstart > INT32_MAX || lstart < 0 || (size_t)lstart >= str_len ) {
     832           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 );
     833           5 :                 RETURN_FALSE;
     834             :         }
     835             : 
     836         103 :         if ( size > INT32_MAX || size < 0) {
     837           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 );
     838           0 :                 RETURN_FALSE;
     839             :         }
     840         103 :         if (size == 0) {
     841           8 :                 RETURN_EMPTY_STRING();
     842             :         }
     843             : 
     844             :         /* we checked that it will fit: */
     845          95 :         start = (int32_t) lstart;
     846             : 
     847          95 :         pstr = str + start;
     848             : 
     849             :         /* just in case pstr points in the middle of a character, move forward to the start of the next char */
     850          95 :         if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     851           9 :                 char *str_end = str + str_len;
     852             : 
     853          25 :                 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     854           9 :                         pstr++;
     855           9 :                         if ( pstr >= str_end ) {
     856           2 :                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     857             :                                                                 "grapheme_extract: invalid input string", 0 );
     858             : 
     859           2 :                                 RETURN_FALSE;
     860             :                         }
     861             :                 }
     862             :         }
     863             : 
     864          93 :         str_len -= (pstr - str);
     865             : 
     866             :         /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
     867             :                 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
     868             :          */
     869             : 
     870          93 :         if ( -1 != grapheme_ascii_check((unsigned char *)pstr, MIN(size + 1, str_len)) ) {
     871          29 :         size_t nsize = MIN(size, str_len);
     872          29 :                 if ( NULL != next ) {
     873          11 :                         ZVAL_LONG(next, start+nsize);
     874             :                 }
     875          58 :                 RETURN_STRINGL(pstr, nsize);
     876             :         }
     877             : 
     878             :         /* convert the strings to UTF-16. */
     879          64 :         ustr = NULL;
     880          64 :         ustr_len = 0;
     881          64 :         status = U_ZERO_ERROR;
     882          64 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, pstr, str_len, &status );
     883             : 
     884          64 :         if ( U_FAILURE( status ) ) {
     885             :                 /* Set global error code. */
     886           0 :                 intl_error_set_code( NULL, status );
     887             : 
     888             :                 /* Set error messages. */
     889           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
     890             : 
     891           0 :                 if ( NULL != ustr )
     892           0 :                         efree( ustr );
     893             : 
     894           0 :                 RETURN_FALSE;
     895             :         }
     896             : 
     897          64 :         bi = NULL;
     898          64 :         status = U_ZERO_ERROR;
     899          64 :         bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
     900             : 
     901          64 :         ubrk_setText(bi, ustr, ustr_len, &status);
     902             : 
     903             :         /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
     904             :                 can't back up. So, we will not do anything. */
     905             : 
     906             :         /* now we need to find the end of the chunk the user wants us to return */
     907             :         /* it's ok to convert str_len to in32_t since if it were too big intl_convert_utf8_to_utf16 above would fail */
     908          64 :         ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, (unsigned char *)pstr, (int32_t)str_len);
     909             : 
     910          64 :         if (ustr) {
     911          64 :                 efree(ustr);
     912             :         }
     913          64 :         ubrk_close(bi);
     914             : 
     915          64 :         if ( NULL != next ) {
     916           6 :                 ZVAL_LONG(next, start+ret_pos);
     917             :         }
     918             : 
     919         128 :         RETURN_STRINGL(((char *)pstr), ret_pos);
     920             : }
     921             : 
     922             : /* }}} */
     923             : 
     924             : /*
     925             :  * Local variables:
     926             :  * tab-width: 4
     927             :  * c-basic-offset: 4
     928             :  * End:
     929             :  * vim600: fdm=marker
     930             :  * vim: noet sw=4 ts=4
     931             :  */
     932             : 

Generated by: LCOV version 1.10

Generated at Wed, 20 Jul 2016 02:56:19 +0000 (3 days ago)

Copyright © 2005-2016 The PHP Group
All rights reserved.