PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/intl/grapheme - grapheme_string.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 329 375 87.7 %
Date: 2015-08-29 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                                                                                |
       4             :    +----------------------------------------------------------------------+
       5             :    | This source file is subject to version 3.01 of the PHP license,      |
       6             :    | that is bundled with this package in the file LICENSE, and is                |
       7             :    | available through the world-wide-web at the following url:                   |
       8             :    | http://www.php.net/license/3_01.txt                                                                  |
       9             :    | If you did not receive a copy of the PHP license and are unable to   |
      10             :    | obtain it through the world-wide-web, please send a note to                  |
      11             :    | license@php.net so we can mail you a copy immediately.                               |
      12             :    +----------------------------------------------------------------------+
      13             :    | Author: Ed Batutis <ed@batutis.com>                                                            |
      14             :    +----------------------------------------------------------------------+
      15             :  */
      16             : 
      17             : /* {{{ includes */
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config.h"
      20             : #endif
      21             : 
      22             : #include <php.h>
      23             : #include "grapheme.h"
      24             : #include "grapheme_util.h"
      25             : 
      26             : #include <unicode/utypes.h>
      27             : #include <unicode/ucol.h>
      28             : #include <unicode/ustring.h>
      29             : #include <unicode/ubrk.h>
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : /* }}} */
      34             : 
      35             : #define GRAPHEME_EXTRACT_TYPE_COUNT             0
      36             : #define GRAPHEME_EXTRACT_TYPE_MAXBYTES  1
      37             : #define GRAPHEME_EXTRACT_TYPE_MAXCHARS  2
      38             : #define GRAPHEME_EXTRACT_TYPE_MIN       GRAPHEME_EXTRACT_TYPE_COUNT
      39             : #define GRAPHEME_EXTRACT_TYPE_MAX       GRAPHEME_EXTRACT_TYPE_MAXCHARS
      40             : 
      41             : 
      42             : /* {{{ grapheme_register_constants
      43             :  * Register API constants
      44             :  */
      45       21291 : void grapheme_register_constants( INIT_FUNC_ARGS )
      46             : {
      47       21291 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
      48       21291 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
      49       21291 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
      50       21291 : }
      51             : /* }}} */
      52             : 
      53             : /* {{{ proto size_t grapheme_strlen(string str)
      54             :    Get number of graphemes in a string */
      55           7 : PHP_FUNCTION(grapheme_strlen)
      56             : {
      57             :         char* string;
      58             :         size_t string_len;
      59           7 :         UChar* ustring = NULL;
      60           7 :         int ustring_len = 0;
      61             :         zend_long ret_len;
      62             :         UErrorCode status;
      63             : 
      64           7 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &string, &string_len) == FAILURE) {
      65           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
      66             :                          "grapheme_strlen: unable to parse input param", 0 );
      67           1 :                 RETURN_FALSE;
      68             :         }
      69             : 
      70           6 :         ret_len = grapheme_ascii_check((unsigned char *)string, string_len);
      71             : 
      72           6 :         if ( ret_len >= 0 )
      73           2 :                 RETURN_LONG(string_len);
      74             : 
      75             :         /* convert the string to UTF-16. */
      76           4 :         status = U_ZERO_ERROR;
      77           4 :         intl_convert_utf8_to_utf16(&ustring, &ustring_len, string, string_len, &status );
      78             : 
      79           4 :         if ( U_FAILURE( status ) ) {
      80             :                 /* Set global error code. */
      81           0 :                 intl_error_set_code( NULL, status );
      82             : 
      83             :                 /* Set error messages. */
      84           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
      85           0 :                 if (ustring) {
      86           0 :                         efree( ustring );
      87             :                 }
      88           0 :                 RETURN_NULL();
      89             :         }
      90             : 
      91           4 :         ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 );
      92             : 
      93           4 :         if (ustring) {
      94           4 :                 efree( ustring );
      95             :         }
      96             : 
      97           4 :         if (ret_len >= 0) {
      98           4 :                 RETVAL_LONG(ret_len);
      99             :         } else {
     100           0 :                 RETVAL_FALSE;
     101             :         }
     102             : }
     103             : /* }}} */
     104             : 
     105             : /* {{{ proto int grapheme_strpos(string haystack, string needle [, int offset ])
     106             :    Find position of first occurrence of a string within another */
     107          38 : PHP_FUNCTION(grapheme_strpos)
     108             : {
     109             :         char *haystack, *needle;
     110             :         size_t haystack_len, needle_len;
     111             :         const char *found;
     112          38 :         zend_long loffset = 0;
     113          38 :         int32_t offset = 0;
     114             :         zend_long ret_pos;
     115             : 
     116          38 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     117           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     118             :                          "grapheme_strpos: unable to parse input param", 0 );
     119           1 :                 RETURN_FALSE;
     120             :         }
     121             : 
     122          37 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     123           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     124           1 :                 RETURN_FALSE;
     125             :         }
     126             : 
     127             :         /* we checked that it will fit: */
     128          36 :         offset = (int32_t) loffset;
     129             : 
     130             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     131             : 
     132          36 :         if (needle_len == 0) {
     133           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     134           0 :                 RETURN_FALSE;
     135             :         }
     136             : 
     137             : 
     138             :         /* quick check to see if the string might be there
     139             :          * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     140             :         */
     141          72 :         found = php_memnstr(haystack + offset, needle, needle_len, haystack + haystack_len);
     142             : 
     143             :         /* if it isn't there the we are done */
     144          36 :         if (!found) {
     145           9 :                 RETURN_FALSE;
     146             :         }
     147             : 
     148             :         /* if it is there, and if the haystack is ascii, we are all done */
     149          27 :         if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
     150          12 :                 RETURN_LONG(found - haystack);
     151             :         }
     152             : 
     153             :         /* do utf16 part of the strpos */
     154          15 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ );
     155             : 
     156          15 :         if ( ret_pos >= 0 ) {
     157          13 :                 RETURN_LONG(ret_pos);
     158             :         } else {
     159           2 :                 RETURN_FALSE;
     160             :         }
     161             : 
     162             : }
     163             : /* }}} */
     164             : 
     165             : /* {{{ proto int grapheme_stripos(string haystack, string needle [, int offset ])
     166             :    Find position of first occurrence of a string within another, ignoring case differences */
     167          40 : PHP_FUNCTION(grapheme_stripos)
     168             : {
     169             :         char *haystack, *needle, *haystack_dup, *needle_dup;
     170             :         size_t haystack_len, needle_len;
     171             :         const char *found;
     172          40 :         zend_long loffset = 0;
     173          40 :         int32_t offset = 0;
     174             :         zend_long ret_pos;
     175             :         int is_ascii;
     176             : 
     177          40 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     178           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     179             :                          "grapheme_stripos: unable to parse input param", 0 );
     180           1 :                 RETURN_FALSE;
     181             :         }
     182             : 
     183          39 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     184           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 );
     185           1 :                 RETURN_FALSE;
     186             :         }
     187             : 
     188             :         /* we checked that it will fit: */
     189          38 :         offset = (int32_t) loffset;
     190             : 
     191             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     192             : 
     193          38 :         if (needle_len == 0) {
     194           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 );
     195           0 :                 RETURN_FALSE;
     196             :         }
     197             : 
     198             : 
     199          38 :         is_ascii = ( grapheme_ascii_check((unsigned char*)haystack, haystack_len) >= 0 );
     200             : 
     201          38 :         if ( is_ascii ) {
     202          19 :                 needle_dup = estrndup(needle, needle_len);
     203          19 :                 php_strtolower(needle_dup, needle_len);
     204          19 :                 haystack_dup = estrndup(haystack, haystack_len);
     205          19 :                 php_strtolower(haystack_dup, haystack_len);
     206             : 
     207          38 :                 found = php_memnstr(haystack_dup + offset, needle_dup, needle_len, haystack_dup + haystack_len);
     208             : 
     209          19 :                 efree(haystack_dup);
     210          19 :                 efree(needle_dup);
     211             : 
     212          19 :                 if (found) {
     213          12 :                         RETURN_LONG(found - haystack_dup);
     214             :                 }
     215             : 
     216             :                 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
     217           7 :                 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     218           5 :                         RETURN_FALSE;
     219             :                 }
     220             :         }
     221             : 
     222             :         /* do utf16 part of the strpos */
     223          21 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ );
     224             : 
     225          21 :         if ( ret_pos >= 0 ) {
     226          15 :                 RETURN_LONG(ret_pos);
     227             :         } else {
     228           6 :                 RETURN_FALSE;
     229             :         }
     230             : 
     231             : }
     232             : /* }}} */
     233             : 
     234             : /* {{{ proto int grapheme_strrpos(string haystack, string needle [, int offset])
     235             :    Find position of last occurrence of a string within another */
     236          37 : PHP_FUNCTION(grapheme_strrpos)
     237             : {
     238             :         char *haystack, *needle;
     239             :         size_t haystack_len, needle_len;
     240          37 :         zend_long loffset = 0;
     241          37 :         int32_t offset = 0;
     242             :         zend_long ret_pos;
     243             :         int is_ascii;
     244             : 
     245          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     246           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     247             :                          "grapheme_strrpos: unable to parse input param", 0 );
     248           1 :                 RETURN_FALSE;
     249             :         }
     250             : 
     251          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     252           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     253           0 :                 RETURN_FALSE;
     254             :         }
     255             : 
     256             :         /* we checked that it will fit: */
     257          36 :         offset = (int32_t) loffset;
     258             : 
     259             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     260             : 
     261          36 :         if (needle_len == 0) {
     262           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     263           0 :                 RETURN_FALSE;
     264             :         }
     265             : 
     266          36 :         is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
     267             : 
     268          36 :         if ( is_ascii ) {
     269             : 
     270          19 :                 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
     271             : 
     272          19 :                 if ( ret_pos >= 0 ) {
     273          12 :                         RETURN_LONG(ret_pos);
     274             :                 }
     275             : 
     276             :                 /* if the needle was ascii too, we are done */
     277             : 
     278           7 :                 if (  grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     279           5 :                         RETURN_FALSE;
     280             :                 }
     281             : 
     282             :                 /* else we need to continue via utf16 */
     283             :         }
     284             : 
     285          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */);
     286             : 
     287          19 :         if ( ret_pos >= 0 ) {
     288          13 :                 RETURN_LONG(ret_pos);
     289             :         } else {
     290           6 :                 RETURN_FALSE;
     291             :         }
     292             : 
     293             : 
     294             : }
     295             : /* }}} */
     296             : 
     297             : /* {{{ proto int grapheme_strripos(string haystack, string needle [, int offset])
     298             :    Find position of last occurrence of a string within another, ignoring case */
     299          37 : PHP_FUNCTION(grapheme_strripos)
     300             : {
     301             :         char *haystack, *needle;
     302             :         size_t haystack_len, needle_len;
     303          37 :         zend_long loffset = 0;
     304          37 :         int32_t offset = 0;
     305             :         zend_long ret_pos;
     306             :         int is_ascii;
     307             : 
     308          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     309           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     310             :                          "grapheme_strrpos: unable to parse input param", 0 );
     311           1 :                 RETURN_FALSE;
     312             :         }
     313             : 
     314          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     315           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     316           0 :                 RETURN_FALSE;
     317             :         }
     318             : 
     319             :         /* we checked that it will fit: */
     320          36 :         offset = (int32_t) loffset;
     321             : 
     322             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     323             : 
     324          36 :         if (needle_len == 0) {
     325           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     326           0 :                 RETURN_FALSE;
     327             :         }
     328             : 
     329          36 :         is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
     330             : 
     331          36 :         if ( is_ascii ) {
     332             :                 char *needle_dup, *haystack_dup;
     333             : 
     334          19 :                 needle_dup = estrndup(needle, needle_len);
     335          19 :                 php_strtolower(needle_dup, needle_len);
     336          19 :                 haystack_dup = estrndup(haystack, haystack_len);
     337          19 :                 php_strtolower(haystack_dup, haystack_len);
     338             : 
     339          19 :                 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
     340             : 
     341          19 :                 efree(haystack_dup);
     342          19 :                 efree(needle_dup);
     343             : 
     344          19 :                 if ( ret_pos >= 0 ) {
     345          12 :                         RETURN_LONG(ret_pos);
     346             :                 }
     347             : 
     348             :                 /* if the needle was ascii too, we are done */
     349             : 
     350           7 :                 if (  grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     351           5 :                         RETURN_FALSE;
     352             :                 }
     353             : 
     354             :                 /* else we need to continue via utf16 */
     355             :         }
     356             : 
     357          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL,  1 /* f_ignore_case */, 1 /*last */);
     358             : 
     359          19 :         if ( ret_pos >= 0 ) {
     360          13 :                 RETURN_LONG(ret_pos);
     361             :         } else {
     362           6 :                 RETURN_FALSE;
     363             :         }
     364             : 
     365             : 
     366             : }
     367             : /* }}} */
     368             : 
     369             : /* {{{ proto string grapheme_substr(string str, int start [, int length])
     370             :    Returns part of a string */
     371          71 : PHP_FUNCTION(grapheme_substr)
     372             : {
     373             :         char *str;
     374             :         zend_string *u8_sub_str;
     375             :         UChar *ustr;
     376             :         size_t str_len;
     377             :         int32_t ustr_len;
     378          71 :         zend_long lstart = 0, length = 0;
     379          71 :         int32_t start = 0;
     380             :         int iter_val;
     381             :         UErrorCode status;
     382             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     383          71 :         UBreakIterator* bi = NULL;
     384             :         int sub_str_start_pos, sub_str_end_pos;
     385             :         int32_t (*iter_func)(UBreakIterator *);
     386          71 :         int no_length = 1;
     387             : 
     388          71 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!", (char **)&str, &str_len, &lstart, &length, &no_length) == FAILURE) {
     389           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     390             :                          "grapheme_substr: unable to parse input param", 0 );
     391           1 :                 RETURN_FALSE;
     392             :         }
     393             : 
     394          70 :         if ( OUTSIDE_STRING(lstart, str_len)) {
     395           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
     396           5 :                 RETURN_FALSE;
     397             :         }
     398             : 
     399             :         /* we checked that it will fit: */
     400          65 :         start = (int32_t) lstart;
     401             : 
     402          65 :         if(no_length) {
     403          22 :                 length = str_len;
     404             :         }
     405             : 
     406          65 :         if(length < INT32_MIN) {
     407           0 :                 length = INT32_MIN;
     408          65 :         } else if(length > INT32_MAX) {
     409           0 :                 length = INT32_MAX;
     410             :         }
     411             : 
     412             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     413             : 
     414          65 :         if ( grapheme_ascii_check((unsigned char *)str, str_len) >= 0 ) {
     415             :                 int32_t asub_str_len;
     416             :                 char *sub_str;
     417           9 :                 grapheme_substr_ascii(str, str_len, start, (int32_t)length, &sub_str, &asub_str_len);
     418             : 
     419           9 :                 if ( NULL == sub_str ) {
     420           1 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 );
     421           1 :                         RETURN_FALSE;
     422             :                 }
     423             : 
     424          16 :                 RETURN_STRINGL(sub_str, asub_str_len);
     425             :         }
     426             : 
     427          56 :         ustr = NULL;
     428          56 :         ustr_len = 0;
     429          56 :         status = U_ZERO_ERROR;
     430          56 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, &status);
     431             : 
     432          56 :         if ( U_FAILURE( status ) ) {
     433             :                 /* Set global error code. */
     434           0 :                 intl_error_set_code( NULL, status );
     435             : 
     436             :                 /* Set error messages. */
     437           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
     438           0 :                 if (ustr) {
     439           0 :                         efree( ustr );
     440             :                 }
     441           0 :                 RETURN_FALSE;
     442             :         }
     443             : 
     444          56 :         bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
     445             : 
     446          56 :         if( U_FAILURE(status) ) {
     447           0 :                 RETURN_FALSE;
     448             :         }
     449             : 
     450          56 :         ubrk_setText(bi, ustr, ustr_len,        &status);
     451             : 
     452          56 :         if ( start < 0 ) {
     453          28 :                 iter_func = ubrk_previous;
     454          28 :                 ubrk_last(bi);
     455          28 :                 iter_val = 1;
     456             :         }
     457             :         else {
     458          28 :                 iter_func = ubrk_next;
     459          28 :                 iter_val = -1;
     460             :         }
     461             : 
     462          56 :         sub_str_start_pos = 0;
     463             : 
     464         377 :         while ( start ) {
     465         265 :                 sub_str_start_pos = iter_func(bi);
     466             : 
     467         265 :                 if ( UBRK_DONE == sub_str_start_pos ) {
     468           0 :                         break;
     469             :                 }
     470             : 
     471         265 :                 start += iter_val;
     472             :         }
     473             : 
     474          56 :         if ( 0 != start || sub_str_start_pos >= ustr_len ) {
     475             : 
     476           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
     477             : 
     478           3 :                 if (ustr) {
     479           3 :                         efree(ustr);
     480             :                 }
     481           3 :                 ubrk_close(bi);
     482           3 :                 RETURN_FALSE;
     483             :         }
     484             : 
     485             :         /* OK to convert here since if str_len were big, convert above would fail */
     486          53 :         if (length >= (int32_t)str_len) {
     487             : 
     488             :                 /* no length supplied or length is too big, return the rest of the string */
     489             : 
     490          18 :                 status = U_ZERO_ERROR;
     491          18 :                 u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
     492             : 
     493          18 :                 if (ustr) {
     494          18 :                         efree( ustr );
     495             :                 }
     496          18 :                 ubrk_close( bi );
     497             : 
     498          18 :                 if ( !u8_sub_str ) {
     499             :                         /* Set global error code. */
     500           0 :                         intl_error_set_code( NULL, status );
     501             : 
     502             :                         /* Set error messages. */
     503           0 :                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
     504             : 
     505           0 :                         RETURN_FALSE;
     506             :                 }
     507             : 
     508             :                 /* return the allocated string, not a duplicate */
     509          18 :                 RETVAL_NEW_STR(u8_sub_str);
     510          18 :                 return;
     511             :         }
     512             : 
     513          35 :         if(length == 0) {
     514             :                 /* empty length - we've validated start, we can return "" now */
     515           2 :                 if (ustr) {
     516           2 :                         efree(ustr);
     517             :                 }
     518           2 :                 ubrk_close(bi);
     519           2 :                 RETURN_EMPTY_STRING();
     520             :         }
     521             : 
     522             :         /* find the end point of the string to return */
     523             : 
     524          33 :         if ( length < 0 ) {
     525          24 :                 iter_func = ubrk_previous;
     526          24 :                 ubrk_last(bi);
     527          24 :                 iter_val = 1;
     528             :         }
     529             :         else {
     530           9 :                 iter_func = ubrk_next;
     531           9 :                 iter_val = -1;
     532             :         }
     533             : 
     534          33 :         sub_str_end_pos = 0;
     535             : 
     536         220 :         while ( length ) {
     537         158 :                 sub_str_end_pos = iter_func(bi);
     538             : 
     539         158 :                 if ( UBRK_DONE == sub_str_end_pos ) {
     540           4 :                         break;
     541             :                 }
     542             : 
     543         154 :                 length += iter_val;
     544             :         }
     545             : 
     546          33 :         ubrk_close(bi);
     547             : 
     548          33 :         if ( UBRK_DONE == sub_str_end_pos) {
     549           4 :                 if(length < 0) {
     550           3 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 );
     551             : 
     552           3 :                         efree(ustr);
     553           3 :                         RETURN_FALSE;
     554             :                 } else {
     555           1 :                         sub_str_end_pos = ustr_len;
     556             :                 }
     557             :         }
     558             : 
     559          30 :         if(sub_str_start_pos > sub_str_end_pos) {
     560           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
     561             : 
     562           2 :                 efree(ustr);
     563           2 :                 RETURN_FALSE;
     564             :         }
     565             : 
     566          28 :         status = U_ZERO_ERROR;
     567          28 :         u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
     568             : 
     569          28 :         efree( ustr );
     570             : 
     571          28 :         if ( !u8_sub_str ) {
     572             :                 /* Set global error code. */
     573           0 :                 intl_error_set_code( NULL, status );
     574             : 
     575             :                 /* Set error messages. */
     576           0 :                 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
     577             : 
     578           0 :                 RETURN_FALSE;
     579             :         }
     580             : 
     581             :          /* return the allocated string, not a duplicate */
     582          28 :         RETVAL_NEW_STR(u8_sub_str);
     583             : }
     584             : /* }}} */
     585             : 
     586             : /* {{{  strstr_common_handler */
     587          73 : static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
     588             : {
     589             :         char *haystack, *needle;
     590             :         const char *found;
     591             :         size_t haystack_len, needle_len;
     592             :         int32_t ret_pos, uchar_pos;
     593          73 :         zend_bool part = 0;
     594             : 
     595          73 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|b", &haystack, &haystack_len, &needle, &needle_len, &part) == FAILURE) {
     596             : 
     597           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     598             :                          "grapheme_strstr: unable to parse input param", 0 );
     599             : 
     600           2 :                 RETURN_FALSE;
     601             :         }
     602             : 
     603          71 :         if (needle_len == 0) {
     604             : 
     605           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     606             : 
     607           0 :                 RETURN_FALSE;
     608             :         }
     609             : 
     610             : 
     611          71 :         if ( !f_ignore_case ) {
     612             : 
     613             :                 /* ASCII optimization: quick check to see if the string might be there
     614             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     615             :                 */
     616          70 :                 found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
     617             : 
     618             :                 /* if it isn't there the we are done */
     619          35 :                 if ( !found ) {
     620           3 :                         RETURN_FALSE;
     621             :                 }
     622             : 
     623             :                 /* if it is there, and if the haystack is ascii, we are all done */
     624          32 :                 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
     625          13 :                         size_t found_offset = found - haystack;
     626             : 
     627          13 :                         if (part) {
     628          10 :                                 RETURN_STRINGL(haystack, found_offset);
     629             :                         } else {
     630          16 :                                 RETURN_STRINGL(found, haystack_len - found_offset);
     631             :                         }
     632             :                 }
     633             : 
     634             :         }
     635             : 
     636             :         /* need to work in utf16 */
     637          55 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );
     638             : 
     639          55 :         if ( ret_pos < 0 ) {
     640           9 :                 RETURN_FALSE;
     641             :         }
     642             : 
     643             :         /* uchar_pos is the 'nth' Unicode character position of the needle */
     644             : 
     645          46 :         ret_pos = 0;
     646          46 :         U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
     647             : 
     648          46 :         if (part) {
     649          30 :                 RETURN_STRINGL(haystack, ret_pos);
     650             :         } else {
     651          62 :                 RETURN_STRINGL(haystack + ret_pos, haystack_len - ret_pos);
     652             :         }
     653             : 
     654             : }
     655             : /* }}} */
     656             : 
     657             : /* {{{ proto string grapheme_strstr(string haystack, string needle[, bool part])
     658             :    Finds first occurrence of a string within another */
     659          36 : PHP_FUNCTION(grapheme_strstr)
     660             : {
     661          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
     662          36 : }
     663             : /* }}} */
     664             : 
     665             : /* {{{ proto string grapheme_stristr(string haystack, string needle[, bool part])
     666             :    Finds first occurrence of a string within another */
     667          37 : PHP_FUNCTION(grapheme_stristr)
     668             : {
     669          37 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
     670          37 : }
     671             : /* }}} */
     672             : 
     673             : /* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
     674             : static inline int32_t
     675          18 : grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
     676             : {
     677          18 :         int pos = 0, prev_pos = 0;
     678          18 :         int ret_pos = 0, prev_ret_pos = 0;
     679             : 
     680             :         while ( 1 ) {
     681          96 :                 pos = ubrk_next(bi);
     682             : 
     683          96 :                 if ( UBRK_DONE == pos ) {
     684           7 :                         break;
     685             :                 }
     686             : 
     687             :                 /* if we are beyond our limit, then the loop is done */
     688          89 :                 if ( pos > csize ) {
     689          11 :                         break;
     690             :                 }
     691             : 
     692             :                 /* update our pointer in the original UTF-8 buffer by as many characters
     693             :                    as ubrk_next iterated over */
     694             : 
     695          78 :                 prev_ret_pos = ret_pos;
     696          78 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     697             : 
     698          78 :                 if ( prev_ret_pos == ret_pos ) {
     699             :                         /* something wrong - malformed utf8? */
     700           0 :                         break;
     701             :                 }
     702             : 
     703          78 :                 prev_pos = pos;
     704          78 :         }
     705             : 
     706          18 :         return ret_pos;
     707             : }
     708             : /* }}} */
     709             : 
     710             : /* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
     711             : static inline int32_t
     712          23 : grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
     713             : {
     714          23 :         int pos = 0, prev_pos = 0;
     715          23 :         int ret_pos = 0, prev_ret_pos = 0;
     716             : 
     717             :         while ( 1 ) {
     718          63 :                 pos = ubrk_next(bi);
     719             : 
     720          63 :                 if ( UBRK_DONE == pos ) {
     721           8 :                         break;
     722             :                 }
     723             : 
     724          55 :                 prev_ret_pos = ret_pos;
     725          55 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     726             : 
     727          55 :                 if ( ret_pos > bsize ) {
     728          15 :                         ret_pos = prev_ret_pos;
     729          15 :                         break;
     730             :                 }
     731             : 
     732          40 :                 if ( prev_ret_pos == ret_pos ) {
     733             :                         /* something wrong - malformed utf8? */
     734           0 :                         break;
     735             :                 }
     736             : 
     737          40 :                 prev_pos = pos;
     738          40 :         }
     739             : 
     740          23 :         return ret_pos;
     741             : }
     742             : /* }}} */
     743             : 
     744             : /* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
     745             : static inline int32_t
     746          21 : grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
     747             : {
     748          21 :         int pos = 0, next_pos = 0;
     749          21 :         int ret_pos = 0;
     750             : 
     751          80 :         while ( size ) {
     752          40 :                 next_pos = ubrk_next(bi);
     753             : 
     754          40 :                 if ( UBRK_DONE == next_pos ) {
     755           2 :                         break;
     756             :                 }
     757          38 :                 pos = next_pos;
     758          38 :                 size--;
     759             :         }
     760             : 
     761             :         /* pos is one past the last UChar - and represent the number of code units to
     762             :                 advance in the utf-8 buffer
     763             :         */
     764             : 
     765          21 :         U8_FWD_N(pstr, ret_pos, str_len, pos);
     766             : 
     767          21 :         return ret_pos;
     768             : }
     769             : /* }}} */
     770             : 
     771             : /* {{{ grapheme extract iter function pointer array */
     772             : typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
     773             : 
     774             : static grapheme_extract_iter grapheme_extract_iters[] = {
     775             :         &grapheme_extract_count_iter,
     776             :         &grapheme_extract_bytecount_iter,
     777             :         &grapheme_extract_charcount_iter,
     778             : };
     779             : /* }}} */
     780             : 
     781             : /* {{{ proto string grapheme_extract(string str, int size[, int extract_type[, int start[, int next]]])
     782             :         Function to extract a sequence of default grapheme clusters */
     783         106 : PHP_FUNCTION(grapheme_extract)
     784             : {
     785             :         char *str, *pstr;
     786             :         UChar *ustr;
     787             :         size_t str_len;
     788             :         int32_t ustr_len;
     789             :         zend_long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
     790         106 :         zend_long lstart = 0; /* starting position in str in bytes */
     791         106 :         int32_t start = 0;
     792         106 :         zend_long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
     793             :         UErrorCode status;
     794             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     795         106 :         UBreakIterator* bi = NULL;
     796             :         int ret_pos;
     797         106 :         zval *next = NULL; /* return offset of next part of the string */
     798             : 
     799         106 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|llz", &str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
     800           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     801             :                          "grapheme_extract: unable to parse input param", 0 );
     802           1 :                 RETURN_FALSE;
     803             :         }
     804             : 
     805         105 :         if ( NULL != next ) {
     806          32 :                 if ( !Z_ISREF_P(next) ) {
     807           0 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     808             :                                  "grapheme_extract: 'next' was not passed by reference", 0 );
     809           0 :                         RETURN_FALSE;
     810             :                 } else {
     811          32 :                         ZVAL_DEREF(next);
     812             :                         /* initialize next */
     813          17 :                         SEPARATE_ZVAL(next);
     814          16 :                         zval_dtor(next);
     815          16 :             ZVAL_LONG(next, lstart);
     816             :                 }
     817             :         }
     818             : 
     819         105 :         if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
     820           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     821             :                          "grapheme_extract: unknown extract type param", 0 );
     822           1 :                 RETURN_FALSE;
     823             :         }
     824             : 
     825         104 :         if ( lstart > INT32_MAX || lstart < 0 || lstart >= str_len ) {
     826           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 );
     827           5 :                 RETURN_FALSE;
     828             :         }
     829             : 
     830          99 :         if ( size > INT32_MAX || size < 0) {
     831           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 );
     832           0 :                 RETURN_FALSE;
     833             :         }
     834          99 :         if (size == 0) {
     835           8 :                 RETURN_EMPTY_STRING();
     836             :         }
     837             : 
     838             :         /* we checked that it will fit: */
     839          91 :         start = (int32_t) lstart;
     840             : 
     841          91 :         pstr = str + start;
     842             : 
     843             :         /* just in case pstr points in the middle of a character, move forward to the start of the next char */
     844          91 :         if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     845           9 :                 char *str_end = str + str_len;
     846             : 
     847          25 :                 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     848           9 :                         pstr++;
     849           9 :                         if ( pstr >= str_end ) {
     850           2 :                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     851             :                                                                 "grapheme_extract: invalid input string", 0 );
     852             : 
     853           2 :                                 RETURN_FALSE;
     854             :                         }
     855             :                 }
     856             :         }
     857             : 
     858          89 :         str_len -= (pstr - str);
     859             : 
     860             :         /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
     861             :                 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
     862             :          */
     863             : 
     864          89 :         if ( -1 != grapheme_ascii_check((unsigned char *)pstr, MIN(size + 1, str_len)) ) {
     865          27 :         size_t nsize = MIN(size, str_len);
     866          27 :                 if ( NULL != next ) {
     867           9 :                         ZVAL_LONG(next, start+nsize);
     868             :                 }
     869          54 :                 RETURN_STRINGL(pstr, nsize);
     870             :         }
     871             : 
     872             :         /* convert the strings to UTF-16. */
     873          62 :         ustr = NULL;
     874          62 :         ustr_len = 0;
     875          62 :         status = U_ZERO_ERROR;
     876          62 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, pstr, str_len, &status );
     877             : 
     878          62 :         if ( U_FAILURE( status ) ) {
     879             :                 /* Set global error code. */
     880           0 :                 intl_error_set_code( NULL, status );
     881             : 
     882             :                 /* Set error messages. */
     883           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
     884             : 
     885           0 :                 if ( NULL != ustr )
     886           0 :                         efree( ustr );
     887             : 
     888           0 :                 RETURN_FALSE;
     889             :         }
     890             : 
     891          62 :         bi = NULL;
     892          62 :         status = U_ZERO_ERROR;
     893          62 :         bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
     894             : 
     895          62 :         ubrk_setText(bi, ustr, ustr_len, &status);
     896             : 
     897             :         /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
     898             :                 can't back up. So, we will not do anything. */
     899             : 
     900             :         /* now we need to find the end of the chunk the user wants us to return */
     901             :         /* it's ok to convert str_len to in32_t since if it were too big intl_convert_utf8_to_utf16 above would fail */
     902          62 :         ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, (unsigned char *)pstr, (int32_t)str_len);
     903             : 
     904          62 :         if (ustr) {
     905          62 :                 efree(ustr);
     906             :         }
     907          62 :         ubrk_close(bi);
     908             : 
     909          62 :         if ( NULL != next ) {
     910           4 :                 ZVAL_LONG(next, start+ret_pos);
     911             :         }
     912             : 
     913         124 :         RETURN_STRINGL(((char *)pstr), ret_pos);
     914             : }
     915             : 
     916             : /* }}} */
     917             : 
     918             : /*
     919             :  * Local variables:
     920             :  * tab-width: 4
     921             :  * c-basic-offset: 4
     922             :  * End:
     923             :  * vim600: fdm=marker
     924             :  * vim: noet sw=4 ts=4
     925             :  */
     926             : 

Generated by: LCOV version 1.10

Generated at Sat, 29 Aug 2015 10:22:16 +0000 (2 days ago)

Copyright © 2005-2015 The PHP Group
All rights reserved.