PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/intl/grapheme - grapheme_string.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 334 384 87.0 %
Date: 2015-02-21 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 5                                                                                                                |
       4             :    +----------------------------------------------------------------------+
       5             :    | This source file is subject to version 3.01 of the PHP license,      |
       6             :    | that is bundled with this package in the file LICENSE, and is                |
       7             :    | available through the world-wide-web at the following url:                   |
       8             :    | http://www.php.net/license/3_01.txt                                                                  |
       9             :    | If you did not receive a copy of the PHP license and are unable to   |
      10             :    | obtain it through the world-wide-web, please send a note to                  |
      11             :    | license@php.net so we can mail you a copy immediately.                               |
      12             :    +----------------------------------------------------------------------+
      13             :    | Author: Ed Batutis <ed@batutis.com>                                                            |
      14             :    +----------------------------------------------------------------------+
      15             :  */
      16             : 
      17             : /* {{{ includes */
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config.h"
      20             : #endif
      21             : 
      22             : #include <php.h>
      23             : #include "grapheme.h"
      24             : #include "grapheme_util.h"
      25             : 
      26             : #include <unicode/utypes.h>
      27             : #include <unicode/ucol.h>
      28             : #include <unicode/ustring.h>
      29             : #include <unicode/ubrk.h>
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : /* }}} */
      34             : 
      35             : #define GRAPHEME_EXTRACT_TYPE_COUNT             0
      36             : #define GRAPHEME_EXTRACT_TYPE_MAXBYTES  1
      37             : #define GRAPHEME_EXTRACT_TYPE_MAXCHARS  2
      38             : #define GRAPHEME_EXTRACT_TYPE_MIN       GRAPHEME_EXTRACT_TYPE_COUNT
      39             : #define GRAPHEME_EXTRACT_TYPE_MAX       GRAPHEME_EXTRACT_TYPE_MAXCHARS
      40             : 
      41             : 
      42             : /* {{{ grapheme_register_constants
      43             :  * Register API constants
      44             :  */
      45       20871 : void grapheme_register_constants( INIT_FUNC_ARGS )
      46             : {
      47       20871 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
      48       20871 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
      49       20871 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
      50       20871 : }
      51             : /* }}} */
      52             : 
      53             : /* {{{ proto size_t grapheme_strlen(string str)
      54             :    Get number of graphemes in a string */
      55           7 : PHP_FUNCTION(grapheme_strlen)
      56             : {
      57             :         char* string;
      58             :         size_t string_len;
      59           7 :         UChar* ustring = NULL;
      60           7 :         int ustring_len = 0;
      61             :         zend_long ret_len;
      62             :         UErrorCode status;
      63             : 
      64           7 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &string, &string_len) == FAILURE) {
      65           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
      66             :                          "grapheme_strlen: unable to parse input param", 0 );
      67           1 :                 RETURN_FALSE;
      68             :         }
      69             : 
      70           6 :         ret_len = grapheme_ascii_check((unsigned char *)string, string_len);
      71             : 
      72           6 :         if ( ret_len >= 0 )
      73           2 :                 RETURN_LONG(string_len);
      74             : 
      75             :         /* convert the string to UTF-16. */
      76           4 :         status = U_ZERO_ERROR;
      77           4 :         intl_convert_utf8_to_utf16(&ustring, &ustring_len, string, string_len, &status );
      78             : 
      79           4 :         if ( U_FAILURE( status ) ) {
      80             :                 /* Set global error code. */
      81           0 :                 intl_error_set_code( NULL, status );
      82             : 
      83             :                 /* Set error messages. */
      84           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
      85           0 :                 if (ustring) {
      86           0 :                         efree( ustring );
      87             :                 }
      88           0 :                 RETURN_NULL();
      89             :         }
      90             : 
      91           4 :         ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 );
      92             : 
      93           4 :         if (ustring) {
      94           4 :                 efree( ustring );
      95             :         }
      96             : 
      97           4 :         if (ret_len >= 0) {
      98           4 :                 RETVAL_LONG(ret_len);
      99             :         } else {
     100           0 :                 RETVAL_FALSE;
     101             :         }
     102             : }
     103             : /* }}} */
     104             : 
     105             : /* {{{ proto int grapheme_strpos(string haystack, string needle [, int offset ])
     106             :    Find position of first occurrence of a string within another */
     107          38 : PHP_FUNCTION(grapheme_strpos)
     108             : {
     109             :         char *haystack, *needle;
     110             :         size_t haystack_len, needle_len;
     111             :         const char *found;
     112          38 :         zend_long loffset = 0;
     113          38 :         int32_t offset = 0;
     114             :         zend_long ret_pos;
     115             : 
     116          38 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     117           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     118             :                          "grapheme_strpos: unable to parse input param", 0 );
     119           1 :                 RETURN_FALSE;
     120             :         }
     121             : 
     122          37 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     123           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     124           1 :                 RETURN_FALSE;
     125             :         }
     126             : 
     127             :         /* we checked that it will fit: */
     128          36 :         offset = (int32_t) loffset;
     129             : 
     130             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     131             : 
     132          36 :         if (needle_len == 0) {
     133           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     134           0 :                 RETURN_FALSE;
     135             :         }
     136             : 
     137             : 
     138             :         /* quick check to see if the string might be there
     139             :          * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     140             :         */
     141          72 :         found = php_memnstr(haystack + offset, needle, needle_len, haystack + haystack_len);
     142             : 
     143             :         /* if it isn't there the we are done */
     144          36 :         if (!found) {
     145           9 :                 RETURN_FALSE;
     146             :         }
     147             : 
     148             :         /* if it is there, and if the haystack is ascii, we are all done */
     149          27 :         if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
     150          12 :                 RETURN_LONG(found - haystack);
     151             :         }
     152             : 
     153             :         /* do utf16 part of the strpos */
     154          15 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ );
     155             : 
     156          15 :         if ( ret_pos >= 0 ) {
     157          13 :                 RETURN_LONG(ret_pos);
     158             :         } else {
     159           2 :                 RETURN_FALSE;
     160             :         }
     161             : 
     162             : }
     163             : /* }}} */
     164             : 
     165             : /* {{{ proto int grapheme_stripos(string haystack, string needle [, int offset ])
     166             :    Find position of first occurrence of a string within another, ignoring case differences */
     167          40 : PHP_FUNCTION(grapheme_stripos)
     168             : {
     169             :         char *haystack, *needle, *haystack_dup, *needle_dup;
     170             :         size_t haystack_len, needle_len;
     171             :         const char *found;
     172          40 :         zend_long loffset = 0;
     173          40 :         int32_t offset = 0;
     174             :         zend_long ret_pos;
     175             :         int is_ascii;
     176             : 
     177          40 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     178           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     179             :                          "grapheme_stripos: unable to parse input param", 0 );
     180           1 :                 RETURN_FALSE;
     181             :         }
     182             : 
     183          39 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     184           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 );
     185           1 :                 RETURN_FALSE;
     186             :         }
     187             : 
     188             :         /* we checked that it will fit: */
     189          38 :         offset = (int32_t) loffset;
     190             : 
     191             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     192             : 
     193          38 :         if (needle_len == 0) {
     194           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 );
     195           0 :                 RETURN_FALSE;
     196             :         }
     197             : 
     198             : 
     199          38 :         is_ascii = ( grapheme_ascii_check((unsigned char*)haystack, haystack_len) >= 0 );
     200             : 
     201          38 :         if ( is_ascii ) {
     202          19 :                 needle_dup = estrndup(needle, needle_len);
     203          19 :                 php_strtolower(needle_dup, needle_len);
     204          19 :                 haystack_dup = estrndup(haystack, haystack_len);
     205          19 :                 php_strtolower(haystack_dup, haystack_len);
     206             : 
     207          38 :                 found = php_memnstr(haystack_dup + offset, needle_dup, needle_len, haystack_dup + haystack_len);
     208             : 
     209          19 :                 efree(haystack_dup);
     210          19 :                 efree(needle_dup);
     211             : 
     212          19 :                 if (found) {
     213          12 :                         RETURN_LONG(found - haystack_dup);
     214             :                 }
     215             : 
     216             :                 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
     217           7 :                 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     218           5 :                         RETURN_FALSE;
     219             :                 }
     220             :         }
     221             : 
     222             :         /* do utf16 part of the strpos */
     223          21 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ );
     224             : 
     225          21 :         if ( ret_pos >= 0 ) {
     226          15 :                 RETURN_LONG(ret_pos);
     227             :         } else {
     228           6 :                 RETURN_FALSE;
     229             :         }
     230             : 
     231             : }
     232             : /* }}} */
     233             : 
     234             : /* {{{ proto int grapheme_strrpos(string haystack, string needle [, int offset])
     235             :    Find position of last occurrence of a string within another */
     236          37 : PHP_FUNCTION(grapheme_strrpos)
     237             : {
     238             :         char *haystack, *needle;
     239             :         size_t haystack_len, needle_len;
     240          37 :         zend_long loffset = 0;
     241          37 :         int32_t offset = 0;
     242             :         zend_long ret_pos;
     243             :         int is_ascii;
     244             : 
     245          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     246           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     247             :                          "grapheme_strrpos: unable to parse input param", 0 );
     248           1 :                 RETURN_FALSE;
     249             :         }
     250             : 
     251          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     252           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     253           0 :                 RETURN_FALSE;
     254             :         }
     255             : 
     256             :         /* we checked that it will fit: */
     257          36 :         offset = (int32_t) loffset;
     258             : 
     259             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     260             : 
     261          36 :         if (needle_len == 0) {
     262           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     263           0 :                 RETURN_FALSE;
     264             :         }
     265             : 
     266          36 :         is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
     267             : 
     268          36 :         if ( is_ascii ) {
     269             : 
     270          19 :                 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
     271             : 
     272          19 :                 if ( ret_pos >= 0 ) {
     273          12 :                         RETURN_LONG(ret_pos);
     274             :                 }
     275             : 
     276             :                 /* if the needle was ascii too, we are done */
     277             : 
     278           7 :                 if (  grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     279           5 :                         RETURN_FALSE;
     280             :                 }
     281             : 
     282             :                 /* else we need to continue via utf16 */
     283             :         }
     284             : 
     285          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */);
     286             : 
     287          19 :         if ( ret_pos >= 0 ) {
     288          13 :                 RETURN_LONG(ret_pos);
     289             :         } else {
     290           6 :                 RETURN_FALSE;
     291             :         }
     292             : 
     293             : 
     294             : }
     295             : /* }}} */
     296             : 
     297             : /* {{{ proto int grapheme_strripos(string haystack, string needle [, int offset])
     298             :    Find position of last occurrence of a string within another, ignoring case */
     299          37 : PHP_FUNCTION(grapheme_strripos)
     300             : {
     301             :         char *haystack, *needle;
     302             :         size_t haystack_len, needle_len;
     303          37 :         zend_long loffset = 0;
     304          37 :         int32_t offset = 0;
     305             :         zend_long ret_pos;
     306             :         int is_ascii;
     307             : 
     308          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
     309           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     310             :                          "grapheme_strrpos: unable to parse input param", 0 );
     311           1 :                 RETURN_FALSE;
     312             :         }
     313             : 
     314          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     315           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
     316           0 :                 RETURN_FALSE;
     317             :         }
     318             : 
     319             :         /* we checked that it will fit: */
     320          36 :         offset = (int32_t) loffset;
     321             : 
     322             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     323             : 
     324          36 :         if (needle_len == 0) {
     325           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     326           0 :                 RETURN_FALSE;
     327             :         }
     328             : 
     329          36 :         is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
     330             : 
     331          36 :         if ( is_ascii ) {
     332             :                 char *needle_dup, *haystack_dup;
     333             : 
     334          19 :                 needle_dup = estrndup(needle, needle_len);
     335          19 :                 php_strtolower(needle_dup, needle_len);
     336          19 :                 haystack_dup = estrndup(haystack, haystack_len);
     337          19 :                 php_strtolower(haystack_dup, haystack_len);
     338             : 
     339          19 :                 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
     340             : 
     341          19 :                 efree(haystack_dup);
     342          19 :                 efree(needle_dup);
     343             : 
     344          19 :                 if ( ret_pos >= 0 ) {
     345          12 :                         RETURN_LONG(ret_pos);
     346             :                 }
     347             : 
     348             :                 /* if the needle was ascii too, we are done */
     349             : 
     350           7 :                 if (  grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
     351           5 :                         RETURN_FALSE;
     352             :                 }
     353             : 
     354             :                 /* else we need to continue via utf16 */
     355             :         }
     356             : 
     357          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL,  1 /* f_ignore_case */, 1 /*last */);
     358             : 
     359          19 :         if ( ret_pos >= 0 ) {
     360          13 :                 RETURN_LONG(ret_pos);
     361             :         } else {
     362           6 :                 RETURN_FALSE;
     363             :         }
     364             : 
     365             : 
     366             : }
     367             : /* }}} */
     368             : 
     369             : /* {{{ proto string grapheme_substr(string str, int start [, int length])
     370             :    Returns part of a string */
     371          71 : PHP_FUNCTION(grapheme_substr)
     372             : {
     373             :         char *str, *sub_str;
     374             :         UChar *ustr;
     375             :         size_t str_len;
     376             :         int32_t ustr_len;
     377             :         size_t sub_str_len;
     378          71 :         zend_long lstart = 0, length = 0;
     379          71 :         int32_t start = 0;
     380             :         int iter_val;
     381             :         UErrorCode status;
     382             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     383          71 :         UBreakIterator* bi = NULL;
     384             :         int sub_str_start_pos, sub_str_end_pos;
     385             :         int32_t (*iter_func)(UBreakIterator *);
     386          71 :         int no_length = 1;
     387             : 
     388          71 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!", (char **)&str, &str_len, &lstart, &length, &no_length) == FAILURE) {
     389           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     390             :                          "grapheme_substr: unable to parse input param", 0 );
     391           1 :                 RETURN_FALSE;
     392             :         }
     393             : 
     394          70 :         if ( OUTSIDE_STRING(lstart, str_len)) {
     395           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
     396           5 :                 RETURN_FALSE;
     397             :         }
     398             : 
     399             :         /* we checked that it will fit: */
     400          65 :         start = (int32_t) lstart;
     401             : 
     402          65 :         if(no_length) {
     403          22 :                 length = str_len;
     404             :         }
     405             : 
     406          65 :         if(length < INT32_MIN) {
     407           0 :                 length = INT32_MIN;
     408          65 :         } else if(length > INT32_MAX) {
     409           0 :                 length = INT32_MAX;
     410             :         }
     411             : 
     412             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     413             : 
     414          65 :         if ( grapheme_ascii_check((unsigned char *)str, str_len) >= 0 ) {
     415             :                 int32_t asub_str_len;
     416           9 :                 grapheme_substr_ascii(str, str_len, start, (int32_t)length, &sub_str, &asub_str_len);
     417             : 
     418           9 :                 if ( NULL == sub_str ) {
     419           1 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 );
     420           1 :                         RETURN_FALSE;
     421             :                 }
     422             : 
     423          16 :                 RETURN_STRINGL(sub_str, asub_str_len);
     424             :         }
     425             : 
     426          56 :         ustr = NULL;
     427          56 :         ustr_len = 0;
     428          56 :         status = U_ZERO_ERROR;
     429          56 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, &status);
     430             : 
     431          56 :         if ( U_FAILURE( status ) ) {
     432             :                 /* Set global error code. */
     433           0 :                 intl_error_set_code( NULL, status );
     434             : 
     435             :                 /* Set error messages. */
     436           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
     437           0 :                 if (ustr) {
     438           0 :                         efree( ustr );
     439             :                 }
     440           0 :                 RETURN_FALSE;
     441             :         }
     442             : 
     443          56 :         bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
     444             : 
     445          56 :         if( U_FAILURE(status) ) {
     446           0 :                 RETURN_FALSE;
     447             :         }
     448             : 
     449          56 :         ubrk_setText(bi, ustr, ustr_len,        &status);
     450             : 
     451          56 :         if ( start < 0 ) {
     452          28 :                 iter_func = ubrk_previous;
     453          28 :                 ubrk_last(bi);
     454          28 :                 iter_val = 1;
     455             :         }
     456             :         else {
     457          28 :                 iter_func = ubrk_next;
     458          28 :                 iter_val = -1;
     459             :         }
     460             : 
     461          56 :         sub_str_start_pos = 0;
     462             : 
     463         377 :         while ( start ) {
     464         265 :                 sub_str_start_pos = iter_func(bi);
     465             : 
     466         265 :                 if ( UBRK_DONE == sub_str_start_pos ) {
     467           0 :                         break;
     468             :                 }
     469             : 
     470         265 :                 start += iter_val;
     471             :         }
     472             : 
     473          56 :         if ( 0 != start || sub_str_start_pos >= ustr_len ) {
     474             : 
     475           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
     476             : 
     477           3 :                 if (ustr) {
     478           3 :                         efree(ustr);
     479             :                 }
     480           3 :                 ubrk_close(bi);
     481           3 :                 RETURN_FALSE;
     482             :         }
     483             : 
     484             :         /* OK to convert here since if str_len were big, convert above would fail */
     485          53 :         if (length >= (int32_t)str_len) {
     486             : 
     487             :                 /* no length supplied or length is too big, return the rest of the string */
     488             : 
     489          18 :                 sub_str = NULL;
     490          18 :                 sub_str_len = 0;
     491          18 :                 status = U_ZERO_ERROR;
     492          18 :                 intl_convert_utf16_to_utf8(&sub_str, &sub_str_len, ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
     493             : 
     494          18 :                 if (ustr) {
     495          18 :                         efree( ustr );
     496             :                 }
     497          18 :                 ubrk_close( bi );
     498             : 
     499          18 :                 if ( U_FAILURE( status ) ) {
     500             :                         /* Set global error code. */
     501           0 :                         intl_error_set_code( NULL, status );
     502             : 
     503             :                         /* Set error messages. */
     504           0 :                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
     505             : 
     506           0 :                         if (sub_str) {
     507           0 :                                 efree( sub_str );
     508             :                         }
     509             : 
     510           0 :                         RETURN_FALSE;
     511             :                 }
     512             : 
     513             :                 /* return the allocated string, not a duplicate */
     514          36 :                 RETVAL_STRINGL(sub_str, sub_str_len);
     515             :                 //???
     516          18 :                 efree(sub_str);
     517          18 :                 return;
     518             :         }
     519             : 
     520          35 :         if(length == 0) {
     521             :                 /* empty length - we've validated start, we can return "" now */
     522           2 :                 if (ustr) {
     523           2 :                         efree(ustr);
     524             :                 }
     525           2 :                 ubrk_close(bi);
     526           2 :                 RETURN_EMPTY_STRING();
     527             :         }
     528             : 
     529             :         /* find the end point of the string to return */
     530             : 
     531          33 :         if ( length < 0 ) {
     532          24 :                 iter_func = ubrk_previous;
     533          24 :                 ubrk_last(bi);
     534          24 :                 iter_val = 1;
     535             :         }
     536             :         else {
     537           9 :                 iter_func = ubrk_next;
     538           9 :                 iter_val = -1;
     539             :         }
     540             : 
     541          33 :         sub_str_end_pos = 0;
     542             : 
     543         220 :         while ( length ) {
     544         158 :                 sub_str_end_pos = iter_func(bi);
     545             : 
     546         158 :                 if ( UBRK_DONE == sub_str_end_pos ) {
     547           4 :                         break;
     548             :                 }
     549             : 
     550         154 :                 length += iter_val;
     551             :         }
     552             : 
     553          33 :         ubrk_close(bi);
     554             : 
     555          33 :         if ( UBRK_DONE == sub_str_end_pos) {
     556           4 :                 if(length < 0) {
     557           3 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 );
     558             : 
     559           3 :                         efree(ustr);
     560           3 :                         RETURN_FALSE;
     561             :                 } else {
     562           1 :                         sub_str_end_pos = ustr_len;
     563             :                 }
     564             :         }
     565             : 
     566          30 :         if(sub_str_start_pos > sub_str_end_pos) {
     567           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
     568             : 
     569           2 :                 efree(ustr);
     570           2 :                 RETURN_FALSE;
     571             :         }
     572             : 
     573          28 :         sub_str = NULL;
     574          28 :         status = U_ZERO_ERROR;
     575          28 :         intl_convert_utf16_to_utf8(&sub_str, &sub_str_len, ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
     576             : 
     577          28 :         efree( ustr );
     578             : 
     579          28 :         if ( U_FAILURE( status ) ) {
     580             :                 /* Set global error code. */
     581           0 :                 intl_error_set_code( NULL, status );
     582             : 
     583             :                 /* Set error messages. */
     584           0 :                 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
     585             : 
     586           0 :                 if ( NULL != sub_str )
     587           0 :                         efree( sub_str );
     588             : 
     589           0 :                 RETURN_FALSE;
     590             :         }
     591             : 
     592             :          /* return the allocated string, not a duplicate */
     593          56 :         RETVAL_STRINGL(sub_str, sub_str_len);
     594             :         //????
     595          28 :         efree(sub_str);
     596             : 
     597             : }
     598             : /* }}} */
     599             : 
     600             : /* {{{  strstr_common_handler */
     601          73 : static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
     602             : {
     603             :         char *haystack, *needle;
     604             :         const char *found;
     605             :         size_t haystack_len, needle_len;
     606             :         int32_t ret_pos, uchar_pos;
     607          73 :         zend_bool part = 0;
     608             : 
     609          73 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|b", &haystack, &haystack_len, &needle, &needle_len, &part) == FAILURE) {
     610             : 
     611           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     612             :                          "grapheme_strstr: unable to parse input param", 0 );
     613             : 
     614           2 :                 RETURN_FALSE;
     615             :         }
     616             : 
     617          71 :         if (needle_len == 0) {
     618             : 
     619           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
     620             : 
     621           0 :                 RETURN_FALSE;
     622             :         }
     623             : 
     624             : 
     625          71 :         if ( !f_ignore_case ) {
     626             : 
     627             :                 /* ASCII optimization: quick check to see if the string might be there
     628             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     629             :                 */
     630          70 :                 found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
     631             : 
     632             :                 /* if it isn't there the we are done */
     633          35 :                 if ( !found ) {
     634           3 :                         RETURN_FALSE;
     635             :                 }
     636             : 
     637             :                 /* if it is there, and if the haystack is ascii, we are all done */
     638          32 :                 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
     639          13 :                         size_t found_offset = found - haystack;
     640             : 
     641          13 :                         if (part) {
     642          10 :                                 RETURN_STRINGL(haystack, found_offset);
     643             :                         } else {
     644          16 :                                 RETURN_STRINGL(found, haystack_len - found_offset);
     645             :                         }
     646             :                 }
     647             : 
     648             :         }
     649             : 
     650             :         /* need to work in utf16 */
     651          55 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );
     652             : 
     653          55 :         if ( ret_pos < 0 ) {
     654           9 :                 RETURN_FALSE;
     655             :         }
     656             : 
     657             :         /* uchar_pos is the 'nth' Unicode character position of the needle */
     658             : 
     659          46 :         ret_pos = 0;
     660          46 :         U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
     661             : 
     662          46 :         if (part) {
     663          30 :                 RETURN_STRINGL(haystack, ret_pos);
     664             :         } else {
     665          62 :                 RETURN_STRINGL(haystack + ret_pos, haystack_len - ret_pos);
     666             :         }
     667             : 
     668             : }
     669             : /* }}} */
     670             : 
     671             : /* {{{ proto string grapheme_strstr(string haystack, string needle[, bool part])
     672             :    Finds first occurrence of a string within another */
     673          36 : PHP_FUNCTION(grapheme_strstr)
     674             : {
     675          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
     676          36 : }
     677             : /* }}} */
     678             : 
     679             : /* {{{ proto string grapheme_stristr(string haystack, string needle[, bool part])
     680             :    Finds first occurrence of a string within another */
     681          37 : PHP_FUNCTION(grapheme_stristr)
     682             : {
     683          37 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
     684          37 : }
     685             : /* }}} */
     686             : 
     687             : /* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
     688             : static inline int32_t
     689          18 : grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
     690             : {
     691          18 :         int pos = 0, prev_pos = 0;
     692          18 :         int ret_pos = 0, prev_ret_pos = 0;
     693             : 
     694             :         while ( 1 ) {
     695          96 :                 pos = ubrk_next(bi);
     696             : 
     697          96 :                 if ( UBRK_DONE == pos ) {
     698           7 :                         break;
     699             :                 }
     700             : 
     701             :                 /* if we are beyond our limit, then the loop is done */
     702          89 :                 if ( pos > csize ) {
     703          11 :                         break;
     704             :                 }
     705             : 
     706             :                 /* update our pointer in the original UTF-8 buffer by as many characters
     707             :                    as ubrk_next iterated over */
     708             : 
     709          78 :                 prev_ret_pos = ret_pos;
     710          78 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     711             : 
     712          78 :                 if ( prev_ret_pos == ret_pos ) {
     713             :                         /* something wrong - malformed utf8? */
     714           0 :                         break;
     715             :                 }
     716             : 
     717          78 :                 prev_pos = pos;
     718          78 :         }
     719             : 
     720          18 :         return ret_pos;
     721             : }
     722             : /* }}} */
     723             : 
     724             : /* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
     725             : static inline int32_t
     726          23 : grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
     727             : {
     728          23 :         int pos = 0, prev_pos = 0;
     729          23 :         int ret_pos = 0, prev_ret_pos = 0;
     730             : 
     731             :         while ( 1 ) {
     732          63 :                 pos = ubrk_next(bi);
     733             : 
     734          63 :                 if ( UBRK_DONE == pos ) {
     735           8 :                         break;
     736             :                 }
     737             : 
     738          55 :                 prev_ret_pos = ret_pos;
     739          55 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     740             : 
     741          55 :                 if ( ret_pos > bsize ) {
     742          15 :                         ret_pos = prev_ret_pos;
     743          15 :                         break;
     744             :                 }
     745             : 
     746          40 :                 if ( prev_ret_pos == ret_pos ) {
     747             :                         /* something wrong - malformed utf8? */
     748           0 :                         break;
     749             :                 }
     750             : 
     751          40 :                 prev_pos = pos;
     752          40 :         }
     753             : 
     754          23 :         return ret_pos;
     755             : }
     756             : /* }}} */
     757             : 
     758             : /* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
     759             : static inline int32_t
     760          21 : grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
     761             : {
     762          21 :         int pos = 0, next_pos = 0;
     763          21 :         int ret_pos = 0;
     764             : 
     765          80 :         while ( size ) {
     766          40 :                 next_pos = ubrk_next(bi);
     767             : 
     768          40 :                 if ( UBRK_DONE == next_pos ) {
     769           2 :                         break;
     770             :                 }
     771          38 :                 pos = next_pos;
     772          38 :                 size--;
     773             :         }
     774             : 
     775             :         /* pos is one past the last UChar - and represent the number of code units to
     776             :                 advance in the utf-8 buffer
     777             :         */
     778             : 
     779          21 :         U8_FWD_N(pstr, ret_pos, str_len, pos);
     780             : 
     781          21 :         return ret_pos;
     782             : }
     783             : /* }}} */
     784             : 
     785             : /* {{{ grapheme extract iter function pointer array */
     786             : typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
     787             : 
     788             : static grapheme_extract_iter grapheme_extract_iters[] = {
     789             :         &grapheme_extract_count_iter,
     790             :         &grapheme_extract_bytecount_iter,
     791             :         &grapheme_extract_charcount_iter,
     792             : };
     793             : /* }}} */
     794             : 
     795             : /* {{{ proto string grapheme_extract(string str, int size[, int extract_type[, int start[, int next]]])
     796             :         Function to extract a sequence of default grapheme clusters */
     797         106 : PHP_FUNCTION(grapheme_extract)
     798             : {
     799             :         char *str, *pstr;
     800             :         UChar *ustr;
     801             :         size_t str_len;
     802             :         int32_t ustr_len;
     803             :         zend_long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
     804         106 :         zend_long lstart = 0; /* starting position in str in bytes */
     805         106 :         int32_t start = 0;
     806         106 :         zend_long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
     807             :         UErrorCode status;
     808             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     809         106 :         UBreakIterator* bi = NULL;
     810             :         int ret_pos;
     811         106 :         zval *next = NULL; /* return offset of next part of the string */
     812             : 
     813         106 :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|llz", &str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
     814           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     815             :                          "grapheme_extract: unable to parse input param", 0 );
     816           1 :                 RETURN_FALSE;
     817             :         }
     818             : 
     819         105 :         if ( NULL != next ) {
     820          32 :                 if ( !Z_ISREF_P(next) ) {
     821           0 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     822             :                                  "grapheme_extract: 'next' was not passed by reference", 0 );
     823           0 :                         RETURN_FALSE;
     824             :                 } else {
     825          32 :                         ZVAL_DEREF(next);
     826             :                         /* initialize next */
     827          17 :                         SEPARATE_ZVAL(next);
     828          16 :                         zval_dtor(next);
     829          16 :             ZVAL_LONG(next, lstart);
     830             :                 }
     831             :         }
     832             : 
     833         105 :         if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
     834           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     835             :                          "grapheme_extract: unknown extract type param", 0 );
     836           1 :                 RETURN_FALSE;
     837             :         }
     838             : 
     839         104 :         if ( lstart > INT32_MAX || lstart < 0 || lstart >= str_len ) {
     840           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 );
     841           5 :                 RETURN_FALSE;
     842             :         }
     843             : 
     844          99 :         if ( size > INT32_MAX || size < 0) {
     845           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 );
     846           0 :                 RETURN_FALSE;
     847             :         }
     848          99 :         if (size == 0) {
     849           8 :                 RETURN_EMPTY_STRING();
     850             :         }
     851             : 
     852             :         /* we checked that it will fit: */
     853          91 :         start = (int32_t) lstart;
     854             : 
     855          91 :         pstr = str + start;
     856             : 
     857             :         /* just in case pstr points in the middle of a character, move forward to the start of the next char */
     858          91 :         if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     859           9 :                 char *str_end = str + str_len;
     860             : 
     861          25 :                 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     862           9 :                         pstr++;
     863           9 :                         if ( pstr >= str_end ) {
     864           2 :                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     865             :                                                                 "grapheme_extract: invalid input string", 0 );
     866             : 
     867           2 :                                 RETURN_FALSE;
     868             :                         }
     869             :                 }
     870             :         }
     871             : 
     872          89 :         str_len -= (pstr - str);
     873             : 
     874             :         /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
     875             :                 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
     876             :          */
     877             : 
     878          89 :         if ( -1 != grapheme_ascii_check((unsigned char *)pstr, MIN(size + 1, str_len)) ) {
     879          27 :         size_t nsize = MIN(size, str_len);
     880          27 :                 if ( NULL != next ) {
     881           9 :                         ZVAL_LONG(next, start+nsize);
     882             :                 }
     883          54 :                 RETURN_STRINGL(pstr, nsize);
     884             :         }
     885             : 
     886             :         /* convert the strings to UTF-16. */
     887          62 :         ustr = NULL;
     888          62 :         ustr_len = 0;
     889          62 :         status = U_ZERO_ERROR;
     890          62 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, pstr, str_len, &status );
     891             : 
     892          62 :         if ( U_FAILURE( status ) ) {
     893             :                 /* Set global error code. */
     894           0 :                 intl_error_set_code( NULL, status );
     895             : 
     896             :                 /* Set error messages. */
     897           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
     898             : 
     899           0 :                 if ( NULL != ustr )
     900           0 :                         efree( ustr );
     901             : 
     902           0 :                 RETURN_FALSE;
     903             :         }
     904             : 
     905          62 :         bi = NULL;
     906          62 :         status = U_ZERO_ERROR;
     907          62 :         bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
     908             : 
     909          62 :         ubrk_setText(bi, ustr, ustr_len, &status);
     910             : 
     911             :         /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
     912             :                 can't back up. So, we will not do anything. */
     913             : 
     914             :         /* now we need to find the end of the chunk the user wants us to return */
     915             :         /* it's ok to convert str_len to in32_t since if it were too big intl_convert_utf8_to_utf16 above would fail */
     916          62 :         ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, (unsigned char *)pstr, (int32_t)str_len);
     917             : 
     918          62 :         if (ustr) {
     919          62 :                 efree(ustr);
     920             :         }
     921          62 :         ubrk_close(bi);
     922             : 
     923          62 :         if ( NULL != next ) {
     924           4 :                 ZVAL_LONG(next, start+ret_pos);
     925             :         }
     926             : 
     927         124 :         RETURN_STRINGL(((char *)pstr), ret_pos);
     928             : }
     929             : 
     930             : /* }}} */
     931             : 
     932             : /*
     933             :  * Local variables:
     934             :  * tab-width: 4
     935             :  * c-basic-offset: 4
     936             :  * End:
     937             :  * vim600: fdm=marker
     938             :  * vim: noet sw=4 ts=4
     939             :  */
     940             : 

Generated by: LCOV version 1.10

Generated at Sat, 21 Feb 2015 16:01:50 +0000 (5 days ago)

Copyright © 2005-2015 The PHP Group
All rights reserved.