PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/intl/grapheme - grapheme_string.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 324 372 87.1 %
Date: 2014-08-23 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 5                                                                                                                |
       4             :    +----------------------------------------------------------------------+
       5             :    | This source file is subject to version 3.01 of the PHP license,      |
       6             :    | that is bundled with this package in the file LICENSE, and is                |
       7             :    | available through the world-wide-web at the following url:                   |
       8             :    | http://www.php.net/license/3_01.txt                                                                  |
       9             :    | If you did not receive a copy of the PHP license and are unable to   |
      10             :    | obtain it through the world-wide-web, please send a note to                  |
      11             :    | license@php.net so we can mail you a copy immediately.                               |
      12             :    +----------------------------------------------------------------------+
      13             :    | Author: Ed Batutis <ed@batutis.com>                                                            |
      14             :    +----------------------------------------------------------------------+
      15             :  */
      16             : 
      17             : /* {{{ includes */
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config.h"
      20             : #endif
      21             : 
      22             : #include <php.h>
      23             : #include "grapheme.h"
      24             : #include "grapheme_util.h"
      25             : 
      26             : #include <unicode/utypes.h>
      27             : #include <unicode/ucol.h>
      28             : #include <unicode/ustring.h>
      29             : #include <unicode/ubrk.h>
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : /* }}} */
      34             : 
      35             : #define GRAPHEME_EXTRACT_TYPE_COUNT             0
      36             : #define GRAPHEME_EXTRACT_TYPE_MAXBYTES  1
      37             : #define GRAPHEME_EXTRACT_TYPE_MAXCHARS  2
      38             : #define GRAPHEME_EXTRACT_TYPE_MIN       GRAPHEME_EXTRACT_TYPE_COUNT
      39             : #define GRAPHEME_EXTRACT_TYPE_MAX       GRAPHEME_EXTRACT_TYPE_MAXCHARS
      40             : 
      41             : 
      42             : /* {{{ grapheme_register_constants
      43             :  * Register API constants
      44             :  */
      45       20208 : void grapheme_register_constants( INIT_FUNC_ARGS )
      46             : {
      47       20208 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
      48       20208 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
      49       20208 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
      50       20208 : }
      51             : /* }}} */
      52             : 
      53             : /* {{{ proto int grapheme_strlen(string str)
      54             :    Get number of graphemes in a string */
      55           7 : PHP_FUNCTION(grapheme_strlen)
      56             : {
      57             :         unsigned char* string;
      58             :         int string_len;
      59           7 :         UChar* ustring = NULL;
      60           7 :         int ustring_len = 0;
      61             :         int ret_len;
      62             :         UErrorCode status;
      63             : 
      64           7 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string, &string_len) == FAILURE) {
      65             : 
      66           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
      67             :                          "grapheme_strlen: unable to parse input param", 0 TSRMLS_CC );
      68             : 
      69           1 :                 RETURN_FALSE;
      70             :         }
      71             : 
      72           6 :         ret_len = grapheme_ascii_check(string, string_len);
      73             : 
      74           6 :         if ( ret_len >= 0 )
      75           2 :                 RETURN_LONG(ret_len);
      76             : 
      77             :         /* convert the string to UTF-16. */
      78           4 :         status = U_ZERO_ERROR;
      79           4 :         intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) string, string_len, &status );
      80             : 
      81           4 :         if ( U_FAILURE( status ) ) {
      82             :                 /* Set global error code. */
      83           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
      84             : 
      85             :                 /* Set error messages. */
      86           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
      87           0 :                 if (ustring) {
      88           0 :                         efree( ustring );
      89             :                 }
      90           0 :                 RETURN_NULL();
      91             :         }
      92             : 
      93           4 :         ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 TSRMLS_CC );
      94             : 
      95           4 :         if (ustring) {
      96           4 :                 efree( ustring );
      97             :         }
      98             : 
      99           4 :         if (ret_len >= 0) {
     100           4 :                 RETVAL_LONG(ret_len);
     101             :         } else {
     102           0 :                 RETVAL_FALSE;
     103             :         }
     104             : }
     105             : /* }}} */
     106             : 
     107             : /* {{{ proto int grapheme_strpos(string haystack, string needle [, int offset ])
     108             :    Find position of first occurrence of a string within another */
     109          38 : PHP_FUNCTION(grapheme_strpos)
     110             : {
     111             :         unsigned char *haystack, *needle;
     112             :         int haystack_len, needle_len;
     113             :         unsigned char *found;
     114          38 :         long loffset = 0;
     115          38 :         int32_t offset = 0;
     116             :         int ret_pos;
     117             : 
     118          38 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     119             : 
     120           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     121             :                          "grapheme_strpos: unable to parse input param", 0 TSRMLS_CC );
     122             : 
     123           1 :                 RETURN_FALSE;
     124             :         }
     125             : 
     126          37 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     127             : 
     128           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     129             : 
     130           1 :                 RETURN_FALSE;
     131             :         }
     132             : 
     133             :         /* we checked that it will fit: */
     134          36 :         offset = (int32_t) loffset;
     135             : 
     136             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     137             : 
     138          36 :         if (needle_len == 0) {
     139             : 
     140           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     141             : 
     142           0 :                 RETURN_FALSE;
     143             :         }
     144             : 
     145             : 
     146             :         /* quick check to see if the string might be there
     147             :          * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     148             :         */
     149          36 :         found = (unsigned char *)php_memnstr((char *)haystack + offset, (char *)needle, needle_len, (char *)haystack + haystack_len);
     150             : 
     151             :         /* if it isn't there the we are done */
     152          36 :         if (!found) {
     153           9 :                 RETURN_FALSE;
     154             :         }
     155             : 
     156             :         /* if it is there, and if the haystack is ascii, we are all done */
     157          27 :         if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
     158             : 
     159          12 :                 RETURN_LONG(found - haystack);
     160             :         }
     161             : 
     162             :         /* do utf16 part of the strpos */
     163          15 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ TSRMLS_CC );
     164             : 
     165          15 :         if ( ret_pos >= 0 ) {
     166          13 :                 RETURN_LONG(ret_pos);
     167             :         } else {
     168           2 :                 RETURN_FALSE;
     169             :         }
     170             : 
     171             : }
     172             : /* }}} */
     173             : 
     174             : /* {{{ proto int grapheme_stripos(string haystack, string needle [, int offset ])
     175             :    Find position of first occurrence of a string within another, ignoring case differences */
     176          40 : PHP_FUNCTION(grapheme_stripos)
     177             : {
     178             :         unsigned char *haystack, *needle, *haystack_dup, *needle_dup;
     179             :         int haystack_len, needle_len;
     180             :         unsigned char *found;
     181          40 :         long loffset = 0;
     182          40 :         int32_t offset = 0;
     183             :         int ret_pos;
     184             :         int is_ascii;
     185             : 
     186          40 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     187             : 
     188           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     189             :                          "grapheme_stripos: unable to parse input param", 0 TSRMLS_CC );
     190             : 
     191           1 :                 RETURN_FALSE;
     192             :         }
     193             : 
     194          39 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     195             : 
     196           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 TSRMLS_CC );
     197             : 
     198           1 :                 RETURN_FALSE;
     199             :         }
     200             : 
     201             :         /* we checked that it will fit: */
     202          38 :         offset = (int32_t) loffset;
     203             : 
     204             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     205             : 
     206          38 :         if (needle_len == 0) {
     207             : 
     208           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 TSRMLS_CC );
     209             : 
     210           0 :                 RETURN_FALSE;
     211             :         }
     212             : 
     213             : 
     214          38 :         is_ascii = ( grapheme_ascii_check(haystack, haystack_len) >= 0 );
     215             : 
     216          38 :         if ( is_ascii ) {
     217          19 :                 needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
     218          19 :                 php_strtolower((char *)needle_dup, needle_len);
     219          19 :                 haystack_dup = (unsigned char *)estrndup((char *)haystack, haystack_len);
     220          19 :                 php_strtolower((char *)haystack_dup, haystack_len);
     221             : 
     222          19 :                 found = (unsigned char*) php_memnstr((char *)haystack_dup + offset, (char *)needle_dup, needle_len, (char *)haystack_dup + haystack_len);
     223             : 
     224          19 :                 efree(haystack_dup);
     225          19 :                 efree(needle_dup);
     226             : 
     227          19 :                 if (found) {
     228          12 :                         RETURN_LONG(found - haystack_dup);
     229             :                 }
     230             : 
     231             :                 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
     232           7 :                 if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
     233           5 :                         RETURN_FALSE;
     234             :                 }
     235             :         }
     236             : 
     237             :         /* do utf16 part of the strpos */
     238          21 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ TSRMLS_CC );
     239             : 
     240          21 :         if ( ret_pos >= 0 ) {
     241          15 :                 RETURN_LONG(ret_pos);
     242             :         } else {
     243           6 :                 RETURN_FALSE;
     244             :         }
     245             : 
     246             : }
     247             : /* }}} */
     248             : 
     249             : /* {{{ proto int grapheme_strrpos(string haystack, string needle [, int offset])
     250             :    Find position of last occurrence of a string within another */
     251          37 : PHP_FUNCTION(grapheme_strrpos)
     252             : {
     253             :         unsigned char *haystack, *needle;
     254             :         int haystack_len, needle_len;
     255          37 :         long loffset = 0;
     256          37 :         int32_t offset = 0;
     257             :         int32_t ret_pos;
     258             :         int is_ascii;
     259             : 
     260          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     261             : 
     262           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     263             :                          "grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
     264             : 
     265           1 :                 RETURN_FALSE;
     266             :         }
     267             : 
     268          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     269             : 
     270           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     271             : 
     272           0 :                 RETURN_FALSE;
     273             :         }
     274             : 
     275             :         /* we checked that it will fit: */
     276          36 :         offset = (int32_t) loffset;
     277             : 
     278             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     279             : 
     280          36 :         if (needle_len == 0) {
     281             : 
     282           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     283             : 
     284           0 :                 RETURN_FALSE;
     285             :         }
     286             : 
     287          36 :         is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
     288             : 
     289          36 :         if ( is_ascii ) {
     290             : 
     291          19 :                 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
     292             : 
     293             : 
     294          19 :                 if ( ret_pos >= 0 ) {
     295          12 :                         RETURN_LONG(ret_pos);
     296             :                 }
     297             : 
     298             :                 /* if the needle was ascii too, we are done */
     299             : 
     300           7 :                 if (  grapheme_ascii_check(needle, needle_len) >= 0 ) {
     301           5 :                         RETURN_FALSE;
     302             :                 }
     303             : 
     304             :                 /* else we need to continue via utf16 */
     305             :         }
     306             : 
     307          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */ TSRMLS_CC);
     308             : 
     309          19 :         if ( ret_pos >= 0 ) {
     310          13 :                 RETURN_LONG(ret_pos);
     311             :         } else {
     312           6 :                 RETURN_FALSE;
     313             :         }
     314             : 
     315             : 
     316             : }
     317             : /* }}} */
     318             : 
     319             : /* {{{ proto int grapheme_strripos(string haystack, string needle [, int offset])
     320             :    Find position of last occurrence of a string within another, ignoring case */
     321          37 : PHP_FUNCTION(grapheme_strripos)
     322             : {
     323             :         unsigned char *haystack, *needle;
     324             :         int haystack_len, needle_len;
     325          37 :         long loffset = 0;
     326          37 :         int32_t offset = 0;
     327             :         int32_t ret_pos;
     328             :         int is_ascii;
     329             : 
     330          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     331             : 
     332           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     333             :                          "grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
     334             : 
     335           1 :                 RETURN_FALSE;
     336             :         }
     337             : 
     338          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     339             : 
     340           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     341             : 
     342           0 :                 RETURN_FALSE;
     343             :         }
     344             : 
     345             :         /* we checked that it will fit: */
     346          36 :         offset = (int32_t) loffset;
     347             : 
     348             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     349             : 
     350          36 :         if (needle_len == 0) {
     351             : 
     352           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     353             : 
     354           0 :                 RETURN_FALSE;
     355             :         }
     356             : 
     357          36 :         is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
     358             : 
     359          36 :         if ( is_ascii ) {
     360             :                 unsigned char *needle_dup, *haystack_dup;
     361             : 
     362          19 :                 needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
     363          19 :                 php_strtolower((char *)needle_dup, needle_len);
     364          19 :                 haystack_dup = (unsigned char *)estrndup((char *)haystack, haystack_len);
     365          19 :                 php_strtolower((char *)haystack_dup, haystack_len);
     366             : 
     367          19 :                 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
     368             : 
     369          19 :                 efree(haystack_dup);
     370          19 :                 efree(needle_dup);
     371             : 
     372          19 :                 if ( ret_pos >= 0 ) {
     373          12 :                         RETURN_LONG(ret_pos);
     374             :                 }
     375             : 
     376             :                 /* if the needle was ascii too, we are done */
     377             : 
     378           7 :                 if (  grapheme_ascii_check(needle, needle_len) >= 0 ) {
     379           5 :                         RETURN_FALSE;
     380             :                 }
     381             : 
     382             :                 /* else we need to continue via utf16 */
     383             :         }
     384             : 
     385          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL,  1 /* f_ignore_case */, 1 /*last */ TSRMLS_CC);
     386             : 
     387          19 :         if ( ret_pos >= 0 ) {
     388          13 :                 RETURN_LONG(ret_pos);
     389             :         } else {
     390           6 :                 RETURN_FALSE;
     391             :         }
     392             : 
     393             : 
     394             : }
     395             : /* }}} */
     396             : 
     397             : /* {{{ proto string grapheme_substr(string str, int start [, int length])
     398             :    Returns part of a string */
     399          71 : PHP_FUNCTION(grapheme_substr)
     400             : {
     401             :         unsigned char *str, *sub_str;
     402             :         UChar *ustr;
     403             :         int str_len, sub_str_len, ustr_len;
     404          71 :         long lstart = 0, length = 0;
     405          71 :         int32_t start = 0;
     406             :         int iter_val;
     407             :         UErrorCode status;
     408             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     409          71 :         UBreakIterator* bi = NULL;
     410             :         int sub_str_start_pos, sub_str_end_pos;
     411             :         int32_t (*iter_func)(UBreakIterator *);
     412             : 
     413          71 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|l", (char **)&str, &str_len, &lstart, &length) == FAILURE) {
     414             : 
     415           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     416             :                          "grapheme_substr: unable to parse input param", 0 TSRMLS_CC );
     417             : 
     418           1 :                 RETURN_FALSE;
     419             :         }
     420             : 
     421          70 :         if ( OUTSIDE_STRING(lstart, str_len) ) {
     422             : 
     423           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 TSRMLS_CC );
     424             : 
     425           5 :                 RETURN_FALSE;
     426             :         }
     427             : 
     428             :         /* we checked that it will fit: */
     429          65 :         start = (int32_t) lstart;
     430             : 
     431             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     432             : 
     433          65 :         if ( grapheme_ascii_check(str, str_len) >= 0 ) {
     434           9 :                 grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len);
     435             : 
     436           9 :                 if ( NULL == sub_str ) {
     437           1 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 TSRMLS_CC );
     438           1 :                         RETURN_FALSE;
     439             :                 }
     440             : 
     441           8 :                 RETURN_STRINGL(((char *)sub_str), sub_str_len, 1);
     442             :         }
     443             : 
     444          56 :         ustr = NULL;
     445          56 :         ustr_len = 0;
     446          56 :         status = U_ZERO_ERROR;
     447          56 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, (char *)str, str_len, &status);
     448             : 
     449          56 :         if ( U_FAILURE( status ) ) {
     450             :                 /* Set global error code. */
     451           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     452             : 
     453             :                 /* Set error messages. */
     454           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
     455           0 :                 if (ustr) {
     456           0 :                         efree( ustr );
     457             :                 }
     458           0 :                 RETURN_FALSE;
     459             :         }
     460             : 
     461          56 :         bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status TSRMLS_CC );
     462             : 
     463          56 :         if( U_FAILURE(status) ) {
     464           0 :                 RETURN_FALSE;
     465             :         }
     466             : 
     467          56 :         ubrk_setText(bi, ustr, ustr_len,        &status);
     468             : 
     469          56 :         if ( start < 0 ) {
     470          28 :                 iter_func = ubrk_previous;
     471          28 :                 ubrk_last(bi);
     472          28 :                 iter_val = 1;
     473             :         }
     474             :         else {
     475          28 :                 iter_func = ubrk_next;
     476          28 :                 iter_val = -1;
     477             :         }
     478             : 
     479          56 :         sub_str_start_pos = 0;
     480             : 
     481         377 :         while ( start ) {
     482         265 :                 sub_str_start_pos = iter_func(bi);
     483             : 
     484         265 :                 if ( UBRK_DONE == sub_str_start_pos ) {
     485           0 :                         break;
     486             :                 }
     487             : 
     488         265 :                 start += iter_val;
     489             :         }
     490             : 
     491          56 :         if ( 0 != start || sub_str_start_pos >= ustr_len ) {
     492             : 
     493           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 TSRMLS_CC );
     494             : 
     495           3 :                 if (ustr) {
     496           3 :                         efree(ustr);
     497             :                 }
     498           3 :                 ubrk_close(bi);
     499           3 :                 RETURN_FALSE;
     500             :         }
     501             : 
     502          53 :         if (ZEND_NUM_ARGS() <= 2) {
     503             : 
     504             :                 /* no length supplied, return the rest of the string */
     505             : 
     506          17 :                 sub_str = NULL;
     507          17 :                 sub_str_len = 0;
     508          17 :                 status = U_ZERO_ERROR;
     509          17 :                 intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
     510             : 
     511          17 :                 if (ustr) {
     512          17 :                         efree( ustr );
     513             :                 }
     514          17 :                 ubrk_close( bi );
     515             : 
     516          17 :                 if ( U_FAILURE( status ) ) {
     517             :                         /* Set global error code. */
     518           0 :                         intl_error_set_code( NULL, status TSRMLS_CC );
     519             : 
     520             :                         /* Set error messages. */
     521           0 :                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
     522             : 
     523           0 :                         if (sub_str) {
     524           0 :                                 efree( sub_str );
     525             :                         }
     526             : 
     527           0 :                         RETURN_FALSE;
     528             :                 }
     529             : 
     530             :                 /* return the allocated string, not a duplicate */
     531          17 :                 RETURN_STRINGL(((char *)sub_str), sub_str_len, 0);
     532             :         }
     533             : 
     534          36 :         if(length == 0) {
     535             :                 /* empty length - we've validated start, we can return "" now */
     536           2 :                 if (ustr) {
     537           2 :                         efree(ustr);
     538             :                 }
     539           2 :                 ubrk_close(bi);
     540           2 :                 RETURN_EMPTY_STRING();          
     541             :         }
     542             : 
     543             :         /* find the end point of the string to return */
     544             : 
     545          34 :         if ( length < 0 ) {
     546          24 :                 iter_func = ubrk_previous;
     547          24 :                 ubrk_last(bi);
     548          24 :                 iter_val = 1;
     549             :         }
     550             :         else {
     551          10 :                 iter_func = ubrk_next;
     552          10 :                 iter_val = -1;
     553             :         }
     554             : 
     555          34 :         sub_str_end_pos = 0;
     556             : 
     557         224 :         while ( length ) {
     558         161 :                 sub_str_end_pos = iter_func(bi);
     559             : 
     560         161 :                 if ( UBRK_DONE == sub_str_end_pos ) {
     561           5 :                         break;
     562             :                 }
     563             : 
     564         156 :                 length += iter_val;
     565             :         }
     566             : 
     567          34 :         ubrk_close(bi);
     568             : 
     569          34 :         if ( UBRK_DONE == sub_str_end_pos) {
     570           5 :                 if(length < 0) {
     571           3 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 TSRMLS_CC );
     572             : 
     573           3 :                         efree(ustr);
     574           3 :                         RETURN_FALSE;
     575             :                 } else {
     576           2 :                         sub_str_end_pos = ustr_len;
     577             :                 }
     578             :         }
     579             :         
     580          31 :         if(sub_str_start_pos > sub_str_end_pos) {
     581           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 TSRMLS_CC );
     582             : 
     583           2 :                 efree(ustr);
     584           2 :                 RETURN_FALSE;
     585             :         }
     586             : 
     587          29 :         sub_str = NULL;
     588          29 :         status = U_ZERO_ERROR;
     589          29 :         intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
     590             : 
     591          29 :         efree( ustr );
     592             : 
     593          29 :         if ( U_FAILURE( status ) ) {
     594             :                 /* Set global error code. */
     595           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     596             : 
     597             :                 /* Set error messages. */
     598           0 :                 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
     599             : 
     600           0 :                 if ( NULL != sub_str )
     601           0 :                         efree( sub_str );
     602             : 
     603           0 :                 RETURN_FALSE;
     604             :         }
     605             : 
     606             :          /* return the allocated string, not a duplicate */
     607          29 :         RETURN_STRINGL(((char *)sub_str), sub_str_len, 0);
     608             : 
     609             : }
     610             : /* }}} */
     611             : 
     612             : /* {{{  strstr_common_handler */
     613          73 : static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
     614             : {
     615             :         unsigned char *haystack, *needle, *found;
     616             :         int haystack_len, needle_len;
     617             :         int ret_pos, uchar_pos;
     618          73 :         zend_bool part = 0;
     619             : 
     620          73 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|b", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &part) == FAILURE) {
     621             : 
     622           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     623             :                          "grapheme_strstr: unable to parse input param", 0 TSRMLS_CC );
     624             : 
     625           2 :                 RETURN_FALSE;
     626             :         }
     627             : 
     628          71 :         if (needle_len == 0) {
     629             : 
     630           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     631             : 
     632           0 :                 RETURN_FALSE;
     633             :         }
     634             : 
     635             : 
     636          71 :         if ( !f_ignore_case ) {
     637             : 
     638             :                 /* ASCII optimization: quick check to see if the string might be there
     639             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     640             :                 */
     641          35 :                 found = (unsigned char *)php_memnstr((char *)haystack, (char *)needle, needle_len, (char *)haystack + haystack_len);
     642             : 
     643             :                 /* if it isn't there the we are done */
     644          35 :                 if ( !found ) {
     645           3 :                         RETURN_FALSE;
     646             :                 }
     647             : 
     648             :                 /* if it is there, and if the haystack is ascii, we are all done */
     649          32 :                 if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
     650          13 :                         size_t found_offset = found - haystack;
     651             : 
     652          13 :                         if (part) {
     653           5 :                                 RETURN_STRINGL(((char *)haystack) , found_offset, 1);
     654             :                         } else {
     655           8 :                                 RETURN_STRINGL(((char *)found), haystack_len - found_offset, 1);
     656             :                         }
     657             :                 }
     658             : 
     659             :         }
     660             : 
     661             :         /* need to work in utf16 */
     662          55 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ TSRMLS_CC );
     663             : 
     664          55 :         if ( ret_pos < 0 ) {
     665           9 :                 RETURN_FALSE;
     666             :         }
     667             : 
     668             :         /* uchar_pos is the 'nth' Unicode character position of the needle */
     669             : 
     670          46 :         ret_pos = 0;
     671          46 :         U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
     672             : 
     673          46 :         if (part) {
     674          15 :                 RETURN_STRINGL(((char *)haystack), ret_pos, 1);
     675             :         }
     676             :         else {
     677          31 :                 RETURN_STRINGL(((char *)haystack) + ret_pos, haystack_len - ret_pos, 1);
     678             :         }
     679             : 
     680             : }
     681             : /* }}} */
     682             : 
     683             : /* {{{ proto string grapheme_strstr(string haystack, string needle[, bool part])
     684             :    Finds first occurrence of a string within another */
     685          36 : PHP_FUNCTION(grapheme_strstr)
     686             : {
     687          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
     688          36 : }
     689             : /* }}} */
     690             : 
     691             : /* {{{ proto string grapheme_stristr(string haystack, string needle[, bool part])
     692             :    Finds first occurrence of a string within another */
     693          37 : PHP_FUNCTION(grapheme_stristr)
     694             : {
     695          37 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
     696          37 : }
     697             : /* }}} */
     698             : 
     699             : /* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
     700             : static inline int32_t
     701          18 : grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
     702             : {
     703          18 :         int pos = 0, prev_pos = 0;
     704          18 :         int ret_pos = 0, prev_ret_pos = 0;
     705             : 
     706             :         while ( 1 ) {
     707          96 :                 pos = ubrk_next(bi);
     708             : 
     709          96 :                 if ( UBRK_DONE == pos ) {
     710           7 :                         break;
     711             :                 }
     712             : 
     713             :                 /* if we are beyond our limit, then the loop is done */
     714          89 :                 if ( pos > csize ) {
     715          11 :                         break;
     716             :                 }
     717             : 
     718             :                 /* update our pointer in the original UTF-8 buffer by as many characters
     719             :                    as ubrk_next iterated over */
     720             : 
     721          78 :                 prev_ret_pos = ret_pos;
     722          78 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     723             : 
     724          78 :                 if ( prev_ret_pos == ret_pos ) {
     725             :                         /* something wrong - malformed utf8? */
     726           0 :                         break;
     727             :                 }
     728             : 
     729          78 :                 prev_pos = pos;
     730          78 :         }
     731             : 
     732          18 :         return ret_pos;
     733             : }
     734             : /* }}} */
     735             : 
     736             : /* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
     737             : static inline int32_t
     738          23 : grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
     739             : {
     740          23 :         int pos = 0, prev_pos = 0;
     741          23 :         int ret_pos = 0, prev_ret_pos = 0;
     742             : 
     743             :         while ( 1 ) {
     744          63 :                 pos = ubrk_next(bi);
     745             : 
     746          63 :                 if ( UBRK_DONE == pos ) {
     747           8 :                         break;
     748             :                 }
     749             : 
     750          55 :                 prev_ret_pos = ret_pos;
     751          55 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     752             : 
     753          55 :                 if ( ret_pos > bsize ) {
     754          15 :                         ret_pos = prev_ret_pos;
     755          15 :                         break;
     756             :                 }
     757             : 
     758          40 :                 if ( prev_ret_pos == ret_pos ) {
     759             :                         /* something wrong - malformed utf8? */
     760           0 :                         break;
     761             :                 }
     762             : 
     763          40 :                 prev_pos = pos;
     764          40 :         }
     765             : 
     766          23 :         return ret_pos;
     767             : }
     768             : /* }}} */
     769             : 
     770             : /* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
     771             : static inline int32_t
     772          21 : grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
     773             : {
     774          21 :         int pos = 0, next_pos = 0;
     775          21 :         int ret_pos = 0;
     776             : 
     777          80 :         while ( size ) {
     778          40 :                 next_pos = ubrk_next(bi);
     779             : 
     780          40 :                 if ( UBRK_DONE == next_pos ) {
     781           2 :                         break;
     782             :                 }
     783          38 :                 pos = next_pos;
     784          38 :                 size--;
     785             :         }
     786             : 
     787             :         /* pos is one past the last UChar - and represent the number of code units to
     788             :                 advance in the utf-8 buffer
     789             :         */
     790             : 
     791          21 :         U8_FWD_N(pstr, ret_pos, str_len, pos);
     792             : 
     793          21 :         return ret_pos;
     794             : }
     795             : /* }}} */
     796             : 
     797             : /* {{{ grapheme extract iter function pointer array */
     798             : typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
     799             : 
     800             : static grapheme_extract_iter grapheme_extract_iters[] = {
     801             :         &grapheme_extract_count_iter,
     802             :         &grapheme_extract_bytecount_iter,
     803             :         &grapheme_extract_charcount_iter,
     804             : };
     805             : /* }}} */
     806             : 
     807             : /* {{{ proto string grapheme_extract(string str, int size[, int extract_type[, int start[, int next]]])
     808             :         Function to extract a sequence of default grapheme clusters */
     809         106 : PHP_FUNCTION(grapheme_extract)
     810             : {
     811             :         unsigned char *str, *pstr;
     812             :         UChar *ustr;
     813             :         int str_len, ustr_len;
     814             :         long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
     815         106 :         long lstart = 0; /* starting position in str in bytes */
     816         106 :         int32_t start = 0;
     817         106 :         long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
     818             :         UErrorCode status;
     819             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     820         106 :         UBreakIterator* bi = NULL;
     821             :         int ret_pos;
     822         106 :         zval *next = NULL; /* return offset of next part of the string */
     823             : 
     824         106 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|llz", (char **)&str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
     825             : 
     826           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     827             :                          "grapheme_extract: unable to parse input param", 0 TSRMLS_CC );
     828             : 
     829           1 :                 RETURN_FALSE;
     830             :         }
     831             : 
     832         105 :         if ( NULL != next ) {
     833          32 :                 if ( !PZVAL_IS_REF(next) ) {
     834           0 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     835             :                                  "grapheme_extract: 'next' was not passed by reference", 0 TSRMLS_CC );
     836             : 
     837           0 :                         RETURN_FALSE;
     838             :                 }
     839             :                 else {
     840             :                         /* initialize next */
     841          16 :                         zval_dtor(next);
     842          16 :             ZVAL_LONG(next, lstart);
     843             :                 }
     844             :         }
     845             : 
     846         105 :         if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
     847             : 
     848           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     849             :                          "grapheme_extract: unknown extract type param", 0 TSRMLS_CC );
     850             : 
     851           1 :                 RETURN_FALSE;
     852             :         }
     853             : 
     854         104 :         if ( lstart > INT32_MAX || lstart < 0 || lstart >= str_len ) {
     855           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 TSRMLS_CC );
     856           5 :                 RETURN_FALSE;
     857             :         }
     858             : 
     859          99 :         if ( size > INT32_MAX || size < 0) {
     860           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 TSRMLS_CC );
     861           0 :                 RETURN_FALSE;
     862             :         }
     863          99 :         if (size == 0) {
     864           8 :                 RETURN_EMPTY_STRING();
     865             :         }
     866             : 
     867             :         /* we checked that it will fit: */
     868          91 :         start = (int32_t) lstart;
     869             : 
     870          91 :         pstr = str + start;
     871             : 
     872             :         /* just in case pstr points in the middle of a character, move forward to the start of the next char */
     873          91 :         if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     874           9 :                 unsigned char *str_end = str + str_len;
     875             : 
     876          25 :                 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     877           9 :                         pstr++;
     878           9 :                         if ( pstr >= str_end ) {
     879           2 :                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     880             :                                                                 "grapheme_extract: invalid input string", 0 TSRMLS_CC );
     881             : 
     882           2 :                                 RETURN_FALSE;
     883             :                         }
     884             :                 }
     885             :         }
     886             : 
     887          89 :         str_len -= (pstr - str);
     888             : 
     889             :         /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
     890             :                 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
     891             :          */
     892             : 
     893          89 :         if ( -1 != grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
     894          27 :         long nsize = ( size < str_len ? size : str_len );
     895          27 :                 if ( NULL != next ) {
     896           9 :                         ZVAL_LONG(next, start+nsize);
     897             :                 }
     898          27 :                 RETURN_STRINGL(((char *)pstr), nsize, 1);
     899             :         }
     900             : 
     901             :         /* convert the strings to UTF-16. */
     902          62 :         ustr = NULL;
     903          62 :         ustr_len = 0;
     904          62 :         status = U_ZERO_ERROR;
     905          62 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, (char *)pstr, str_len, &status );
     906             : 
     907          62 :         if ( U_FAILURE( status ) ) {
     908             :                 /* Set global error code. */
     909           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     910             : 
     911             :                 /* Set error messages. */
     912           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
     913             : 
     914           0 :                 if ( NULL != ustr )
     915           0 :                         efree( ustr );
     916             : 
     917           0 :                 RETURN_FALSE;
     918             :         }
     919             : 
     920          62 :         bi = NULL;
     921          62 :         status = U_ZERO_ERROR;
     922          62 :         bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
     923             : 
     924          62 :         ubrk_setText(bi, ustr, ustr_len, &status);
     925             : 
     926             :         /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
     927             :                 can't back up. So, we will not do anything. */
     928             : 
     929             :         /* now we need to find the end of the chunk the user wants us to return */
     930             : 
     931          62 :         ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, pstr, str_len);
     932             : 
     933          62 :         if (ustr) {
     934          62 :                 efree(ustr);
     935             :         }
     936          62 :         ubrk_close(bi);
     937             : 
     938          62 :         if ( NULL != next ) {
     939           4 :                 ZVAL_LONG(next, start+ret_pos);
     940             :         }
     941             : 
     942          62 :         RETURN_STRINGL(((char *)pstr), ret_pos, 1);
     943             : }
     944             : 
     945             : /* }}} */
     946             : 
     947             : /*
     948             :  * Local variables:
     949             :  * tab-width: 4
     950             :  * c-basic-offset: 4
     951             :  * End:
     952             :  * vim600: fdm=marker
     953             :  * vim: noet sw=4 ts=4
     954             :  */
     955             : 

Generated by: LCOV version 1.10

Generated at Sat, 23 Aug 2014 22:39:33 +0000 (6 days ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.