PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/intl/grapheme - grapheme_string.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 329 377 87.3 %
Date: 2014-09-19 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 5                                                                                                                |
       4             :    +----------------------------------------------------------------------+
       5             :    | This source file is subject to version 3.01 of the PHP license,      |
       6             :    | that is bundled with this package in the file LICENSE, and is                |
       7             :    | available through the world-wide-web at the following url:                   |
       8             :    | http://www.php.net/license/3_01.txt                                                                  |
       9             :    | If you did not receive a copy of the PHP license and are unable to   |
      10             :    | obtain it through the world-wide-web, please send a note to                  |
      11             :    | license@php.net so we can mail you a copy immediately.                               |
      12             :    +----------------------------------------------------------------------+
      13             :    | Author: Ed Batutis <ed@batutis.com>                                                            |
      14             :    +----------------------------------------------------------------------+
      15             :  */
      16             : 
      17             : /* {{{ includes */
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config.h"
      20             : #endif
      21             : 
      22             : #include <php.h>
      23             : #include "grapheme.h"
      24             : #include "grapheme_util.h"
      25             : 
      26             : #include <unicode/utypes.h>
      27             : #include <unicode/ucol.h>
      28             : #include <unicode/ustring.h>
      29             : #include <unicode/ubrk.h>
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : /* }}} */
      34             : 
      35             : #define GRAPHEME_EXTRACT_TYPE_COUNT             0
      36             : #define GRAPHEME_EXTRACT_TYPE_MAXBYTES  1
      37             : #define GRAPHEME_EXTRACT_TYPE_MAXCHARS  2
      38             : #define GRAPHEME_EXTRACT_TYPE_MIN       GRAPHEME_EXTRACT_TYPE_COUNT
      39             : #define GRAPHEME_EXTRACT_TYPE_MAX       GRAPHEME_EXTRACT_TYPE_MAXCHARS
      40             : 
      41             : 
      42             : /* {{{ grapheme_register_constants
      43             :  * Register API constants
      44             :  */
      45       20335 : void grapheme_register_constants( INIT_FUNC_ARGS )
      46             : {
      47       20335 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
      48       20335 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
      49       20335 :         REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
      50       20335 : }
      51             : /* }}} */
      52             : 
      53             : /* {{{ proto size_t grapheme_strlen(string str)
      54             :    Get number of graphemes in a string */
      55           7 : PHP_FUNCTION(grapheme_strlen)
      56             : {
      57             :         unsigned char* string;
      58             :         size_t string_len;
      59           7 :         UChar* ustring = NULL;
      60           7 :         int ustring_len = 0;
      61             :         int ret_len;
      62             :         UErrorCode status;
      63             : 
      64           7 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string, &string_len) == FAILURE) {
      65             : 
      66           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
      67             :                          "grapheme_strlen: unable to parse input param", 0 TSRMLS_CC );
      68             : 
      69           1 :                 RETURN_FALSE;
      70             :         }
      71             : 
      72           6 :         ret_len = grapheme_ascii_check(string, string_len);
      73             : 
      74           6 :         if ( ret_len >= 0 )
      75           2 :                 RETURN_LONG(ret_len);
      76             : 
      77             :         /* convert the string to UTF-16. */
      78           4 :         status = U_ZERO_ERROR;
      79           4 :         intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) string, string_len, &status );
      80             : 
      81           4 :         if ( U_FAILURE( status ) ) {
      82             :                 /* Set global error code. */
      83           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
      84             : 
      85             :                 /* Set error messages. */
      86           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
      87           0 :                 if (ustring) {
      88           0 :                         efree( ustring );
      89             :                 }
      90           0 :                 RETURN_NULL();
      91             :         }
      92             : 
      93           4 :         ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 TSRMLS_CC );
      94             : 
      95           4 :         if (ustring) {
      96           4 :                 efree( ustring );
      97             :         }
      98             : 
      99           4 :         if (ret_len >= 0) {
     100           4 :                 RETVAL_LONG(ret_len);
     101             :         } else {
     102           0 :                 RETVAL_FALSE;
     103             :         }
     104             : }
     105             : /* }}} */
     106             : 
     107             : /* {{{ proto int grapheme_strpos(string haystack, string needle [, int offset ])
     108             :    Find position of first occurrence of a string within another */
     109          38 : PHP_FUNCTION(grapheme_strpos)
     110             : {
     111             :         unsigned char *haystack, *needle;
     112             :         size_t haystack_len, needle_len;
     113             :         unsigned char *found;
     114          38 :         zend_long loffset = 0;
     115          38 :         int32_t offset = 0;
     116             :         int ret_pos;
     117             : 
     118          38 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     119             : 
     120           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     121             :                          "grapheme_strpos: unable to parse input param", 0 TSRMLS_CC );
     122             : 
     123           1 :                 RETURN_FALSE;
     124             :         }
     125             : 
     126          37 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     127             : 
     128           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     129             : 
     130           1 :                 RETURN_FALSE;
     131             :         }
     132             : 
     133             :         /* we checked that it will fit: */
     134          36 :         offset = (int32_t) loffset;
     135             : 
     136             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     137             : 
     138          36 :         if (needle_len == 0) {
     139             : 
     140           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     141             : 
     142           0 :                 RETURN_FALSE;
     143             :         }
     144             : 
     145             : 
     146             :         /* quick check to see if the string might be there
     147             :          * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     148             :         */
     149          36 :         found = (unsigned char *)php_memnstr((char *)haystack + offset, (char *)needle, needle_len, (char *)haystack + haystack_len);
     150             : 
     151             :         /* if it isn't there the we are done */
     152          36 :         if (!found) {
     153           9 :                 RETURN_FALSE;
     154             :         }
     155             : 
     156             :         /* if it is there, and if the haystack is ascii, we are all done */
     157          27 :         if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
     158             : 
     159          12 :                 RETURN_LONG(found - haystack);
     160             :         }
     161             : 
     162             :         /* do utf16 part of the strpos */
     163          15 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ TSRMLS_CC );
     164             : 
     165          15 :         if ( ret_pos >= 0 ) {
     166          13 :                 RETURN_LONG(ret_pos);
     167             :         } else {
     168           2 :                 RETURN_FALSE;
     169             :         }
     170             : 
     171             : }
     172             : /* }}} */
     173             : 
     174             : /* {{{ proto int grapheme_stripos(string haystack, string needle [, int offset ])
     175             :    Find position of first occurrence of a string within another, ignoring case differences */
     176          40 : PHP_FUNCTION(grapheme_stripos)
     177             : {
     178             :         unsigned char *haystack, *needle, *haystack_dup, *needle_dup;
     179             :         size_t haystack_len, needle_len;
     180             :         unsigned char *found;
     181          40 :         zend_long loffset = 0;
     182          40 :         int32_t offset = 0;
     183             :         int ret_pos;
     184             :         int is_ascii;
     185             : 
     186          40 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     187             : 
     188           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     189             :                          "grapheme_stripos: unable to parse input param", 0 TSRMLS_CC );
     190             : 
     191           1 :                 RETURN_FALSE;
     192             :         }
     193             : 
     194          39 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     195             : 
     196           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 TSRMLS_CC );
     197             : 
     198           1 :                 RETURN_FALSE;
     199             :         }
     200             : 
     201             :         /* we checked that it will fit: */
     202          38 :         offset = (int32_t) loffset;
     203             : 
     204             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     205             : 
     206          38 :         if (needle_len == 0) {
     207             : 
     208           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 TSRMLS_CC );
     209             : 
     210           0 :                 RETURN_FALSE;
     211             :         }
     212             : 
     213             : 
     214          38 :         is_ascii = ( grapheme_ascii_check(haystack, haystack_len) >= 0 );
     215             : 
     216          38 :         if ( is_ascii ) {
     217          19 :                 needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
     218          19 :                 php_strtolower((char *)needle_dup, needle_len);
     219          19 :                 haystack_dup = (unsigned char *)estrndup((char *)haystack, haystack_len);
     220          19 :                 php_strtolower((char *)haystack_dup, haystack_len);
     221             : 
     222          19 :                 found = (unsigned char*) php_memnstr((char *)haystack_dup + offset, (char *)needle_dup, needle_len, (char *)haystack_dup + haystack_len);
     223             : 
     224          19 :                 efree(haystack_dup);
     225          19 :                 efree(needle_dup);
     226             : 
     227          19 :                 if (found) {
     228          12 :                         RETURN_LONG(found - haystack_dup);
     229             :                 }
     230             : 
     231             :                 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
     232           7 :                 if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
     233           5 :                         RETURN_FALSE;
     234             :                 }
     235             :         }
     236             : 
     237             :         /* do utf16 part of the strpos */
     238          21 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ TSRMLS_CC );
     239             : 
     240          21 :         if ( ret_pos >= 0 ) {
     241          15 :                 RETURN_LONG(ret_pos);
     242             :         } else {
     243           6 :                 RETURN_FALSE;
     244             :         }
     245             : 
     246             : }
     247             : /* }}} */
     248             : 
     249             : /* {{{ proto int grapheme_strrpos(string haystack, string needle [, int offset])
     250             :    Find position of last occurrence of a string within another */
     251          37 : PHP_FUNCTION(grapheme_strrpos)
     252             : {
     253             :         unsigned char *haystack, *needle;
     254             :         size_t haystack_len, needle_len;
     255          37 :         zend_long loffset = 0;
     256          37 :         int32_t offset = 0;
     257             :         int32_t ret_pos;
     258             :         int is_ascii;
     259             : 
     260          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     261             : 
     262           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     263             :                          "grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
     264             : 
     265           1 :                 RETURN_FALSE;
     266             :         }
     267             : 
     268          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     269             : 
     270           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     271             : 
     272           0 :                 RETURN_FALSE;
     273             :         }
     274             : 
     275             :         /* we checked that it will fit: */
     276          36 :         offset = (int32_t) loffset;
     277             : 
     278             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     279             : 
     280          36 :         if (needle_len == 0) {
     281             : 
     282           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     283             : 
     284           0 :                 RETURN_FALSE;
     285             :         }
     286             : 
     287          36 :         is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
     288             : 
     289          36 :         if ( is_ascii ) {
     290             : 
     291          19 :                 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
     292             : 
     293             : 
     294          19 :                 if ( ret_pos >= 0 ) {
     295          12 :                         RETURN_LONG(ret_pos);
     296             :                 }
     297             : 
     298             :                 /* if the needle was ascii too, we are done */
     299             : 
     300           7 :                 if (  grapheme_ascii_check(needle, needle_len) >= 0 ) {
     301           5 :                         RETURN_FALSE;
     302             :                 }
     303             : 
     304             :                 /* else we need to continue via utf16 */
     305             :         }
     306             : 
     307          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */ TSRMLS_CC);
     308             : 
     309          19 :         if ( ret_pos >= 0 ) {
     310          13 :                 RETURN_LONG(ret_pos);
     311             :         } else {
     312           6 :                 RETURN_FALSE;
     313             :         }
     314             : 
     315             : 
     316             : }
     317             : /* }}} */
     318             : 
     319             : /* {{{ proto int grapheme_strripos(string haystack, string needle [, int offset])
     320             :    Find position of last occurrence of a string within another, ignoring case */
     321          37 : PHP_FUNCTION(grapheme_strripos)
     322             : {
     323             :         unsigned char *haystack, *needle;
     324             :         size_t haystack_len, needle_len;
     325          37 :         zend_long loffset = 0;
     326          37 :         int32_t offset = 0;
     327             :         int32_t ret_pos;
     328             :         int is_ascii;
     329             : 
     330          37 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
     331             : 
     332           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     333             :                          "grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
     334             : 
     335           1 :                 RETURN_FALSE;
     336             :         }
     337             : 
     338          36 :         if ( OUTSIDE_STRING(loffset, haystack_len) ) {
     339             : 
     340           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
     341             : 
     342           0 :                 RETURN_FALSE;
     343             :         }
     344             : 
     345             :         /* we checked that it will fit: */
     346          36 :         offset = (int32_t) loffset;
     347             : 
     348             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     349             : 
     350          36 :         if (needle_len == 0) {
     351             : 
     352           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     353             : 
     354           0 :                 RETURN_FALSE;
     355             :         }
     356             : 
     357          36 :         is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
     358             : 
     359          36 :         if ( is_ascii ) {
     360             :                 unsigned char *needle_dup, *haystack_dup;
     361             : 
     362          19 :                 needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
     363          19 :                 php_strtolower((char *)needle_dup, needle_len);
     364          19 :                 haystack_dup = (unsigned char *)estrndup((char *)haystack, haystack_len);
     365          19 :                 php_strtolower((char *)haystack_dup, haystack_len);
     366             : 
     367          19 :                 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
     368             : 
     369          19 :                 efree(haystack_dup);
     370          19 :                 efree(needle_dup);
     371             : 
     372          19 :                 if ( ret_pos >= 0 ) {
     373          12 :                         RETURN_LONG(ret_pos);
     374             :                 }
     375             : 
     376             :                 /* if the needle was ascii too, we are done */
     377             : 
     378           7 :                 if (  grapheme_ascii_check(needle, needle_len) >= 0 ) {
     379           5 :                         RETURN_FALSE;
     380             :                 }
     381             : 
     382             :                 /* else we need to continue via utf16 */
     383             :         }
     384             : 
     385          19 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL,  1 /* f_ignore_case */, 1 /*last */ TSRMLS_CC);
     386             : 
     387          19 :         if ( ret_pos >= 0 ) {
     388          13 :                 RETURN_LONG(ret_pos);
     389             :         } else {
     390           6 :                 RETURN_FALSE;
     391             :         }
     392             : 
     393             : 
     394             : }
     395             : /* }}} */
     396             : 
     397             : /* {{{ proto string grapheme_substr(string str, int start [, int length])
     398             :    Returns part of a string */
     399          71 : PHP_FUNCTION(grapheme_substr)
     400             : {
     401             :         unsigned char *str, *sub_str;
     402             :         UChar *ustr;
     403             :         size_t str_len;
     404             :         int32_t ustr_len;
     405             :         int32_t sub_str_len;
     406          71 :         zend_long lstart = 0, length = 0;
     407          71 :         int32_t start = 0;
     408             :         int iter_val;
     409             :         UErrorCode status;
     410             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     411          71 :         UBreakIterator* bi = NULL;
     412             :         int sub_str_start_pos, sub_str_end_pos;
     413             :         int32_t (*iter_func)(UBreakIterator *);
     414             : 
     415          71 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|l", (char **)&str, &str_len, &lstart, &length) == FAILURE) {
     416             : 
     417           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     418             :                          "grapheme_substr: unable to parse input param", 0 TSRMLS_CC );
     419             : 
     420           1 :                 RETURN_FALSE;
     421             :         }
     422             : 
     423          70 :         if ( OUTSIDE_STRING(lstart, str_len) ) {
     424             : 
     425           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 TSRMLS_CC );
     426             : 
     427           5 :                 RETURN_FALSE;
     428             :         }
     429             : 
     430             :         /* we checked that it will fit: */
     431          65 :         start = (int32_t) lstart;
     432             : 
     433             :         /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
     434             : 
     435          65 :         if ( grapheme_ascii_check(str, str_len) >= 0 ) {
     436           9 :                 grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len);
     437             : 
     438           9 :                 if ( NULL == sub_str ) {
     439           1 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 TSRMLS_CC );
     440           1 :                         RETURN_FALSE;
     441             :                 }
     442             : 
     443          16 :                 RETURN_STRINGL(((char *)sub_str), sub_str_len);
     444             :         }
     445             : 
     446          56 :         ustr = NULL;
     447          56 :         ustr_len = 0;
     448          56 :         status = U_ZERO_ERROR;
     449          56 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, (char *)str, str_len, &status);
     450             : 
     451          56 :         if ( U_FAILURE( status ) ) {
     452             :                 /* Set global error code. */
     453           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     454             : 
     455             :                 /* Set error messages. */
     456           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
     457           0 :                 if (ustr) {
     458           0 :                         efree( ustr );
     459             :                 }
     460           0 :                 RETURN_FALSE;
     461             :         }
     462             : 
     463          56 :         bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status TSRMLS_CC );
     464             : 
     465          56 :         if( U_FAILURE(status) ) {
     466           0 :                 RETURN_FALSE;
     467             :         }
     468             : 
     469          56 :         ubrk_setText(bi, ustr, ustr_len,        &status);
     470             : 
     471          56 :         if ( start < 0 ) {
     472          28 :                 iter_func = ubrk_previous;
     473          28 :                 ubrk_last(bi);
     474          28 :                 iter_val = 1;
     475             :         }
     476             :         else {
     477          28 :                 iter_func = ubrk_next;
     478          28 :                 iter_val = -1;
     479             :         }
     480             : 
     481          56 :         sub_str_start_pos = 0;
     482             : 
     483         377 :         while ( start ) {
     484         265 :                 sub_str_start_pos = iter_func(bi);
     485             : 
     486         265 :                 if ( UBRK_DONE == sub_str_start_pos ) {
     487           0 :                         break;
     488             :                 }
     489             : 
     490         265 :                 start += iter_val;
     491             :         }
     492             : 
     493          56 :         if ( 0 != start || sub_str_start_pos >= ustr_len ) {
     494             : 
     495           3 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 TSRMLS_CC );
     496             : 
     497           3 :                 if (ustr) {
     498           3 :                         efree(ustr);
     499             :                 }
     500           3 :                 ubrk_close(bi);
     501           3 :                 RETURN_FALSE;
     502             :         }
     503             : 
     504          53 :         if (ZEND_NUM_ARGS() <= 2) {
     505             : 
     506             :                 /* no length supplied, return the rest of the string */
     507             : 
     508          17 :                 sub_str = NULL;
     509          17 :                 sub_str_len = 0;
     510          17 :                 status = U_ZERO_ERROR;
     511          17 :                 intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
     512             : 
     513          17 :                 if (ustr) {
     514          17 :                         efree( ustr );
     515             :                 }
     516          17 :                 ubrk_close( bi );
     517             : 
     518          17 :                 if ( U_FAILURE( status ) ) {
     519             :                         /* Set global error code. */
     520           0 :                         intl_error_set_code( NULL, status TSRMLS_CC );
     521             : 
     522             :                         /* Set error messages. */
     523           0 :                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
     524             : 
     525           0 :                         if (sub_str) {
     526           0 :                                 efree( sub_str );
     527             :                         }
     528             : 
     529           0 :                         RETURN_FALSE;
     530             :                 }
     531             : 
     532             :                 /* return the allocated string, not a duplicate */
     533          34 :                 RETVAL_STRINGL(((char *)sub_str), sub_str_len);
     534             :                 //???
     535          17 :                 efree(sub_str);
     536          17 :                 return;
     537             :         }
     538             : 
     539          36 :         if(length == 0) {
     540             :                 /* empty length - we've validated start, we can return "" now */
     541           2 :                 if (ustr) {
     542           2 :                         efree(ustr);
     543             :                 }
     544           2 :                 ubrk_close(bi);
     545           2 :                 RETURN_EMPTY_STRING();          
     546             :         }
     547             : 
     548             :         /* find the end point of the string to return */
     549             : 
     550          34 :         if ( length < 0 ) {
     551          24 :                 iter_func = ubrk_previous;
     552          24 :                 ubrk_last(bi);
     553          24 :                 iter_val = 1;
     554             :         }
     555             :         else {
     556          10 :                 iter_func = ubrk_next;
     557          10 :                 iter_val = -1;
     558             :         }
     559             : 
     560          34 :         sub_str_end_pos = 0;
     561             : 
     562         224 :         while ( length ) {
     563         161 :                 sub_str_end_pos = iter_func(bi);
     564             : 
     565         161 :                 if ( UBRK_DONE == sub_str_end_pos ) {
     566           5 :                         break;
     567             :                 }
     568             : 
     569         156 :                 length += iter_val;
     570             :         }
     571             : 
     572          34 :         ubrk_close(bi);
     573             : 
     574          34 :         if ( UBRK_DONE == sub_str_end_pos) {
     575           5 :                 if(length < 0) {
     576           3 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 TSRMLS_CC );
     577             : 
     578           3 :                         efree(ustr);
     579           3 :                         RETURN_FALSE;
     580             :                 } else {
     581           2 :                         sub_str_end_pos = ustr_len;
     582             :                 }
     583             :         }
     584             :         
     585          31 :         if(sub_str_start_pos > sub_str_end_pos) {
     586           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 TSRMLS_CC );
     587             : 
     588           2 :                 efree(ustr);
     589           2 :                 RETURN_FALSE;
     590             :         }
     591             : 
     592          29 :         sub_str = NULL;
     593          29 :         status = U_ZERO_ERROR;
     594          29 :         intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
     595             : 
     596          29 :         efree( ustr );
     597             : 
     598          29 :         if ( U_FAILURE( status ) ) {
     599             :                 /* Set global error code. */
     600           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     601             : 
     602             :                 /* Set error messages. */
     603           0 :                 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
     604             : 
     605           0 :                 if ( NULL != sub_str )
     606           0 :                         efree( sub_str );
     607             : 
     608           0 :                 RETURN_FALSE;
     609             :         }
     610             : 
     611             :          /* return the allocated string, not a duplicate */
     612          58 :         RETVAL_STRINGL(((char *)sub_str), sub_str_len);
     613             :         //????
     614          29 :         efree(sub_str);
     615             : 
     616             : }
     617             : /* }}} */
     618             : 
     619             : /* {{{  strstr_common_handler */
     620          73 : static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
     621             : {
     622             :         unsigned char *haystack, *needle, *found;
     623             :         size_t haystack_len, needle_len;
     624             :         int ret_pos, uchar_pos;
     625          73 :         zend_bool part = 0;
     626             : 
     627          73 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|b", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &part) == FAILURE) {
     628             : 
     629           2 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     630             :                          "grapheme_strstr: unable to parse input param", 0 TSRMLS_CC );
     631             : 
     632           2 :                 RETURN_FALSE;
     633             :         }
     634             : 
     635          71 :         if (needle_len == 0) {
     636             : 
     637           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 TSRMLS_CC );
     638             : 
     639           0 :                 RETURN_FALSE;
     640             :         }
     641             : 
     642             : 
     643          71 :         if ( !f_ignore_case ) {
     644             : 
     645             :                 /* ASCII optimization: quick check to see if the string might be there
     646             :                  * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
     647             :                 */
     648          35 :                 found = (unsigned char *)php_memnstr((char *)haystack, (char *)needle, needle_len, (char *)haystack + haystack_len);
     649             : 
     650             :                 /* if it isn't there the we are done */
     651          35 :                 if ( !found ) {
     652           3 :                         RETURN_FALSE;
     653             :                 }
     654             : 
     655             :                 /* if it is there, and if the haystack is ascii, we are all done */
     656          32 :                 if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
     657          13 :                         size_t found_offset = found - haystack;
     658             : 
     659          13 :                         if (part) {
     660          10 :                                 RETURN_STRINGL(((char *)haystack) , found_offset);
     661             :                         } else {
     662          16 :                                 RETURN_STRINGL(((char *)found), haystack_len - found_offset);
     663             :                         }
     664             :                 }
     665             : 
     666             :         }
     667             : 
     668             :         /* need to work in utf16 */
     669          55 :         ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ TSRMLS_CC );
     670             : 
     671          55 :         if ( ret_pos < 0 ) {
     672           9 :                 RETURN_FALSE;
     673             :         }
     674             : 
     675             :         /* uchar_pos is the 'nth' Unicode character position of the needle */
     676             : 
     677          46 :         ret_pos = 0;
     678          46 :         U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
     679             : 
     680          46 :         if (part) {
     681          30 :                 RETURN_STRINGL(((char *)haystack), ret_pos);
     682             :         }
     683             :         else {
     684          62 :                 RETURN_STRINGL(((char *)haystack) + ret_pos, haystack_len - ret_pos);
     685             :         }
     686             : 
     687             : }
     688             : /* }}} */
     689             : 
     690             : /* {{{ proto string grapheme_strstr(string haystack, string needle[, bool part])
     691             :    Finds first occurrence of a string within another */
     692          36 : PHP_FUNCTION(grapheme_strstr)
     693             : {
     694          36 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
     695          36 : }
     696             : /* }}} */
     697             : 
     698             : /* {{{ proto string grapheme_stristr(string haystack, string needle[, bool part])
     699             :    Finds first occurrence of a string within another */
     700          37 : PHP_FUNCTION(grapheme_stristr)
     701             : {
     702          37 :         strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
     703          37 : }
     704             : /* }}} */
     705             : 
     706             : /* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
     707             : static inline int32_t
     708          18 : grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
     709             : {
     710          18 :         int pos = 0, prev_pos = 0;
     711          18 :         int ret_pos = 0, prev_ret_pos = 0;
     712             : 
     713             :         while ( 1 ) {
     714          96 :                 pos = ubrk_next(bi);
     715             : 
     716          96 :                 if ( UBRK_DONE == pos ) {
     717           7 :                         break;
     718             :                 }
     719             : 
     720             :                 /* if we are beyond our limit, then the loop is done */
     721          89 :                 if ( pos > csize ) {
     722          11 :                         break;
     723             :                 }
     724             : 
     725             :                 /* update our pointer in the original UTF-8 buffer by as many characters
     726             :                    as ubrk_next iterated over */
     727             : 
     728          78 :                 prev_ret_pos = ret_pos;
     729          78 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     730             : 
     731          78 :                 if ( prev_ret_pos == ret_pos ) {
     732             :                         /* something wrong - malformed utf8? */
     733           0 :                         break;
     734             :                 }
     735             : 
     736          78 :                 prev_pos = pos;
     737          78 :         }
     738             : 
     739          18 :         return ret_pos;
     740             : }
     741             : /* }}} */
     742             : 
     743             : /* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
     744             : static inline int32_t
     745          23 : grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
     746             : {
     747          23 :         int pos = 0, prev_pos = 0;
     748          23 :         int ret_pos = 0, prev_ret_pos = 0;
     749             : 
     750             :         while ( 1 ) {
     751          63 :                 pos = ubrk_next(bi);
     752             : 
     753          63 :                 if ( UBRK_DONE == pos ) {
     754           8 :                         break;
     755             :                 }
     756             : 
     757          55 :                 prev_ret_pos = ret_pos;
     758          55 :                 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
     759             : 
     760          55 :                 if ( ret_pos > bsize ) {
     761          15 :                         ret_pos = prev_ret_pos;
     762          15 :                         break;
     763             :                 }
     764             : 
     765          40 :                 if ( prev_ret_pos == ret_pos ) {
     766             :                         /* something wrong - malformed utf8? */
     767           0 :                         break;
     768             :                 }
     769             : 
     770          40 :                 prev_pos = pos;
     771          40 :         }
     772             : 
     773          23 :         return ret_pos;
     774             : }
     775             : /* }}} */
     776             : 
     777             : /* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
     778             : static inline int32_t
     779          21 : grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
     780             : {
     781          21 :         int pos = 0, next_pos = 0;
     782          21 :         int ret_pos = 0;
     783             : 
     784          80 :         while ( size ) {
     785          40 :                 next_pos = ubrk_next(bi);
     786             : 
     787          40 :                 if ( UBRK_DONE == next_pos ) {
     788           2 :                         break;
     789             :                 }
     790          38 :                 pos = next_pos;
     791          38 :                 size--;
     792             :         }
     793             : 
     794             :         /* pos is one past the last UChar - and represent the number of code units to
     795             :                 advance in the utf-8 buffer
     796             :         */
     797             : 
     798          21 :         U8_FWD_N(pstr, ret_pos, str_len, pos);
     799             : 
     800          21 :         return ret_pos;
     801             : }
     802             : /* }}} */
     803             : 
     804             : /* {{{ grapheme extract iter function pointer array */
     805             : typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
     806             : 
     807             : static grapheme_extract_iter grapheme_extract_iters[] = {
     808             :         &grapheme_extract_count_iter,
     809             :         &grapheme_extract_bytecount_iter,
     810             :         &grapheme_extract_charcount_iter,
     811             : };
     812             : /* }}} */
     813             : 
     814             : /* {{{ proto string grapheme_extract(string str, int size[, int extract_type[, int start[, int next]]])
     815             :         Function to extract a sequence of default grapheme clusters */
     816         106 : PHP_FUNCTION(grapheme_extract)
     817             : {
     818             :         unsigned char *str, *pstr;
     819             :         UChar *ustr;
     820             :         size_t str_len;
     821             :         int32_t ustr_len;
     822             :         zend_long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
     823         106 :         zend_long lstart = 0; /* starting position in str in bytes */
     824         106 :         int32_t start = 0;
     825         106 :         zend_long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
     826             :         UErrorCode status;
     827             :         unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
     828         106 :         UBreakIterator* bi = NULL;
     829             :         int ret_pos;
     830         106 :         zval *next = NULL; /* return offset of next part of the string */
     831             : 
     832         106 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|llz", (char **)&str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
     833             : 
     834           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     835             :                          "grapheme_extract: unable to parse input param", 0 TSRMLS_CC );
     836             : 
     837           1 :                 RETURN_FALSE;
     838             :         }
     839             : 
     840         105 :         if ( NULL != next ) {
     841          16 :                 if ( !Z_ISREF_P(next) ) {
     842           0 :                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     843             :                                  "grapheme_extract: 'next' was not passed by reference", 0 TSRMLS_CC );
     844             : 
     845           0 :                         RETURN_FALSE;
     846             :                 }
     847             :                 else {
     848          16 :                         ZVAL_DEREF(next);
     849             :                         /* initialize next */
     850          17 :                         SEPARATE_ZVAL(next);
     851          16 :                         zval_dtor(next);
     852          16 :             ZVAL_LONG(next, lstart);
     853             :                 }
     854             :         }
     855             : 
     856         105 :         if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
     857             : 
     858           1 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     859             :                          "grapheme_extract: unknown extract type param", 0 TSRMLS_CC );
     860             : 
     861           1 :                 RETURN_FALSE;
     862             :         }
     863             : 
     864         104 :         if ( lstart > INT32_MAX || lstart < 0 || lstart >= str_len ) {
     865           5 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 TSRMLS_CC );
     866           5 :                 RETURN_FALSE;
     867             :         }
     868             : 
     869          99 :         if ( size > INT32_MAX || size < 0) {
     870           0 :                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 TSRMLS_CC );
     871           0 :                 RETURN_FALSE;
     872             :         }
     873          99 :         if (size == 0) {
     874           8 :                 RETURN_EMPTY_STRING();
     875             :         }
     876             : 
     877             :         /* we checked that it will fit: */
     878          91 :         start = (int32_t) lstart;
     879             : 
     880          91 :         pstr = str + start;
     881             : 
     882             :         /* just in case pstr points in the middle of a character, move forward to the start of the next char */
     883          91 :         if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     884           9 :                 unsigned char *str_end = str + str_len;
     885             : 
     886          25 :                 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
     887           9 :                         pstr++;
     888           9 :                         if ( pstr >= str_end ) {
     889           2 :                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
     890             :                                                                 "grapheme_extract: invalid input string", 0 TSRMLS_CC );
     891             : 
     892           2 :                                 RETURN_FALSE;
     893             :                         }
     894             :                 }
     895             :         }
     896             : 
     897          89 :         str_len -= (pstr - str);
     898             : 
     899             :         /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
     900             :                 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
     901             :          */
     902             : 
     903          89 :         if ( -1 != grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
     904          27 :         zend_long nsize = ( size < str_len ? size : str_len );
     905          27 :                 if ( NULL != next ) {
     906           9 :                         ZVAL_LONG(next, start+nsize);
     907             :                 }
     908          54 :                 RETURN_STRINGL(((char *)pstr), nsize);
     909             :         }
     910             : 
     911             :         /* convert the strings to UTF-16. */
     912          62 :         ustr = NULL;
     913          62 :         ustr_len = 0;
     914          62 :         status = U_ZERO_ERROR;
     915          62 :         intl_convert_utf8_to_utf16(&ustr, &ustr_len, (char *)pstr, str_len, &status );
     916             : 
     917          62 :         if ( U_FAILURE( status ) ) {
     918             :                 /* Set global error code. */
     919           0 :                 intl_error_set_code( NULL, status TSRMLS_CC );
     920             : 
     921             :                 /* Set error messages. */
     922           0 :                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
     923             : 
     924           0 :                 if ( NULL != ustr )
     925           0 :                         efree( ustr );
     926             : 
     927           0 :                 RETURN_FALSE;
     928             :         }
     929             : 
     930          62 :         bi = NULL;
     931          62 :         status = U_ZERO_ERROR;
     932          62 :         bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
     933             : 
     934          62 :         ubrk_setText(bi, ustr, ustr_len, &status);
     935             : 
     936             :         /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
     937             :                 can't back up. So, we will not do anything. */
     938             : 
     939             :         /* now we need to find the end of the chunk the user wants us to return */
     940             : 
     941          62 :         ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, pstr, str_len);
     942             : 
     943          62 :         if (ustr) {
     944          62 :                 efree(ustr);
     945             :         }
     946          62 :         ubrk_close(bi);
     947             : 
     948          62 :         if ( NULL != next ) {
     949           4 :                 ZVAL_LONG(next, start+ret_pos);
     950             :         }
     951             : 
     952         124 :         RETURN_STRINGL(((char *)pstr), ret_pos);
     953             : }
     954             : 
     955             : /* }}} */
     956             : 
     957             : /*
     958             :  * Local variables:
     959             :  * tab-width: 4
     960             :  * c-basic-offset: 4
     961             :  * End:
     962             :  * vim600: fdm=marker
     963             :  * vim: noet sw=4 ts=4
     964             :  */
     965             : 

Generated by: LCOV version 1.10

Generated at Fri, 19 Sep 2014 17:11:09 +0000 (3 days ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.