PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - pcre - php_pcre.c
Test: PHP Code Coverage
Date: 2009-11-23 Instrumented lines: 917
Code covered: 90.7 % Executed lines: 832
Legend: not executed executed

       1                 : /*
       2                 :    +----------------------------------------------------------------------+
       3                 :    | PHP Version 6                                                        |
       4                 :    +----------------------------------------------------------------------+
       5                 :    | Copyright (c) 1997-2009 The PHP Group                                |
       6                 :    +----------------------------------------------------------------------+
       7                 :    | This source file is subject to version 3.01 of the PHP license,      |
       8                 :    | that is bundled with this package in the file LICENSE, and is        |
       9                 :    | available through the world-wide-web at the following url:           |
      10                 :    | http://www.php.net/license/3_01.txt                                  |
      11                 :    | If you did not receive a copy of the PHP license and are unable to   |
      12                 :    | obtain it through the world-wide-web, please send a note to          |
      13                 :    | license@php.net so we can mail you a copy immediately.               |
      14                 :    +----------------------------------------------------------------------+
      15                 :    | Author: Andrei Zmievski <andrei@php.net>                             |
      16                 :    +----------------------------------------------------------------------+
      17                 :  */
      18                 : 
      19                 : /* $Id: php_pcre.c 288111 2009-09-06 17:41:34Z felipe $ */
      20                 : 
      21                 : /*  TODO
      22                 :  *  php_pcre_replace_impl():
      23                 :  *   - should use fcall info cache (enhancement)
      24                 :  */
      25                 : 
      26                 : #include "php.h"
      27                 : #include "php_ini.h"
      28                 : #include "php_globals.h"
      29                 : #include "php_pcre.h"
      30                 : #include "ext/standard/info.h"
      31                 : #include "ext/standard/php_smart_str.h"
      32                 : 
      33                 : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
      34                 : 
      35                 : #include "ext/standard/php_string.h"
      36                 : 
      37                 : #define PREG_PATTERN_ORDER                      1
      38                 : #define PREG_SET_ORDER                          2
      39                 : #define PREG_OFFSET_CAPTURE                     (1<<8)
      40                 : 
      41                 : #define PREG_SPLIT_NO_EMPTY                     (1<<0)
      42                 : #define PREG_SPLIT_DELIM_CAPTURE        (1<<1)
      43                 : #define PREG_SPLIT_OFFSET_CAPTURE       (1<<2)
      44                 : 
      45                 : #define PREG_REPLACE_EVAL                       (1<<0)
      46                 : 
      47                 : #define PREG_GREP_INVERT                        (1<<0)
      48                 : 
      49                 : #define PCRE_CACHE_SIZE 4096
      50                 : 
      51                 : enum {
      52                 :         PHP_PCRE_NO_ERROR = 0,
      53                 :         PHP_PCRE_INTERNAL_ERROR,
      54                 :         PHP_PCRE_BACKTRACK_LIMIT_ERROR,
      55                 :         PHP_PCRE_RECURSION_LIMIT_ERROR,
      56                 :         PHP_PCRE_BAD_UTF8_ERROR,
      57                 :         PHP_PCRE_BAD_UTF8_OFFSET_ERROR
      58                 : };
      59                 : 
      60                 : typedef struct {
      61                 :         char *str;
      62                 :         int byte_offset;
      63                 :         int cp_offset;
      64                 : } offset_map_t;
      65                 : 
      66                 : ZEND_DECLARE_MODULE_GLOBALS(pcre)
      67                 : 
      68                 : 
      69                 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
      70               8 : {
      71               8 :         int preg_code = 0;
      72                 : 
      73               8 :         switch (pcre_code) {
      74                 :                 case PCRE_ERROR_MATCHLIMIT:
      75               3 :                         preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
      76               3 :                         break;
      77                 : 
      78                 :                 case PCRE_ERROR_RECURSIONLIMIT:
      79               3 :                         preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
      80               3 :                         break;
      81                 : 
      82                 :                 case PCRE_ERROR_BADUTF8:
      83               1 :                         preg_code = PHP_PCRE_BAD_UTF8_ERROR;
      84               1 :                         break;
      85                 : 
      86                 :                 case PCRE_ERROR_BADUTF8_OFFSET:
      87               1 :                         preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
      88               1 :                         break;
      89                 : 
      90                 :                 default:
      91               0 :                         preg_code = PHP_PCRE_INTERNAL_ERROR;
      92                 :                         break;
      93                 :         }
      94                 : 
      95               8 :         PCRE_G(error_code) = preg_code;
      96               8 : }
      97                 : /* }}} */
      98                 : 
      99                 : 
     100                 : static void php_free_pcre_cache(void *data) /* {{{ */
     101           23837 : {
     102           23837 :         pcre_cache_entry *pce = (pcre_cache_entry *) data;
     103           23837 :         if (!pce) return;
     104           23837 :         pefree(pce->re, 1);
     105           23837 :         if (pce->extra) pefree(pce->extra, 1);
     106                 : #if HAVE_SETLOCALE
     107           23837 :         if ((void*)pce->tables) pefree((void*)pce->tables, 1);
     108           23837 :         pefree(pce->locale, 1);
     109                 : #endif
     110                 : }
     111                 : /* }}} */
     112                 : 
     113                 : 
     114                 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
     115           17007 : {
     116           17007 :         zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
     117           17007 :         pcre_globals->backtrack_limit = 0;
     118           17007 :         pcre_globals->recursion_limit = 0;
     119           17007 :         pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
     120           17007 : }
     121                 : /* }}} */
     122                 : 
     123                 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
     124           17039 : {
     125           17039 :         zend_hash_destroy(&pcre_globals->pcre_cache);
     126           17039 : }
     127                 : /* }}} */
     128                 : 
     129                 : PHP_INI_BEGIN()
     130                 :         STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
     131                 :         STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
     132                 : PHP_INI_END()
     133                 : 
     134                 : 
     135                 : /* {{{ PHP_MINFO_FUNCTION(pcre) */
     136                 : static PHP_MINFO_FUNCTION(pcre)
     137              43 : {
     138              43 :         php_info_print_table_start();
     139              43 :         php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
     140              43 :         php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
     141              43 :         php_info_print_table_end();
     142                 : 
     143              43 :         DISPLAY_INI_ENTRIES();
     144              43 : }
     145                 : /* }}} */
     146                 : 
     147                 : /* {{{ PHP_MINIT_FUNCTION(pcre) */
     148                 : static PHP_MINIT_FUNCTION(pcre)
     149           17007 : {
     150           17007 :         REGISTER_INI_ENTRIES();
     151                 :         
     152           17007 :         REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
     153           17007 :         REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
     154           17007 :         REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     155           17007 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
     156           17007 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
     157           17007 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     158           17007 :         REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
     159                 : 
     160           17007 :         REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
     161           17007 :         REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
     162           17007 :         REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     163           17007 :         REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     164           17007 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
     165           17007 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
     166           17007 :         REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
     167                 : 
     168           17007 :         return SUCCESS;
     169                 : }
     170                 : /* }}} */
     171                 : 
     172                 : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
     173                 : static PHP_MSHUTDOWN_FUNCTION(pcre)
     174           17039 : {
     175           17039 :         UNREGISTER_INI_ENTRIES();
     176                 : 
     177           17039 :         return SUCCESS;
     178                 : }
     179                 : /* }}} */
     180                 : 
     181                 : /* {{{ static pcre_clean_cache */
     182                 : static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
     183           40960 : {
     184           40960 :         int *num_clean = (int *)arg;
     185                 : 
     186           40960 :         if (*num_clean > 0) {
     187            5120 :                 (*num_clean)--;
     188            5120 :                 return 1;
     189                 :         } else {
     190           35840 :                 return 0;
     191                 :         }
     192                 : }
     193                 : /* }}} */
     194                 : 
     195                 : /* {{{ static make_subpats_table */
     196                 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
     197         3560667 : {
     198         3560667 :         pcre_extra *extra = pce->extra;
     199         3560667 :         int name_cnt = 0, name_size, ni = 0;
     200                 :         int rc;
     201                 :         char *name_table;
     202                 :         unsigned short name_idx;
     203         3560667 :         char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
     204                 : 
     205         3560667 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
     206         3560667 :         if (rc < 0) {
     207               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     208               0 :                 efree(subpat_names);
     209               0 :                 return NULL;
     210                 :         }
     211         3560667 :         if (name_cnt > 0) {
     212                 :                 int rc1, rc2;
     213              12 :                 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
     214              12 :                 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
     215              12 :                 rc = rc2 ? rc2 : rc1;
     216              12 :                 if (rc < 0) {
     217               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     218               0 :                         efree(subpat_names);
     219               0 :                         return NULL;
     220                 :                 }
     221                 : 
     222            1076 :                 while (ni++ < name_cnt) {
     223            1054 :                         name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
     224            1054 :                         subpat_names[name_idx] = name_table + 2;
     225            1054 :                         if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
     226               2 :                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
     227               2 :                                 efree(subpat_names);
     228               2 :                                 return NULL;
     229                 :                         }
     230            1052 :                         name_table += name_size;
     231                 :                 }
     232                 :         }
     233                 : 
     234         3560665 :         return subpat_names;
     235                 : }
     236                 : /* }}} */
     237                 : 
     238                 : /* {{{ pcre_get_compiled_regex_cache
     239                 :  */
     240                 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_uchar utype, char *regex, int regex_len TSRMLS_DC)
     241         3690455 : {
     242         3690455 :         pcre                            *re = NULL;
     243                 :         pcre_extra                      *extra;
     244         3690455 :         int                                      coptions = 0;
     245         3690455 :         int                                      soptions = 0;
     246                 :         const char                      *error;
     247                 :         int                                      erroffset;
     248                 :         char                             delimiter;
     249                 :         char                             start_delimiter;
     250                 :         char                             end_delimiter;
     251                 :         char                            *p, *pp;
     252                 :         char                            *pattern;
     253         3690455 :         int                                      do_study = 0;
     254         3690455 :         int                                      poptions = 0;
     255         3690455 :         unsigned const char *tables = NULL;
     256                 : #if HAVE_SETLOCALE
     257         3690455 :         char                            *locale = setlocale(LC_CTYPE, NULL);
     258                 : #endif
     259                 :         pcre_cache_entry        *pce;
     260                 :         pcre_cache_entry         new_entry;
     261                 : 
     262                 :         /* Try to lookup the cached regex entry, and if successful, just pass
     263                 :            back the compiled pattern, otherwise go on and compile it. */
     264         3690455 :         if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
     265                 :                 /*
     266                 :                  * We use a quick pcre_info() check to see whether cache is corrupted, and if it
     267                 :                  * is, we flush it and compile the pattern from scratch.
     268                 :                  */
     269         3666543 :                 if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
     270               0 :                         zend_hash_clean(&PCRE_G(pcre_cache));
     271                 :                 } else {
     272                 : #if HAVE_SETLOCALE
     273         3666543 :                         if (!strcmp(pce->locale, locale)) {
     274         3666543 :                                 return pce;
     275                 :                         }
     276                 : #else
     277                 :                         return pce;
     278                 : #endif
     279                 :                 }
     280                 :         }
     281                 :         
     282           23912 :         p = regex;
     283                 :         
     284                 :         /* Parse through the leading whitespace, and display a warning if we
     285                 :            get to the end without encountering a delimiter. */
     286           23912 :         while (isspace((int)*(unsigned char *)p)) p++;
     287           23912 :         if (*p == 0) {
     288               3 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
     289               3 :                 return NULL;
     290                 :         }
     291                 :         
     292                 :         /* Get the delimiter and display a warning if it is alphanumeric
     293                 :            or a backslash. */
     294           23909 :         delimiter = *p++;
     295           23909 :         if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
     296               7 :                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
     297               7 :                 return NULL;
     298                 :         }
     299                 : 
     300           23902 :         start_delimiter = delimiter;
     301           23902 :         if ((pp = strchr("([{< )]}> )]}>", delimiter)))
     302              20 :                 delimiter = pp[5];
     303           23902 :         end_delimiter = delimiter;
     304                 : 
     305           23902 :         if (start_delimiter == end_delimiter) {
     306                 :                 /* We need to iterate through the pattern, searching for the ending delimiter,
     307                 :                    but skipping the backslashed delimiters.  If the ending delimiter is not
     308                 :                    found, display a warning. */
     309           23882 :                 pp = p;
     310        14844533 :                 while (*pp != 0) {
     311        14820644 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     312        13889661 :                         else if (*pp == delimiter)
     313           23875 :                                 break;
     314        14796769 :                         pp++;
     315                 :                 }
     316           23882 :                 if (*pp == 0) {
     317               7 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
     318               7 :                         return NULL;
     319                 :                 }
     320                 :         } else {
     321                 :                 /* We iterate through the pattern, searching for the matching ending
     322                 :                  * delimiter. For each matching starting delimiter, we increment nesting
     323                 :                  * level, and decrement it for each matching ending delimiter. If we
     324                 :                  * reach the end of the pattern without matching, display a warning.
     325                 :                  */
     326              20 :                 int brackets = 1;       /* brackets nesting level */
     327              20 :                 pp = p;
     328             180 :                 while (*pp != 0) {
     329             159 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     330             147 :                         else if (*pp == end_delimiter && --brackets <= 0)
     331                 :                                 break;
     332             128 :                         else if (*pp == start_delimiter)
     333               1 :                                 brackets++;
     334             140 :                         pp++;
     335                 :                 }
     336              20 :                 if (*pp == 0) {
     337               1 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
     338               1 :                         return NULL;
     339                 :                 }
     340                 :         }
     341                 :         
     342                 :         /* Make a copy of the actual pattern. */
     343           23894 :         pattern = estrndup(p, pp-p);
     344                 : 
     345                 :         /* Move on to the options */
     346           23894 :         pp++;
     347                 : 
     348                 :         /* Parse through the options, setting appropriate flags.  Display
     349                 :            a warning if we encounter an unknown modifier. */    
     350           59048 :         while (*pp != 0) {
     351           11273 :                 switch (*pp++) {
     352                 :                         /* Perl compatible options */
     353             913 :                         case 'i':       coptions |= PCRE_CASELESS;              break;
     354             853 :                         case 'm':       coptions |= PCRE_MULTILINE;             break;
     355            9389 :                         case 's':       coptions |= PCRE_DOTALL;                break;
     356               4 :                         case 'x':       coptions |= PCRE_EXTENDED;              break;
     357                 :                         
     358                 :                         /* PCRE specific options */
     359               2 :                         case 'A':       coptions |= PCRE_ANCHORED;              break;
     360               7 :                         case 'D':       coptions |= PCRE_DOLLAR_ENDONLY;break;
     361              28 :                         case 'S':       do_study  = 1;                                  break;
     362              27 :                         case 'U':       coptions |= PCRE_UNGREEDY;              break;
     363               1 :                         case 'X':       coptions |= PCRE_EXTRA;                 break;
     364              29 :                         case 'u':       coptions |= PCRE_UTF8;                  break;
     365                 : 
     366                 :                         /* Custom preg options */
     367               5 :                         case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
     368                 :                         
     369                 :                         case ' ':
     370                 :                         case '\n':
     371               2 :                                 break;
     372                 : 
     373                 :                         default:
     374              13 :                                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
     375              13 :                                 efree(pattern);
     376              13 :                                 return NULL;
     377                 :                 }
     378                 :         }
     379                 : 
     380           23881 :         if (utype == IS_UNICODE) {
     381           15293 :                 coptions |= PCRE_UTF8;
     382                 :         }
     383                 : 
     384                 : #if HAVE_SETLOCALE
     385           23881 :         if (strcmp(locale, "C"))
     386               2 :                 tables = pcre_maketables();
     387                 : #endif
     388                 : 
     389                 :         /* Compile pattern and display a warning if compilation failed. */
     390           23881 :         re = pcre_compile(pattern,
     391                 :                                           coptions,
     392                 :                                           &error,
     393                 :                                           &erroffset,
     394                 :                                           tables);
     395                 : 
     396           23881 :         if (re == NULL) {
     397              44 :                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
     398              44 :                 efree(pattern);
     399              44 :                 if (tables) {
     400               0 :                         pefree((void*)tables, 1);
     401                 :                 }
     402              44 :                 return NULL;
     403                 :         }
     404                 : 
     405                 :         /* If study option was specified, study the pattern and
     406                 :            store the result in extra for passing to pcre_exec. */
     407           23837 :         if (do_study) {
     408              28 :                 extra = pcre_study(re, soptions, &error);
     409              28 :                 if (extra) {
     410              28 :                         extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     411                 :                 }
     412              28 :                 if (error != NULL) {
     413               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
     414                 :                 }
     415                 :         } else {
     416           23809 :                 extra = NULL;
     417                 :         }
     418                 : 
     419           23837 :         efree(pattern);
     420                 : 
     421                 :         /*
     422                 :          * If we reached cache limit, clean out the items from the head of the list;
     423                 :          * these are supposedly the oldest ones (but not necessarily the least used
     424                 :          * ones).
     425                 :          */
     426           23837 :         if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
     427              10 :                 int num_clean = PCRE_CACHE_SIZE / 8;
     428              10 :                 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
     429                 :         }
     430                 : 
     431                 :         /* Store the compiled pattern and extra info in the cache. */
     432           23837 :         new_entry.re = re;
     433           23837 :         new_entry.extra = extra;
     434           23837 :         new_entry.preg_options = poptions;
     435           23837 :         new_entry.compile_options = coptions;
     436                 : #if HAVE_SETLOCALE
     437           23837 :         new_entry.locale = pestrdup(locale, 1);
     438           23837 :         new_entry.tables = tables;
     439                 : #endif
     440           23837 :         zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
     441                 :                                                 sizeof(pcre_cache_entry), (void**)&pce);
     442                 : 
     443           23837 :         return pce;
     444                 : }
     445                 : /* }}} */
     446                 : 
     447                 : /* {{{ pcre_get_compiled_regex
     448                 :  */
     449                 : PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
     450          127831 : {
     451          127831 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(IS_UNICODE, regex, strlen(regex) TSRMLS_CC);
     452                 : 
     453          127831 :         if (extra) {
     454          127831 :                 *extra = pce ? pce->extra : NULL;
     455                 :         }
     456          127831 :         if (preg_options) {
     457          127831 :                 *preg_options = pce ? pce->preg_options : 0;
     458                 :         }
     459                 :         
     460          127831 :         return pce ? pce->re : NULL;
     461                 : }
     462                 : /* }}} */
     463                 : 
     464                 : /* {{{ pcre_get_compiled_regex_ex
     465                 :  */
     466                 : PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
     467               0 : {
     468               0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(IS_UNICODE, regex, strlen(regex) TSRMLS_CC);
     469                 :         
     470               0 :         if (extra) {
     471               0 :                 *extra = pce ? pce->extra : NULL;
     472                 :         }
     473               0 :         if (preg_options) {
     474               0 :                 *preg_options = pce ? pce->preg_options : 0;
     475                 :         }
     476               0 :         if (compile_options) {
     477               0 :                 *compile_options = pce ? pce->compile_options : 0;
     478                 :         }
     479                 :         
     480               0 :         return pce ? pce->re : NULL;
     481                 : }
     482                 : /* }}} */
     483                 : 
     484                 : /* {{{ add_offset_pair */
     485                 : static inline void add_offset_pair(zval *result, zend_uchar utype, char *str, int len, int offset, char *name, offset_map_t *prev TSRMLS_DC)
     486              64 : {
     487                 :         zval *match_pair;
     488                 :         int tmp;
     489                 : 
     490              64 :         ALLOC_ZVAL(match_pair);
     491              64 :         array_init(match_pair);
     492              64 :         INIT_PZVAL(match_pair);
     493                 : 
     494                 :         /* Add (match, offset) to the return value */
     495              64 :         if (utype == IS_UNICODE) {
     496              64 :                 add_next_index_utf8_stringl(match_pair, str, len, 1);
     497                 :         } else {
     498               0 :                 add_next_index_stringl(match_pair, str, len, 1);
     499                 :         }
     500                 : 
     501                 :         /* Calculate codepoint offset from the previous chunk */
     502              64 :         if (offset) {
     503              47 :                 if (utype == IS_UNICODE) {
     504              47 :                         tmp = prev->byte_offset;
     505             282 :                         while (tmp < offset) {
     506             188 :                                 U8_FWD_1(prev->str, tmp, offset);
     507             188 :                                 prev->cp_offset++;
     508                 :                         }
     509              47 :                         prev->byte_offset = tmp;
     510                 :                 } else {
     511               0 :                         prev->cp_offset = offset;
     512               0 :                         prev->byte_offset = offset;
     513                 :                 }               
     514                 :         }
     515              64 :         add_next_index_long(match_pair, prev->cp_offset);
     516                 :         
     517              64 :         if (name) {
     518               2 :                 UErrorCode status = U_ZERO_ERROR;
     519               2 :                 UChar *u = NULL;
     520                 :                 int u_len;
     521               2 :                 zval_add_ref(&match_pair);
     522               2 :                 zend_string_to_unicode_ex(UG(utf8_conv), &u, &u_len, name, strlen(name), &status);
     523               2 :                 zend_u_hash_update(Z_ARRVAL_P(result), IS_UNICODE, ZSTR(u), u_len+1, &match_pair, sizeof(zval *), NULL);
     524               2 :                 efree(u);
     525                 :         }
     526              64 :         zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
     527              64 : }
     528                 : /* }}} */
     529                 : 
     530                 : /* {{{ php_do_pcre_match */
     531                 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) 
     532         3539703 : {
     533                 :         /* parameters */
     534                 :         zstr                      regex;                        /* Regular expression */
     535                 :         zstr                      subject;                      /* String to match against */
     536                 :         int                               regex_len;
     537                 :         int                               subject_len;
     538                 :         pcre_cache_entry *pce;                          /* Compiled regular expression */
     539         3539703 :         zval                     *subpats = NULL;       /* Array for subpatterns */
     540         3539703 :         long                      flags = 0;            /* Match control flags */
     541         3539703 :         long                      start_offset = 0;     /* Where the new search starts */
     542                 :         zend_uchar                str_type;
     543         3539703 :         char                     *regex_utf8 = NULL, *subject_utf8 = NULL;
     544                 :         int                       regex_utf8_len, subject_utf8_len;
     545         3539703 :         UErrorCode                status = U_ZERO_ERROR;
     546                 : 
     547         3539703 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "TTz|ll" : "TT|zll"), &regex, &regex_len, &str_type,
     548                 :                                                           &subject, &subject_len, &str_type, &subpats, &flags, &start_offset) == FAILURE) {
     549              16 :                 RETURN_FALSE;
     550                 :         }
     551                 : 
     552         3539687 :         if (str_type == IS_UNICODE) {
     553         1366322 :                 zend_unicode_to_string_ex(UG(utf8_conv), &regex_utf8, &regex_utf8_len, regex.u, regex_len, &status);
     554         1366322 :                 zend_unicode_to_string_ex(UG(utf8_conv), &subject_utf8, &subject_utf8_len, subject.u, subject_len, &status);
     555         1366322 :                 regex.s = regex_utf8;
     556         1366322 :                 regex_len = regex_utf8_len;
     557         1366322 :                 subject.s = subject_utf8;
     558         1366322 :                 subject_len = subject_utf8_len;
     559                 :         }
     560                 :         
     561                 :         /* Compile regex or get it from cache. */
     562         3539687 :         if ((pce = pcre_get_compiled_regex_cache(str_type, regex.s, regex_len TSRMLS_CC)) == NULL) {
     563              16 :                 if (str_type == IS_UNICODE) {
     564              16 :                         efree(regex_utf8);
     565              16 :                         efree(subject_utf8);
     566                 :                 }
     567              16 :                 RETURN_FALSE;
     568                 :         }
     569                 : 
     570         3539671 :         php_pcre_match_impl(pce, str_type, subject.s, subject_len, return_value, subpats, 
     571                 :                 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
     572                 : 
     573         3539671 :         if (str_type == IS_UNICODE) {
     574         1366306 :                 efree(regex_utf8);
     575         1366306 :                 efree(subject_utf8);
     576                 :         }
     577                 : }
     578                 : /* }}} */
     579                 : 
     580                 : /* {{{ php_pcre_match_impl */
     581                 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_uchar utype, char *subject, int subject_len, zval *return_value,
     582                 :         zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
     583         3539934 : {
     584                 :         zval                    *result_set,            /* Holds a set of subpatterns after
     585                 :                                                                                    a global match */
     586         3539934 :                                    **match_sets = NULL; /* An array of sets of matches for each
     587                 :                                                                                    subpattern after a global match */
     588         3539934 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     589                 :         pcre_extra               extra_data;            /* Used locally for exec options */
     590         3539934 :         int                              exoptions = 0;         /* Execution options */
     591         3539934 :         int                              count = 0;                     /* Count of matched subpatterns */
     592                 :         int                             *offsets;                       /* Array of subpattern offsets */
     593                 :         int                              num_subpats;           /* Number of captured subpatterns */
     594                 :         int                              size_offsets;          /* Size of the offsets array */
     595                 :         int                              matched;                       /* Has anything matched */
     596         3539934 :         int                              g_notempty = 0;        /* If the match should not be empty */
     597                 :         const char         **stringlist;                /* Holds list of subpatterns */
     598                 :         char               **subpat_names;              /* Array for named subpatterns */
     599                 :         int                              i, rc;
     600                 :         int                              subpats_order;         /* Order of subpattern matches */
     601                 :         int                              offset_capture;    /* Capture match offsets: yes/no */
     602                 : 
     603                 :         /* Overwrite the passed-in value for subpatterns with an empty array. */
     604         3539934 :         if (subpats != NULL) {
     605          995592 :                 zval_dtor(subpats);
     606          995592 :                 array_init(subpats);
     607                 :         }
     608                 : 
     609         3539934 :         subpats_order = global ? PREG_PATTERN_ORDER : 0;
     610                 : 
     611         3539934 :         if (use_flags) {
     612             221 :                 offset_capture = flags & PREG_OFFSET_CAPTURE;
     613                 : 
     614                 :                 /*
     615                 :                  * subpats_order is pre-set to pattern mode so we change it only if
     616                 :                  * necessary.
     617                 :                  */
     618             221 :                 if (flags & 0xff) {
     619              14 :                         subpats_order = flags & 0xff;
     620                 :                 }
     621             221 :                 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
     622                 :                         (!global && subpats_order != 0)) {
     623               1 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
     624               1 :                         return;
     625                 :                 }
     626                 :         } else {
     627         3539713 :                 offset_capture = 0;
     628                 :         }
     629                 : 
     630         3539933 :         if (utype == IS_UNICODE) {
     631         1366373 :                 int k = 0;
     632                 :                 /* Calculate byte offset from codepoint offset */
     633         1366373 :                 if (start_offset < 0) {
     634               4 :                         k = subject_len;
     635               4 :                         U8_BACK_N((unsigned char*)subject, 0, k, -start_offset);
     636                 :                 } else {
     637         1366369 :                         U8_FWD_N(subject, k, subject_len, start_offset);
     638                 :                 }
     639         1366373 :                 start_offset = k;
     640                 :         } else {
     641                 :                 /* Negative offset counts from the end of the string. */
     642         2173560 :                 if (start_offset < 0) {
     643               0 :                         start_offset = subject_len + start_offset;
     644               0 :                         if (start_offset < 0) {
     645               0 :                                 start_offset = 0;
     646                 :                         }
     647                 :                 }
     648         2173560 :                 if (!(pce->compile_options & PCRE_UTF8)) {
     649         2173557 :                         exoptions |= PCRE_NO_UTF8_CHECK;
     650                 :                 }
     651                 :         }
     652                 : 
     653         3539933 :         if (extra == NULL) {
     654         3539925 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     655         3539925 :                 extra = &extra_data;
     656                 :         }
     657         3539933 :         extra->match_limit = PCRE_G(backtrack_limit);
     658         3539933 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
     659                 : 
     660                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
     661         3539933 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
     662         3539933 :         if (rc < 0) {
     663               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     664               0 :                 RETURN_FALSE;
     665                 :         }
     666         3539933 :         num_subpats++;
     667         3539933 :         size_offsets = num_subpats * 3;
     668                 : 
     669                 :         /*
     670                 :          * Build a mapping from subpattern numbers to their names. We will always
     671                 :          * allocate the table, even though there may be no named subpatterns. This
     672                 :          * avoids somewhat more complicated logic in the inner loops.
     673                 :          */
     674         3539933 :         subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
     675         3539933 :         if (!subpat_names) {
     676               1 :                 RETURN_FALSE;
     677                 :         }
     678                 : 
     679         3539932 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     680                 : 
     681                 :         /* Allocate match sets array and initialize the values. */
     682         3539932 :         if (global && subpats_order == PREG_PATTERN_ORDER) {
     683             305 :                 match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
     684             742 :                 for (i=0; i<num_subpats; i++) {
     685             437 :                         ALLOC_ZVAL(match_sets[i]);
     686             437 :                         array_init(match_sets[i]);
     687             437 :                         INIT_PZVAL(match_sets[i]);
     688                 :                 }
     689                 :         }
     690                 : 
     691         3539932 :         matched = 0;
     692         3539932 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
     693                 :         
     694                 :         do {
     695                 :                 /* Execute the regular expression. */
     696         3540097 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
     697                 :                                                   exoptions|g_notempty, offsets, size_offsets);
     698                 : 
     699                 :                 /* the string was already proved to be valid UTF-8 */
     700         3540097 :                 exoptions |= PCRE_NO_UTF8_CHECK;
     701                 : 
     702                 :                 /* Check for too many substrings condition. */
     703         3540097 :                 if (count == 0) {
     704               0 :                         php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
     705               0 :                         count = size_offsets/3;
     706                 :                 }
     707                 : 
     708                 :                 /* If something has matched */
     709         3540097 :                 if (count > 0) {
     710          419920 :                         matched++;
     711                 : 
     712                 :                         /* If subpatterns array has been passed, fill it in with values. */
     713          419920 :                         if (subpats != NULL) {
     714           33834 :                                 offset_map_t map = { subject, 0, 0 };
     715                 : 
     716                 :                                 /* Try to get the list of substrings and display a warning if failed. */
     717           33834 :                                 if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
     718               0 :                                         efree(subpat_names);
     719               0 :                                         efree(offsets);
     720               0 :                                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
     721               0 :                                         RETURN_FALSE;
     722                 :                                 }
     723                 : 
     724           33834 :                                 if (global) {   /* global pattern matching */
     725             163 :                                         if (subpats_order == PREG_PATTERN_ORDER) {
     726                 :                                                 /* For each subpattern, insert it into the appropriate array. */
     727             316 :                                                 for (i = 0; i < count; i++) {
     728             191 :                                                         if (offset_capture) {
     729               7 :                                                                 add_offset_pair(match_sets[i], utype, (char *)stringlist[i],
     730                 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, &map TSRMLS_CC);
     731             184 :                                                         } else if (utype == IS_UNICODE) {
     732             174 :                                                                 add_next_index_utf8_stringl(match_sets[i], (char *)stringlist[i],
     733                 :                                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], 1);
     734                 :                                                         } else {
     735              10 :                                                                 add_next_index_stringl(match_sets[i], (char *)stringlist[i],
     736                 :                                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], 1);
     737                 :                                                         }
     738                 :                                                 }
     739                 :                                                 /*
     740                 :                                                  * If the number of captured subpatterns on this run is
     741                 :                                                  * less than the total possible number, pad the result
     742                 :                                                  * arrays with empty strings.
     743                 :                                                  */
     744             125 :                                                 if (count < num_subpats) {
     745               2 :                                                         for (; i < num_subpats; i++) {
     746               1 :                                                                 add_next_index_unicode(match_sets[i], EMPTY_STR, 1);
     747                 :                                                         }
     748                 :                                                 }
     749                 :                                         } else {
     750                 :                                                 /* Allocate the result set array */
     751              38 :                                                 ALLOC_ZVAL(result_set);
     752              38 :                                                 array_init(result_set);
     753              38 :                                                 INIT_PZVAL(result_set);
     754                 :                                                 
     755                 :                                                 /* Add all the subpatterns to it */
     756             343 :                                                 for (i = 0; i < count; i++) {
     757             305 :                                                         if (offset_capture) {
     758               7 :                                                                 add_offset_pair(result_set, utype, (char *)stringlist[i],
     759                 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1],
     760                 :                                                                                                 subpat_names[i], &map TSRMLS_CC);
     761             298 :                                                         } else if (utype == IS_UNICODE) {
     762             298 :                                                                 if (subpat_names[i]) {
     763               8 :                                                                         add_utf8_assoc_utf8_stringl(result_set, subpat_names[i], (char *)stringlist[i],
     764                 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1], 1);
     765                 :                                                                 }
     766             298 :                                                                 add_next_index_utf8_stringl(result_set, (char *)stringlist[i],
     767                 :                                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], 1);
     768                 :                                                         } else {
     769               0 :                                                                 if (subpat_names[i]) {
     770               0 :                                                                         add_rt_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
     771                 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1], 1);
     772                 :                                                                 }
     773               0 :                                                                 add_next_index_stringl(result_set, (char *)stringlist[i],
     774                 :                                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], 1);
     775                 :                                                         }
     776                 :                                                 }
     777                 :                                                 /* And add it to the output array */
     778              38 :                                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
     779                 :                                         }
     780                 :                                 } else {                        /* single pattern matching */
     781                 :                                         /* For each subpattern, insert it into the subpatterns array. */
     782          101185 :                                         for (i = 0; i < count; i++) {
     783           67514 :                                                 if (offset_capture) {
     784               9 :                                                         add_offset_pair(subpats, utype, (char *)stringlist[i],
     785                 :                                                                                         offsets[(i<<1)+1] - offsets[i<<1],
     786                 :                                                                                         offsets[i<<1], subpat_names[i], &map TSRMLS_CC);
     787           67505 :                                                 } else if (utype == IS_UNICODE) {
     788           67003 :                                                         if (subpat_names[i]) {
     789              13 :                                                                 add_utf8_assoc_utf8_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     790                 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1], 1);
     791                 :                                                         }
     792           67003 :                                                         add_next_index_utf8_stringl(subpats, (char *)stringlist[i],
     793                 :                                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], 1);
     794                 :                                                 } else {
     795             502 :                                                         if (subpat_names[i]) {
     796               0 :                                                                 add_rt_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     797                 :                                                                                                    offsets[(i<<1)+1] - offsets[i<<1], 1);
     798                 :                                                         }
     799             502 :                                                         add_next_index_stringl(subpats, (char *)stringlist[i],
     800                 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], 1);
     801                 :                                                 }
     802                 :                                         }
     803                 :                                 }
     804                 : 
     805           33834 :                                 pcre_free((void *) stringlist);
     806                 :                         }
     807         3120177 :                 } else if (count == PCRE_ERROR_NOMATCH) {
     808                 :                         /* If we previously set PCRE_NOTEMPTY after a null match,
     809                 :                            this is not necessarily the end. We need to advance
     810                 :                            the start offset, and continue. Fudge the offset values
     811                 :                            to achieve this, unless we're already at the end of the string. */
     812         3120174 :                         if (g_notempty != 0 && start_offset < subject_len) {
     813               2 :                                 offsets[0] = start_offset;
     814               4 :                                 if (utype == IS_UNICODE || pce->compile_options & PCRE_UTF8) {
     815               2 :                                         offsets[1] = start_offset;
     816               2 :                                         U8_FWD_1(subject, offsets[1], subject_len);
     817                 :                                 } else {
     818               0 :                                         offsets[1] = start_offset + 1;
     819                 :                                 }
     820                 :                         } else
     821                 :                                 break;
     822                 :                 } else {
     823               3 :                         pcre_handle_exec_error(count TSRMLS_CC);
     824               3 :                         break;
     825                 :                 }
     826                 :                 
     827                 :                 /* If we have matched an empty string, mimic what Perl's /g options does.
     828                 :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
     829                 :                    the match again at the same point. If this fails (picked up above) we
     830                 :                    advance to the next character. */
     831          419922 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
     832                 :                 
     833                 :                 /* Advance to the position right after the last full match */
     834          419922 :                 start_offset = offsets[1];
     835          419922 :         } while (global);
     836                 : 
     837                 :         /* Add the match sets to the output array and clean up */
     838         3539932 :         if (global && subpats_order == PREG_PATTERN_ORDER) {
     839             305 :                 UErrorCode status = U_ZERO_ERROR;
     840             305 :                 UChar *u = NULL;
     841                 :                 int u_len;
     842             742 :                 for (i = 0; i < num_subpats; i++) {
     843             437 :                         if (subpat_names[i]) {
     844               5 :                                 zend_string_to_unicode_ex(UG(utf8_conv), &u, &u_len, subpat_names[i], strlen(subpat_names[i]), &status);
     845               5 :                                 zend_u_hash_update(Z_ARRVAL_P(subpats), IS_UNICODE, ZSTR(u),
     846                 :                                                                    u_len+1, &match_sets[i], sizeof(zval *), NULL);
     847               5 :                                 Z_ADDREF_P(match_sets[i]);
     848               5 :                                 efree(u);
     849               5 :                                 status = U_ZERO_ERROR;
     850                 :                         }
     851             437 :                         zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
     852                 :                 }
     853             305 :                 efree(match_sets);
     854                 :         }
     855                 :         
     856         3539932 :         efree(offsets);
     857         3539932 :         efree(subpat_names);
     858                 : 
     859         3539932 :         RETVAL_LONG(matched);
     860                 : }
     861                 : /* }}} */
     862                 : 
     863                 : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]]) U
     864                 :    Perform a Perl-style regular expression match */
     865                 : static PHP_FUNCTION(preg_match)
     866         3539607 : {
     867         3539607 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
     868         3539607 : }
     869                 : /* }}} */
     870                 : 
     871                 : /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]]) U
     872                 :    Perform a Perl-style global regular expression match */
     873                 : static PHP_FUNCTION(preg_match_all)
     874              96 : {
     875              96 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
     876              96 : }
     877                 : /* }}} */
     878                 : 
     879                 : /* {{{ preg_get_backref
     880                 :  */
     881                 : static int preg_get_backref(char **str, int *backref)
     882             119 : {
     883             119 :         register char in_brace = 0;
     884             119 :         register char *walk = *str;
     885                 : 
     886             119 :         if (walk[1] == 0)
     887               9 :                 return 0;
     888                 : 
     889             110 :         if (*walk == '$' && walk[1] == '{') {
     890              14 :                 in_brace = 1;
     891              14 :                 walk++;
     892                 :         }
     893             110 :         walk++;
     894                 : 
     895             110 :         if (*walk >= '0' && *walk <= '9') {
     896              86 :                 *backref = *walk - '0';
     897              86 :                 walk++;
     898                 :         } else
     899              24 :                 return 0;
     900                 :         
     901             120 :         if (*walk && *walk >= '0' && *walk <= '9') {
     902               2 :                 *backref = *backref * 10 + *walk - '0';
     903               2 :                 walk++;
     904                 :         }
     905                 : 
     906              86 :         if (in_brace) {
     907              14 :                 if (*walk == 0 || *walk != '}')
     908               6 :                         return 0;
     909                 :                 else
     910               8 :                         walk++;
     911                 :         }
     912                 :         
     913              80 :         *str = walk;
     914              80 :         return 1;       
     915                 : }
     916                 : /* }}} */
     917                 : 
     918                 : /* {{{ preg_do_repl_func
     919                 :  */
     920                 : static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
     921              42 : {
     922                 :         zval            *retval_ptr;            /* Function return value */
     923                 :         zval       **args[1];                   /* Argument to pass to function */
     924                 :         zval            *subpats;                       /* Captured subpatterns */ 
     925                 :         int                      result_len;            /* Return value length */
     926                 :         int                      i;
     927                 : 
     928              42 :         MAKE_STD_ZVAL(subpats);
     929              42 :         array_init(subpats);
     930              94 :         for (i = 0; i < count; i++) {
     931              52 :                 if (subpat_names[i]) {
     932               1 :                         add_utf8_assoc_utf8_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
     933                 :                 }
     934              52 :                 add_next_index_utf8_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
     935                 :         }
     936              42 :         args[0] = &subpats;
     937                 : 
     938              83 :         if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
     939              41 :                 convert_to_string_with_converter_ex(&retval_ptr, UG(utf8_conv));
     940              41 :                 *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
     941              41 :                 result_len = Z_STRLEN_P(retval_ptr);
     942              41 :                 zval_ptr_dtor(&retval_ptr);
     943                 :         } else {
     944               1 :                 if (!EG(exception)) {
     945               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
     946                 :                 }
     947               1 :                 result_len = offsets[1] - offsets[0];
     948               1 :                 *result = estrndup(&subject[offsets[0]], result_len);
     949                 :         }
     950                 : 
     951              42 :         zval_ptr_dtor(&subpats);
     952                 : 
     953              42 :         return result_len;
     954                 : }
     955                 : /* }}} */
     956                 : 
     957                 : /* {{{ preg_do_eval
     958                 :  */
     959                 : static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
     960                 :                                                 int *offsets, int count, char **result TSRMLS_DC)
     961               5 : {
     962                 :         zval             retval;                        /* Return value from evaluation */
     963                 :         char            *eval_str_end,          /* End of eval string */
     964                 :                                 *match,                         /* Current match for a backref */
     965                 :                                 *esc_match,                     /* Quote-escaped match */
     966                 :                                 *walk,                          /* Used to walk the code string */
     967                 :                                 *segment,                       /* Start of segment to append while walking */
     968                 :                                  walk_last;                     /* Last walked character */
     969                 :         int                      match_len;                     /* Length of the match */
     970                 :         int                      esc_match_len;         /* Length of the quote-escaped match */
     971                 :         int                      result_len;            /* Length of the result of the evaluation */
     972                 :         int                      backref;                       /* Current backref */
     973                 :         char        *compiled_string_description;
     974                 :         UConverter  *orig_runtime_conv;
     975               5 :         smart_str    code = {0};
     976                 :         
     977               5 :         eval_str_end = eval_str + eval_str_len;
     978               5 :         walk = segment = eval_str;
     979               5 :         walk_last = 0;
     980                 :         
     981             195 :         while (walk < eval_str_end) {
     982                 :                 /* If found a backreference.. */
     983             185 :                 if ('\\' == *walk || '$' == *walk) {
     984              33 :                         smart_str_appendl(&code, segment, walk - segment);
     985              33 :                         if (walk_last == '\\') {
     986               0 :                                 code.c[code.len-1] = *walk++;
     987               0 :                                 segment = walk;
     988               0 :                                 walk_last = 0;
     989               0 :                                 continue;
     990                 :                         }
     991              33 :                         segment = walk;
     992              33 :                         if (preg_get_backref(&walk, &backref)) {
     993               8 :                                 if (backref < count) {
     994                 :                                         /* Find the corresponding string match and substitute it
     995                 :                                            in instead of the backref */
     996               8 :                                         match = subject + offsets[backref<<1];
     997               8 :                                         match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
     998               8 :                                         if (match_len) {
     999               8 :                                                 esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0 TSRMLS_CC);
    1000                 :                                         } else {
    1001               0 :                                                 esc_match = match;
    1002               0 :                                                 esc_match_len = 0;
    1003                 :                                         }
    1004                 :                                 } else {
    1005               0 :                                         esc_match = "";
    1006               0 :                                         esc_match_len = 0;
    1007                 :                                 }
    1008               8 :                                 smart_str_appendl(&code, esc_match, esc_match_len);
    1009                 : 
    1010               8 :                                 segment = walk;
    1011                 : 
    1012                 :                                 /* Clean up and reassign */
    1013               8 :                                 if (esc_match_len)
    1014               8 :                                         efree(esc_match);
    1015               8 :                                 continue;
    1016                 :                         }
    1017                 :                 }
    1018             177 :                 walk++;
    1019             177 :                 walk_last = walk[-1];
    1020                 :         }
    1021               5 :         smart_str_appendl(&code, segment, walk - segment);
    1022               5 :         smart_str_0(&code);
    1023                 : 
    1024               5 :         orig_runtime_conv = UG(runtime_encoding_conv);
    1025               5 :         UG(runtime_encoding_conv) = UG(utf8_conv);
    1026               5 :         compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
    1027                 :         /* Run the code */
    1028               5 :         if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
    1029               1 :                 efree(compiled_string_description);
    1030               1 :                 UG(runtime_encoding_conv) = orig_runtime_conv;
    1031               1 :                 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
    1032                 :                 /* zend_error() does not return in this case */
    1033                 :         }
    1034               4 :         efree(compiled_string_description);
    1035               4 :         UG(runtime_encoding_conv) = orig_runtime_conv;
    1036               4 :         convert_to_string_with_converter(&retval, UG(utf8_conv));
    1037                 :         
    1038                 :         /* Save the return value and its length */
    1039               4 :         *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
    1040               4 :         result_len = Z_STRLEN(retval);
    1041                 :         
    1042                 :         /* Clean up */
    1043               4 :         zval_dtor(&retval);
    1044               4 :         smart_str_free(&code);
    1045                 :         
    1046               4 :         return result_len;
    1047                 : }
    1048                 : /* }}} */
    1049                 : 
    1050                 : /* {{{ php_pcre_replace
    1051                 :  */
    1052                 : PHPAPI char *php_pcre_replace(zend_uchar utype,
    1053                 :                                                           char *regex,   int regex_len,
    1054                 :                                                           char *subject, int subject_len,
    1055                 :                                                           zval *replace_val, int is_callable_replace,
    1056                 :                                                           int *result_len, int limit, int *replace_count TSRMLS_DC)
    1057           20744 : {
    1058                 :         pcre_cache_entry        *pce;                       /* Compiled regular expression */
    1059                 : 
    1060                 :         /* Compile regex or get it from cache. */
    1061           20744 :         if ((pce = pcre_get_compiled_regex_cache(utype, regex, regex_len TSRMLS_CC)) == NULL) {
    1062               9 :                 return NULL;
    1063                 :         }
    1064                 : 
    1065           20735 :         return php_pcre_replace_impl(pce, utype, subject, subject_len, replace_val, 
    1066                 :                 is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
    1067                 : }
    1068                 : /* }}} */
    1069                 : 
    1070                 : /* {{{ php_pcre_replace_impl() */
    1071                 : PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype, char *subject, int subject_len, zval *replace_val, 
    1072                 :         int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
    1073           20735 : {
    1074           20735 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1075                 :         pcre_extra               extra_data;            /* Used locally for exec options */
    1076           20735 :         int                              exoptions = 0;         /* Execution options */
    1077           20735 :         int                              count = 0;                     /* Count of matched subpatterns */
    1078                 :         int                             *offsets;                       /* Array of subpattern offsets */
    1079                 :         char                    **subpat_names;         /* Array for named subpatterns */
    1080                 :         int                              num_subpats;           /* Number of captured subpatterns */
    1081                 :         int                              size_offsets;          /* Size of the offsets array */
    1082                 :         int                              new_len;                       /* Length of needed storage */
    1083                 :         int                              alloc_len;                     /* Actual allocated length */
    1084           20735 :         int                              eval_result_len=0;     /* Length of the eval'ed or
    1085                 :                                                                                    function-returned string */
    1086                 :         int                              match_len;                     /* Length of the current match */
    1087                 :         int                              backref;                       /* Backreference number */
    1088                 :         int                              eval;                          /* If the replacement string should be eval'ed */
    1089                 :         int                              start_offset;          /* Where the new search starts */
    1090           20735 :         int                              g_notempty=0;          /* If the match should not be empty */
    1091           20735 :         int                              replace_len=0;         /* Length of replacement string */
    1092                 :         char                    *result,                        /* Result of replacement */
    1093           20735 :                                         *replace=NULL,          /* Replacement string */
    1094                 :                                         *new_buf,                       /* Temporary buffer for re-allocation */
    1095                 :                                         *walkbuf,                       /* Location of current replacement in the result */
    1096                 :                                         *walk,                          /* Used to walk the replacement string */
    1097                 :                                         *match,                         /* The current match */
    1098                 :                                         *piece,                         /* The current piece of subject */
    1099           20735 :                                         *replace_end=NULL,      /* End of replacement string */
    1100                 :                                         *eval_result,           /* Result of eval or custom function */
    1101                 :                                          walk_last;                     /* Last walked character */
    1102                 :         int                              rc;
    1103                 : 
    1104           20735 :         if (extra == NULL) {
    1105           20716 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1106           20716 :                 extra = &extra_data;
    1107                 :         }
    1108           20735 :         extra->match_limit = PCRE_G(backtrack_limit);
    1109           20735 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1110                 : 
    1111           20735 :         eval = pce->preg_options & PREG_REPLACE_EVAL;
    1112           20735 :         if (is_callable_replace) {
    1113              28 :                 if (eval) {
    1114               1 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
    1115               1 :                         return NULL;
    1116                 :                 }
    1117                 :         } else {
    1118           20707 :                 replace = Z_STRVAL_P(replace_val);
    1119           20707 :                 replace_len = Z_STRLEN_P(replace_val);
    1120           20707 :                 replace_end = replace + replace_len;
    1121                 :         }
    1122                 : 
    1123                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1124           20734 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
    1125           20734 :         if (rc < 0) {
    1126               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
    1127               0 :                 return NULL;
    1128                 :         }
    1129           20734 :         num_subpats++;
    1130           20734 :         size_offsets = num_subpats * 3;
    1131                 : 
    1132                 :         /*
    1133                 :          * Build a mapping from subpattern numbers to their names. We will always
    1134                 :          * allocate the table, even though there may be no named subpatterns. This
    1135                 :          * avoids somewhat more complicated logic in the inner loops.
    1136                 :          */
    1137           20734 :         subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
    1138           20734 :         if (!subpat_names) {
    1139               1 :                 return NULL;
    1140                 :         }
    1141                 : 
    1142           20733 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1143                 :         
    1144           20733 :         alloc_len = 2 * subject_len + 1;
    1145           20733 :         result = safe_emalloc(alloc_len, sizeof(char), 0);
    1146                 : 
    1147                 :         /* Initialize */
    1148           20733 :         match = NULL;
    1149           20733 :         *result_len = 0;
    1150           20733 :         start_offset = 0;
    1151           20733 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1152                 : 
    1153           20733 :         if (utype != IS_UNICODE && !(pce->compile_options & PCRE_UTF8)) {
    1154           14319 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1155                 :         }
    1156                 :         
    1157                 :         while (1) {
    1158                 :                 /* Execute the regular expression. */
    1159           25603 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
    1160                 :                                                   exoptions|g_notempty, offsets, size_offsets);
    1161                 : 
    1162                 :                 /* the string was already proved to be valid UTF-8 */
    1163           25603 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1164                 : 
    1165                 :                 /* Check for too many substrings condition. */
    1166           25603 :                 if (count == 0) {
    1167               0 :                         php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
    1168               0 :                         count = size_offsets/3;
    1169                 :                 }
    1170                 : 
    1171           25603 :                 piece = subject + start_offset;
    1172                 : 
    1173           30473 :                 if (count > 0 && (limit == -1 || limit > 0)) {
    1174            4871 :                         if (replace_count) {
    1175            4871 :                                 ++*replace_count;
    1176                 :                         }
    1177                 :                         /* Set the match location in subject */
    1178            4871 :                         match = subject + offsets[0];
    1179                 : 
    1180            4871 :                         new_len = *result_len + offsets[0] - start_offset; /* part before the match */
    1181                 :                         
    1182                 :                         /* If evaluating, do it and add the return string's length */
    1183            4871 :                         if (eval) {
    1184               5 :                                 eval_result_len = preg_do_eval(replace, replace_len, subject,
    1185                 :                                                                                            offsets, count, &eval_result TSRMLS_CC);
    1186               4 :                                 new_len += eval_result_len;
    1187            4866 :                         } else if (is_callable_replace) {
    1188                 :                                 /* Use custom function to get replacement string and its length. */
    1189              42 :                                 eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
    1190              42 :                                 new_len += eval_result_len;
    1191                 :                         } else { /* do regular substitution */
    1192            4824 :                                 walk = replace;
    1193            4824 :                                 walk_last = 0;
    1194           14580 :                                 while (walk < replace_end) {
    1195            4932 :                                         if ('\\' == *walk || '$' == *walk) {
    1196              43 :                                                 if (walk_last == '\\') {
    1197               0 :                                                         walk++;
    1198               0 :                                                         walk_last = 0;
    1199               0 :                                                         continue;
    1200                 :                                                 }
    1201              43 :                                                 if (preg_get_backref(&walk, &backref)) {
    1202              36 :                                                         if (backref < count)
    1203              35 :                                                                 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
    1204              36 :                                                         continue;
    1205                 :                                                 }
    1206                 :                                         }
    1207            4896 :                                         new_len++;
    1208            4896 :                                         walk++;
    1209            4896 :                                         walk_last = walk[-1];
    1210                 :                                 }
    1211                 :                         }
    1212                 : 
    1213            4870 :                         if (new_len + 1 > alloc_len) {
    1214              12 :                                 alloc_len = 1 + alloc_len + 2 * new_len;
    1215              12 :                                 new_buf = emalloc(alloc_len);
    1216              12 :                                 memcpy(new_buf, result, *result_len);
    1217              12 :                                 efree(result);
    1218              12 :                                 result = new_buf;
    1219                 :                         }
    1220                 :                         /* copy the part of the string before the match */
    1221            4870 :                         memcpy(&result[*result_len], piece, match-piece);
    1222            4870 :                         *result_len += match-piece;
    1223                 : 
    1224                 :                         /* copy replacement and backrefs */
    1225            4870 :                         walkbuf = result + *result_len;
    1226                 :                         
    1227                 :                         /* If evaluating or using custom function, copy result to the buffer
    1228                 :                          * and clean up. */
    1229            4916 :                         if (eval || is_callable_replace) {
    1230              46 :                                 memcpy(walkbuf, eval_result, eval_result_len);
    1231              46 :                                 *result_len += eval_result_len;
    1232              46 :                                 STR_FREE(eval_result);
    1233                 :                         } else { /* do regular backreference copying */
    1234            4824 :                                 walk = replace;
    1235            4824 :                                 walk_last = 0;
    1236           14580 :                                 while (walk < replace_end) {
    1237            4932 :                                         if ('\\' == *walk || '$' == *walk) {
    1238              43 :                                                 if (walk_last == '\\') {
    1239               0 :                                                         *(walkbuf-1) = *walk++;
    1240               0 :                                                         walk_last = 0;
    1241               0 :                                                         continue;
    1242                 :                                                 }
    1243              43 :                                                 if (preg_get_backref(&walk, &backref)) {
    1244              36 :                                                         if (backref < count) {
    1245              35 :                                                                 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1246              35 :                                                                 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
    1247              35 :                                                                 walkbuf += match_len;
    1248                 :                                                         }
    1249              36 :                                                         continue;
    1250                 :                                                 }
    1251                 :                                         }
    1252            4896 :                                         *walkbuf++ = *walk++;
    1253            4896 :                                         walk_last = walk[-1];
    1254                 :                                 }
    1255            4824 :                                 *walkbuf = '\0';
    1256                 :                                 /* increment the result length by how much we've added to the string */
    1257            4824 :                                 *result_len += walkbuf - (result + *result_len);
    1258                 :                         }
    1259                 : 
    1260            4870 :                         if (limit != -1)
    1261              15 :                                 limit--;
    1262                 : 
    1263           20732 :                 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
    1264                 :                         /* If we previously set PCRE_NOTEMPTY after a null match,
    1265                 :                            this is not necessarily the end. We need to advance
    1266                 :                            the start offset, and continue. Fudge the offset values
    1267                 :                            to achieve this, unless we're already at the end of the string. */
    1268           20729 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1269               0 :                                 offsets[0] = start_offset;
    1270               0 :                                 if (utype == IS_UNICODE || pce->compile_options & PCRE_UTF8) {
    1271               0 :                                         offsets[1] = start_offset;
    1272               0 :                                         U8_FWD_1(subject, offsets[1], subject_len);
    1273                 :                                 } else {
    1274               0 :                                         offsets[1] = start_offset + 1;
    1275                 :                                 }
    1276               0 :                                 memcpy(&result[*result_len], piece, 1);
    1277               0 :                                 (*result_len)++;
    1278                 :                         } else {
    1279           20729 :                                 new_len = *result_len + subject_len - start_offset;
    1280           20729 :                                 if (new_len + 1 > alloc_len) {
    1281               1 :                                         alloc_len = new_len + 1; /* now we know exactly how long it is */
    1282               1 :                                         new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
    1283               1 :                                         memcpy(new_buf, result, *result_len);
    1284               1 :                                         efree(result);
    1285               1 :                                         result = new_buf;
    1286                 :                                 }
    1287                 :                                 /* stick that last bit of string on our output */
    1288           20729 :                                 memcpy(&result[*result_len], piece, subject_len - start_offset);
    1289           20729 :                                 *result_len += subject_len - start_offset;
    1290           20729 :                                 result[*result_len] = '\0';
    1291           20729 :                                 break;
    1292                 :                         }
    1293                 :                 } else {
    1294               3 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1295               3 :                         efree(result);
    1296               3 :                         result = NULL;
    1297               3 :                         break;
    1298                 :                 }
    1299                 :                         
    1300                 :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1301                 :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
    1302                 :                    the match again at the same point. If this fails (picked up above) we
    1303                 :                    advance to the next character. */
    1304            4870 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
    1305                 :                 
    1306                 :                 /* Advance to the next piece. */
    1307            4870 :                 start_offset = offsets[1];
    1308            4870 :         }
    1309                 : 
    1310           20732 :         efree(offsets);
    1311           20732 :         efree(subpat_names);
    1312                 : 
    1313           20732 :         return result;
    1314                 : }
    1315                 : /* }}} */
    1316                 : 
    1317                 : /* {{{ php_replace_in_subject
    1318                 :  */
    1319                 : static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
    1320           20701 : {
    1321                 :         zval            **regex_entry,
    1322           20701 :                                 **replace_entry = NULL,
    1323                 :                                  *replace_value,
    1324                 :                                   empty_replace;
    1325                 :         char            *subject_value,
    1326                 :                                 *result;
    1327                 :         int                      subject_len;
    1328                 :         zend_uchar   utype;
    1329                 : 
    1330                 :         /* Make sure we're dealing with strings. */     
    1331           20701 :         utype = Z_TYPE_PP(subject);
    1332           20701 :         convert_to_string_with_converter_ex(subject, UG(utf8_conv));
    1333                 : 
    1334           20701 :         ZVAL_STRINGL(&empty_replace, "", 0, 0);
    1335                 :         
    1336                 :         /* If regex is an array */
    1337           20701 :         if (Z_TYPE_P(regex) == IS_ARRAY) {
    1338                 :                 /* Duplicate subject string for repeated replacement */
    1339              22 :                 subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
    1340              22 :                 subject_len = Z_STRLEN_PP(subject);
    1341              22 :                 *result_len = subject_len;
    1342                 :                 
    1343              22 :                 zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
    1344                 : 
    1345              22 :                 replace_value = replace;
    1346              22 :                 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
    1347              15 :                         zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
    1348                 : 
    1349                 :                 /* For each entry in the regex array, get the entry */
    1350             108 :                 while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)&regex_entry) == SUCCESS) {
    1351                 :                         /* Make sure we're dealing with strings. */     
    1352              65 :                         convert_to_string_with_converter_ex(regex_entry, UG(utf8_conv));
    1353                 :                 
    1354                 :                         /* If replace is an array and not a callable construct */
    1355              65 :                         if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
    1356                 :                                 /* Get current entry */
    1357              50 :                                 if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
    1358              48 :                                         if (!is_callable_replace) {
    1359              48 :                                                 convert_to_string_with_converter_ex(replace_entry, UG(utf8_conv));
    1360                 :                                         }
    1361              48 :                                         replace_value = *replace_entry;
    1362              48 :                                         zend_hash_move_forward(Z_ARRVAL_P(replace));
    1363                 :                                 } else {
    1364                 :                                         /* We've run out of replacement strings, so use an empty one */
    1365               2 :                                         replace_value = &empty_replace;
    1366                 :                                 }
    1367                 :                         }
    1368                 :                         
    1369                 :                         /* Do the actual replacement and put the result back into subject_value
    1370                 :                            for further replacements. */
    1371              65 :                         if ((result = php_pcre_replace(utype,
    1372                 :                                                                                    Z_STRVAL_PP(regex_entry),
    1373                 :                                                                                    Z_STRLEN_PP(regex_entry),
    1374                 :                                                                                    subject_value,
    1375                 :                                                                                    subject_len,
    1376                 :                                                                                    replace_value,
    1377                 :                                                                                    is_callable_replace,
    1378                 :                                                                                    result_len,
    1379                 :                                                                                    limit,
    1380                 :                                                                                    replace_count TSRMLS_CC)) != NULL) {
    1381              64 :                                 efree(subject_value);
    1382              64 :                                 subject_value = result;
    1383              64 :                                 subject_len = *result_len;
    1384                 :                         } else {
    1385               1 :                                 efree(subject_value);
    1386               1 :                                 return NULL;
    1387                 :                         }
    1388                 : 
    1389              64 :                         zend_hash_move_forward(Z_ARRVAL_P(regex));
    1390                 :                 }
    1391                 : 
    1392              21 :                 return subject_value;
    1393                 :         } else {
    1394           20679 :                 result = php_pcre_replace(utype,
    1395                 :                                                                   Z_STRVAL_P(regex),
    1396                 :                                                                   Z_STRLEN_P(regex),
    1397                 :                                                                   Z_STRVAL_PP(subject),
    1398                 :                                                                   Z_STRLEN_PP(subject),
    1399                 :                                                                   replace,
    1400                 :                                                                   is_callable_replace,
    1401                 :                                                                   result_len,
    1402                 :                                                                   limit,
    1403                 :                                                                   replace_count TSRMLS_CC);
    1404           20678 :                 return result;
    1405                 :         }
    1406                 : }
    1407                 : /* }}} */
    1408                 : 
    1409                 : /* {{{ preg_replace_impl
    1410                 :  */
    1411                 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
    1412           20708 : {
    1413                 :         zval                *regex,
    1414                 :                                     *replace,
    1415                 :                                     *subject,
    1416                 :                                    **subject_entry,
    1417           20708 :                                     *zcount = NULL;
    1418                 :         char                    *result;
    1419                 :         int                              result_len;
    1420           20708 :         long                     limit = -1;
    1421                 :         zstr                     string_key;
    1422                 :         uint                     string_key_len;
    1423                 :         ulong                    num_key;
    1424                 :         zval                     callback_name;
    1425           20708 :         int                              replace_count=0, old_replace_count;
    1426                 :         zend_uchar       utype;
    1427                 :         
    1428           20708 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z/z/z/|lz", &regex,
    1429                 :                                                           &replace, &subject, &limit, &zcount) == FAILURE) {
    1430              11 :                 return;
    1431                 :         }
    1432                 : 
    1433           20697 :         if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1434               3 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1435               3 :                 RETURN_FALSE;
    1436                 :         }
    1437                 : 
    1438           20694 :         if (is_callable_replace) {
    1439              32 :                 if (Z_TYPE_P(replace) != IS_ARRAY && Z_TYPE_P(replace) != IS_OBJECT) {
    1440              27 :                         convert_to_unicode(replace);
    1441                 :                 }
    1442              32 :                 if (!zend_is_callable(replace, 0, &callback_name TSRMLS_CC)) {
    1443               4 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%R', to be a valid callback", Z_TYPE(callback_name), Z_UNIVAL(callback_name));
    1444               4 :                         zval_dtor(&callback_name);
    1445               4 :                         *return_value = *subject;
    1446               4 :                         zval_copy_ctor(return_value);
    1447               4 :                         INIT_PZVAL(return_value);
    1448               4 :                         return;
    1449                 :                 }
    1450              28 :                 zval_dtor(&callback_name);
    1451           20662 :         } else if (Z_TYPE_P(replace) != IS_ARRAY) {
    1452           20656 :                 convert_to_string_with_converter(replace, UG(utf8_conv));
    1453                 :         }
    1454                 : 
    1455           20689 :         if (Z_TYPE_P(regex) != IS_ARRAY) {
    1456           20678 :                 convert_to_string_with_converter(regex, UG(utf8_conv));
    1457                 :         }
    1458                 :         
    1459                 :         /* if subject is an array */
    1460           20688 :         if (Z_TYPE_P(subject) == IS_ARRAY) {
    1461               6 :                 array_init(return_value);
    1462               6 :                 zend_hash_internal_pointer_reset(Z_ARRVAL_P(subject));
    1463                 : 
    1464                 :                 /* For each subject entry, convert it to string, then perform replacement
    1465                 :                    and add the result to the return_value array. */
    1466              31 :                 while (zend_hash_get_current_data(Z_ARRVAL_P(subject), (void **)&subject_entry) == SUCCESS) {
    1467              19 :                         SEPARATE_ZVAL(subject_entry);
    1468              19 :                         utype = Z_TYPE_PP(subject_entry);
    1469              19 :                         old_replace_count = replace_count;
    1470              19 :                         if ((result = php_replace_in_subject(regex, replace, subject_entry, &result_len, limit, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
    1471              36 :                                 if (!is_filter || replace_count > old_replace_count) {
    1472                 :                                         /* Add to return array */
    1473              17 :                                         switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(subject), &string_key, &string_key_len, &num_key, 0, NULL))
    1474                 :                                         {
    1475                 :                                         case HASH_KEY_IS_UNICODE:
    1476               2 :                                                 if (utype == IS_UNICODE || utype != IS_STRING) {
    1477               1 :                                                         add_u_assoc_utf8_stringl_ex(return_value, IS_UNICODE, string_key, string_key_len, result, result_len, ZSTR_AUTOFREE);
    1478                 :                                                 } else {
    1479               0 :                                                         add_u_assoc_stringl_ex(return_value, IS_UNICODE, string_key, string_key_len, result, result_len, 0);
    1480                 :                                                 }
    1481               1 :                                                 break;
    1482                 : 
    1483                 :                                         case HASH_KEY_IS_STRING:
    1484               0 :                                                 if (utype == IS_UNICODE || utype != IS_STRING) {
    1485               0 :                                                         add_u_assoc_utf8_stringl_ex(return_value, IS_STRING, string_key, string_key_len, result, result_len, ZSTR_AUTOFREE);
    1486                 :                                                 } else {
    1487               0 :                                                         add_u_assoc_stringl_ex(return_value, IS_STRING, string_key, string_key_len, result, result_len, 0);
    1488                 :                                                 }
    1489               0 :                                                 break;
    1490                 : 
    1491                 :                                         case HASH_KEY_IS_LONG:
    1492              32 :                                                 if (utype == IS_UNICODE || utype != IS_STRING) {
    1493              16 :                                                         add_index_utf8_stringl(return_value, num_key, result, result_len, ZSTR_AUTOFREE);
    1494                 :                                                 } else {
    1495               0 :                                                         add_index_stringl(return_value, num_key, result, result_len, 0);
    1496                 :                                                 }
    1497                 :                                                 break;
    1498                 :                                         }
    1499                 :                                 } else {
    1500               2 :                                         efree(result);
    1501                 :                                 }
    1502                 :                         }
    1503                 :                 
    1504              19 :                         zend_hash_move_forward(Z_ARRVAL_P(subject));
    1505                 :                 }
    1506                 :         } else {        /* if subject is not an array */
    1507           20682 :                 utype = Z_TYPE_P(subject);
    1508           20682 :                 old_replace_count = replace_count;
    1509           20682 :                 if ((result = php_replace_in_subject(regex, replace, &subject, &result_len, limit, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
    1510           41334 :                         if (!is_filter || replace_count > old_replace_count) {
    1511           26870 :                                 if (utype == IS_UNICODE || utype != IS_STRING) {
    1512            6203 :                                         RETVAL_UTF8_STRINGL(result, result_len, ZSTR_AUTOFREE);
    1513                 :                                 } else {
    1514           14464 :                                         RETVAL_STRINGL(result, result_len, 0);
    1515                 :                                 }
    1516                 :                         } else {
    1517               0 :                                 efree(result);
    1518                 :                         }
    1519                 :                 }
    1520                 :         }
    1521           20687 :         if (ZEND_NUM_ARGS() > 4) {
    1522               7 :                 zval_dtor(zcount);
    1523               7 :                 ZVAL_LONG(zcount, replace_count);
    1524                 :         }
    1525                 :         
    1526                 : }
    1527                 : /* }}} */
    1528                 : 
    1529                 : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) U
    1530                 :    Perform Perl-style regular expression replacement. */
    1531                 : static PHP_FUNCTION(preg_replace)
    1532           20667 : {
    1533           20667 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
    1534           20664 : }
    1535                 : /* }}} */
    1536                 : 
    1537                 : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]]) U
    1538                 :    Perform Perl-style regular expression replacement using replacement callback. */
    1539                 : static PHP_FUNCTION(preg_replace_callback)
    1540              40 : {
    1541              40 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
    1542              40 : }
    1543                 : /* }}} */
    1544                 : 
    1545                 : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) U
    1546                 :    Perform Perl-style regular expression replacement and only return matches. */
    1547                 : static PHP_FUNCTION(preg_filter)
    1548               1 : {
    1549               1 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
    1550               1 : }
    1551                 : /* }}} */
    1552                 : 
    1553                 : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) U
    1554                 :    Split string into an array using a perl-style regular expression as a delimiter */
    1555                 : static PHP_FUNCTION(preg_split)
    1556            1957 : {
    1557                 :         zstr                             regex;                 /* Regular expression */
    1558                 :         zstr                             subject;               /* String to match against */
    1559                 :         int                                      regex_len;
    1560                 :         int                                      subject_len;
    1561            1957 :         long                             limit_val = -1;/* Integer value of limit */
    1562            1957 :         long                             flags = 0;             /* Match control flags */
    1563                 :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1564                 :         zend_uchar                       str_type;
    1565            1957 :         char                            *regex_utf8 = NULL, *subject_utf8 = NULL;
    1566                 :         int                                      regex_utf8_len, subject_utf8_len;
    1567            1957 :         UErrorCode                       status = U_ZERO_ERROR;
    1568                 : 
    1569                 :         /* Get function parameters and do error checking */     
    1570            1957 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT|ll", &regex, &regex_len, &str_type,
    1571                 :                                                           &subject, &subject_len, &str_type, &limit_val, &flags) == FAILURE) {
    1572               8 :                 RETURN_FALSE;
    1573                 :         }
    1574                 :         
    1575            1949 :         if (str_type == IS_UNICODE) {
    1576            1687 :                 zend_unicode_to_string_ex(UG(utf8_conv), &regex_utf8, &regex_utf8_len, regex.u, regex_len, &status);
    1577            1687 :                 zend_unicode_to_string_ex(UG(utf8_conv), &subject_utf8, &subject_utf8_len, subject.u, subject_len, &status);
    1578            1687 :                 regex.s = regex_utf8;
    1579            1687 :                 regex_len = regex_utf8_len;
    1580            1687 :                 subject.s = subject_utf8;
    1581            1687 :                 subject_len = subject_utf8_len;
    1582                 :         }
    1583                 : 
    1584                 :         /* Compile regex or get it from cache. */
    1585            1949 :         if ((pce = pcre_get_compiled_regex_cache(str_type, regex.s, regex_len TSRMLS_CC)) == NULL) {
    1586               5 :                 if (str_type == IS_UNICODE) {
    1587               5 :                         efree(regex_utf8);
    1588               5 :                         efree(subject_utf8);
    1589                 :                 }
    1590               5 :                 RETURN_FALSE;
    1591                 :         }
    1592                 : 
    1593            1944 :         php_pcre_split_impl(pce, str_type, subject.s, subject_len, return_value, limit_val, flags TSRMLS_CC);
    1594                 : 
    1595            1944 :         if (str_type == IS_UNICODE) {
    1596            1682 :                 efree(regex_utf8);
    1597            1682 :                 efree(subject_utf8);
    1598                 :         }
    1599                 : }
    1600                 : /* }}} */
    1601                 : 
    1602                 : /* {{{ php_pcre_split_impl
    1603                 :  */
    1604                 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_uchar utype, char *subject, int subject_len, zval *return_value,
    1605                 :         long limit_val, long flags TSRMLS_DC)
    1606            1960 : {
    1607            1960 :         pcre_extra              *extra = NULL;          /* Holds results of studying */
    1608                 :         pcre_extra               extra_data;            /* Used locally for exec options */
    1609                 :         int                             *offsets;                       /* Array of subpattern offsets */
    1610                 :         int                              size_offsets;          /* Size of the offsets array */
    1611            1960 :         int                              exoptions = 0;         /* Execution options */
    1612            1960 :         int                              count = 0;                     /* Count of matched subpatterns */
    1613                 :         int                              start_offset;          /* Where the new search starts */
    1614                 :         int                              next_offset;           /* End of the last delimiter match + 1 */
    1615            1960 :         int                              g_notempty = 0;        /* If the match should not be empty */
    1616                 :         char                    *last_match;            /* Location of last match */
    1617                 :         int                              rc;
    1618                 :         int                              no_empty;                      /* If NO_EMPTY flag is set */
    1619                 :         int                              delim_capture;         /* If delimiters should be captured */
    1620                 :         int                              offset_capture;        /* If offsets should be captured */
    1621            1960 :         offset_map_t     map = { subject, 0, 0 };
    1622                 : 
    1623            1960 :         no_empty = flags & PREG_SPLIT_NO_EMPTY;
    1624            1960 :         delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
    1625            1960 :         offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
    1626                 :         
    1627            1960 :         if (limit_val == 0) {
    1628               1 :                 limit_val = -1;
    1629                 :         }
    1630                 : 
    1631            1960 :         if (extra == NULL) {
    1632            1960 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1633            1960 :                 extra = &extra_data;
    1634                 :         }
    1635            1960 :         extra->match_limit = PCRE_G(backtrack_limit);
    1636            1960 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1637                 :         
    1638                 :         /* Initialize return value */
    1639            1960 :         array_init(return_value);
    1640                 : 
    1641                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1642            1960 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
    1643            1960 :         if (rc < 0) {
    1644               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
    1645               0 :                 RETURN_FALSE;
    1646                 :         }
    1647            1960 :         size_offsets = (size_offsets + 1) * 3;
    1648            1960 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1649                 :         
    1650                 :         /* Start at the beginning of the string */
    1651            1960 :         start_offset = 0;
    1652            1960 :         next_offset = 0;
    1653            1960 :         last_match = subject;
    1654            1960 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1655                 : 
    1656            1960 :         if (utype != IS_UNICODE && !(pce->compile_options & PCRE_UTF8)) {
    1657               0 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1658                 :         }
    1659                 :         
    1660                 :         /* Get next piece if no limit or limit not yet reached and something matched*/
    1661            7129 :         while ((limit_val == -1 || limit_val > 1)) {
    1662            5167 :                 count = pcre_exec(pce->re, extra, subject,
    1663                 :                                                   subject_len, start_offset,
    1664                 :                                                   exoptions|g_notempty, offsets, size_offsets);
    1665                 : 
    1666                 :                 /* the string was already proved to be valid UTF-8 */
    1667            5167 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1668                 : 
    1669                 :                 /* Check for too many substrings condition. */
    1670            5167 :                 if (count == 0) {
    1671               0 :                         php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
    1672               0 :                         count = size_offsets/3;
    1673                 :                 }
    1674                 :                                 
    1675                 :                 /* If something matched */
    1676            5167 :                 if (count > 0) {
    1677            3151 :                         if (!no_empty || &subject[offsets[0]] != last_match) {
    1678                 : 
    1679            3092 :                                 if (offset_capture) {
    1680                 :                                         /* Add (match, offset) pair to the return value */
    1681              26 :                                         add_offset_pair(return_value, utype, last_match, &subject[offsets[0]]-last_match, next_offset, NULL, &map TSRMLS_CC);
    1682            3066 :                                 } else if (utype == IS_UNICODE) {
    1683                 :                                         /* Add the piece to the return value */
    1684            2675 :                                         add_next_index_utf8_stringl(return_value, last_match,
    1685                 :                                                                                                 &subject[offsets[0]]-last_match, 1);
    1686                 :                                 } else {
    1687                 :                                         /* Add the piece to the return value */
    1688             391 :                                         add_next_index_stringl(return_value, last_match,
    1689                 :                                                                                         &subject[offsets[0]]-last_match, 1);
    1690                 :                                 }
    1691                 : 
    1692                 :                                 /* One less left to do */
    1693            3092 :                                 if (limit_val != -1)
    1694               1 :                                         limit_val--;
    1695                 :                         }
    1696                 :                         
    1697            3151 :                         last_match = &subject[offsets[1]];
    1698            3151 :                         next_offset = offsets[1];
    1699                 : 
    1700            3151 :                         if (delim_capture) {
    1701                 :                                 int i, match_len;
    1702              62 :                                 for (i = 1; i < count; i++) {
    1703              31 :                                         match_len = offsets[(i<<1)+1] - offsets[i<<1];
    1704                 :                                         /* If we have matched a delimiter */
    1705              31 :                                         if (!no_empty || match_len > 0) {
    1706              21 :                                                 if (offset_capture) {
    1707              10 :                                                         add_offset_pair(return_value, utype, &subject[offsets[i<<1]], match_len,
    1708                 :                                                                                         offsets[i<<1], NULL, &map TSRMLS_CC);
    1709              11 :                                                 } else if (utype == IS_UNICODE) {
    1710              11 :                                                         add_next_index_utf8_stringl(return_value, &subject[offsets[i<<1]],
    1711                 :                                                                                                                 match_len, 1);
    1712                 :                                                 } else {
    1713               0 :                                                         add_next_index_stringl(return_value, &subject[offsets[i<<1]],
    1714                 :                                                                                                         match_len, 1);
    1715                 :                                                 }
    1716                 :                                         }
    1717                 :                                 }
    1718                 :                         }
    1719            2016 :                 } else if (count == PCRE_ERROR_NOMATCH) {
    1720                 :                         /* If we previously set PCRE_NOTEMPTY after a null match,
    1721                 :                            this is not necessarily the end. We need to advance
    1722                 :                            the start offset, and continue. Fudge the offset values
    1723                 :                            to achieve this, unless we're already at the end of the string. */
    1724            2015 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1725              58 :                                 offsets[0] = start_offset;
    1726             116 :                                 if (utype == IS_UNICODE || pce->compile_options & PCRE_UTF8) {
    1727              58 :                                         offsets[1] = start_offset;
    1728              58 :                                         U8_FWD_1(subject, offsets[1], subject_len);
    1729                 :                                 } else {
    1730               0 :                                         offsets[1] = start_offset + 1;
    1731                 :                                 }
    1732                 :                         } else
    1733                 :                                 break;
    1734                 :                 } else {
    1735               1 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1736               1 :                         break;
    1737                 :                 }
    1738                 : 
    1739                 :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1740                 :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
    1741                 :                    the match again at the same point. If this fails (picked up above) we
    1742                 :                    advance to the next character. */
    1743            3209 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
    1744                 :                 
    1745                 :                 /* Advance to the position right after the last full match */
    1746            3209 :                 start_offset = offsets[1];
    1747                 :         }
    1748                 : 
    1749                 : 
    1750            1960 :         start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
    1751                 : 
    1752            1960 :         if (!no_empty || start_offset < subject_len)
    1753                 :         {
    1754            1952 :                 if (offset_capture) {
    1755                 :                         /* Add the last (match, offset) pair to the return value */
    1756               5 :                         add_offset_pair(return_value, utype, &subject[start_offset],
    1757                 :                                                         subject_len - start_offset, start_offset, NULL, &map TSRMLS_CC);
    1758            1947 :                 } else if (utype == IS_UNICODE) {
    1759                 :                         /* Add the last piece to the return value */
    1760            1686 :                         add_next_index_utf8_stringl(return_value, last_match, subject + subject_len - last_match, 1);
    1761                 :                 } else {
    1762                 :                         /* Add the last piece to the return value */
    1763             261 :                         add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
    1764                 :                 }
    1765                 :         }
    1766                 :         
    1767                 :         /* Clean up */
    1768            1960 :         efree(offsets);
    1769                 : }
    1770                 : /* }}} */
    1771                 : 
    1772                 : /* {{{ proto string preg_quote(string str [, string delim_char]) U
    1773                 :    Quote regular expression characters plus an optional character */
    1774                 : static PHP_FUNCTION(preg_quote)
    1775            6145 : {
    1776                 :         int              in_str_len;
    1777                 :         char    *in_str;                /* Input string argument */
    1778                 :         char    *in_str_end;    /* End of the input string */
    1779            6145 :         int              delim_len = 0;
    1780            6145 :         char    *delim = NULL;  /* Additional delimiter argument */
    1781                 :         char    *out_str,               /* Output string with quoted characters */
    1782                 :                         *p,                             /* Iterator for input string */
    1783                 :                         *q,                             /* Iterator for output string */
    1784                 :                          c;                             /* Current character */
    1785            6145 :         UChar32  delim_char=0;  /* Delimiter character to be quoted */
    1786            6145 :         zend_bool quote_delim = 0; /* Whether to quote additional delim char */
    1787                 :         
    1788                 :         /* Get the arguments and check for errors */
    1789            6145 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&|s&", &in_str, &in_str_len, UG(utf8_conv),
    1790                 :                                                           &delim, &delim_len, UG(utf8_conv)) == FAILURE) {
    1791               5 :                 return;
    1792                 :         }
    1793                 :         
    1794            6140 :         in_str_end = in_str + in_str_len;
    1795                 : 
    1796                 :         /* Nothing to do if we got an empty string */
    1797            6140 :         if (in_str == in_str_end) {
    1798               2 :                 RETURN_EMPTY_UNICODE();
    1799                 :         }
    1800                 : 
    1801            6138 :         if (delim && *delim) {
    1802            6135 :                 U8_GET((unsigned char*)delim, 0, 0, delim_len, delim_char);
    1803            6135 :                 quote_delim = 1;
    1804                 :         }
    1805                 :         
    1806                 :         /* Allocate enough memory so that even if each character
    1807                 :            is quoted, we won't run out of room. In Unicode mode, the longest UTF-8
    1808                 :            sequence is 4 bytes, so the multiplier is (4+1). In non-Unicode mode, we
    1809                 :            have to assume that any character can be '\0', which needs 4 chars to
    1810                 :            be escaped. */
    1811            6138 :         out_str = safe_emalloc(5, in_str_len, 1);
    1812                 :         
    1813                 :         /* Go through the string and quote necessary characters */
    1814         5945129 :         for(p = in_str, q = out_str; p != in_str_end; p++) {
    1815         5938991 :                 c = *p;
    1816         5938991 :                 switch(c) {
    1817                 :                         case '.':
    1818                 :                         case '\\':
    1819                 :                         case '+':
    1820                 :                         case '*':
    1821                 :                         case '?':
    1822                 :                         case '[':
    1823                 :                         case '^':
    1824                 :                         case ']':
    1825                 :                         case '$':
    1826                 :                         case '(':
    1827                 :                         case ')':
    1828                 :                         case '{':
    1829                 :                         case '}':
    1830                 :                         case '=':
    1831                 :                         case '!':
    1832                 :                         case '>':
    1833                 :                         case '<':
    1834                 :                         case '|':
    1835                 :                         case ':':
    1836                 :                         case '-':
    1837          823380 :                                 *q++ = '\\';
    1838          823380 :                                 *q++ = c;
    1839          823380 :                                 break;
    1840                 : 
    1841                 :                         case '\0':
    1842             808 :                                 *q++ = '\\';
    1843             808 :                                 *q++ = '0';
    1844             808 :                                 *q++ = '0';
    1845             808 :                                 *q++ = '0';
    1846             808 :                                 break;
    1847                 : 
    1848                 :                         default:
    1849         5114803 :                                 if ((UChar32)(unsigned char)c > 0x7f) { /* non-ASCII char */
    1850            1413 :                                         int tmp = 0;
    1851            1413 :                                         UChar32 cp = 0;
    1852            1413 :                                         U8_NEXT(p, tmp, in_str_end-p, cp);
    1853            1413 :                                         if (quote_delim && cp == delim_char) {
    1854               0 :                                                 *q++ = '\\';
    1855                 :                                         }
    1856            1413 :                                         memcpy(q, p, tmp);
    1857            1413 :                                         q += tmp;
    1858            1413 :                                         p += tmp-1; /* going to be incremented by the loop */
    1859                 :                                 } else {
    1860         5113390 :                                         if (quote_delim && c == delim_char)
    1861           14361 :                                                         *q++ = '\\';
    1862         5113390 :                                         *q++ = c;
    1863                 :                                 }
    1864                 :                                 break;
    1865                 :                 }
    1866                 :         }
    1867            6138 :         *q = '\0';
    1868                 :         
    1869                 :         /* Reallocate string and return it */
    1870            6138 :         RETVAL_UTF8_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, ZSTR_AUTOFREE);
    1871                 : }
    1872                 : /* }}} */
    1873                 : 
    1874                 : /* {{{ proto array preg_grep(string regex, array input [, int flags]) U
    1875                 :    Searches array and returns entries which match regex */
    1876                 : static PHP_FUNCTION(preg_grep)
    1877              29 : {
    1878                 :         zstr                             regex;                 /* Regular expression */
    1879                 :         int                                      regex_len;
    1880                 :         char*                            regex_utf8;
    1881                 :         int                                      regex_utf8_len;
    1882                 :         zend_uchar           regex_type;
    1883                 :         zval                            *input;                 /* Input array */
    1884              29 :         long                             flags = 0;             /* Match control flags */
    1885                 :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1886              29 :         UErrorCode                       status = U_ZERO_ERROR;
    1887                 : 
    1888                 :         /* Get arguments and do error checking */
    1889              29 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ta|l", &regex,
    1890                 :                                                           &regex_len, &regex_type, &input, &flags) == FAILURE) {
    1891               9 :                 return;
    1892                 :         }
    1893                 :         
    1894              20 :         if (regex_type == IS_UNICODE) {
    1895              20 :                 zend_unicode_to_string_ex(UG(utf8_conv), &regex_utf8, &regex_utf8_len, regex.u, regex_len, &status);
    1896              20 :                 regex.s = regex_utf8;
    1897              20 :                 regex_len = regex_utf8_len;
    1898                 :         }
    1899                 : 
    1900                 :         /* Compile regex or get it from cache. */
    1901              20 :         if ((pce = pcre_get_compiled_regex_cache(regex_type, regex.s, regex_len TSRMLS_CC)) == NULL) {
    1902               5 :                 if (regex_type == IS_UNICODE) {
    1903               5 :                         efree(regex_utf8);
    1904                 :                 }
    1905               5 :                 RETURN_FALSE;
    1906                 :         }
    1907                 :         
    1908              15 :         php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
    1909                 : 
    1910              15 :         if (regex_type == IS_UNICODE) {
    1911              15 :                 efree(regex_utf8);
    1912                 :         }
    1913                 : }
    1914                 : /* }}} */
    1915                 : 
    1916                 : /* {{{ php_pcre_grep_impl */
    1917                 : PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value,
    1918                 :         long flags TSRMLS_DC)
    1919              15 : {
    1920                 :         zval               **entry;                             /* An entry in the input array */
    1921              15 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1922                 :         pcre_extra               extra_data;            /* Used locally for exec options */
    1923                 :         int                             *offsets;                       /* Array of subpattern offsets */
    1924                 :         int                              size_offsets;          /* Size of the offsets array */
    1925              15 :         int                              count = 0;                     /* Count of matched subpatterns */
    1926                 :         zstr                     string_key;
    1927                 :         uint                     string_key_len;
    1928                 :         ulong                    num_key;
    1929                 :         zend_bool                invert;                        /* Whether to return non-matching
    1930                 :                                                                                    entries */
    1931                 :         int                              rc;
    1932              15 :         int                              exoptions = 0;         /* Execution options */
    1933                 :         
    1934                 :         
    1935              15 :         invert = flags & PREG_GREP_INVERT ? 1 : 0;
    1936                 :         
    1937              15 :         if (extra == NULL) {
    1938              15 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1939              15 :                 extra = &extra_data;
    1940                 :         }
    1941              15 :         extra->match_limit = PCRE_G(backtrack_limit);
    1942              15 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1943                 : 
    1944                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1945              15 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
    1946              15 :         if (rc < 0) {
    1947               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
    1948               0 :                 RETURN_FALSE;
    1949                 :         }
    1950              15 :         size_offsets = (size_offsets + 1) * 3;
    1951              15 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1952                 :         
    1953                 :         /* Initialize return array */
    1954              15 :         array_init(return_value);
    1955                 : 
    1956              15 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1957                 : 
    1958                 :         /* Go through the input array */
    1959              15 :         zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
    1960             104 :         while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
    1961              75 :                 zval subject = **entry;
    1962                 : 
    1963              75 :                 if (Z_TYPE_PP(entry) != IS_STRING) {
    1964              75 :                         zval_copy_ctor(&subject);
    1965              75 :                         convert_to_string_with_converter(&subject, UG(utf8_conv));
    1966                 :                 }
    1967                 : 
    1968                 :                 /* Perform the match */
    1969              75 :                 count = pcre_exec(pce->re, extra, Z_STRVAL(subject), Z_STRLEN(subject),
    1970                 :                                                   0, exoptions | ((Z_TYPE_PP(entry) != IS_UNICODE && !(pce->compile_options & PCRE_UTF8))?PCRE_NO_UTF8_CHECK:0), offsets, size_offsets);
    1971                 : 
    1972                 :                 /* Check for too many substrings condition. */
    1973              75 :                 if (count == 0) {
    1974               0 :                         php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
    1975               0 :                         count = size_offsets/3;
    1976              75 :                 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
    1977               1 :                         if (Z_TYPE_PP(entry) != IS_STRING) {
    1978               1 :                                 zval_dtor(&subject);
    1979                 :                         }
    1980               1 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1981               1 :                         break;
    1982                 :                 }
    1983                 : 
    1984                 :                 /* If the entry fits our requirements */
    1985              74 :                 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
    1986                 : 
    1987              30 :                         Z_ADDREF_PP(entry);
    1988                 : 
    1989                 :                         /* Add to return array */
    1990              30 :                         switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL))
    1991                 :                         {
    1992                 :                                 case HASH_KEY_IS_UNICODE:
    1993               2 :                                         add_u_assoc_zval_ex(return_value, IS_UNICODE, string_key, string_key_len, *entry);
    1994               2 :                                         break;
    1995                 : 
    1996                 :                                 case HASH_KEY_IS_STRING:
    1997               0 :                                         add_u_assoc_zval_ex(return_value, IS_STRING, string_key, string_key_len, *entry);
    1998               0 :                                         break;
    1999                 : 
    2000                 :                                 case HASH_KEY_IS_LONG:
    2001              28 :                                         add_index_zval(return_value, num_key, *entry);
    2002                 :                                         break;
    2003                 :                         }
    2004                 :                 }
    2005                 : 
    2006              74 :                 if (Z_TYPE_PP(entry) != IS_STRING) {
    2007              74 :                         zval_dtor(&subject);
    2008                 :                 }
    2009                 : 
    2010              74 :                 zend_hash_move_forward(Z_ARRVAL_P(input));
    2011                 :         }
    2012              15 :         zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
    2013                 :         /* Clean up */
    2014              15 :         efree(offsets);
    2015                 : }
    2016                 : /* }}} */
    2017                 : 
    2018                 : /* {{{ proto int preg_last_error()
    2019                 :    Returns the error code of the last regexp execution. */
    2020                 : static PHP_FUNCTION(preg_last_error)
    2021              17 : {
    2022              17 :         if (zend_parse_parameters_none() == FAILURE) {
    2023               2 :                 return;
    2024                 :         }
    2025                 : 
    2026              15 :         RETURN_LONG(PCRE_G(error_code));
    2027                 : }
    2028                 : /* }}} */
    2029                 : 
    2030                 : /* {{{ module definition structures */
    2031                 : 
    2032                 : /* {{{ arginfo */
    2033                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
    2034                 :     ZEND_ARG_INFO(0, pattern)
    2035                 :     ZEND_ARG_INFO(0, subject)
    2036                 :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2037                 :     ZEND_ARG_INFO(0, flags)
    2038                 :     ZEND_ARG_INFO(0, offset)
    2039                 : ZEND_END_ARG_INFO()
    2040                 : 
    2041                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
    2042                 :     ZEND_ARG_INFO(0, pattern)
    2043                 :     ZEND_ARG_INFO(0, subject)
    2044                 :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2045                 :     ZEND_ARG_INFO(0, flags)
    2046                 :     ZEND_ARG_INFO(0, offset)
    2047                 : ZEND_END_ARG_INFO()
    2048                 : 
    2049                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
    2050                 :     ZEND_ARG_INFO(0, regex)
    2051                 :     ZEND_ARG_INFO(0, replace)
    2052                 :     ZEND_ARG_INFO(0, subject)
    2053                 :     ZEND_ARG_INFO(0, limit)
    2054                 :     ZEND_ARG_INFO(1, count)
    2055                 : ZEND_END_ARG_INFO()
    2056                 : 
    2057                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
    2058                 :     ZEND_ARG_INFO(0, regex)
    2059                 :     ZEND_ARG_INFO(0, callback)
    2060                 :     ZEND_ARG_INFO(0, subject)
    2061                 :     ZEND_ARG_INFO(0, limit)
    2062                 :     ZEND_ARG_INFO(1, count)
    2063                 : ZEND_END_ARG_INFO()
    2064                 : 
    2065                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
    2066                 :     ZEND_ARG_INFO(0, pattern)
    2067                 :     ZEND_ARG_INFO(0, subject)
    2068                 :     ZEND_ARG_INFO(0, limit)
    2069                 :     ZEND_ARG_INFO(0, flags) 
    2070                 : ZEND_END_ARG_INFO()
    2071                 : 
    2072                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
    2073                 :     ZEND_ARG_INFO(0, str)
    2074                 :     ZEND_ARG_INFO(0, delim_char)
    2075                 : ZEND_END_ARG_INFO()
    2076                 : 
    2077                 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
    2078                 :     ZEND_ARG_INFO(0, regex)
    2079                 :     ZEND_ARG_INFO(0, input) /* array */
    2080                 :     ZEND_ARG_INFO(0, flags)
    2081                 : ZEND_END_ARG_INFO()
    2082                 : 
    2083                 : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
    2084                 : ZEND_END_ARG_INFO()
    2085                 : /* }}} */
    2086                 : 
    2087                 : static const zend_function_entry pcre_functions[] = {
    2088                 :         PHP_FE(preg_match,                              arginfo_preg_match)
    2089                 :         PHP_FE(preg_match_all,                  arginfo_preg_match_all)
    2090                 :         PHP_FE(preg_replace,                    arginfo_preg_replace)
    2091                 :         PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
    2092                 :         PHP_FE(preg_filter,                             arginfo_preg_replace)
    2093                 :         PHP_FE(preg_split,                              arginfo_preg_split)
    2094                 :         PHP_FE(preg_quote,                              arginfo_preg_quote)
    2095                 :         PHP_FE(preg_grep,                               arginfo_preg_grep)
    2096                 :         PHP_FE(preg_last_error,                 arginfo_preg_last_error)
    2097                 :         {NULL,          NULL,                           NULL}
    2098                 : };
    2099                 : 
    2100                 : zend_module_entry pcre_module_entry = {
    2101                 :         STANDARD_MODULE_HEADER,
    2102                 :    "pcre",
    2103                 :         pcre_functions,
    2104                 :         PHP_MINIT(pcre),
    2105                 :         PHP_MSHUTDOWN(pcre),
    2106                 :         NULL,
    2107                 :         NULL,
    2108                 :         PHP_MINFO(pcre),
    2109                 :         NO_VERSION_YET,
    2110                 :         PHP_MODULE_GLOBALS(pcre),
    2111                 :         PHP_GINIT(pcre),
    2112                 :         PHP_GSHUTDOWN(pcre),
    2113                 :         NULL,
    2114                 :         STANDARD_MODULE_PROPERTIES_EX
    2115                 : };
    2116                 : 
    2117                 : #ifdef COMPILE_DL_PCRE
    2118                 : ZEND_GET_MODULE(pcre)
    2119                 : #endif
    2120                 : 
    2121                 : /* }}} */
    2122                 : 
    2123                 : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
    2124                 : 
    2125                 : /*
    2126                 :  * Local variables:
    2127                 :  * tab-width: 4
    2128                 :  * c-basic-offset: 4
    2129                 :  * End:
    2130                 :  * vim600: sw=4 ts=4 fdm=marker
    2131                 :  * vim<600: sw=4 ts=4
    2132                 :  */

Generated by: LTP GCOV extension version 1.5

Generated at Mon, 23 Nov 2009 17:39:34 +0000 (33 hours ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.