PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/pcre - php_pcre.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 795 866 91.8 %
Date: 2014-10-30 Functions: 32 33 97.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                        |
       4             :    +----------------------------------------------------------------------+
       5             :    | Copyright (c) 1997-2014 The PHP Group                                |
       6             :    +----------------------------------------------------------------------+
       7             :    | This source file is subject to version 3.01 of the PHP license,      |
       8             :    | that is bundled with this package in the file LICENSE, and is        |
       9             :    | available through the world-wide-web at the following url:           |
      10             :    | http://www.php.net/license/3_01.txt                                  |
      11             :    | If you did not receive a copy of the PHP license and are unable to   |
      12             :    | obtain it through the world-wide-web, please send a note to          |
      13             :    | license@php.net so we can mail you a copy immediately.               |
      14             :    +----------------------------------------------------------------------+
      15             :    | Author: Andrei Zmievski <andrei@php.net>                             |
      16             :    +----------------------------------------------------------------------+
      17             :  */
      18             : 
      19             : /* $Id$ */
      20             : 
      21             : #include "php.h"
      22             : #include "php_ini.h"
      23             : #include "php_globals.h"
      24             : #include "php_pcre.h"
      25             : #include "ext/standard/info.h"
      26             : #include "zend_smart_str.h"
      27             : 
      28             : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
      29             : 
      30             : #include "ext/standard/php_string.h"
      31             : 
      32             : #define PREG_PATTERN_ORDER                      1
      33             : #define PREG_SET_ORDER                          2
      34             : #define PREG_OFFSET_CAPTURE                     (1<<8)
      35             : 
      36             : #define PREG_SPLIT_NO_EMPTY                     (1<<0)
      37             : #define PREG_SPLIT_DELIM_CAPTURE        (1<<1)
      38             : #define PREG_SPLIT_OFFSET_CAPTURE       (1<<2)
      39             : 
      40             : #define PREG_REPLACE_EVAL                       (1<<0)
      41             : 
      42             : #define PREG_GREP_INVERT                        (1<<0)
      43             : 
      44             : #define PCRE_CACHE_SIZE 4096
      45             : 
      46             : enum {
      47             :         PHP_PCRE_NO_ERROR = 0,
      48             :         PHP_PCRE_INTERNAL_ERROR,
      49             :         PHP_PCRE_BACKTRACK_LIMIT_ERROR,
      50             :         PHP_PCRE_RECURSION_LIMIT_ERROR,
      51             :         PHP_PCRE_BAD_UTF8_ERROR,
      52             :         PHP_PCRE_BAD_UTF8_OFFSET_ERROR
      53             : };
      54             : 
      55             : 
      56             : ZEND_DECLARE_MODULE_GLOBALS(pcre)
      57             : 
      58             : 
      59           9 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
      60             : {
      61           9 :         int preg_code = 0;
      62             : 
      63           9 :         switch (pcre_code) {
      64             :                 case PCRE_ERROR_MATCHLIMIT:
      65           4 :                         preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
      66           4 :                         break;
      67             : 
      68             :                 case PCRE_ERROR_RECURSIONLIMIT:
      69           2 :                         preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
      70           2 :                         break;
      71             : 
      72             :                 case PCRE_ERROR_BADUTF8:
      73           2 :                         preg_code = PHP_PCRE_BAD_UTF8_ERROR;
      74           2 :                         break;
      75             : 
      76             :                 case PCRE_ERROR_BADUTF8_OFFSET:
      77           1 :                         preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
      78           1 :                         break;
      79             : 
      80             :                 default:
      81           0 :                         preg_code = PHP_PCRE_INTERNAL_ERROR;
      82             :                         break;
      83             :         }
      84             : 
      85           9 :         PCRE_G(error_code) = preg_code;
      86           9 : }
      87             : /* }}} */
      88             : 
      89       24272 : static void php_free_pcre_cache(zval *data) /* {{{ */
      90             : {
      91       24272 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
      92       24272 :         if (!pce) return;
      93       24272 :         pefree(pce->re, 1);
      94       24272 :         if (pce->extra) {
      95       24268 :                 pcre_free_study(pce->extra);
      96             :         }
      97             : #if HAVE_SETLOCALE
      98       24272 :         if ((void*)pce->tables) pefree((void*)pce->tables, 1);
      99       24272 :         pefree(pce->locale, 1);
     100             : #endif
     101       24272 :         pefree(pce, 1);
     102             : }
     103             : /* }}} */
     104             : 
     105       20423 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
     106             : {
     107       20423 :         zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
     108       20423 :         pcre_globals->backtrack_limit = 0;
     109       20423 :         pcre_globals->recursion_limit = 0;
     110       20423 :         pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
     111       20423 : }
     112             : /* }}} */
     113             : 
     114       20457 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
     115             : {
     116       20457 :         zend_hash_destroy(&pcre_globals->pcre_cache);
     117       20457 : }
     118             : /* }}} */
     119             : 
     120             : PHP_INI_BEGIN()
     121             :         STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
     122             :         STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
     123             : #ifdef PCRE_STUDY_JIT_COMPILE
     124             :         STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
     125             : #endif
     126             : PHP_INI_END()
     127             : 
     128             : 
     129             : /* {{{ PHP_MINFO_FUNCTION(pcre) */
     130         143 : static PHP_MINFO_FUNCTION(pcre)
     131             : {
     132         143 :         php_info_print_table_start();
     133         143 :         php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
     134         143 :         php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
     135         143 :         php_info_print_table_end();
     136             : 
     137         143 :         DISPLAY_INI_ENTRIES();
     138         143 : }
     139             : /* }}} */
     140             : 
     141             : /* {{{ PHP_MINIT_FUNCTION(pcre) */
     142       20423 : static PHP_MINIT_FUNCTION(pcre)
     143             : {
     144       20423 :         REGISTER_INI_ENTRIES();
     145             :         
     146       20423 :         REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
     147       20423 :         REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
     148       20423 :         REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     149       20423 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
     150       20423 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
     151       20423 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     152       20423 :         REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
     153             : 
     154       20423 :         REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
     155       20423 :         REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
     156       20423 :         REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     157       20423 :         REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     158       20423 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
     159       20423 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
     160       20423 :         REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
     161             : 
     162       20423 :         return SUCCESS;
     163             : }
     164             : /* }}} */
     165             : 
     166             : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
     167       20457 : static PHP_MSHUTDOWN_FUNCTION(pcre)
     168             : {
     169       20457 :         UNREGISTER_INI_ENTRIES();
     170             : 
     171       20457 :         return SUCCESS;
     172             : }
     173             : /* }}} */
     174             : 
     175             : /* {{{ static pcre_clean_cache */
     176       36864 : static int pcre_clean_cache(zval *data, void *arg TSRMLS_DC)
     177             : {
     178       36864 :         int *num_clean = (int *)arg;
     179             : 
     180       36864 :         if (*num_clean > 0) {
     181        4608 :                 (*num_clean)--;
     182        4608 :                 return 1;
     183             :         } else {
     184       32256 :                 return 0;
     185             :         }
     186             : }
     187             : /* }}} */
     188             : 
     189             : /* {{{ static make_subpats_table */
     190          10 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
     191             : {
     192          10 :         pcre_extra *extra = pce->extra;
     193          10 :         int name_cnt = pce->name_count, name_size, ni = 0;
     194             :         int rc;
     195             :         char *name_table;
     196             :         unsigned short name_idx;
     197             :         char **subpat_names;
     198             :         int rc1, rc2;
     199             : 
     200          10 :         rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
     201          10 :         rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
     202          10 :         rc = rc2 ? rc2 : rc1;
     203          10 :         if (rc < 0) {
     204           0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     205           0 :                 return NULL;
     206             :         }
     207             : 
     208          10 :         subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
     209         174 :         while (ni++ < name_cnt) {
     210         154 :                 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
     211         154 :                 subpat_names[name_idx] = name_table + 2;
     212         308 :                 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
     213           0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
     214           0 :                         efree(subpat_names);
     215           0 :                         return NULL;
     216             :                 }
     217         154 :                 name_table += name_size;
     218             :         }
     219          10 :         return subpat_names;
     220             : }
     221             : /* }}} */
     222             : 
     223             : /* {{{ pcre_get_compiled_regex_cache
     224             :  */
     225     1986776 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS_DC)
     226             : {
     227     1986776 :         pcre                            *re = NULL;
     228             :         pcre_extra                      *extra;
     229     1986776 :         int                                      coptions = 0;
     230     1986776 :         int                                      soptions = 0;
     231             :         const char                      *error;
     232             :         int                                      erroffset;
     233             :         char                             delimiter;
     234             :         char                             start_delimiter;
     235             :         char                             end_delimiter;
     236             :         char                            *p, *pp;
     237             :         char                            *pattern;
     238     1986776 :         int                                      do_study = 0;
     239     1986776 :         int                                      poptions = 0;
     240     1986776 :         unsigned const char *tables = NULL;
     241             : #if HAVE_SETLOCALE
     242             :         char                            *locale;
     243             : #endif
     244             :         pcre_cache_entry        *pce;
     245             :         pcre_cache_entry         new_entry;
     246             :         int                                      rc;
     247             : 
     248             : #if HAVE_SETLOCALE
     249             : # if defined(PHP_WIN32) && defined(ZTS)
     250             :         _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
     251             : # endif
     252     1986776 :         locale = setlocale(LC_CTYPE, NULL);
     253             : #endif
     254             : 
     255             :         /* Try to lookup the cached regex entry, and if successful, just pass
     256             :            back the compiled pattern, otherwise go on and compile it. */
     257     1986776 :         pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
     258     1986776 :         if (pce) {
     259             :                 /*
     260             :                  * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
     261             :                  * is, we flush it and compile the pattern from scratch.
     262             :                  */
     263             : //???           int     count = 0;
     264             : //???
     265             : //???           if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
     266             : //???                   zend_hash_clean(&PCRE_G(pcre_cache));
     267             : //???           } else {
     268             : #if HAVE_SETLOCALE
     269     1962462 :                         if (!strcmp(pce->locale, locale)) {
     270             : #endif
     271     1962460 :                                 return pce;
     272             : #if HAVE_SETLOCALE
     273             :                         }
     274             : #endif
     275             : //???           }
     276             :         }
     277             :         
     278       24316 :         p = regex->val;
     279             :         
     280             :         /* Parse through the leading whitespace, and display a warning if we
     281             :            get to the end without encountering a delimiter. */
     282       24316 :         while (isspace((int)*(unsigned char *)p)) p++;
     283       24316 :         if (*p == 0) {
     284           5 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, 
     285           5 :                                                  p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
     286           5 :                 return NULL;
     287             :         }
     288             :         
     289             :         /* Get the delimiter and display a warning if it is alphanumeric
     290             :            or a backslash. */
     291       24311 :         delimiter = *p++;
     292       24311 :         if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
     293           7 :                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
     294           7 :                 return NULL;
     295             :         }
     296             : 
     297       24304 :         start_delimiter = delimiter;
     298       24304 :         if ((pp = strchr("([{< )]}> )]}>", delimiter)))
     299          29 :                 delimiter = pp[5];
     300       24304 :         end_delimiter = delimiter;
     301             : 
     302       24304 :         pp = p;
     303             : 
     304       24304 :         if (start_delimiter == end_delimiter) {
     305             :                 /* We need to iterate through the pattern, searching for the ending delimiter,
     306             :                    but skipping the backslashed delimiters.  If the ending delimiter is not
     307             :                    found, display a warning. */
     308    16318758 :                 while (*pp != 0) {
     309    16294474 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     310    15126146 :                         else if (*pp == delimiter)
     311       24266 :                                 break;
     312    16270208 :                         pp++;
     313             :                 }
     314             :         } else {
     315             :                 /* We iterate through the pattern, searching for the matching ending
     316             :                  * delimiter. For each matching starting delimiter, we increment nesting
     317             :                  * level, and decrement it for each matching ending delimiter. If we
     318             :                  * reach the end of the pattern without matching, display a warning.
     319             :                  */
     320          29 :                 int brackets = 1;       /* brackets nesting level */
     321         281 :                 while (*pp != 0) {
     322         249 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     323         226 :                         else if (*pp == end_delimiter && --brackets <= 0)
     324             :                                 break;
     325         200 :                         else if (*pp == start_delimiter)
     326           1 :                                 brackets++;
     327         223 :                         pp++;
     328             :                 }
     329             :         }
     330             : 
     331       24304 :         if (*pp == 0) {
     332          12 :                 if (pp < regex->val + regex->len) {
     333           4 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
     334           8 :                 } else if (start_delimiter == end_delimiter) {
     335           7 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
     336             :                 } else {
     337           1 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
     338             :                 }
     339          12 :                 return NULL;
     340             :         }
     341             :         
     342             :         /* Make a copy of the actual pattern. */
     343       24292 :         pattern = estrndup(p, pp-p);
     344             : 
     345             :         /* Move on to the options */
     346       24292 :         pp++;
     347             : 
     348             :         /* Parse through the options, setting appropriate flags.  Display
     349             :            a warning if we encounter an unknown modifier. */    
     350       62375 :         while (pp < regex->val + regex->len) {
     351       13809 :                 switch (*pp++) {
     352             :                         /* Perl compatible options */
     353        1922 :                         case 'i':       coptions |= PCRE_CASELESS;              break;
     354        2122 :                         case 'm':       coptions |= PCRE_MULTILINE;             break;
     355        9630 :                         case 's':       coptions |= PCRE_DOTALL;                break;
     356           5 :                         case 'x':       coptions |= PCRE_EXTENDED;              break;
     357             :                         
     358             :                         /* PCRE specific options */
     359           2 :                         case 'A':       coptions |= PCRE_ANCHORED;              break;
     360           9 :                         case 'D':       coptions |= PCRE_DOLLAR_ENDONLY;break;
     361          28 :                         case 'S':       do_study  = 1;                                  break;
     362          31 :                         case 'U':       coptions |= PCRE_UNGREEDY;              break;
     363           1 :                         case 'X':       coptions |= PCRE_EXTRA;                 break;
     364          34 :                         case 'u':       coptions |= PCRE_UTF8;
     365             :         /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
     366             :        characters, even in UTF-8 mode. However, this can be changed by setting
     367             :        the PCRE_UCP option. */
     368             : #ifdef PCRE_UCP
     369          34 :                                                 coptions |= PCRE_UCP;
     370             : #endif                  
     371          34 :                                 break;
     372             : 
     373             :                         /* Custom preg options */
     374           5 :                         case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
     375             :                         
     376             :                         case ' ':
     377             :                         case '\n':
     378           2 :                                 break;
     379             : 
     380             :                         default:
     381          18 :                                 if (pp[-1]) {
     382          13 :                                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
     383             :                                 } else {
     384           5 :                                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
     385             :                                 }
     386          18 :                                 efree(pattern);
     387          18 :                                 return NULL;
     388             :                 }
     389             :         }
     390             : 
     391             : #if HAVE_SETLOCALE
     392       24274 :         if (strcmp(locale, "C"))
     393           4 :                 tables = pcre_maketables();
     394             : #endif
     395             : 
     396             :         /* Compile pattern and display a warning if compilation failed. */
     397       24274 :         re = pcre_compile(pattern,
     398             :                                           coptions,
     399             :                                           &error,
     400             :                                           &erroffset,
     401             :                                           tables);
     402             : 
     403       24274 :         if (re == NULL) {
     404           6 :                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
     405           6 :                 efree(pattern);
     406           6 :                 if (tables) {
     407           0 :                         pefree((void*)tables, 1);
     408             :                 }
     409           6 :                 return NULL;
     410             :         }
     411             : 
     412             : #ifdef PCRE_STUDY_JIT_COMPILE
     413       24268 :         if (PCRE_G(jit)) {
     414             :                 /* Enable PCRE JIT compiler */
     415       24264 :                 do_study = 1;
     416       24264 :                 soptions |= PCRE_STUDY_JIT_COMPILE;
     417             :         }
     418             : #endif
     419             : 
     420             :         /* If study option was specified, study the pattern and
     421             :            store the result in extra for passing to pcre_exec. */
     422       24268 :         if (do_study) {
     423       24264 :                 extra = pcre_study(re, soptions, &error);
     424       24264 :                 if (extra) {
     425       24264 :                         extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     426       24264 :                         extra->match_limit = PCRE_G(backtrack_limit);
     427       24264 :                         extra->match_limit_recursion = PCRE_G(recursion_limit);
     428             :                 }
     429       24264 :                 if (error != NULL) {
     430           0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
     431             :                 }
     432             :         } else {
     433           4 :                 extra = NULL;
     434             :         }
     435             : 
     436       24268 :         efree(pattern);
     437             : 
     438             :         /*
     439             :          * If we reached cache limit, clean out the items from the head of the list;
     440             :          * these are supposedly the oldest ones (but not necessarily the least used
     441             :          * ones).
     442             :          */
     443       24268 :         if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
     444           9 :                 int num_clean = PCRE_CACHE_SIZE / 8;
     445           9 :                 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
     446             :         }
     447             : 
     448             :         /* Store the compiled pattern and extra info in the cache. */
     449       24268 :         new_entry.re = re;
     450       24268 :         new_entry.extra = extra;
     451       24268 :         new_entry.preg_options = poptions;
     452       24268 :         new_entry.compile_options = coptions;
     453             : #if HAVE_SETLOCALE
     454       24268 :         new_entry.locale = pestrdup(locale, 1);
     455       24268 :         new_entry.tables = tables;
     456             : #endif
     457             : 
     458       24268 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
     459       24268 :         if (rc < 0) {
     460           0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     461           0 :                 return NULL;
     462             :         }
     463             : 
     464       24268 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
     465       24268 :         if (rc < 0) {
     466           0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     467           0 :                 return NULL;
     468             :         }
     469             : 
     470             :         /*
     471             :          * Interned strings are not duplicated when stored in HashTable,
     472             :          * but all the interned strings created during HTTP request are removed
     473             :          * at end of request. However PCRE_G(pcre_cache) must be consistent
     474             :          * on the next request as well. So we disable usage of interned strings
     475             :          * as hash keys especually for this table.
     476             :          * See bug #63180 
     477             :          */
     478       48536 :         pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
     479             : 
     480       24268 :         return pce;
     481             : }
     482             : /* }}} */
     483             : 
     484             : /* {{{ pcre_get_compiled_regex
     485             :  */
     486      127853 : PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
     487             : {
     488      127853 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC);
     489             : 
     490      127853 :         if (extra) {
     491      127853 :                 *extra = pce ? pce->extra : NULL;
     492             :         }
     493      127853 :         if (preg_options) {
     494      127853 :                 *preg_options = pce ? pce->preg_options : 0;
     495             :         }
     496             :         
     497      127853 :         return pce ? pce->re : NULL;
     498             : }
     499             : /* }}} */
     500             : 
     501             : /* {{{ pcre_get_compiled_regex_ex
     502             :  */
     503           0 : PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
     504             : {
     505           0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC);
     506             :         
     507           0 :         if (extra) {
     508           0 :                 *extra = pce ? pce->extra : NULL;
     509             :         }
     510           0 :         if (preg_options) {
     511           0 :                 *preg_options = pce ? pce->preg_options : 0;
     512             :         }
     513           0 :         if (compile_options) {
     514           0 :                 *compile_options = pce ? pce->compile_options : 0;
     515             :         }
     516             :         
     517           0 :         return pce ? pce->re : NULL;
     518             : }
     519             : /* }}} */
     520             : 
     521             : /* {{{ add_offset_pair */
     522          73 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
     523             : {
     524             :         zval match_pair;
     525             : 
     526          73 :         array_init_size(&match_pair, 2);
     527             : 
     528             :         /* Add (match, offset) to the return value */
     529          73 :         add_next_index_stringl(&match_pair, str, len);
     530          73 :         add_next_index_long(&match_pair, offset);
     531             :         
     532          73 :         if (name) {
     533           2 :                 zval_add_ref(&match_pair);
     534           2 :                 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
     535             :         }
     536          73 :         zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
     537          73 : }
     538             : /* }}} */
     539             : 
     540     1830501 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
     541             : {
     542             :         /* parameters */
     543             :         zend_string              *regex;                        /* Regular expression */
     544             :         zend_string              *subject;                      /* String to match against */
     545             :         pcre_cache_entry *pce;                          /* Compiled regular expression */
     546     1830501 :         zval                     *subpats = NULL;       /* Array for subpatterns */
     547     1830501 :         zend_long                 flags = 0;            /* Match control flags */
     548     1830501 :         zend_long                 start_offset = 0;     /* Where the new search starts */
     549             : 
     550             : #ifndef FAST_ZPP
     551             :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|z/ll", &regex,
     552             :                                                           &subject, &subpats, &flags, &start_offset) == FAILURE) {
     553             :                 RETURN_FALSE;
     554             :         }
     555             : #else
     556     1830501 :         ZEND_PARSE_PARAMETERS_START(2, 5)
     557     5491479 :                 Z_PARAM_STR(regex)
     558     5491467 :                 Z_PARAM_STR(subject)
     559     1830485 :                 Z_PARAM_OPTIONAL
     560     4034167 :                 Z_PARAM_ZVAL_EX(subpats, 0, 1)
     561     1109667 :                 Z_PARAM_LONG(flags)
     562          56 :                 Z_PARAM_LONG(start_offset)
     563     1830501 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
     564             : #endif
     565             :         
     566             :         /* Compile regex or get it from cache. */
     567     1830485 :         if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
     568          27 :                 RETURN_FALSE;
     569             :         }
     570             : 
     571     3660916 :         php_pcre_match_impl(pce, subject->val, subject->len, return_value, subpats, 
     572     1830458 :                 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
     573             : }
     574             : /* }}} */
     575             : 
     576             : /* {{{ php_pcre_match_impl() */
     577     1831303 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
     578             :         zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
     579             : {
     580             :         zval                     result_set,            /* Holds a set of subpatterns after
     581             :                                                                                    a global match */
     582     1831303 :                                     *match_sets = NULL; /* An array of sets of matches for each
     583             :                                                                                    subpattern after a global match */
     584     1831303 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     585             :         pcre_extra               extra_data;            /* Used locally for exec options */
     586     1831303 :         int                              exoptions = 0;         /* Execution options */
     587     1831303 :         int                              count = 0;                     /* Count of matched subpatterns */
     588             :         int                             *offsets;                       /* Array of subpattern offsets */
     589             :         int                              num_subpats;           /* Number of captured subpatterns */
     590             :         int                              size_offsets;          /* Size of the offsets array */
     591             :         int                              matched;                       /* Has anything matched */
     592     1831303 :         int                              g_notempty = 0;        /* If the match should not be empty */
     593             :         const char         **stringlist;                /* Holds list of subpatterns */
     594             :         char               **subpat_names;              /* Array for named subpatterns */
     595             :         int                              i;
     596             :         int                              subpats_order;         /* Order of subpattern matches */
     597             :         int                              offset_capture;    /* Capture match offsets: yes/no */
     598     1831303 :         unsigned char   *mark = NULL;       /* Target for MARK name */
     599             :         zval            marks;                  /* Array of marks for PREG_PATTERN_ORDER */
     600             :         ALLOCA_FLAG(use_heap);
     601             : 
     602     1831303 :         ZVAL_UNDEF(&marks);
     603             : 
     604             :         /* Overwrite the passed-in value for subpatterns with an empty array. */
     605     1831303 :         if (subpats != NULL) {
     606             :                 zval_dtor(subpats);
     607     1110435 :                 array_init(subpats);
     608             :         }
     609             : 
     610     1831303 :         subpats_order = global ? PREG_PATTERN_ORDER : 0;
     611             : 
     612     1831303 :         if (use_flags) {
     613         818 :                 offset_capture = flags & PREG_OFFSET_CAPTURE;
     614             : 
     615             :                 /*
     616             :                  * subpats_order is pre-set to pattern mode so we change it only if
     617             :                  * necessary.
     618             :                  */
     619         818 :                 if (flags & 0xff) {
     620          23 :                         subpats_order = flags & 0xff;
     621             :                 }
     622         818 :                 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
     623             :                         (!global && subpats_order != 0)) {
     624           1 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
     625           1 :                         return;
     626             :                 }
     627             :         } else {
     628     1830485 :                 offset_capture = 0;
     629             :         }
     630             : 
     631             :         /* Negative offset counts from the end of the string. */
     632     1831302 :         if (start_offset < 0) {
     633           5 :                 start_offset = subject_len + start_offset;
     634           5 :                 if (start_offset < 0) {
     635           1 :                         start_offset = 0;
     636             :                 }
     637             :         }
     638             : 
     639     1831302 :         if (extra == NULL) {
     640           3 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     641           3 :                 extra = &extra_data;
     642             :         }
     643     1831302 :         extra->match_limit = PCRE_G(backtrack_limit);
     644     1831302 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
     645             : #ifdef PCRE_EXTRA_MARK
     646     1831302 :         extra->mark = &mark;
     647     1831302 :         extra->flags |= PCRE_EXTRA_MARK;
     648             : #endif
     649             : 
     650             :         /* Calculate the size of the offsets array, and allocate memory for it. */
     651     1831302 :         num_subpats = pce->capture_count + 1;
     652     1831302 :         size_offsets = num_subpats * 3;
     653             : 
     654             :         /*
     655             :          * Build a mapping from subpattern numbers to their names. We will
     656             :          * allocate the table only if there are any named subpatterns.
     657             :          */
     658     1831302 :         subpat_names = NULL;
     659     1831302 :         if (pce->name_count > 0) {
     660           9 :                 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
     661           9 :                 if (!subpat_names) {
     662           0 :                         RETURN_FALSE;
     663             :                 }
     664             :         }
     665             : 
     666     1831302 :         if (size_offsets <= 32) {
     667     1831288 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
     668             :         } else {
     669          14 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     670             :         }
     671             : 
     672             :         /* Allocate match sets array and initialize the values. */
     673     1831302 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     674         894 :                 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
     675        2021 :                 for (i=0; i<num_subpats; i++) {
     676        1127 :                         array_init(&match_sets[i]);
     677             :                 }
     678             :         }
     679             : 
     680     1831302 :         matched = 0;
     681     1831302 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
     682             :         
     683             :         do {
     684             :                 /* Execute the regular expression. */
     685     1831497 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
     686             :                                                   exoptions|g_notempty, offsets, size_offsets);
     687             : 
     688             :                 /* the string was already proved to be valid UTF-8 */
     689     1831497 :                 exoptions |= PCRE_NO_UTF8_CHECK;
     690             : 
     691             :                 /* Check for too many substrings condition. */
     692     1831497 :                 if (count == 0) {
     693           0 :                         php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
     694           0 :                         count = size_offsets/3;
     695             :                 }
     696             : 
     697             :                 /* If something has matched */
     698     1831497 :                 if (count > 0) {
     699      206135 :                         matched++;
     700             : 
     701             :                         /* If subpatterns array has been passed, fill it in with values. */
     702      206135 :                         if (subpats != NULL) {
     703             :                                 /* Try to get the list of substrings and display a warning if failed. */
     704       41007 :                                 if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
     705           0 :                                         if (subpat_names) {
     706           0 :                                                 efree(subpat_names);
     707             :                                         }
     708           0 :                                         if (size_offsets <= 32) {
     709           0 :                                                 free_alloca(offsets, use_heap);
     710             :                                         } else {
     711           0 :                                                 efree(offsets);
     712             :                                         }
     713           0 :                                         if (match_sets) efree(match_sets);
     714           0 :                                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
     715           0 :                                         RETURN_FALSE;
     716             :                                 }
     717             : 
     718       41007 :                                 if (global) {   /* global pattern matching */
     719         324 :                                         if (subpats && subpats_order == PREG_PATTERN_ORDER) {
     720             :                                                 /* For each subpattern, insert it into the appropriate array. */
     721         140 :                                                 if (offset_capture) {
     722          27 :                                                         for (i = 0; i < count; i++) {
     723          30 :                                                                 add_offset_pair(&match_sets[i], (char *)stringlist[i],
     724          30 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     725             :                                                         }
     726             :                                                 } else {
     727         337 :                                                         for (i = 0; i < count; i++) {
     728         209 :                                                                 add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
     729         209 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     730             :                                                         }
     731             :                                                 }
     732             :                                                 /* Add MARK, if available */
     733         140 :                                                 if (mark) {
     734           2 :                                                         if (Z_TYPE(marks) == IS_UNDEF) {
     735           1 :                                                                 array_init(&marks);
     736             :                                                         }
     737           2 :                                                         add_index_string(&marks, matched - 1, (char *) mark);
     738             :                                                 }
     739             :                                                 /*
     740             :                                                  * If the number of captured subpatterns on this run is
     741             :                                                  * less than the total possible number, pad the result
     742             :                                                  * arrays with empty strings.
     743             :                                                  */
     744         140 :                                                 if (count < num_subpats) {
     745          11 :                                                         for (; i < num_subpats; i++) {
     746           7 :                                                                 add_next_index_string(&match_sets[i], "");
     747             :                                                         }
     748             :                                                 }
     749             :                                         } else {
     750             :                                                 /* Allocate the result set array */
     751          44 :                                                 array_init_size(&result_set, count + (mark ? 1 : 0));
     752             :                                                 
     753             :                                                 /* Add all the subpatterns to it */
     754          44 :                                                 if (subpat_names) {
     755           2 :                                                         if (offset_capture) {
     756           0 :                                                                 for (i = 0; i < count; i++) {
     757           0 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     758           0 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
     759             :                                                                 }
     760             :                                                         } else {
     761          14 :                                                                 for (i = 0; i < count; i++) {
     762          12 :                                                                         if (subpat_names[i]) {
     763           8 :                                                                                 add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
     764             :                                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     765             :                                                                         }
     766          12 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     767          12 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     768             :                                                                 }
     769             :                                                         }
     770             :                                                 } else {
     771          42 :                                                         if (offset_capture) {
     772          10 :                                                                 for (i = 0; i < count; i++) {
     773          14 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     774          14 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     775             :                                                                 }
     776             :                                                         } else {
     777         343 :                                                                 for (i = 0; i < count; i++) {
     778         304 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     779         304 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     780             :                                                                 }
     781             :                                                         }
     782             :                                                 }
     783             :                                                 /* Add MARK, if available */
     784          44 :                                                 if (mark) {
     785           2 :                                                         add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
     786             :                                                 }
     787             :                                                 /* And add it to the output array */
     788          44 :                                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
     789             :                                         }
     790             :                                 } else {                        /* single pattern matching */
     791             :                                         /* For each subpattern, insert it into the subpatterns array. */
     792       40823 :                                         if (subpat_names) {
     793           5 :                                                 if (offset_capture) {
     794           5 :                                                         for (i = 0; i < count; i++) {
     795          12 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     796           4 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     797           8 :                                                                                                 offsets[i<<1], subpat_names[i]);
     798             :                                                         }
     799             :                                                 } else {
     800          24 :                                                         for (i = 0; i < count; i++) {
     801          20 :                                                                 if (subpat_names[i]) {
     802          13 :                                                                         add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     803             :                                                                                                           offsets[(i<<1)+1] - offsets[i<<1]);
     804             :                                                                 }
     805          20 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     806          20 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     807             :                                                         }
     808             :                                                 }
     809             :                                         } else {
     810       40818 :                                                 if (offset_capture) {
     811          10 :                                                         for (i = 0; i < count; i++) {
     812          12 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     813           6 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     814           6 :                                                                                                 offsets[i<<1], NULL);
     815             :                                                         }
     816             :                                                 } else {
     817      124602 :                                                         for (i = 0; i < count; i++) {
     818       83788 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     819       83788 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     820             :                                                         }
     821             :                                                 }
     822             :                                         }
     823             :                                         /* Add MARK, if available */
     824       40823 :                                         if (mark) {
     825           1 :                                                 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
     826             :                                         }
     827             :                                 }
     828             : 
     829       41007 :                                 pcre_free((void *) stringlist);
     830             :                         }
     831     1625362 :                 } else if (count == PCRE_ERROR_NOMATCH) {
     832             :                         /* If we previously set PCRE_NOTEMPTY after a null match,
     833             :                            this is not necessarily the end. We need to advance
     834             :                            the start offset, and continue. Fudge the offset values
     835             :                            to achieve this, unless we're already at the end of the string. */
     836     1625358 :                         if (g_notempty != 0 && start_offset < subject_len) {
     837           2 :                                 offsets[0] = start_offset;
     838           2 :                                 offsets[1] = start_offset + 1;
     839             :                         } else
     840             :                                 break;
     841             :                 } else {
     842           4 :                         pcre_handle_exec_error(count TSRMLS_CC);
     843           4 :                         break;
     844             :                 }
     845             :                 
     846             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
     847             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
     848             :                    the match again at the same point. If this fails (picked up above) we
     849             :                    advance to the next character. */
     850      206137 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
     851             :                 
     852             :                 /* Advance to the position right after the last full match */
     853      206137 :                 start_offset = offsets[1];
     854      206137 :         } while (global);
     855             : 
     856             :         /* Add the match sets to the output array and clean up */
     857     1831302 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     858         894 :                 if (subpat_names) {
     859          10 :                         for (i = 0; i < num_subpats; i++) {
     860           8 :                                 if (subpat_names[i]) {
     861           5 :                                         zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
     862             :                                                                          strlen(subpat_names[i]), &match_sets[i]);
     863           5 :                                         Z_ADDREF(match_sets[i]);
     864             :                                 }
     865           8 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
     866             :                         }
     867             :                 } else {
     868        2011 :                         for (i = 0; i < num_subpats; i++) {
     869        1119 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
     870             :                         }
     871             :                 }
     872         894 :                 efree(match_sets);
     873             : 
     874         894 :                 if (Z_TYPE(marks) != IS_UNDEF) {
     875           1 :                         add_assoc_zval(subpats, "MARK", &marks);
     876             :                 }
     877             :         }
     878             :         
     879     1831302 :         if (size_offsets <= 32) {
     880     1831288 :                 free_alloca(offsets, use_heap);
     881             :         } else {
     882          14 :                 efree(offsets);
     883             :         }
     884     1831302 :         if (subpat_names) {
     885           9 :                 efree(subpat_names);
     886             :         }
     887             : 
     888             :         /* Did we encounter an error? */
     889     1831302 :         if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
     890     1831298 :                 RETVAL_LONG(matched);
     891             :         } else {
     892           4 :                 RETVAL_FALSE;
     893             :         }
     894             : }
     895             : /* }}} */
     896             : 
     897             : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
     898             :    Perform a Perl-style regular expression match */
     899     1830394 : static PHP_FUNCTION(preg_match)
     900             : {
     901     1830394 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
     902     1830394 : }
     903             : /* }}} */
     904             : 
     905             : /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
     906             :    Perform a Perl-style global regular expression match */
     907         107 : static PHP_FUNCTION(preg_match_all)
     908             : {
     909         107 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
     910         107 : }
     911             : /* }}} */
     912             : 
     913             : /* {{{ preg_get_backref
     914             :  */
     915         146 : static int preg_get_backref(char **str, int *backref)
     916             : {
     917         146 :         register char in_brace = 0;
     918         146 :         register char *walk = *str;
     919             : 
     920         146 :         if (walk[1] == 0)
     921           9 :                 return 0;
     922             : 
     923         137 :         if (*walk == '$' && walk[1] == '{') {
     924          14 :                 in_brace = 1;
     925          14 :                 walk++;
     926             :         }
     927         137 :         walk++;
     928             : 
     929         250 :         if (*walk >= '0' && *walk <= '9') {
     930         113 :                 *backref = *walk - '0';
     931         113 :                 walk++;
     932             :         } else
     933          24 :                 return 0;
     934             :         
     935         113 :         if (*walk && *walk >= '0' && *walk <= '9') {
     936           2 :                 *backref = *backref * 10 + *walk - '0';
     937           2 :                 walk++;
     938             :         }
     939             : 
     940         113 :         if (in_brace) {
     941          14 :                 if (*walk == 0 || *walk != '}')
     942           6 :                         return 0;
     943             :                 else
     944           8 :                         walk++;
     945             :         }
     946             :         
     947         107 :         *str = walk;
     948         107 :         return 1;       
     949             : }
     950             : /* }}} */
     951             : 
     952             : /* {{{ preg_do_repl_func
     953             :  */
     954          52 : static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark TSRMLS_DC)
     955             : {
     956             :         zend_string *result_str;
     957             :         zval             retval;                        /* Function return value */
     958             :         zval         args[1];                   /* Argument to pass to function */
     959             :         int                      i;
     960             : 
     961          52 :         array_init_size(&args[0], count + (mark ? 1 : 0));
     962          52 :         if (subpat_names) {
     963           3 :                 for (i = 0; i < count; i++) {
     964           2 :                         if (subpat_names[i]) {
     965           1 :                                 add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
     966             :                         }
     967           2 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
     968             :                 }
     969             :         } else {
     970         133 :                 for (i = 0; i < count; i++) {
     971          82 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
     972             :                 }
     973             :         }
     974          52 :         if (mark) {
     975           2 :                 add_assoc_string(&args[0], "MARK", (char *) mark);
     976             :         }
     977             : 
     978         155 :         if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
     979          51 :                 result_str = zval_get_string(&retval);
     980          51 :                 zval_ptr_dtor(&retval);
     981             :         } else {
     982           1 :                 if (!EG(exception)) {
     983           0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
     984             :                 }
     985             : 
     986           2 :                 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
     987             :         }
     988             : 
     989          52 :         zval_ptr_dtor(&args[0]);
     990             : 
     991          52 :         return result_str;
     992             : }
     993             : /* }}} */
     994             : 
     995             : /* {{{ preg_do_eval
     996             :  */
     997           4 : static zend_string *preg_do_eval(char *eval_str, int eval_str_len, char *subject,
     998             :                                                 int *offsets, int count TSRMLS_DC)
     999             : {
    1000             :         zval             retval;                        /* Return value from evaluation */
    1001             :         char            *eval_str_end,          /* End of eval string */
    1002             :                                 *match,                         /* Current match for a backref */
    1003             :                                 *walk,                          /* Used to walk the code string */
    1004             :                                 *segment,                       /* Start of segment to append while walking */
    1005             :                                  walk_last;                     /* Last walked character */
    1006             :         int                      match_len;                     /* Length of the match */
    1007             :         int                      backref;                       /* Current backref */
    1008             :         zend_string *esc_match;                 /* Quote-escaped match */
    1009             :         zend_string *result_str;
    1010             :         char        *compiled_string_description;
    1011           4 :         smart_str    code = {0};
    1012             :         
    1013           4 :         eval_str_end = eval_str + eval_str_len;
    1014           4 :         walk = segment = eval_str;
    1015           4 :         walk_last = 0;
    1016             :         
    1017         178 :         while (walk < eval_str_end) {
    1018             :                 /* If found a backreference.. */
    1019         170 :                 if ('\\' == *walk || '$' == *walk) {
    1020          32 :                         smart_str_appendl(&code, segment, walk - segment);
    1021          32 :                         if (walk_last == '\\') {
    1022           0 :                                 code.s->val[code.s->len-1] = *walk++;
    1023           0 :                                 segment = walk;
    1024           0 :                                 walk_last = 0;
    1025           0 :                                 continue;
    1026             :                         }
    1027          32 :                         segment = walk;
    1028          32 :                         if (preg_get_backref(&walk, &backref)) {
    1029           7 :                                 if (backref < count) {
    1030             :                                         /* Find the corresponding string match and substitute it
    1031             :                                            in instead of the backref */
    1032           7 :                                         match = subject + offsets[backref<<1];
    1033           7 :                                         match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1034           7 :                                         if (match_len) {
    1035           7 :                                                 esc_match = php_addslashes(match, match_len, 0 TSRMLS_CC);
    1036             :                                         } else {
    1037           0 :                                                 esc_match = zend_string_init(match, match_len, 0);
    1038             :                                         }
    1039             :                                 } else {
    1040           0 :                                         esc_match = STR_EMPTY_ALLOC();
    1041             :                                 }
    1042           7 :                                 smart_str_appendl(&code, esc_match->val, esc_match->len);
    1043             : 
    1044           7 :                                 segment = walk;
    1045             : 
    1046             :                                 /* Clean up and reassign */
    1047             :                                 zend_string_release(esc_match);
    1048           7 :                                 continue;
    1049             :                         }
    1050             :                 }
    1051         163 :                 walk++;
    1052         163 :                 walk_last = walk[-1];
    1053             :         }
    1054           4 :         smart_str_appendl(&code, segment, walk - segment);
    1055             :         smart_str_0(&code);
    1056             : 
    1057           4 :         compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
    1058             :         /* Run the code */
    1059           4 :         if (zend_eval_stringl(code.s->val, code.s->len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
    1060           1 :                 efree(compiled_string_description);
    1061           1 :                 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.s->val);
    1062             :                 /* zend_error() does not return in this case */
    1063             :         }
    1064           3 :         efree(compiled_string_description);
    1065             :         
    1066             :         /* Save the return string */
    1067           3 :         result_str = zval_get_string(&retval);
    1068             :         
    1069             :         /* Clean up */
    1070             :         zval_dtor(&retval);
    1071             :         smart_str_free(&code);
    1072             :         
    1073           3 :         return result_str;
    1074             : }
    1075             : /* }}} */
    1076             : 
    1077             : /* {{{ php_pcre_replace
    1078             :  */
    1079       25192 : PHPAPI zend_string *php_pcre_replace(zend_string *regex,
    1080             :                                                           char *subject, int subject_len,
    1081             :                                                           zval *replace_val, int is_callable_replace,
    1082             :                                                           int limit, int *replace_count TSRMLS_DC)
    1083             : {
    1084             :         pcre_cache_entry        *pce;                       /* Compiled regular expression */
    1085             : 
    1086             :         /* Compile regex or get it from cache. */
    1087       25192 :         if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
    1088          11 :                 return NULL;
    1089             :         }
    1090             : 
    1091       25181 :         return php_pcre_replace_impl(pce, subject, subject_len, replace_val, 
    1092             :                 is_callable_replace, limit, replace_count TSRMLS_CC);
    1093             : }
    1094             : /* }}} */
    1095             : 
    1096             : /* {{{ php_pcre_replace_impl() */
    1097       25193 : PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, 
    1098             :         int is_callable_replace, int limit, int *replace_count TSRMLS_DC)
    1099             : {
    1100       25193 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1101             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1102       25193 :         int                              exoptions = 0;         /* Execution options */
    1103       25193 :         int                              count = 0;                     /* Count of matched subpatterns */
    1104             :         int                             *offsets;                       /* Array of subpattern offsets */
    1105             :         char                    **subpat_names;         /* Array for named subpatterns */
    1106             :         int                              num_subpats;           /* Number of captured subpatterns */
    1107             :         int                              size_offsets;          /* Size of the offsets array */
    1108             :         int                              new_len;                       /* Length of needed storage */
    1109             :         int                              alloc_len;                     /* Actual allocated length */
    1110             :         int                              match_len;                     /* Length of the current match */
    1111             :         int                              backref;                       /* Backreference number */
    1112             :         int                              eval;                          /* If the replacement string should be eval'ed */
    1113             :         int                              start_offset;          /* Where the new search starts */
    1114       25193 :         int                              g_notempty=0;          /* If the match should not be empty */
    1115       25193 :         int                              replace_len=0;         /* Length of replacement string */
    1116       25193 :         char                    *replace=NULL,          /* Replacement string */
    1117             :                                         *walkbuf,                       /* Location of current replacement in the result */
    1118             :                                         *walk,                          /* Used to walk the replacement string */
    1119             :                                         *match,                         /* The current match */
    1120             :                                         *piece,                         /* The current piece of subject */
    1121       25193 :                                         *replace_end=NULL,      /* End of replacement string */
    1122             :                                          walk_last;                     /* Last walked character */
    1123             :         int                              result_len;            /* Length of result */
    1124       25193 :         unsigned char   *mark = NULL;       /* Target for MARK name */
    1125             :         zend_string             *result;                        /* Result of replacement */
    1126       25193 :         zend_string     *eval_result=NULL;  /* Result of eval or custom function */
    1127             :         ALLOCA_FLAG(use_heap);
    1128             : 
    1129       25193 :         if (extra == NULL) {
    1130           2 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1131           2 :                 extra = &extra_data;
    1132             :         }
    1133       25193 :         extra->match_limit = PCRE_G(backtrack_limit);
    1134       25193 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1135             : 
    1136       25193 :         eval = pce->preg_options & PREG_REPLACE_EVAL;
    1137       25193 :         if (is_callable_replace) {
    1138          34 :                 if (eval) {
    1139           1 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
    1140           1 :                         return NULL;
    1141             :                 }
    1142             :         } else {
    1143       25159 :                 replace = Z_STRVAL_P(replace_val);
    1144       25159 :                 replace_len = Z_STRLEN_P(replace_val);
    1145       25159 :                 replace_end = replace + replace_len;
    1146             :         }
    1147             : 
    1148       25192 :         if (eval) {
    1149           3 :                 php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
    1150             :         }
    1151             : 
    1152             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1153       25192 :         num_subpats = pce->capture_count + 1;
    1154       25192 :         size_offsets = num_subpats * 3;
    1155       25192 :         if (size_offsets <= 32) {
    1156       25189 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1157             :         } else {
    1158           3 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1159             :         }       
    1160             : 
    1161             :         /*
    1162             :          * Build a mapping from subpattern numbers to their names. We will
    1163             :          * allocate the table only if there are any named subpatterns.
    1164             :          */
    1165       25192 :         subpat_names = NULL;
    1166       25192 :         if (pce->name_count > 0) {
    1167           1 :                 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
    1168           1 :                 if (!subpat_names) {
    1169           0 :                         return NULL;
    1170             :                 }
    1171             :         }
    1172             : 
    1173       25192 :         alloc_len = 2 * subject_len;
    1174       50384 :         result = zend_string_alloc(alloc_len * sizeof(char), 0);
    1175             : 
    1176             :         /* Initialize */
    1177       25192 :         match = NULL;
    1178       25192 :         start_offset = 0;
    1179       25192 :         result_len = 0;
    1180       25192 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1181             :         
    1182             :         while (1) {
    1183             : #ifdef PCRE_EXTRA_MARK
    1184       31112 :                 extra->mark = &mark;
    1185       31112 :                 extra->flags |= PCRE_EXTRA_MARK;
    1186             : #endif
    1187             :                 /* Execute the regular expression. */
    1188       31112 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
    1189             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1190             : 
    1191             :                 /* the string was already proved to be valid UTF-8 */
    1192       31112 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1193             : 
    1194             :                 /* Check for too many substrings condition. */
    1195       31112 :                 if (count == 0) {
    1196           0 :                         php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
    1197           0 :                         count = size_offsets/3;
    1198             :                 }
    1199             : 
    1200       31112 :                 piece = subject + start_offset;
    1201             : 
    1202       37032 :                 if (count > 0 && (limit == -1 || limit > 0)) {
    1203        5921 :                         if (replace_count) {
    1204        5921 :                                 ++*replace_count;
    1205             :                         }
    1206             :                         /* Set the match location in subject */
    1207        5921 :                         match = subject + offsets[0];
    1208             : 
    1209        5921 :                         new_len = result_len + offsets[0] - start_offset; /* part before the match */
    1210             :                         
    1211             :                         /* If evaluating, do it and add the return string's length */
    1212        5921 :                         if (eval) {
    1213           4 :                                 eval_result = preg_do_eval(replace, replace_len, subject,
    1214             :                                                                                            offsets, count TSRMLS_CC);
    1215           3 :                                 new_len += eval_result->len;
    1216        5917 :                         } else if (is_callable_replace) {
    1217             :                                 /* Use custom function to get replacement string and its length. */
    1218          52 :                                 eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark TSRMLS_CC);
    1219          52 :                                 new_len += eval_result->len;
    1220             :                         } else { /* do regular substitution */
    1221        5865 :                                 walk = replace;
    1222        5865 :                                 walk_last = 0;
    1223       17681 :                                 while (walk < replace_end) {
    1224        5951 :                                         if ('\\' == *walk || '$' == *walk) {
    1225          57 :                                                 if (walk_last == '\\') {
    1226           0 :                                                         walk++;
    1227           0 :                                                         walk_last = 0;
    1228           0 :                                                         continue;
    1229             :                                                 }
    1230          57 :                                                 if (preg_get_backref(&walk, &backref)) {
    1231          50 :                                                         if (backref < count)
    1232          49 :                                                                 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
    1233          50 :                                                         continue;
    1234             :                                                 }
    1235             :                                         }
    1236        5901 :                                         new_len++;
    1237        5901 :                                         walk++;
    1238        5901 :                                         walk_last = walk[-1];
    1239             :                                 }
    1240             :                         }
    1241             : 
    1242        5920 :                         if (new_len > alloc_len) {
    1243          12 :                                 alloc_len = alloc_len + 2 * new_len;
    1244          24 :                                 result = zend_string_realloc(result, alloc_len, 0);
    1245             :                         }
    1246             :                         /* copy the part of the string before the match */
    1247        5920 :                         memcpy(&result->val[result_len], piece, match-piece);
    1248        5920 :                         result_len += match-piece;
    1249             : 
    1250             :                         /* copy replacement and backrefs */
    1251        5920 :                         walkbuf = result->val + result_len;
    1252             :                         
    1253             :                         /* If evaluating or using custom function, copy result to the buffer
    1254             :                          * and clean up. */
    1255        5975 :                         if (eval || is_callable_replace) {
    1256          55 :                                 memcpy(walkbuf, eval_result->val, eval_result->len);
    1257          55 :                                 result_len += eval_result->len;
    1258          55 :                                 if (eval_result) zend_string_release(eval_result);
    1259             :                         } else { /* do regular backreference copying */
    1260        5865 :                                 walk = replace;
    1261        5865 :                                 walk_last = 0;
    1262       17681 :                                 while (walk < replace_end) {
    1263        5951 :                                         if ('\\' == *walk || '$' == *walk) {
    1264          57 :                                                 if (walk_last == '\\') {
    1265           0 :                                                         *(walkbuf-1) = *walk++;
    1266           0 :                                                         walk_last = 0;
    1267           0 :                                                         continue;
    1268             :                                                 }
    1269          57 :                                                 if (preg_get_backref(&walk, &backref)) {
    1270          50 :                                                         if (backref < count) {
    1271          49 :                                                                 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1272          49 :                                                                 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
    1273          49 :                                                                 walkbuf += match_len;
    1274             :                                                         }
    1275          50 :                                                         continue;
    1276             :                                                 }
    1277             :                                         }
    1278        5901 :                                         *walkbuf++ = *walk++;
    1279        5901 :                                         walk_last = walk[-1];
    1280             :                                 }
    1281        5865 :                                 *walkbuf = '\0';
    1282             :                                 /* increment the result length by how much we've added to the string */
    1283        5865 :                                 result_len += walkbuf - (result->val + result_len);
    1284             :                         }
    1285             : 
    1286        5920 :                         if (limit != -1)
    1287          32 :                                 limit--;
    1288             : 
    1289       25191 :                 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
    1290             :                         /* If we previously set PCRE_NOTEMPTY after a null match,
    1291             :                            this is not necessarily the end. We need to advance
    1292             :                            the start offset, and continue. Fudge the offset values
    1293             :                            to achieve this, unless we're already at the end of the string. */
    1294       25187 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1295           0 :                                 offsets[0] = start_offset;
    1296           0 :                                 offsets[1] = start_offset + 1;
    1297           0 :                                 memcpy(&result->val[result_len], piece, 1);
    1298           0 :                                 result_len++;
    1299             :                         } else {
    1300       25187 :                                 new_len = result_len + subject_len - start_offset;
    1301       25187 :                                 if (new_len > alloc_len) {
    1302           1 :                                         alloc_len = new_len; /* now we know exactly how long it is */
    1303           2 :                                         result = zend_string_realloc(result, alloc_len, 0);
    1304             :                                 }
    1305             :                                 /* stick that last bit of string on our output */
    1306       25187 :                                 memcpy(&result->val[result_len], piece, subject_len - start_offset);
    1307       25187 :                                 result_len += subject_len - start_offset;
    1308       25187 :                                 result->val[result_len] = '\0';
    1309       25187 :                                 break;
    1310             :                         }
    1311             :                 } else {
    1312           4 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1313             :                         zend_string_free(result);
    1314           4 :                         result = NULL;
    1315           4 :                         break;
    1316             :                 }
    1317             :                         
    1318             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1319             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
    1320             :                    the match again at the same point. If this fails (picked up above) we
    1321             :                    advance to the next character. */
    1322        5920 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
    1323             :                 
    1324             :                 /* Advance to the next piece. */
    1325        5920 :                 start_offset = offsets[1];
    1326        5920 :         }
    1327             : 
    1328       25191 :         if (result) {
    1329       25187 :                 result->len = result_len;
    1330             :         }
    1331       25191 :         if (size_offsets <= 32) {
    1332       25188 :                 free_alloca(offsets, use_heap);
    1333             :         } else {
    1334           3 :                 efree(offsets);
    1335             :         }
    1336       25191 :         if (subpat_names) {
    1337           1 :                 efree(subpat_names);
    1338             :         }
    1339             : 
    1340       25191 :         return result;
    1341             : }
    1342             : /* }}} */
    1343             : 
    1344             : /* {{{ php_replace_in_subject
    1345             :  */
    1346       25149 : static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
    1347             : {
    1348             :         zval            *regex_entry,
    1349       25149 :                                 *replace_entry = NULL,
    1350             :                                 *replace_value,
    1351             :                                  empty_replace;
    1352             :         zend_string *result;
    1353       25149 :         zend_string     *subject_str = zval_get_string(subject);
    1354             :         uint32_t replace_idx;
    1355             : 
    1356             :         /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
    1357       25149 :         ZVAL_EMPTY_STRING(&empty_replace);
    1358             :         
    1359             :         /* If regex is an array */
    1360       25149 :         if (Z_TYPE_P(regex) == IS_ARRAY) {
    1361          22 :                 replace_value = replace;
    1362          22 :                 replace_idx = 0;
    1363             : 
    1364             :                 /* For each entry in the regex array, get the entry */
    1365         151 :                 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
    1366             :                         /* Make sure we're dealing with strings. */     
    1367          65 :                         zend_string *regex_str = zval_get_string(regex_entry);
    1368             :                 
    1369             :                         /* If replace is an array and not a callable construct */
    1370          65 :                         if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
    1371             :                                 /* Get current entry */
    1372          50 :                                 replace_entry = NULL;
    1373         100 :                                 while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
    1374          96 :                                         if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
    1375          48 :                                                 replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
    1376          48 :                                                 break;
    1377             :                                         }
    1378           0 :                                         replace_idx++;
    1379             :                                 }
    1380          50 :                                 if (replace_entry != NULL) {
    1381          48 :                                         if (!is_callable_replace) {
    1382          48 :                                                 convert_to_string_ex(replace_entry);
    1383             :                                         }
    1384          48 :                                         replace_value = replace_entry;
    1385          48 :                                         replace_idx++;
    1386             :                                 } else {
    1387             :                                         /* We've run out of replacement strings, so use an empty one */
    1388           2 :                                         replace_value = &empty_replace;
    1389             :                                 }
    1390             :                         }
    1391             :                         
    1392             :                         /* Do the actual replacement and put the result back into subject_str
    1393             :                            for further replacements. */
    1394          65 :                         if ((result = php_pcre_replace(regex_str,
    1395             :                                                                                    subject_str->val,
    1396          65 :                                                                                    subject_str->len,
    1397             :                                                                                    replace_value,
    1398             :                                                                                    is_callable_replace,
    1399             :                                                                                    limit,
    1400             :                                                                                    replace_count TSRMLS_CC)) != NULL) {
    1401             :                                 zend_string_release(subject_str);
    1402          64 :                                 subject_str = result;
    1403             :                         } else {
    1404             :                                 zend_string_release(subject_str);
    1405             :                                 zend_string_release(regex_str);
    1406           1 :                                 return NULL;
    1407             :                         }
    1408             : 
    1409             :                         zend_string_release(regex_str);
    1410             :                 } ZEND_HASH_FOREACH_END();
    1411             : 
    1412          21 :                 return subject_str;
    1413             :         } else {
    1414       25127 :                 result = php_pcre_replace(Z_STR_P(regex),
    1415             :                                                                   subject_str->val,
    1416       25127 :                                                                   subject_str->len,
    1417             :                                                                   replace,
    1418             :                                                                   is_callable_replace,
    1419             :                                                                   limit,
    1420             :                                                                   replace_count TSRMLS_CC);
    1421             :                 zend_string_release(subject_str);
    1422       25126 :                 return result;
    1423             :         }
    1424             : }
    1425             : /* }}} */
    1426             : 
    1427             : /* {{{ preg_replace_impl
    1428             :  */
    1429       25157 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
    1430             : {
    1431             :         zval                *regex,
    1432             :                                     *replace,
    1433             :                                     *subject,
    1434             :                                     *subject_entry,
    1435       25157 :                                     *zcount = NULL;
    1436       25157 :         int                              limit_val = -1;
    1437       25157 :         zend_long                limit = -1;
    1438             :         zend_string             *result;
    1439             :         zend_string             *string_key;
    1440             :         zend_ulong               num_key;
    1441             :         zend_string             *callback_name;
    1442       25157 :         int                              replace_count=0, old_replace_count;
    1443             : 
    1444             : #ifndef FAST_ZPP
    1445             :         /* Get function parameters and do error-checking. */
    1446             :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1447             :                 return;
    1448             :         }
    1449             : #else
    1450       25157 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1451       25148 :                 Z_PARAM_ZVAL(regex)
    1452       25148 :                 Z_PARAM_ZVAL(replace)
    1453       25148 :                 Z_PARAM_ZVAL(subject)
    1454       25148 :                 Z_PARAM_OPTIONAL
    1455       25200 :                 Z_PARAM_LONG(limit)
    1456          33 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1457       25157 :         ZEND_PARSE_PARAMETERS_END();
    1458             : #endif
    1459             :         
    1460       50262 :         if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1461           3 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1462           3 :                 RETURN_FALSE;
    1463             :         }
    1464             : 
    1465       75421 :         if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
    1466       25156 :                 SEPARATE_ZVAL(replace);
    1467       50262 :                 convert_to_string_ex(replace);
    1468             :         }
    1469       25141 :         if (is_callable_replace) {
    1470          39 :                 if (!zend_is_callable(replace, 0, &callback_name TSRMLS_CC)) {
    1471           4 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
    1472           4 :                         zend_string_release(callback_name);
    1473           4 :                         ZVAL_DUP(return_value, subject);
    1474           4 :                         return;
    1475             :                 }
    1476          35 :                 zend_string_release(callback_name);
    1477             :         }
    1478             : 
    1479       25137 :         if (ZEND_NUM_ARGS() > 3) {
    1480          22 :                 limit_val = limit;
    1481             :         }
    1482             :                 
    1483       50274 :         if (Z_TYPE_P(regex) != IS_ARRAY) {
    1484       25160 :                 SEPARATE_ZVAL(regex);
    1485       50253 :                 convert_to_string_ex(regex);
    1486             :         }
    1487             :                 
    1488             :         /* if subject is an array */
    1489       50272 :         if (Z_TYPE_P(subject) == IS_ARRAY) {
    1490           6 :                 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
    1491             : 
    1492             :                 /* For each subject entry, convert it to string, then perform replacement
    1493             :                    and add the result to the return_value array. */
    1494          44 :                 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
    1495          19 :                         old_replace_count = replace_count;
    1496          19 :                         if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
    1497          36 :                                 if (!is_filter || replace_count > old_replace_count) {
    1498             :                                         /* Add to return array */
    1499          17 :                                         if (string_key) {
    1500           1 :                                                 add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
    1501             :                                         } else {
    1502          16 :                                                 add_index_str(return_value, num_key, result);
    1503             :                                         }
    1504             :                                 } else {
    1505             :                                         zend_string_release(result);
    1506             :                                 }
    1507             :                         }
    1508             :                 } ZEND_HASH_FOREACH_END();
    1509             :         } else {        /* if subject is not an array */
    1510       25130 :                 old_replace_count = replace_count;
    1511       25130 :                 if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
    1512       50226 :                         if (!is_filter || replace_count > old_replace_count) {
    1513       25113 :                                 RETVAL_STR(result);
    1514             :                         } else {
    1515             :                                 zend_string_release(result);
    1516             :                         }
    1517             :                 }
    1518             :         }
    1519       25135 :         if (ZEND_NUM_ARGS() > 4) {
    1520           8 :                 zval_dtor(zcount);
    1521           8 :                 ZVAL_LONG(zcount, replace_count);
    1522             :         }
    1523             :         
    1524             : }
    1525             : /* }}} */
    1526             : 
    1527             : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1528             :    Perform Perl-style regular expression replacement. */
    1529       25109 : static PHP_FUNCTION(preg_replace)
    1530             : {
    1531       25109 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
    1532       25105 : }
    1533             : /* }}} */
    1534             : 
    1535             : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
    1536             :    Perform Perl-style regular expression replacement using replacement callback. */
    1537          47 : static PHP_FUNCTION(preg_replace_callback)
    1538             : {
    1539          47 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
    1540          47 : }
    1541             : /* }}} */
    1542             : 
    1543             : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1544             :    Perform Perl-style regular expression replacement and only return matches. */
    1545           1 : static PHP_FUNCTION(preg_filter)
    1546             : {
    1547           1 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
    1548           1 : }
    1549             : /* }}} */
    1550             : 
    1551             : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) 
    1552             :    Split string into an array using a perl-style regular expression as a delimiter */
    1553        2408 : static PHP_FUNCTION(preg_split)
    1554             : {
    1555             :         zend_string                     *regex;                 /* Regular expression */
    1556             :         zend_string                     *subject;               /* String to match against */
    1557        2408 :         zend_long                        limit_val = -1;/* Integer value of limit */
    1558        2408 :         zend_long                        flags = 0;             /* Match control flags */
    1559             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1560             : 
    1561             :         /* Get function parameters and do error checking */     
    1562             : #ifndef FAST_ZPP
    1563             :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|ll", &regex,
    1564             :                                                           &subject, &subject_len, &limit_val, &flags) == FAILURE) {
    1565             :                 RETURN_FALSE;
    1566             :         }
    1567             : #else
    1568        2408 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1569        7212 :                 Z_PARAM_STR(regex)
    1570        7206 :                 Z_PARAM_STR(subject)
    1571        2400 :                 Z_PARAM_OPTIONAL
    1572        2440 :                 Z_PARAM_LONG(limit_val)
    1573          52 :                 Z_PARAM_LONG(flags)
    1574        2408 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
    1575             : #endif
    1576             :         
    1577             :         /* Compile regex or get it from cache. */
    1578        2400 :         if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
    1579           5 :                 RETURN_FALSE;
    1580             :         }
    1581             : 
    1582        2395 :         php_pcre_split_impl(pce, subject->val, subject->len, return_value, limit_val, flags TSRMLS_CC);
    1583             : }
    1584             : /* }}} */
    1585             : 
    1586             : /* {{{ php_pcre_split
    1587             :  */
    1588        2410 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
    1589             :         long limit_val, long flags TSRMLS_DC)
    1590             : {
    1591        2410 :         pcre_extra              *extra = NULL;          /* Holds results of studying */
    1592        2410 :         pcre                    *re_bump = NULL;        /* Regex instance for empty matches */
    1593        2410 :         pcre_extra              *extra_bump = NULL;     /* Almost dummy */
    1594             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1595             :         int                             *offsets;                       /* Array of subpattern offsets */
    1596             :         int                              size_offsets;          /* Size of the offsets array */
    1597        2410 :         int                              exoptions = 0;         /* Execution options */
    1598        2410 :         int                              count = 0;                     /* Count of matched subpatterns */
    1599             :         int                              start_offset;          /* Where the new search starts */
    1600             :         int                              next_offset;           /* End of the last delimiter match + 1 */
    1601        2410 :         int                              g_notempty = 0;        /* If the match should not be empty */
    1602             :         char                    *last_match;            /* Location of last match */
    1603             :         int                              no_empty;                      /* If NO_EMPTY flag is set */
    1604             :         int                              delim_capture;         /* If delimiters should be captured */
    1605             :         int                              offset_capture;        /* If offsets should be captured */
    1606             :         ALLOCA_FLAG(use_heap);
    1607             : 
    1608        2410 :         no_empty = flags & PREG_SPLIT_NO_EMPTY;
    1609        2410 :         delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
    1610        2410 :         offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
    1611             :         
    1612        2410 :         if (limit_val == 0) {
    1613           1 :                 limit_val = -1;
    1614             :         }
    1615             : 
    1616        2410 :         if (extra == NULL) {
    1617        2410 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1618        2410 :                 extra = &extra_data;
    1619             :         }
    1620        2410 :         extra->match_limit = PCRE_G(backtrack_limit);
    1621        2410 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1622             : #ifdef PCRE_EXTRA_MARK
    1623        2410 :         extra->flags &= ~PCRE_EXTRA_MARK;
    1624             : #endif
    1625             :         
    1626             :         /* Initialize return value */
    1627        2410 :         array_init(return_value);
    1628             : 
    1629             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1630        2410 :         size_offsets = (pce->capture_count + 1) * 3;
    1631        2410 :         if (size_offsets <= 32) {
    1632        2410 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1633             :         } else {
    1634           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1635             :         }
    1636             :         
    1637             :         /* Start at the beginning of the string */
    1638        2410 :         start_offset = 0;
    1639        2410 :         next_offset = 0;
    1640        2410 :         last_match = subject;
    1641        2410 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1642             :         
    1643             :         /* Get next piece if no limit or limit not yet reached and something matched*/
    1644        8956 :         while ((limit_val == -1 || limit_val > 1)) {
    1645        6544 :                 count = pcre_exec(pce->re, extra, subject,
    1646             :                                                   subject_len, start_offset,
    1647             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1648             : 
    1649             :                 /* the string was already proved to be valid UTF-8 */
    1650        6544 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1651             : 
    1652             :                 /* Check for too many substrings condition. */
    1653        6544 :                 if (count == 0) {
    1654           0 :                         php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
    1655           0 :                         count = size_offsets/3;
    1656             :                 }
    1657             :                                 
    1658             :                 /* If something matched */
    1659        6544 :                 if (count > 0) {
    1660        4078 :                         if (!no_empty || &subject[offsets[0]] != last_match) {
    1661             : 
    1662        4019 :                                 if (offset_capture) {
    1663             :                                         /* Add (match, offset) pair to the return value */
    1664          26 :                                         add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
    1665             :                                 } else {
    1666             :                                         /* Add the piece to the return value */
    1667        3993 :                                         add_next_index_stringl(return_value, last_match,
    1668        3993 :                                                                            &subject[offsets[0]]-last_match);
    1669             :                                 }
    1670             : 
    1671             :                                 /* One less left to do */
    1672        4019 :                                 if (limit_val != -1)
    1673           1 :                                         limit_val--;
    1674             :                         }
    1675             :                         
    1676        4078 :                         last_match = &subject[offsets[1]];
    1677        4078 :                         next_offset = offsets[1];
    1678             : 
    1679        4078 :                         if (delim_capture) {
    1680             :                                 int i, match_len;
    1681          62 :                                 for (i = 1; i < count; i++) {
    1682          31 :                                         match_len = offsets[(i<<1)+1] - offsets[i<<1];
    1683             :                                         /* If we have matched a delimiter */
    1684          31 :                                         if (!no_empty || match_len > 0) {
    1685          21 :                                                 if (offset_capture) {
    1686          10 :                                                         add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
    1687             :                                                 } else {
    1688          22 :                                                         add_next_index_stringl(return_value,
    1689          11 :                                                                                                    &subject[offsets[i<<1]],
    1690             :                                                                                                    match_len);
    1691             :                                                 }
    1692             :                                         }
    1693             :                                 }
    1694             :                         }
    1695        2466 :                 } else if (count == PCRE_ERROR_NOMATCH) {
    1696             :                         /* If we previously set PCRE_NOTEMPTY after a null match,
    1697             :                            this is not necessarily the end. We need to advance
    1698             :                            the start offset, and continue. Fudge the offset values
    1699             :                            to achieve this, unless we're already at the end of the string. */
    1700        2465 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1701          58 :                                 if (pce->compile_options & PCRE_UTF8) {
    1702          12 :                                         if (re_bump == NULL) {
    1703             :                                                 int dummy;
    1704           2 :                                                 zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
    1705           2 :                                                 re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy TSRMLS_CC);
    1706             :                                                 zend_string_release(regex);
    1707           2 :                                                 if (re_bump == NULL) {
    1708           0 :                                                         RETURN_FALSE;
    1709             :                                                 }
    1710             :                                         }
    1711          12 :                                         count = pcre_exec(re_bump, extra_bump, subject,
    1712             :                                                           subject_len, start_offset,
    1713             :                                                           exoptions, offsets, size_offsets);
    1714          12 :                                         if (count < 1) {
    1715           0 :                                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
    1716           0 :                                                 RETURN_FALSE;
    1717             :                                         }
    1718             :                                 } else {
    1719          46 :                                         offsets[0] = start_offset;
    1720          46 :                                         offsets[1] = start_offset + 1;
    1721             :                                 }
    1722             :                         } else
    1723             :                                 break;
    1724             :                 } else {
    1725           1 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1726           1 :                         break;
    1727             :                 }
    1728             : 
    1729             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1730             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
    1731             :                    the match again at the same point. If this fails (picked up above) we
    1732             :                    advance to the next character. */
    1733        4136 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
    1734             :                 
    1735             :                 /* Advance to the position right after the last full match */
    1736        4136 :                 start_offset = offsets[1];
    1737             :         }
    1738             : 
    1739             : 
    1740        2410 :         start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
    1741             : 
    1742        2410 :         if (!no_empty || start_offset < subject_len)
    1743             :         {
    1744        2402 :                 if (offset_capture) {
    1745             :                         /* Add the last (match, offset) pair to the return value */
    1746           5 :                         add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
    1747             :                 } else {
    1748             :                         /* Add the last piece to the return value */
    1749        2397 :                         add_next_index_stringl(return_value, last_match, subject + subject_len - last_match);
    1750             :                 }
    1751             :         }
    1752             : 
    1753             :         
    1754             :         /* Clean up */
    1755        2410 :         if (size_offsets <= 32) {
    1756        2410 :                 free_alloca(offsets, use_heap);
    1757             :         } else {
    1758           0 :                 efree(offsets);
    1759             :         }
    1760             : }
    1761             : /* }}} */
    1762             : 
    1763             : /* {{{ proto string preg_quote(string str [, string delim_char])
    1764             :    Quote regular expression characters plus an optional character */
    1765        8073 : static PHP_FUNCTION(preg_quote)
    1766             : {
    1767             :         size_t           in_str_len;
    1768             :         char    *in_str;                /* Input string argument */
    1769             :         char    *in_str_end;    /* End of the input string */
    1770        8073 :         size_t           delim_len = 0;
    1771        8073 :         char    *delim = NULL;  /* Additional delimiter argument */
    1772             :         zend_string     *out_str;       /* Output string with quoted characters */
    1773             :         char    *p,                             /* Iterator for input string */
    1774             :                         *q,                             /* Iterator for output string */
    1775        8073 :                          delim_char=0,  /* Delimiter character to be quoted */
    1776             :                          c;                             /* Current character */
    1777        8073 :         zend_bool quote_delim = 0; /* Whether to quote additional delim char */
    1778             :         
    1779             :         /* Get the arguments and check for errors */
    1780             : #ifndef FAST_ZPP
    1781             :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
    1782             :                                                           &delim, &delim_len) == FAILURE) {
    1783             :                 return;
    1784             :         }
    1785             : #else
    1786        8073 :         ZEND_PARSE_PARAMETERS_START(1, 2)
    1787       24210 :                 Z_PARAM_STRING(in_str, in_str_len)
    1788        8068 :                 Z_PARAM_OPTIONAL
    1789       24196 :                 Z_PARAM_STRING(delim, delim_len)
    1790        8073 :         ZEND_PARSE_PARAMETERS_END();
    1791             : #endif
    1792             :         
    1793        8068 :         in_str_end = in_str + in_str_len;
    1794             : 
    1795             :         /* Nothing to do if we got an empty string */
    1796        8068 :         if (in_str == in_str_end) {
    1797           5 :                 RETURN_EMPTY_STRING();
    1798             :         }
    1799             : 
    1800        8063 :         if (delim && *delim) {
    1801        8060 :                 delim_char = delim[0];
    1802        8060 :                 quote_delim = 1;
    1803             :         }
    1804             :         
    1805             :         /* Allocate enough memory so that even if each character
    1806             :            is quoted, we won't run out of room */
    1807       16126 :         out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
    1808             :         
    1809             :         /* Go through the string and quote necessary characters */
    1810     7535388 :         for (p = in_str, q = out_str->val; p != in_str_end; p++) {
    1811     7527325 :                 c = *p;
    1812     7527325 :                 switch(c) {
    1813             :                         case '.':
    1814             :                         case '\\':
    1815             :                         case '+':
    1816             :                         case '*':
    1817             :                         case '?':
    1818             :                         case '[':
    1819             :                         case '^':
    1820             :                         case ']':
    1821             :                         case '$':
    1822             :                         case '(':
    1823             :                         case ')':
    1824             :                         case '{':
    1825             :                         case '}':
    1826             :                         case '=':
    1827             :                         case '!':
    1828             :                         case '>':
    1829             :                         case '<':
    1830             :                         case '|':
    1831             :                         case ':':
    1832             :                         case '-':
    1833     1069713 :                                 *q++ = '\\';
    1834     1069713 :                                 *q++ = c;
    1835     1069713 :                                 break;
    1836             : 
    1837             :                         case '\0':
    1838        1029 :                                 *q++ = '\\';
    1839        1029 :                                 *q++ = '0';
    1840        1029 :                                 *q++ = '0';
    1841        1029 :                                 *q++ = '0';
    1842        1029 :                                 break;
    1843             : 
    1844             :                         default:
    1845     6456583 :                                 if (quote_delim && c == delim_char)
    1846       15028 :                                         *q++ = '\\';
    1847     6456583 :                                 *q++ = c;
    1848             :                                 break;
    1849             :                 }
    1850             :         }
    1851        8063 :         *q = '\0';
    1852             : 
    1853             :         /* Reallocate string and return it */
    1854       16126 :         out_str = zend_string_realloc(out_str, q - out_str->val, 0);
    1855        8063 :         RETURN_STR(out_str);
    1856             : }
    1857             : /* }}} */
    1858             : 
    1859             : /* {{{ proto array preg_grep(string regex, array input [, int flags])
    1860             :    Searches array and returns entries which match regex */
    1861          29 : static PHP_FUNCTION(preg_grep)
    1862             : {
    1863             :         zend_string                     *regex;                 /* Regular expression */
    1864             :         zval                            *input;                 /* Input array */
    1865          29 :         zend_long                        flags = 0;             /* Match control flags */
    1866             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1867             : 
    1868             :         /* Get arguments and do error checking */
    1869             : #ifndef FAST_ZPP
    1870             :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Sa|l", &regex,
    1871             :                                                           &input, &flags) == FAILURE) {
    1872             :                 return;
    1873             :         }
    1874             : #else
    1875          29 :         ZEND_PARSE_PARAMETERS_START(2, 3)
    1876          75 :                 Z_PARAM_STR(regex)
    1877          69 :                 Z_PARAM_ARRAY(input)
    1878          20 :                 Z_PARAM_OPTIONAL
    1879          26 :                 Z_PARAM_LONG(flags)
    1880          29 :         ZEND_PARSE_PARAMETERS_END();
    1881             : #endif
    1882             :         
    1883             :         /* Compile regex or get it from cache. */
    1884          20 :         if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
    1885           5 :                 RETURN_FALSE;
    1886             :         }
    1887             :         
    1888          15 :         php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
    1889             : }
    1890             : /* }}} */
    1891             : 
    1892          15 : PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
    1893             : {
    1894             :         zval                *entry;                             /* An entry in the input array */
    1895          15 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1896             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1897             :         int                             *offsets;                       /* Array of subpattern offsets */
    1898             :         int                              size_offsets;          /* Size of the offsets array */
    1899          15 :         int                              count = 0;                     /* Count of matched subpatterns */
    1900             :         zend_string             *string_key;
    1901             :         zend_ulong               num_key;
    1902             :         zend_bool                invert;                        /* Whether to return non-matching
    1903             :                                                                                    entries */
    1904             :         ALLOCA_FLAG(use_heap);
    1905             :         
    1906          15 :         invert = flags & PREG_GREP_INVERT ? 1 : 0;
    1907             :         
    1908          15 :         if (extra == NULL) {
    1909           0 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1910           0 :                 extra = &extra_data;
    1911             :         }
    1912          15 :         extra->match_limit = PCRE_G(backtrack_limit);
    1913          15 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1914             : #ifdef PCRE_EXTRA_MARK
    1915          15 :         extra->flags &= ~PCRE_EXTRA_MARK;
    1916             : #endif
    1917             : 
    1918             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1919          15 :         size_offsets = (pce->capture_count + 1) * 3;
    1920          15 :         if (size_offsets <= 32) {
    1921          15 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1922             :         } else {
    1923           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1924             :         }
    1925             :         
    1926             :         /* Initialize return array */
    1927          15 :         array_init(return_value);
    1928             : 
    1929          15 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1930             : 
    1931             :         /* Go through the input array */
    1932         171 :         ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
    1933          78 :                 zend_string *subject_str = zval_get_string(entry);
    1934             : 
    1935             :                 /* Perform the match */
    1936          78 :                 count = pcre_exec(pce->re, extra, subject_str->val,
    1937          78 :                                                   subject_str->len, 0,
    1938             :                                                   0, offsets, size_offsets);
    1939             : 
    1940             :                 /* Check for too many substrings condition. */
    1941          78 :                 if (count == 0) {
    1942           0 :                         php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
    1943           0 :                         count = size_offsets/3;
    1944          78 :                 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
    1945           0 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1946             :                         zend_string_release(subject_str);
    1947           0 :                         break;
    1948             :                 }
    1949             : 
    1950             :                 /* If the entry fits our requirements */
    1951          78 :                 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
    1952          33 :                         if (Z_REFCOUNTED_P(entry)) {
    1953             :                                 Z_ADDREF_P(entry);
    1954             :                         }
    1955             : 
    1956             :                         /* Add to return array */
    1957          33 :                         if (string_key) {
    1958           3 :                                 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
    1959             :                         } else {
    1960          30 :                                 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
    1961             :                         }
    1962             :                 }
    1963             : 
    1964             :                 zend_string_release(subject_str);
    1965             :         } ZEND_HASH_FOREACH_END();
    1966             : 
    1967             :         /* Clean up */
    1968          15 :         if (size_offsets <= 32) {
    1969          15 :                 free_alloca(offsets, use_heap);
    1970             :         } else {
    1971           0 :                 efree(offsets);
    1972             :         }
    1973          15 : }
    1974             : /* }}} */
    1975             : 
    1976             : /* {{{ proto int preg_last_error()
    1977             :    Returns the error code of the last regexp execution. */
    1978          17 : static PHP_FUNCTION(preg_last_error)
    1979             : {
    1980             : #ifndef FAST_ZPP
    1981             :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
    1982             :                 return;
    1983             :         }
    1984             : #else
    1985          17 :         ZEND_PARSE_PARAMETERS_START(0, 0)
    1986          17 :         ZEND_PARSE_PARAMETERS_END();
    1987             : #endif
    1988             : 
    1989          15 :         RETURN_LONG(PCRE_G(error_code));
    1990             : }
    1991             : /* }}} */
    1992             : 
    1993             : /* {{{ module definition structures */
    1994             : 
    1995             : /* {{{ arginfo */
    1996             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
    1997             :     ZEND_ARG_INFO(0, pattern)
    1998             :     ZEND_ARG_INFO(0, subject)
    1999             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2000             :     ZEND_ARG_INFO(0, flags)
    2001             :     ZEND_ARG_INFO(0, offset)
    2002             : ZEND_END_ARG_INFO()
    2003             : 
    2004             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
    2005             :     ZEND_ARG_INFO(0, pattern)
    2006             :     ZEND_ARG_INFO(0, subject)
    2007             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2008             :     ZEND_ARG_INFO(0, flags)
    2009             :     ZEND_ARG_INFO(0, offset)
    2010             : ZEND_END_ARG_INFO()
    2011             : 
    2012             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
    2013             :     ZEND_ARG_INFO(0, regex)
    2014             :     ZEND_ARG_INFO(0, replace)
    2015             :     ZEND_ARG_INFO(0, subject)
    2016             :     ZEND_ARG_INFO(0, limit)
    2017             :     ZEND_ARG_INFO(1, count)
    2018             : ZEND_END_ARG_INFO()
    2019             : 
    2020             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
    2021             :     ZEND_ARG_INFO(0, regex)
    2022             :     ZEND_ARG_INFO(0, callback)
    2023             :     ZEND_ARG_INFO(0, subject)
    2024             :     ZEND_ARG_INFO(0, limit)
    2025             :     ZEND_ARG_INFO(1, count)
    2026             : ZEND_END_ARG_INFO()
    2027             : 
    2028             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
    2029             :     ZEND_ARG_INFO(0, pattern)
    2030             :     ZEND_ARG_INFO(0, subject)
    2031             :     ZEND_ARG_INFO(0, limit)
    2032             :     ZEND_ARG_INFO(0, flags) 
    2033             : ZEND_END_ARG_INFO()
    2034             : 
    2035             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
    2036             :     ZEND_ARG_INFO(0, str)
    2037             :     ZEND_ARG_INFO(0, delim_char)
    2038             : ZEND_END_ARG_INFO()
    2039             : 
    2040             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
    2041             :     ZEND_ARG_INFO(0, regex)
    2042             :     ZEND_ARG_INFO(0, input) /* array */
    2043             :     ZEND_ARG_INFO(0, flags)
    2044             : ZEND_END_ARG_INFO()
    2045             : 
    2046             : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
    2047             : ZEND_END_ARG_INFO()
    2048             : /* }}} */
    2049             : 
    2050             : static const zend_function_entry pcre_functions[] = {
    2051             :         PHP_FE(preg_match,                              arginfo_preg_match)
    2052             :         PHP_FE(preg_match_all,                  arginfo_preg_match_all)
    2053             :         PHP_FE(preg_replace,                    arginfo_preg_replace)
    2054             :         PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
    2055             :         PHP_FE(preg_filter,                             arginfo_preg_replace)
    2056             :         PHP_FE(preg_split,                              arginfo_preg_split)
    2057             :         PHP_FE(preg_quote,                              arginfo_preg_quote)
    2058             :         PHP_FE(preg_grep,                               arginfo_preg_grep)
    2059             :         PHP_FE(preg_last_error,                 arginfo_preg_last_error)
    2060             :         PHP_FE_END
    2061             : };
    2062             : 
    2063             : zend_module_entry pcre_module_entry = {
    2064             :         STANDARD_MODULE_HEADER,
    2065             :    "pcre",
    2066             :         pcre_functions,
    2067             :         PHP_MINIT(pcre),
    2068             :         PHP_MSHUTDOWN(pcre),
    2069             :         NULL,
    2070             :         NULL,
    2071             :         PHP_MINFO(pcre),
    2072             :         NO_VERSION_YET,
    2073             :         PHP_MODULE_GLOBALS(pcre),
    2074             :         PHP_GINIT(pcre),
    2075             :         PHP_GSHUTDOWN(pcre),
    2076             :         NULL,
    2077             :         STANDARD_MODULE_PROPERTIES_EX
    2078             : };
    2079             : 
    2080             : #ifdef COMPILE_DL_PCRE
    2081             : ZEND_GET_MODULE(pcre)
    2082             : #endif
    2083             : 
    2084             : /* }}} */
    2085             : 
    2086             : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
    2087             : 
    2088             : /*
    2089             :  * Local variables:
    2090             :  * tab-width: 4
    2091             :  * c-basic-offset: 4
    2092             :  * End:
    2093             :  * vim600: sw=4 ts=4 fdm=marker
    2094             :  * vim<600: sw=4 ts=4
    2095             :  */

Generated by: LCOV version 1.10

Generated at Thu, 30 Oct 2014 07:41:35 +0000 (7 hours ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.