PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/pcre - php_pcre.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 875 949 92.2 %
Date: 2016-07-19 Functions: 33 34 97.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                        |
       4             :    +----------------------------------------------------------------------+
       5             :    | Copyright (c) 1997-2016 The PHP Group                                |
       6             :    +----------------------------------------------------------------------+
       7             :    | This source file is subject to version 3.01 of the PHP license,      |
       8             :    | that is bundled with this package in the file LICENSE, and is        |
       9             :    | available through the world-wide-web at the following url:           |
      10             :    | http://www.php.net/license/3_01.txt                                  |
      11             :    | If you did not receive a copy of the PHP license and are unable to   |
      12             :    | obtain it through the world-wide-web, please send a note to          |
      13             :    | license@php.net so we can mail you a copy immediately.               |
      14             :    +----------------------------------------------------------------------+
      15             :    | Author: Andrei Zmievski <andrei@php.net>                             |
      16             :    +----------------------------------------------------------------------+
      17             :  */
      18             : 
      19             : /* $Id$ */
      20             : 
      21             : #include "php.h"
      22             : #include "php_ini.h"
      23             : #include "php_globals.h"
      24             : #include "php_pcre.h"
      25             : #include "ext/standard/info.h"
      26             : #include "ext/standard/basic_functions.h"
      27             : #include "zend_smart_str.h"
      28             : 
      29             : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : #define PREG_PATTERN_ORDER                      1
      34             : #define PREG_SET_ORDER                          2
      35             : #define PREG_OFFSET_CAPTURE                     (1<<8)
      36             : 
      37             : #define PREG_SPLIT_NO_EMPTY                     (1<<0)
      38             : #define PREG_SPLIT_DELIM_CAPTURE        (1<<1)
      39             : #define PREG_SPLIT_OFFSET_CAPTURE       (1<<2)
      40             : 
      41             : #define PREG_REPLACE_EVAL                       (1<<0)
      42             : 
      43             : #define PREG_GREP_INVERT                        (1<<0)
      44             : 
      45             : #define PCRE_CACHE_SIZE 4096
      46             : 
      47             : /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
      48             : #ifndef PCRE_NOTEMPTY_ATSTART
      49             : # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
      50             : #endif
      51             : 
      52             : enum {
      53             :         PHP_PCRE_NO_ERROR = 0,
      54             :         PHP_PCRE_INTERNAL_ERROR,
      55             :         PHP_PCRE_BACKTRACK_LIMIT_ERROR,
      56             :         PHP_PCRE_RECURSION_LIMIT_ERROR,
      57             :         PHP_PCRE_BAD_UTF8_ERROR,
      58             :         PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
      59             :         PHP_PCRE_JIT_STACKLIMIT_ERROR
      60             : };
      61             : 
      62             : 
      63             : PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
      64             : 
      65             : #ifdef PCRE_STUDY_JIT_COMPILE
      66             : #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
      67             : #define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
      68             : ZEND_TLS pcre_jit_stack *jit_stack = NULL;
      69             : #endif
      70             : 
      71          17 : static void pcre_handle_exec_error(int pcre_code) /* {{{ */
      72             : {
      73          17 :         int preg_code = 0;
      74             : 
      75          17 :         switch (pcre_code) {
      76             :                 case PCRE_ERROR_MATCHLIMIT:
      77           4 :                         preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
      78           4 :                         break;
      79             : 
      80             :                 case PCRE_ERROR_RECURSIONLIMIT:
      81           2 :                         preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
      82           2 :                         break;
      83             : 
      84             :                 case PCRE_ERROR_BADUTF8:
      85           8 :                         preg_code = PHP_PCRE_BAD_UTF8_ERROR;
      86           8 :                         break;
      87             : 
      88             :                 case PCRE_ERROR_BADUTF8_OFFSET:
      89           1 :                         preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
      90           1 :                         break;
      91             :                 
      92             : #ifdef PCRE_STUDY_JIT_COMPILE
      93             :                 case PCRE_ERROR_JIT_STACKLIMIT:
      94           1 :                         preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
      95           1 :                         break;
      96             : #endif
      97             : 
      98             :                 default:
      99           1 :                         preg_code = PHP_PCRE_INTERNAL_ERROR;
     100             :                         break;
     101             :         }
     102             : 
     103          17 :         PCRE_G(error_code) = preg_code;
     104          17 : }
     105             : /* }}} */
     106             : 
     107       61484 : static void php_free_pcre_cache(zval *data) /* {{{ */
     108             : {
     109       61484 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
     110       61484 :         if (!pce) return;
     111       61484 :         pcre_free(pce->re);
     112       61484 :         if (pce->extra) {
     113       61476 :                 pcre_free_study(pce->extra);
     114             :         }
     115             : #if HAVE_SETLOCALE
     116       61484 :         if ((void*)pce->tables) pefree((void*)pce->tables, 1);
     117       61484 :         if (pce->locale) {
     118           8 :                 zend_string_release(pce->locale);
     119             :         }
     120             : #endif
     121       61484 :         pefree(pce, 1);
     122             : }
     123             : /* }}} */
     124             : 
     125       23409 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
     126             : {
     127       23409 :         zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
     128       23409 :         pcre_globals->backtrack_limit = 0;
     129       23409 :         pcre_globals->recursion_limit = 0;
     130       23409 :         pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
     131       23409 : }
     132             : /* }}} */
     133             : 
     134       23445 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
     135             : {
     136       23445 :         zend_hash_destroy(&pcre_globals->pcre_cache);
     137             : 
     138             : #ifdef PCRE_STUDY_JIT_COMPILE
     139             :         /* Stack may only be destroyed when no cached patterns
     140             :                 possibly associated with it do exist. */
     141       23445 :         if (jit_stack) {
     142       23395 :                 pcre_jit_stack_free(jit_stack);
     143       23395 :                 jit_stack = NULL;
     144             :         }
     145             : #endif
     146             : 
     147       23445 : }
     148             : /* }}} */
     149             : 
     150             : PHP_INI_BEGIN()
     151             :         STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
     152             :         STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
     153             : #ifdef PCRE_STUDY_JIT_COMPILE
     154             :         STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
     155             : #endif
     156             : PHP_INI_END()
     157             : 
     158             : 
     159             : /* {{{ PHP_MINFO_FUNCTION(pcre) */
     160         150 : static PHP_MINFO_FUNCTION(pcre)
     161             : {
     162         150 :         int jit_yes = 0;
     163             : 
     164         150 :         php_info_print_table_start();
     165         150 :         php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
     166         150 :         php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
     167             : 
     168         150 :         if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
     169         150 :                 php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
     170             :         } else {
     171           0 :                 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
     172             :         }
     173             : 
     174         150 :         php_info_print_table_end();
     175             : 
     176         150 :         DISPLAY_INI_ENTRIES();
     177         150 : }
     178             : /* }}} */
     179             : 
     180             : /* {{{ PHP_MINIT_FUNCTION(pcre) */
     181       23409 : static PHP_MINIT_FUNCTION(pcre)
     182             : {
     183       23409 :         REGISTER_INI_ENTRIES();
     184             : 
     185       23409 :         REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
     186       23409 :         REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
     187       23409 :         REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     188       23409 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
     189       23409 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
     190       23409 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     191       23409 :         REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
     192             : 
     193       23409 :         REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
     194       23409 :         REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
     195       23409 :         REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     196       23409 :         REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     197       23409 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
     198       23409 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
     199       23409 :         REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     200       23409 :         REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
     201             : 
     202       23409 :         return SUCCESS;
     203             : }
     204             : /* }}} */
     205             : 
     206             : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
     207       23445 : static PHP_MSHUTDOWN_FUNCTION(pcre)
     208             : {
     209       23445 :         UNREGISTER_INI_ENTRIES();
     210             : 
     211       23445 :         return SUCCESS;
     212             : }
     213             : /* }}} */
     214             : 
     215             : #ifdef PCRE_STUDY_JIT_COMPILE
     216             : /* {{{ PHP_RINIT_FUNCTION(pcre) */
     217       23363 : static PHP_RINIT_FUNCTION(pcre)
     218             : {
     219       23363 :         if (PCRE_G(jit) && jit_stack == NULL) {
     220       23357 :                 jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
     221             :         }
     222             : 
     223       23363 :         return SUCCESS;
     224             : }
     225             : /* }}} */
     226             : #endif
     227             : 
     228             : /* {{{ static pcre_clean_cache */
     229      274432 : static int pcre_clean_cache(zval *data, void *arg)
     230             : {
     231      274432 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
     232      274432 :         int *num_clean = (int *)arg;
     233             : 
     234      274432 :         if (*num_clean > 0 && !pce->refcount) {
     235       34304 :                 (*num_clean)--;
     236       34304 :                 return ZEND_HASH_APPLY_REMOVE;
     237             :         } else {
     238      240128 :                 return ZEND_HASH_APPLY_KEEP;
     239             :         }
     240             : }
     241             : /* }}} */
     242             : 
     243             : /* {{{ static make_subpats_table */
     244          10 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
     245             : {
     246          10 :         pcre_extra *extra = pce->extra;
     247          10 :         int name_cnt = pce->name_count, name_size, ni = 0;
     248             :         int rc;
     249             :         char *name_table;
     250             :         unsigned short name_idx;
     251             :         char **subpat_names;
     252             :         int rc1, rc2;
     253             : 
     254          10 :         rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
     255          10 :         rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
     256          10 :         rc = rc2 ? rc2 : rc1;
     257          10 :         if (rc < 0) {
     258           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     259           0 :                 return NULL;
     260             :         }
     261             : 
     262          10 :         subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
     263         174 :         while (ni++ < name_cnt) {
     264         154 :                 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
     265         154 :                 subpat_names[name_idx] = name_table + 2;
     266         308 :                 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
     267           0 :                         php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
     268           0 :                         efree(subpat_names);
     269           0 :                         return NULL;
     270             :                 }
     271         154 :                 name_table += name_size;
     272             :         }
     273          10 :         return subpat_names;
     274             : }
     275             : /* }}} */
     276             : 
     277             : /* {{{ static calculate_unit_length */
     278             : /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
     279             : static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
     280             : {
     281             :         int unit_len;
     282             : 
     283          79 :         if (pce->compile_options & PCRE_UTF8) {
     284          22 :                 char *end = start;
     285             : 
     286             :                 /* skip continuation bytes */
     287          35 :                 while ((*++end & 0xC0) == 0x80);
     288          22 :                 unit_len = end - start;
     289             :         } else {
     290          57 :                 unit_len = 1;
     291             :         }
     292          79 :         return unit_len;
     293             : }
     294             : /* }}} */
     295             : 
     296             : /* {{{ pcre_get_compiled_regex_cache
     297             :  */
     298     1970179 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
     299             : {
     300     1970179 :         pcre                            *re = NULL;
     301             :         pcre_extra                      *extra;
     302     1970179 :         int                                      coptions = 0;
     303     1970179 :         int                                      soptions = 0;
     304             :         const char                      *error;
     305             :         int                                      erroffset;
     306             :         char                             delimiter;
     307             :         char                             start_delimiter;
     308             :         char                             end_delimiter;
     309             :         char                            *p, *pp;
     310             :         char                            *pattern;
     311     1970179 :         int                                      do_study = 0;
     312     1970179 :         int                                      poptions = 0;
     313     1970179 :         unsigned const char *tables = NULL;
     314             :         pcre_cache_entry        *pce;
     315             :         pcre_cache_entry         new_entry;
     316             :         int                                      rc;
     317             : 
     318             :         /* Try to lookup the cached regex entry, and if successful, just pass
     319             :            back the compiled pattern, otherwise go on and compile it. */
     320     1970179 :         pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
     321     1970179 :         if (pce) {
     322             : #if HAVE_SETLOCALE
     323     1908734 :                 if (pce->locale == BG(locale_string) ||
     324          40 :                     (pce->locale && BG(locale_string) &&
     325          20 :                      ZSTR_LEN(pce->locale) == ZSTR_LEN(BG(locale_string)) &&
     326          18 :                      !memcmp(ZSTR_VAL(pce->locale), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(pce->locale))) ||
     327           2 :                     (!pce->locale &&
     328           0 :                      ZSTR_LEN(BG(locale_string)) == 1 &&
     329           0 :                      ZSTR_VAL(BG(locale_string))[0] == 'C') ||
     330           2 :                     (!BG(locale_string) &&
     331           0 :                      ZSTR_LEN(pce->locale) == 1 &&
     332           0 :                      ZSTR_VAL(pce->locale)[0] == 'C')) {
     333     1908650 :                         return pce;
     334             :                 }
     335             : #else
     336             :                 return pce;
     337             : #endif
     338             :         }
     339             : 
     340       61529 :         p = ZSTR_VAL(regex);
     341             : 
     342             :         /* Parse through the leading whitespace, and display a warning if we
     343             :            get to the end without encountering a delimiter. */
     344       61529 :         while (isspace((int)*(unsigned char *)p)) p++;
     345       61529 :         if (*p == 0) {
     346           5 :                 php_error_docref(NULL, E_WARNING,
     347           5 :                                                  p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
     348           5 :                 return NULL;
     349             :         }
     350             : 
     351             :         /* Get the delimiter and display a warning if it is alphanumeric
     352             :            or a backslash. */
     353       61524 :         delimiter = *p++;
     354       61524 :         if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
     355           7 :                 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
     356           7 :                 return NULL;
     357             :         }
     358             : 
     359       61517 :         start_delimiter = delimiter;
     360       61517 :         if ((pp = strchr("([{< )]}> )]}>", delimiter)))
     361          30 :                 delimiter = pp[5];
     362       61517 :         end_delimiter = delimiter;
     363             : 
     364       61517 :         pp = p;
     365             : 
     366       61517 :         if (start_delimiter == end_delimiter) {
     367             :                 /* We need to iterate through the pattern, searching for the ending delimiter,
     368             :                    but skipping the backslashed delimiters.  If the ending delimiter is not
     369             :                    found, display a warning. */
     370    17149877 :                 while (*pp != 0) {
     371    17088380 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     372    15875285 :                         else if (*pp == delimiter)
     373       61477 :                                 break;
     374    17026903 :                         pp++;
     375             :                 }
     376             :         } else {
     377             :                 /* We iterate through the pattern, searching for the matching ending
     378             :                  * delimiter. For each matching starting delimiter, we increment nesting
     379             :                  * level, and decrement it for each matching ending delimiter. If we
     380             :                  * reach the end of the pattern without matching, display a warning.
     381             :                  */
     382          30 :                 int brackets = 1;       /* brackets nesting level */
     383         923 :                 while (*pp != 0) {
     384         890 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     385         768 :                         else if (*pp == end_delimiter && --brackets <= 0)
     386             :                                 break;
     387         741 :                         else if (*pp == start_delimiter)
     388          24 :                                 brackets++;
     389         863 :                         pp++;
     390             :                 }
     391             :         }
     392             : 
     393       61517 :         if (*pp == 0) {
     394          13 :                 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
     395           4 :                         php_error_docref(NULL,E_WARNING, "Null byte in regex");
     396           9 :                 } else if (start_delimiter == end_delimiter) {
     397           8 :                         php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
     398             :                 } else {
     399           1 :                         php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
     400             :                 }
     401          13 :                 return NULL;
     402             :         }
     403             : 
     404             :         /* Make a copy of the actual pattern. */
     405       61504 :         pattern = estrndup(p, pp-p);
     406             : 
     407             :         /* Move on to the options */
     408       61504 :         pp++;
     409             : 
     410             :         /* Parse through the options, setting appropriate flags.  Display
     411             :            a warning if we encounter an unknown modifier. */
     412      137878 :         while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
     413       14888 :                 switch (*pp++) {
     414             :                         /* Perl compatible options */
     415        1932 :                         case 'i':       coptions |= PCRE_CASELESS;              break;
     416        2388 :                         case 'm':       coptions |= PCRE_MULTILINE;             break;
     417       10453 :                         case 's':       coptions |= PCRE_DOTALL;                break;
     418           5 :                         case 'x':       coptions |= PCRE_EXTENDED;              break;
     419             : 
     420             :                         /* PCRE specific options */
     421           2 :                         case 'A':       coptions |= PCRE_ANCHORED;              break;
     422           9 :                         case 'D':       coptions |= PCRE_DOLLAR_ENDONLY;break;
     423          25 :                         case 'S':       do_study  = 1;                                  break;
     424          20 :                         case 'U':       coptions |= PCRE_UNGREEDY;              break;
     425           1 :                         case 'X':       coptions |= PCRE_EXTRA;                 break;
     426          31 :                         case 'u':       coptions |= PCRE_UTF8;
     427             :         /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
     428             :        characters, even in UTF-8 mode. However, this can be changed by setting
     429             :        the PCRE_UCP option. */
     430             : #ifdef PCRE_UCP
     431          31 :                                                 coptions |= PCRE_UCP;
     432             : #endif
     433          31 :                                 break;
     434             : 
     435             :                         /* Custom preg options */
     436           2 :                         case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
     437             : 
     438             :                         case ' ':
     439             :                         case '\n':
     440           2 :                                 break;
     441             : 
     442             :                         default:
     443          18 :                                 if (pp[-1]) {
     444          13 :                                         php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
     445             :                                 } else {
     446           5 :                                         php_error_docref(NULL,E_WARNING, "Null byte in regex");
     447             :                                 }
     448          18 :                                 efree(pattern);
     449          18 :                                 return NULL;
     450             :                 }
     451             :         }
     452             : 
     453             : #if HAVE_SETLOCALE
     454       61498 :         if (BG(locale_string) &&
     455          12 :             (ZSTR_LEN(BG(locale_string)) != 1 || ZSTR_VAL(BG(locale_string))[0] != 'C')) {
     456           4 :                 tables = pcre_maketables();
     457             :         }
     458             : #endif
     459             : 
     460             :         /* Compile pattern and display a warning if compilation failed. */
     461       61486 :         re = pcre_compile(pattern,
     462             :                                           coptions,
     463             :                                           &error,
     464             :                                           &erroffset,
     465             :                                           tables);
     466             : 
     467       61486 :         if (re == NULL) {
     468           6 :                 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
     469           6 :                 efree(pattern);
     470           6 :                 if (tables) {
     471           0 :                         pefree((void*)tables, 1);
     472             :                 }
     473           6 :                 return NULL;
     474             :         }
     475             : 
     476             : #ifdef PCRE_STUDY_JIT_COMPILE
     477       61480 :         if (PCRE_G(jit)) {
     478             :                 /* Enable PCRE JIT compiler */
     479       61472 :                 do_study = 1;
     480       61472 :                 soptions |= PCRE_STUDY_JIT_COMPILE;
     481             :         }
     482             : #endif
     483             : 
     484             :         /* If study option was specified, study the pattern and
     485             :            store the result in extra for passing to pcre_exec. */
     486       61480 :         if (do_study) {
     487       61472 :                 extra = pcre_study(re, soptions, &error);
     488       61472 :                 if (extra) {
     489       61472 :                         extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     490       61472 :                         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
     491       61472 :                         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
     492             : #ifdef PCRE_STUDY_JIT_COMPILE
     493       61472 :                         if (PCRE_G(jit) && jit_stack) {
     494       61472 :                                 pcre_assign_jit_stack(extra, NULL, jit_stack);
     495             :                         }
     496             : #endif
     497             :                 }
     498       61472 :                 if (error != NULL) {
     499           0 :                         php_error_docref(NULL, E_WARNING, "Error while studying pattern");
     500             :                 }
     501             :         } else {
     502           8 :                 extra = NULL;
     503             :         }
     504             : 
     505       61480 :         efree(pattern);
     506             : 
     507             :         /*
     508             :          * If we reached cache limit, clean out the items from the head of the list;
     509             :          * these are supposedly the oldest ones (but not necessarily the least used
     510             :          * ones).
     511             :          */
     512       61480 :         if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
     513          67 :                 int num_clean = PCRE_CACHE_SIZE / 8;
     514          67 :                 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
     515             :         }
     516             : 
     517             :         /* Store the compiled pattern and extra info in the cache. */
     518       61480 :         new_entry.re = re;
     519       61480 :         new_entry.extra = extra;
     520       61480 :         new_entry.preg_options = poptions;
     521       61480 :         new_entry.compile_options = coptions;
     522             : #if HAVE_SETLOCALE
     523       61488 :         new_entry.locale = BG(locale_string) ?
     524          16 :                 ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
     525           0 :                         zend_string_copy(BG(locale_string)) :
     526           8 :                         zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1)) :
     527             :                 NULL;
     528       61480 :         new_entry.tables = tables;
     529             : #endif
     530       61480 :         new_entry.refcount = 0;
     531             : 
     532       61480 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
     533       61480 :         if (rc < 0) {
     534           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     535           0 :                 return NULL;
     536             :         }
     537             : 
     538       61480 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
     539       61480 :         if (rc < 0) {
     540           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     541           0 :                 return NULL;
     542             :         }
     543             : 
     544             :         /*
     545             :          * Interned strings are not duplicated when stored in HashTable,
     546             :          * but all the interned strings created during HTTP request are removed
     547             :          * at end of request. However PCRE_G(pcre_cache) must be consistent
     548             :          * on the next request as well. So we disable usage of interned strings
     549             :          * as hash keys especually for this table.
     550             :          * See bug #63180
     551             :          */
     552       61480 :         if (!ZSTR_IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
     553      122960 :                 zend_string *str = zend_string_init(ZSTR_VAL(regex), ZSTR_LEN(regex), 1);
     554       61480 :                 GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
     555       61480 :                 ZSTR_H(str) = ZSTR_H(regex);
     556       61480 :                 regex = str;
     557             :         }
     558             : 
     559       61480 :         pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
     560             : 
     561       61480 :         return pce;
     562             : }
     563             : /* }}} */
     564             : 
     565             : /* {{{ pcre_get_compiled_regex
     566             :  */
     567      131052 : PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
     568             : {
     569      131052 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
     570             : 
     571      131052 :         if (extra) {
     572      131052 :                 *extra = pce ? pce->extra : NULL;
     573             :         }
     574      131052 :         if (preg_options) {
     575      131052 :                 *preg_options = pce ? pce->preg_options : 0;
     576             :         }
     577             : 
     578      131052 :         return pce ? pce->re : NULL;
     579             : }
     580             : /* }}} */
     581             : 
     582             : /* {{{ pcre_get_compiled_regex_ex
     583             :  */
     584           0 : PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
     585             : {
     586           0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
     587             : 
     588           0 :         if (extra) {
     589           0 :                 *extra = pce ? pce->extra : NULL;
     590             :         }
     591           0 :         if (preg_options) {
     592           0 :                 *preg_options = pce ? pce->preg_options : 0;
     593             :         }
     594           0 :         if (compile_options) {
     595           0 :                 *compile_options = pce ? pce->compile_options : 0;
     596             :         }
     597             : 
     598           0 :         return pce ? pce->re : NULL;
     599             : }
     600             : /* }}} */
     601             : 
     602             : /* {{{ add_offset_pair */
     603          75 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
     604             : {
     605             :         zval match_pair, tmp;
     606             : 
     607          75 :         array_init_size(&match_pair, 2);
     608             : 
     609             :         /* Add (match, offset) to the return value */
     610         150 :         ZVAL_STRINGL(&tmp, str, len);
     611          75 :         zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
     612          75 :         ZVAL_LONG(&tmp, offset);
     613          75 :         zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
     614             : 
     615          75 :         if (name) {
     616             :                 Z_ADDREF(match_pair);
     617           2 :                 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
     618             :         }
     619          75 :         zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
     620          75 : }
     621             : /* }}} */
     622             : 
     623     1781607 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
     624             : {
     625             :         /* parameters */
     626             :         zend_string              *regex;                        /* Regular expression */
     627             :         zend_string              *subject;                      /* String to match against */
     628             :         pcre_cache_entry *pce;                          /* Compiled regular expression */
     629     1781607 :         zval                     *subpats = NULL;       /* Array for subpatterns */
     630     1781607 :         zend_long                 flags = 0;            /* Match control flags */
     631     1781607 :         zend_long                 start_offset = 0;     /* Where the new search starts */
     632             : 
     633             : #ifndef FAST_ZPP
     634             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
     635             :                                                           &subject, &subpats, &flags, &start_offset) == FAILURE) {
     636             :                 RETURN_FALSE;
     637             :         }
     638             : #else
     639     1781607 :         ZEND_PARSE_PARAMETERS_START(2, 5)
     640     5344797 :                 Z_PARAM_STR(regex)
     641     5344785 :                 Z_PARAM_STR(subject)
     642     1781591 :                 Z_PARAM_OPTIONAL
     643     4188538 :                 Z_PARAM_ZVAL_EX(subpats, 0, 1)
     644     1212461 :                 Z_PARAM_LONG(flags)
     645          59 :                 Z_PARAM_LONG(start_offset)
     646     1781607 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
     647             : #endif
     648             : 
     649     1781591 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
     650           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
     651           0 :                         RETURN_FALSE;
     652             :         }
     653             : 
     654             :         /* Compile regex or get it from cache. */
     655     1781591 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
     656          27 :                 RETURN_FALSE;
     657             :         }
     658             : 
     659     1781564 :         pce->refcount++;
     660     1781564 :         php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
     661             :                 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
     662     1781564 :         pce->refcount--;
     663             : }
     664             : /* }}} */
     665             : 
     666             : /* {{{ php_pcre_match_impl() */
     667     1783103 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
     668             :         zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
     669             : {
     670             :         zval                     result_set,            /* Holds a set of subpatterns after
     671             :                                                                                    a global match */
     672     1783103 :                                     *match_sets = NULL; /* An array of sets of matches for each
     673             :                                                                                    subpattern after a global match */
     674     1783103 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     675             :         pcre_extra               extra_data;            /* Used locally for exec options */
     676     1783103 :         int                              exoptions = 0;         /* Execution options */
     677     1783103 :         int                              count = 0;                     /* Count of matched subpatterns */
     678             :         int                             *offsets;                       /* Array of subpattern offsets */
     679             :         int                              num_subpats;           /* Number of captured subpatterns */
     680             :         int                              size_offsets;          /* Size of the offsets array */
     681             :         int                              matched;                       /* Has anything matched */
     682     1783103 :         int                              g_notempty = 0;        /* If the match should not be empty */
     683             :         const char         **stringlist;                /* Holds list of subpatterns */
     684             :         char               **subpat_names;              /* Array for named subpatterns */
     685             :         int                              i;
     686             :         int                              subpats_order;         /* Order of subpattern matches */
     687             :         int                              offset_capture;    /* Capture match offsets: yes/no */
     688     1783103 :         unsigned char   *mark = NULL;       /* Target for MARK name */
     689             :         zval            marks;                  /* Array of marks for PREG_PATTERN_ORDER */
     690             :         ALLOCA_FLAG(use_heap);
     691             : 
     692     1783103 :         ZVAL_UNDEF(&marks);
     693             : 
     694             :         /* Overwrite the passed-in value for subpatterns with an empty array. */
     695     1783103 :         if (subpats != NULL) {
     696             :                 zval_dtor(subpats);
     697     1213917 :                 array_init(subpats);
     698             :         }
     699             : 
     700     1783103 :         subpats_order = global ? PREG_PATTERN_ORDER : 0;
     701             : 
     702     1783103 :         if (use_flags) {
     703        1515 :                 offset_capture = flags & PREG_OFFSET_CAPTURE;
     704             : 
     705             :                 /*
     706             :                  * subpats_order is pre-set to pattern mode so we change it only if
     707             :                  * necessary.
     708             :                  */
     709        1515 :                 if (flags & 0xff) {
     710          23 :                         subpats_order = flags & 0xff;
     711             :                 }
     712        1515 :                 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
     713             :                         (!global && subpats_order != 0)) {
     714           1 :                         php_error_docref(NULL, E_WARNING, "Invalid flags specified");
     715           1 :                         return;
     716             :                 }
     717             :         } else {
     718     1781588 :                 offset_capture = 0;
     719             :         }
     720             : 
     721             :         /* Negative offset counts from the end of the string. */
     722     1783102 :         if (start_offset < 0) {
     723           5 :                 start_offset = subject_len + start_offset;
     724           5 :                 if (start_offset < 0) {
     725           1 :                         start_offset = 0;
     726             :                 }
     727             :         }
     728             : 
     729     1783102 :         if (extra == NULL) {
     730           6 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     731           6 :                 extra = &extra_data;
     732             :         }
     733     1783102 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
     734     1783102 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
     735             : #ifdef PCRE_EXTRA_MARK
     736     1783102 :         extra->mark = &mark;
     737     1783102 :         extra->flags |= PCRE_EXTRA_MARK;
     738             : #endif
     739             : 
     740             :         /* Calculate the size of the offsets array, and allocate memory for it. */
     741     1783102 :         num_subpats = pce->capture_count + 1;
     742     1783102 :         size_offsets = num_subpats * 3;
     743             : 
     744             :         /*
     745             :          * Build a mapping from subpattern numbers to their names. We will
     746             :          * allocate the table only if there are any named subpatterns.
     747             :          */
     748     1783102 :         subpat_names = NULL;
     749     1783102 :         if (pce->name_count > 0) {
     750           9 :                 subpat_names = make_subpats_table(num_subpats, pce);
     751           9 :                 if (!subpat_names) {
     752           0 :                         RETURN_FALSE;
     753             :                 }
     754             :         }
     755             : 
     756     1783102 :         if (size_offsets <= 32) {
     757     1783091 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
     758             :         } else {
     759          11 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     760             :         }
     761     1783102 :         memset(offsets, 0, size_offsets*sizeof(int));
     762             :         /* Allocate match sets array and initialize the values. */
     763     1783102 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     764         119 :                 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
     765         380 :                 for (i=0; i<num_subpats; i++) {
     766         261 :                         array_init(&match_sets[i]);
     767             :                 }
     768             :         }
     769             : 
     770     1783102 :         matched = 0;
     771     1783102 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
     772             : 
     773             :         do {
     774             :                 /* Execute the regular expression. */
     775     1783309 :                 count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
     776             :                                                   exoptions|g_notempty, offsets, size_offsets);
     777             : 
     778             :                 /* the string was already proved to be valid UTF-8 */
     779     1783309 :                 exoptions |= PCRE_NO_UTF8_CHECK;
     780             : 
     781             :                 /* Check for too many substrings condition. */
     782     1783309 :                 if (count == 0) {
     783           0 :                         php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
     784           0 :                         count = size_offsets/3;
     785             :                 }
     786             : 
     787             :                 /* If something has matched */
     788     1783309 :                 if (count > 0) {
     789       94873 :                         matched++;
     790             : 
     791             :                         /* If subpatterns array has been passed, fill it in with values. */
     792       94873 :                         if (subpats != NULL) {
     793             :                                 /* Try to get the list of substrings and display a warning if failed. */
     794       46935 :                                 if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
     795           1 :                                         if (subpat_names) {
     796           0 :                                                 efree(subpat_names);
     797             :                                         }
     798           1 :                                         if (size_offsets <= 32) {
     799           1 :                                                 free_alloca(offsets, use_heap);
     800             :                                         } else {
     801           0 :                                                 efree(offsets);
     802             :                                         }
     803           1 :                                         if (match_sets) efree(match_sets);
     804           1 :                                         php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
     805           1 :                                         RETURN_FALSE;
     806             :                                 }
     807             : 
     808       46934 :                                 if (global) {   /* global pattern matching */
     809         340 :                                         if (subpats && subpats_order == PREG_PATTERN_ORDER) {
     810             :                                                 /* For each subpattern, insert it into the appropriate array. */
     811         148 :                                                 if (offset_capture) {
     812          22 :                                                         for (i = 0; i < count; i++) {
     813          24 :                                                                 add_offset_pair(&match_sets[i], (char *)stringlist[i],
     814          24 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     815             :                                                         }
     816             :                                                 } else {
     817         357 :                                                         for (i = 0; i < count; i++) {
     818         219 :                                                                 add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
     819         219 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     820             :                                                         }
     821             :                                                 }
     822             :                                                 /* Add MARK, if available */
     823         148 :                                                 if (mark) {
     824           2 :                                                         if (Z_TYPE(marks) == IS_UNDEF) {
     825           1 :                                                                 array_init(&marks);
     826             :                                                         }
     827           2 :                                                         add_index_string(&marks, matched - 1, (char *) mark);
     828             :                                                 }
     829             :                                                 /*
     830             :                                                  * If the number of captured subpatterns on this run is
     831             :                                                  * less than the total possible number, pad the result
     832             :                                                  * arrays with empty strings.
     833             :                                                  */
     834         148 :                                                 if (count < num_subpats) {
     835          11 :                                                         for (; i < num_subpats; i++) {
     836           7 :                                                                 add_next_index_string(&match_sets[i], "");
     837             :                                                         }
     838             :                                                 }
     839             :                                         } else {
     840             :                                                 /* Allocate the result set array */
     841          44 :                                                 array_init_size(&result_set, count + (mark ? 1 : 0));
     842             : 
     843             :                                                 /* Add all the subpatterns to it */
     844          44 :                                                 if (subpat_names) {
     845           2 :                                                         if (offset_capture) {
     846           0 :                                                                 for (i = 0; i < count; i++) {
     847           0 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     848           0 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
     849             :                                                                 }
     850             :                                                         } else {
     851          14 :                                                                 for (i = 0; i < count; i++) {
     852          12 :                                                                         if (subpat_names[i]) {
     853           8 :                                                                                 add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
     854             :                                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     855             :                                                                         }
     856          12 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     857          12 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     858             :                                                                 }
     859             :                                                         }
     860             :                                                 } else {
     861          42 :                                                         if (offset_capture) {
     862          10 :                                                                 for (i = 0; i < count; i++) {
     863          14 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     864          14 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     865             :                                                                 }
     866             :                                                         } else {
     867         343 :                                                                 for (i = 0; i < count; i++) {
     868         304 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     869         304 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     870             :                                                                 }
     871             :                                                         }
     872             :                                                 }
     873             :                                                 /* Add MARK, if available */
     874          44 :                                                 if (mark) {
     875           2 :                                                         add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
     876             :                                                 }
     877             :                                                 /* And add it to the output array */
     878          44 :                                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
     879             :                                         }
     880             :                                 } else {                        /* single pattern matching */
     881             :                                         /* For each subpattern, insert it into the subpatterns array. */
     882       46742 :                                         if (subpat_names) {
     883           5 :                                                 if (offset_capture) {
     884           5 :                                                         for (i = 0; i < count; i++) {
     885          12 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     886           4 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     887           8 :                                                                                                 offsets[i<<1], subpat_names[i]);
     888             :                                                         }
     889             :                                                 } else {
     890          24 :                                                         for (i = 0; i < count; i++) {
     891          20 :                                                                 if (subpat_names[i]) {
     892          13 :                                                                         add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     893             :                                                                                                           offsets[(i<<1)+1] - offsets[i<<1]);
     894             :                                                                 }
     895          20 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     896          20 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     897             :                                                         }
     898             :                                                 }
     899             :                                         } else {
     900       46737 :                                                 if (offset_capture) {
     901          19 :                                                         for (i = 0; i < count; i++) {
     902          22 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     903          11 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     904          11 :                                                                                                 offsets[i<<1], NULL);
     905             :                                                         }
     906             :                                                 } else {
     907      142703 :                                                         for (i = 0; i < count; i++) {
     908       95974 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     909       95974 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     910             :                                                         }
     911             :                                                 }
     912             :                                         }
     913             :                                         /* Add MARK, if available */
     914       46742 :                                         if (mark) {
     915           1 :                                                 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
     916             :                                         }
     917             :                                 }
     918             : 
     919       46934 :                                 pcre_free((void *) stringlist);
     920             :                         }
     921     1688436 :                 } else if (count == PCRE_ERROR_NOMATCH) {
     922             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
     923             :                            this is not necessarily the end. We need to advance
     924             :                            the start offset, and continue. Fudge the offset values
     925             :                            to achieve this, unless we're already at the end of the string. */
     926     1688429 :                         if (g_notempty != 0 && start_offset < subject_len) {
     927          12 :                                 int unit_len = calculate_unit_length(pce, subject + start_offset);
     928             :                                 
     929           6 :                                 offsets[0] = (int)start_offset;
     930           6 :                                 offsets[1] = (int)(start_offset + unit_len);
     931             :                         } else
     932             :                                 break;
     933             :                 } else {
     934           7 :                         pcre_handle_exec_error(count);
     935           7 :                         break;
     936             :                 }
     937             : 
     938             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
     939             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
     940             :                    the match again at the same point. If this fails (picked up above) we
     941             :                    advance to the next character. */
     942       94878 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
     943             : 
     944             :                 /* Advance to the position right after the last full match */
     945       94878 :                 start_offset = offsets[1];
     946       94878 :         } while (global);
     947             : 
     948             :         /* Add the match sets to the output array and clean up */
     949     1783101 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     950         119 :                 if (subpat_names) {
     951          10 :                         for (i = 0; i < num_subpats; i++) {
     952           8 :                                 if (subpat_names[i]) {
     953           5 :                                         zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
     954             :                                                                          strlen(subpat_names[i]), &match_sets[i]);
     955           5 :                                         Z_ADDREF(match_sets[i]);
     956             :                                 }
     957           8 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
     958             :                         }
     959             :                 } else {
     960         370 :                         for (i = 0; i < num_subpats; i++) {
     961         253 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
     962             :                         }
     963             :                 }
     964         119 :                 efree(match_sets);
     965             : 
     966         119 :                 if (Z_TYPE(marks) != IS_UNDEF) {
     967           1 :                         add_assoc_zval(subpats, "MARK", &marks);
     968             :                 }
     969             :         }
     970             : 
     971     1783101 :         if (size_offsets <= 32) {
     972     1783090 :                 free_alloca(offsets, use_heap);
     973             :         } else {
     974          11 :                 efree(offsets);
     975             :         }
     976     1783101 :         if (subpat_names) {
     977           9 :                 efree(subpat_names);
     978             :         }
     979             : 
     980             :         /* Did we encounter an error? */
     981     1783101 :         if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
     982     1783094 :                 RETVAL_LONG(matched);
     983             :         } else {
     984           7 :                 RETVAL_FALSE;
     985             :         }
     986             : }
     987             : /* }}} */
     988             : 
     989             : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
     990             :    Perform a Perl-style regular expression match */
     991     1781496 : static PHP_FUNCTION(preg_match)
     992             : {
     993     1781496 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
     994     1781496 : }
     995             : /* }}} */
     996             : 
     997             : /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
     998             :    Perform a Perl-style global regular expression match */
     999         111 : static PHP_FUNCTION(preg_match_all)
    1000             : {
    1001         111 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
    1002         111 : }
    1003             : /* }}} */
    1004             : 
    1005             : /* {{{ preg_get_backref
    1006             :  */
    1007         126 : static int preg_get_backref(char **str, int *backref)
    1008             : {
    1009         126 :         register char in_brace = 0;
    1010         126 :         register char *walk = *str;
    1011             : 
    1012         126 :         if (walk[1] == 0)
    1013           8 :                 return 0;
    1014             : 
    1015         118 :         if (*walk == '$' && walk[1] == '{') {
    1016          14 :                 in_brace = 1;
    1017          14 :                 walk++;
    1018             :         }
    1019         118 :         walk++;
    1020             : 
    1021         236 :         if (*walk >= '0' && *walk <= '9') {
    1022         118 :                 *backref = *walk - '0';
    1023         118 :                 walk++;
    1024             :         } else
    1025           0 :                 return 0;
    1026             : 
    1027         118 :         if (*walk && *walk >= '0' && *walk <= '9') {
    1028           2 :                 *backref = *backref * 10 + *walk - '0';
    1029           2 :                 walk++;
    1030             :         }
    1031             : 
    1032         118 :         if (in_brace) {
    1033          14 :                 if (*walk != '}')
    1034           6 :                         return 0;
    1035             :                 else
    1036           8 :                         walk++;
    1037             :         }
    1038             : 
    1039         112 :         *str = walk;
    1040         112 :         return 1;
    1041             : }
    1042             : /* }}} */
    1043             : 
    1044             : /* {{{ preg_do_repl_func
    1045             :  */
    1046          69 : static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
    1047             : {
    1048             :         zend_string *result_str;
    1049             :         zval             retval;                        /* Function return value */
    1050             :         zval         args[1];                   /* Argument to pass to function */
    1051             :         int                      i;
    1052             : 
    1053          69 :         array_init_size(&args[0], count + (mark ? 1 : 0));
    1054          69 :         if (subpat_names) {
    1055           3 :                 for (i = 0; i < count; i++) {
    1056           2 :                         if (subpat_names[i]) {
    1057           1 :                                 add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
    1058             :                         }
    1059           2 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
    1060             :                 }
    1061             :         } else {
    1062         167 :                 for (i = 0; i < count; i++) {
    1063          99 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
    1064             :                 }
    1065             :         }
    1066          69 :         if (mark) {
    1067           2 :                 add_assoc_string(&args[0], "MARK", (char *) mark);
    1068             :         }
    1069             : 
    1070         205 :         if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
    1071          67 :                 result_str = zval_get_string(&retval);
    1072          67 :                 zval_ptr_dtor(&retval);
    1073             :         } else {
    1074           2 :                 if (!EG(exception)) {
    1075           0 :                         php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
    1076             :                 }
    1077             : 
    1078           4 :                 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
    1079             :         }
    1080             : 
    1081          69 :         zval_ptr_dtor(&args[0]);
    1082             : 
    1083          69 :         return result_str;
    1084             : }
    1085             : /* }}} */
    1086             : 
    1087             : /* {{{ php_pcre_replace
    1088             :  */
    1089       36972 : PHPAPI zend_string *php_pcre_replace(zend_string *regex,
    1090             :                                                           zend_string *subject_str,
    1091             :                                                           char *subject, int subject_len,
    1092             :                                                           zval *replace_val, int is_callable_replace,
    1093             :                                                           int limit, int *replace_count)
    1094             : {
    1095             :         pcre_cache_entry        *pce;                       /* Compiled regular expression */
    1096             :         zend_string                     *result;                        /* Function result */
    1097             : 
    1098             :         /* Compile regex or get it from cache. */
    1099       36972 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    1100          12 :                 return NULL;
    1101             :         }
    1102       36960 :         pce->refcount++;
    1103       36960 :         result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
    1104             :                 is_callable_replace, limit, replace_count);
    1105       36960 :         pce->refcount--;
    1106             : 
    1107       36960 :         return result;
    1108             : }
    1109             : /* }}} */
    1110             : 
    1111             : /* {{{ php_pcre_replace_impl() */
    1112       36975 : PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
    1113             : {
    1114       36975 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1115             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1116       36975 :         int                              exoptions = 0;         /* Execution options */
    1117       36975 :         int                              count = 0;                     /* Count of matched subpatterns */
    1118             :         int                             *offsets;                       /* Array of subpattern offsets */
    1119             :         char                    **subpat_names;         /* Array for named subpatterns */
    1120             :         int                              num_subpats;           /* Number of captured subpatterns */
    1121             :         int                              size_offsets;          /* Size of the offsets array */
    1122             :         size_t                   new_len;                       /* Length of needed storage */
    1123             :         size_t                   alloc_len;                     /* Actual allocated length */
    1124             :         int                              match_len;                     /* Length of the current match */
    1125             :         int                              backref;                       /* Backreference number */
    1126             :         int                              start_offset;          /* Where the new search starts */
    1127       36975 :         int                              g_notempty=0;          /* If the match should not be empty */
    1128       36975 :         int                              replace_len=0;         /* Length of replacement string */
    1129       36975 :         char                    *replace=NULL,          /* Replacement string */
    1130             :                                         *walkbuf,                       /* Location of current replacement in the result */
    1131             :                                         *walk,                          /* Used to walk the replacement string */
    1132             :                                         *match,                         /* The current match */
    1133             :                                         *piece,                         /* The current piece of subject */
    1134       36975 :                                         *replace_end=NULL,      /* End of replacement string */
    1135             :                                          walk_last;                     /* Last walked character */
    1136             :         int                              result_len;            /* Length of result */
    1137       36975 :         unsigned char   *mark = NULL;       /* Target for MARK name */
    1138             :         zend_string             *result;                        /* Result of replacement */
    1139       36975 :         zend_string     *eval_result=NULL;  /* Result of custom function */
    1140             : 
    1141             :         ALLOCA_FLAG(use_heap);
    1142             : 
    1143       36975 :         if (extra == NULL) {
    1144           2 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1145           2 :                 extra = &extra_data;
    1146             :         }
    1147             : 
    1148       36975 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    1149       36975 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    1150             : 
    1151       36975 :         if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
    1152           1 :                 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
    1153           1 :                 return NULL;
    1154             :         }
    1155             : 
    1156       36974 :         if (!is_callable_replace) {
    1157       36924 :                 replace = Z_STRVAL_P(replace_val);
    1158       36924 :                 replace_len = (int)Z_STRLEN_P(replace_val);
    1159       36924 :                 replace_end = replace + replace_len;
    1160             :         }
    1161             : 
    1162             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1163       36974 :         num_subpats = pce->capture_count + 1;
    1164       36974 :         size_offsets = num_subpats * 3;
    1165       36974 :         if (size_offsets <= 32) {
    1166       36971 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1167             :         } else {
    1168           3 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1169             :         }
    1170             : 
    1171             :         /*
    1172             :          * Build a mapping from subpattern numbers to their names. We will
    1173             :          * allocate the table only if there are any named subpatterns.
    1174             :          */
    1175       36974 :         subpat_names = NULL;
    1176       36974 :         if (UNEXPECTED(pce->name_count > 0)) {
    1177           1 :                 subpat_names = make_subpats_table(num_subpats, pce);
    1178           1 :                 if (!subpat_names) {
    1179           0 :                         return NULL;
    1180             :                 }
    1181             :         }
    1182             : 
    1183       36974 :         alloc_len = 0;
    1184       36974 :         result = NULL;
    1185             : 
    1186             :         /* Initialize */
    1187       36974 :         match = NULL;
    1188       36974 :         start_offset = 0;
    1189       36974 :         result_len = 0;
    1190       36974 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1191             : 
    1192             :         while (1) {
    1193             : #ifdef PCRE_EXTRA_MARK
    1194       51984 :                 extra->mark = &mark;
    1195       51984 :                 extra->flags |= PCRE_EXTRA_MARK;
    1196             : #endif
    1197             :                 /* Execute the regular expression. */
    1198       51984 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
    1199             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1200             : 
    1201             :                 /* the string was already proved to be valid UTF-8 */
    1202       51984 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1203             : 
    1204             :                 /* Check for too many substrings condition. */
    1205       51984 :                 if (UNEXPECTED(count == 0)) {
    1206           0 :                         php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
    1207           0 :                         count = size_offsets / 3;
    1208             :                 }
    1209             : 
    1210       51984 :                 piece = subject + start_offset;
    1211             : 
    1212             :                 /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
    1213       66982 :                 if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) {
    1214       14998 :                         if (UNEXPECTED(replace_count)) {
    1215       14998 :                                 ++*replace_count;
    1216             :                         }
    1217             : 
    1218             :                         /* Set the match location in subject */
    1219       14998 :                         match = subject + offsets[0];
    1220             : 
    1221       14998 :                         new_len = result_len + offsets[0] - start_offset; /* part before the match */
    1222             :                         
    1223             :                         /* if (!is_callable_replace) */
    1224       14998 :                         if (EXPECTED(replace)) {
    1225             :                                 /* do regular substitution */
    1226       14929 :                                 walk = replace;
    1227       14929 :                                 walk_last = 0;
    1228             : 
    1229       61260 :                                 while (walk < replace_end) {
    1230       31402 :                                         if ('\\' == *walk || '$' == *walk) {
    1231          63 :                                                 if (walk_last == '\\') {
    1232           0 :                                                         walk++;
    1233           0 :                                                         walk_last = 0;
    1234           0 :                                                         continue;
    1235             :                                                 }
    1236          63 :                                                 if (preg_get_backref(&walk, &backref)) {
    1237          56 :                                                         if (backref < count)
    1238          55 :                                                                 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
    1239          56 :                                                         continue;
    1240             :                                                 }
    1241             :                                         }
    1242       31346 :                                         new_len++;
    1243       31346 :                                         walk++;
    1244       31346 :                                         walk_last = walk[-1];
    1245             :                                 }
    1246             : 
    1247       14929 :                                 if (new_len >= alloc_len) {
    1248        9332 :                                         alloc_len = alloc_len + 2 * new_len;
    1249        9332 :                                         if (result == NULL) {
    1250        8852 :                                                 result = zend_string_alloc(alloc_len, 0);
    1251             :                                         } else {
    1252         480 :                                                 result = zend_string_extend(result, alloc_len, 0);
    1253             :                                         }
    1254             :                                 }
    1255             : 
    1256             :                                 /* copy the part of the string before the match */
    1257       14929 :                                 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
    1258       14929 :                                 result_len += (int)(match-piece);
    1259             : 
    1260             :                                 /* copy replacement and backrefs */
    1261       14929 :                                 walkbuf = ZSTR_VAL(result) + result_len;
    1262             : 
    1263       14929 :                                 walk = replace;
    1264       14929 :                                 walk_last = 0;
    1265       61260 :                                 while (walk < replace_end) {
    1266       31402 :                                         if ('\\' == *walk || '$' == *walk) {
    1267          63 :                                                 if (walk_last == '\\') {
    1268           0 :                                                         *(walkbuf-1) = *walk++;
    1269           0 :                                                         walk_last = 0;
    1270           0 :                                                         continue;
    1271             :                                                 }
    1272          63 :                                                 if (preg_get_backref(&walk, &backref)) {
    1273          56 :                                                         if (backref < count) {
    1274          55 :                                                                 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1275          55 :                                                                 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
    1276          55 :                                                                 walkbuf += match_len;
    1277             :                                                         }
    1278          56 :                                                         continue;
    1279             :                                                 }
    1280             :                                         }
    1281       31346 :                                         *walkbuf++ = *walk++;
    1282       31346 :                                         walk_last = walk[-1];
    1283             :                                 }
    1284       14929 :                                 *walkbuf = '\0';
    1285             :                                 /* increment the result length by how much we've added to the string */
    1286       14929 :                                 result_len += (int)(walkbuf - (ZSTR_VAL(result) + result_len));
    1287             :                         } else {
    1288             :                                 /* Use custom function to get replacement string and its length. */
    1289          69 :                                 eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
    1290             :                                 ZEND_ASSERT(eval_result);
    1291          69 :                                 new_len += (int)ZSTR_LEN(eval_result);
    1292          69 :                                 if (new_len >= alloc_len) {
    1293          61 :                                         alloc_len = alloc_len + 2 * new_len;
    1294          61 :                                         if (result == NULL) {
    1295          38 :                                                 result = zend_string_alloc(alloc_len, 0);
    1296             :                                         } else {
    1297          23 :                                                 result = zend_string_extend(result, alloc_len, 0);
    1298             :                                         }
    1299             :                                 }
    1300             :                                 /* copy the part of the string before the match */
    1301          69 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
    1302          69 :                                 result_len += (int)(match-piece);
    1303             : 
    1304             :                                 /* copy replacement and backrefs */
    1305          69 :                                 walkbuf = ZSTR_VAL(result) + result_len;
    1306             : 
    1307             :                                 /* If using custom function, copy result to the buffer and clean up. */
    1308          69 :                                 memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
    1309          69 :                                 result_len += (int)ZSTR_LEN(eval_result);
    1310             :                                 zend_string_release(eval_result);
    1311             :                         }
    1312             : 
    1313       14998 :                         if (EXPECTED(limit)) {
    1314       14998 :                                 limit--;
    1315             :                         }
    1316       36998 :                 } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
    1317             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
    1318             :                            this is not necessarily the end. We need to advance
    1319             :                            the start offset, and continue. Fudge the offset values
    1320             :                            to achieve this, unless we're already at the end of the string. */
    1321       36990 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1322          12 :                                 int unit_len = calculate_unit_length(pce, piece);
    1323             : 
    1324          12 :                                 offsets[0] = start_offset;
    1325          12 :                                 offsets[1] = start_offset + unit_len;
    1326          12 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
    1327          12 :                                 result_len += unit_len;
    1328             :                         } else {
    1329       36966 :                                 if (!result && subject_str) {
    1330       28076 :                                         result = zend_string_copy(subject_str);
    1331       28076 :                                         break;
    1332             :                                 }
    1333        8890 :                                 new_len = result_len + subject_len - start_offset;
    1334        8890 :                                 if (new_len >= alloc_len) {
    1335         294 :                                         alloc_len = new_len; /* now we know exactly how long it is */
    1336         294 :                                         if (NULL != result) {
    1337         294 :                                                 result = zend_string_realloc(result, alloc_len, 0);
    1338             :                                         } else {
    1339           0 :                                                 result = zend_string_alloc(alloc_len, 0);
    1340             :                                         }
    1341             :                                 }
    1342             :                                 /* stick that last bit of string on our output */
    1343        8890 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
    1344        8890 :                                 result_len += subject_len - start_offset;
    1345        8890 :                                 ZSTR_VAL(result)[result_len] = '\0';
    1346        8890 :                                 ZSTR_LEN(result) = result_len;
    1347        8890 :                                 break;
    1348             :                         }
    1349             :                 } else {
    1350           8 :                         pcre_handle_exec_error(count);
    1351           8 :                         if (result) {
    1352             :                                 zend_string_free(result);
    1353           0 :                                 result = NULL;
    1354             :                         }
    1355           8 :                         break;
    1356             :                 }
    1357             : 
    1358             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1359             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1360             :                    the match again at the same point. If this fails (picked up above) we
    1361             :                    advance to the next character. */
    1362       15010 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1363             : 
    1364             :                 /* Advance to the next piece. */
    1365       15010 :                 start_offset = offsets[1];
    1366       15010 :         }
    1367             : 
    1368       36974 :         if (size_offsets <= 32) {
    1369       36971 :                 free_alloca(offsets, use_heap);
    1370             :         } else {
    1371           3 :                 efree(offsets);
    1372             :         }
    1373       36974 :         if (UNEXPECTED(subpat_names)) {
    1374           1 :                 efree(subpat_names);
    1375             :         }
    1376             : 
    1377       36974 :         return result;
    1378             : }
    1379             : /* }}} */
    1380             : 
    1381             : /* {{{ php_replace_in_subject
    1382             :  */
    1383       36930 : static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
    1384             : {
    1385             :         zval            *regex_entry,
    1386             :                                 *replace_value,
    1387             :                                  empty_replace;
    1388             :         zend_string *result;
    1389             :         uint32_t replace_idx;
    1390       36930 :         zend_string     *subject_str = zval_get_string(subject);
    1391             : 
    1392             :         /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
    1393       36930 :         ZVAL_EMPTY_STRING(&empty_replace);
    1394             : 
    1395       36930 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
    1396           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
    1397           0 :                         return NULL;
    1398             :         }
    1399             : 
    1400             :         /* If regex is an array */
    1401       36930 :         if (Z_TYPE_P(regex) == IS_ARRAY) {
    1402          22 :                 replace_value = replace;
    1403          22 :                 replace_idx = 0;
    1404             : 
    1405             :                 /* For each entry in the regex array, get the entry */
    1406         149 :                 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
    1407             :                         zval replace_str;
    1408             :                         /* Make sure we're dealing with strings. */
    1409          64 :                         zend_string *regex_str = zval_get_string(regex_entry);
    1410             : 
    1411          64 :                         ZVAL_UNDEF(&replace_str);
    1412             :                         /* If replace is an array and not a callable construct */
    1413          64 :                         if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
    1414             :                                 /* Get current entry */
    1415          98 :                                 while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
    1416          94 :                                         if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) {
    1417          47 :                                                 ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val);
    1418          47 :                                                 break;
    1419             :                                         }
    1420           0 :                                         replace_idx++;
    1421             :                                 }
    1422          49 :                                 if (!Z_ISUNDEF(replace_str)) {
    1423          47 :                                         if (!is_callable_replace) {
    1424          47 :                                                 convert_to_string(&replace_str);
    1425             :                                         }
    1426          47 :                                         replace_value = &replace_str;
    1427          47 :                                         replace_idx++;
    1428             :                                 } else {
    1429             :                                         /* We've run out of replacement strings, so use an empty one */
    1430           2 :                                         replace_value = &empty_replace;
    1431             :                                 }
    1432             :                         }
    1433             : 
    1434             :                         /* Do the actual replacement and put the result back into subject_str
    1435             :                            for further replacements. */
    1436          64 :                         if ((result = php_pcre_replace(regex_str,
    1437             :                                                                                    subject_str,
    1438             :                                                                                    ZSTR_VAL(subject_str),
    1439             :                                                                                    (int)ZSTR_LEN(subject_str),
    1440             :                                                                                    replace_value,
    1441             :                                                                                    is_callable_replace,
    1442             :                                                                                    limit,
    1443             :                                                                                    replace_count)) != NULL) {
    1444             :                                 zend_string_release(subject_str);
    1445          63 :                                 subject_str = result;
    1446             :                         } else {
    1447             :                                 zend_string_release(subject_str);
    1448             :                                 zend_string_release(regex_str);
    1449             :                                 zval_dtor(&replace_str);
    1450           1 :                                 return NULL;
    1451             :                         }
    1452             : 
    1453             :                         zend_string_release(regex_str);
    1454             :                         zval_dtor(&replace_str);
    1455             :                 } ZEND_HASH_FOREACH_END();
    1456             : 
    1457          21 :                 return subject_str;
    1458             :         } else {
    1459       36908 :                 result = php_pcre_replace(Z_STR_P(regex),
    1460             :                                                                   subject_str,
    1461             :                                                                   ZSTR_VAL(subject_str),
    1462             :                                                                   (int)ZSTR_LEN(subject_str),
    1463             :                                                                   replace,
    1464             :                                                                   is_callable_replace,
    1465             :                                                                   limit,
    1466             :                                                                   replace_count);
    1467             :                 zend_string_release(subject_str);
    1468       36908 :                 return result;
    1469             :         }
    1470             : }
    1471             : /* }}} */
    1472             : 
    1473             : /* {{{ preg_replace_impl
    1474             :  */
    1475       36920 : static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
    1476             : {
    1477             :         zval            *subject_entry;
    1478             :         zend_string     *result;
    1479             :         zend_string     *string_key;
    1480             :         zend_ulong       num_key;
    1481       36920 :         int                      replace_count = 0, old_replace_count;
    1482             : 
    1483       73829 :         if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
    1484       36899 :                 convert_to_string_ex(replace);
    1485             :         }
    1486             : 
    1487       36918 :         if (Z_TYPE_P(regex) != IS_ARRAY) {
    1488       36908 :                 convert_to_string_ex(regex);
    1489             :         }
    1490             : 
    1491             :         /* if subject is an array */
    1492       36917 :         if (Z_TYPE_P(subject) == IS_ARRAY) {
    1493           6 :                 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
    1494             : 
    1495             :                 /* For each subject entry, convert it to string, then perform replacement
    1496             :                    and add the result to the return_value array. */
    1497          44 :                 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
    1498          19 :                         old_replace_count = replace_count;
    1499          19 :                         if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
    1500          36 :                                 if (!is_filter || replace_count > old_replace_count) {
    1501             :                                         /* Add to return array */
    1502             :                                         zval zv;
    1503             : 
    1504          17 :                                         ZVAL_STR(&zv, result);
    1505          17 :                                         if (string_key) {
    1506           1 :                                                 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
    1507             :                                         } else {
    1508          16 :                                                 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
    1509             :                                         }
    1510             :                                 } else {
    1511             :                                         zend_string_release(result);
    1512             :                                 }
    1513             :                         }
    1514             :                 } ZEND_HASH_FOREACH_END();
    1515             :         } else {        
    1516             :                 /* if subject is not an array */
    1517       36911 :                 old_replace_count = replace_count;
    1518       36911 :                 if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
    1519       73780 :                         if (!is_filter || replace_count > old_replace_count) {
    1520       36890 :                                 RETVAL_STR(result);
    1521             :                         } else {
    1522             :                                 zend_string_release(result);
    1523             :                         }
    1524             :                 }
    1525             :         }
    1526             :         
    1527       36917 :         return replace_count;
    1528             : }
    1529             : /* }}} */
    1530             : 
    1531             : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1532             :    Perform Perl-style regular expression replacement. */
    1533       36873 : static PHP_FUNCTION(preg_replace)
    1534             : {
    1535       36873 :         zval *regex, *replace, *subject, *zcount = NULL;
    1536       36873 :         zend_long limit = -1;
    1537             :         int replace_count;
    1538             : 
    1539             : #ifndef FAST_ZPP
    1540             :         /* Get function parameters and do error-checking. */
    1541             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1542             :                 return;
    1543             :         }
    1544             : #else
    1545       36873 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1546       36870 :                 Z_PARAM_ZVAL(regex)
    1547       36870 :                 Z_PARAM_ZVAL(replace)
    1548       36870 :                 Z_PARAM_ZVAL(subject)
    1549       36870 :                 Z_PARAM_OPTIONAL
    1550       36910 :                 Z_PARAM_LONG(limit)
    1551          27 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1552       36873 :         ZEND_PARSE_PARAMETERS_END();
    1553             : #endif
    1554             : 
    1555       73748 :         if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1556           3 :                 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1557           3 :                 RETURN_FALSE;
    1558             :         }
    1559             : 
    1560       36867 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
    1561       36864 :         if (zcount) {
    1562           7 :                 zval_dtor(zcount);
    1563           7 :                 ZVAL_LONG(zcount, replace_count);
    1564             :         }
    1565             : }
    1566             : /* }}} */
    1567             : 
    1568             : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
    1569             :    Perform Perl-style regular expression replacement using replacement callback. */
    1570          50 : static PHP_FUNCTION(preg_replace_callback)
    1571             : {
    1572          50 :         zval *regex, *replace, *subject, *zcount = NULL;
    1573          50 :         zend_long limit = -1;
    1574             :         zend_string     *callback_name;
    1575             :         int replace_count;
    1576             : 
    1577             : #ifndef FAST_ZPP
    1578             :         /* Get function parameters and do error-checking. */
    1579             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1580             :                 return;
    1581             :         }    
    1582             : #else
    1583          50 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1584          44 :                 Z_PARAM_ZVAL(regex)
    1585          44 :                 Z_PARAM_ZVAL(replace)
    1586          44 :                 Z_PARAM_ZVAL(subject)
    1587          44 :                 Z_PARAM_OPTIONAL
    1588          58 :                 Z_PARAM_LONG(limit)
    1589           6 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1590          50 :         ZEND_PARSE_PARAMETERS_END();
    1591             : #endif
    1592             : 
    1593          41 :         if (!zend_is_callable(replace, 0, &callback_name)) {
    1594           3 :                 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
    1595           3 :                 zend_string_release(callback_name);
    1596           3 :                 ZVAL_COPY(return_value, subject);
    1597           3 :                 return;
    1598             :         }
    1599          38 :         zend_string_release(callback_name);
    1600             : 
    1601          38 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
    1602          38 :         if (zcount) {
    1603           1 :                 zval_dtor(zcount);
    1604           1 :                 ZVAL_LONG(zcount, replace_count);
    1605             :         }
    1606             : }
    1607             : /* }}} */
    1608             : 
    1609             : /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
    1610             :    Perform Perl-style regular expression replacement using replacement callback. */
    1611          12 : static PHP_FUNCTION(preg_replace_callback_array)
    1612             : {
    1613          12 :         zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
    1614          12 :         zend_long limit = -1;
    1615             :         zend_string *str_idx;
    1616             :         zend_string *callback_name;
    1617          12 :         int replace_count = 0;
    1618             : 
    1619             : #ifndef FAST_ZPP
    1620             :         /* Get function parameters and do error-checking. */
    1621             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
    1622             :                 return;
    1623             :         }
    1624             : #else
    1625          12 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1626          30 :                 Z_PARAM_ARRAY(pattern)
    1627           7 :                 Z_PARAM_ZVAL(subject)
    1628           7 :                 Z_PARAM_OPTIONAL
    1629          13 :                 Z_PARAM_LONG(limit)
    1630           4 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1631          12 :         ZEND_PARSE_PARAMETERS_END();
    1632             : #endif
    1633             :         
    1634           6 :         ZVAL_UNDEF(&zv);
    1635          33 :         ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
    1636          15 :                 if (str_idx) {
    1637          15 :                         ZVAL_STR_COPY(&regex, str_idx);
    1638             :                 } else {
    1639           0 :                         php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
    1640           0 :                         zval_ptr_dtor(return_value);
    1641           0 :                         RETURN_NULL();
    1642             :                 }               
    1643             : 
    1644          15 :                 if (!zend_is_callable(replace, 0, &callback_name)) {
    1645           1 :                         php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
    1646           1 :                         zend_string_release(callback_name);
    1647           1 :                         zval_ptr_dtor(&regex);
    1648           1 :                         zval_ptr_dtor(return_value);
    1649           1 :                         ZVAL_COPY(return_value, subject);
    1650           1 :                         return;
    1651             :                 }
    1652          14 :                 zend_string_release(callback_name);
    1653             : 
    1654          14 :                 if (Z_ISNULL_P(return_value)) {
    1655           5 :                         replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
    1656             :                 } else {
    1657           9 :                         replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
    1658           9 :                         zval_ptr_dtor(return_value);
    1659             :                 }
    1660             : 
    1661          14 :                 zval_ptr_dtor(&regex);
    1662             : 
    1663          14 :                 if (Z_ISUNDEF(zv)) {
    1664           1 :                         RETURN_NULL();  
    1665             :                 }
    1666             : 
    1667          13 :                 ZVAL_COPY_VALUE(return_value, &zv);
    1668             : 
    1669          13 :                 if (UNEXPECTED(EG(exception))) {
    1670           1 :                         zval_ptr_dtor(return_value);
    1671           1 :                         RETURN_NULL();  
    1672             :                 }
    1673             :         } ZEND_HASH_FOREACH_END();
    1674             : 
    1675           3 :         if (zcount) {
    1676           1 :                 zval_dtor(zcount);
    1677           1 :                 ZVAL_LONG(zcount, replace_count);
    1678             :         }
    1679             : }
    1680             : /* }}} */
    1681             : 
    1682             : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1683             :    Perform Perl-style regular expression replacement and only return matches. */
    1684           1 : static PHP_FUNCTION(preg_filter)
    1685             : {
    1686           1 :         zval *regex, *replace, *subject, *zcount = NULL;
    1687           1 :         zend_long limit = -1;
    1688             :         int replace_count;
    1689             : 
    1690             : #ifndef FAST_ZPP
    1691             :         /* Get function parameters and do error-checking. */
    1692             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1693             :                 return;
    1694             :         }    
    1695             : #else
    1696           1 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1697           1 :                 Z_PARAM_ZVAL(regex)
    1698           1 :                 Z_PARAM_ZVAL(replace)
    1699           1 :                 Z_PARAM_ZVAL(subject)
    1700           1 :                 Z_PARAM_OPTIONAL
    1701           1 :                 Z_PARAM_LONG(limit)
    1702           0 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1703           1 :         ZEND_PARSE_PARAMETERS_END();
    1704             : #endif
    1705             : 
    1706           3 :         if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1707           0 :                 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1708           0 :                 RETURN_FALSE;
    1709             :         }
    1710             : 
    1711           1 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
    1712           1 :         if (zcount) {
    1713           0 :                 zval_dtor(zcount);
    1714           0 :                 ZVAL_LONG(zcount, replace_count);
    1715             :         }
    1716             : }
    1717             : /* }}} */
    1718             : 
    1719             : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
    1720             :    Split string into an array using a perl-style regular expression as a delimiter */
    1721       10838 : static PHP_FUNCTION(preg_split)
    1722             : {
    1723             :         zend_string                     *regex;                 /* Regular expression */
    1724             :         zend_string                     *subject;               /* String to match against */
    1725       10838 :         zend_long                        limit_val = -1;/* Integer value of limit */
    1726       10838 :         zend_long                        flags = 0;             /* Match control flags */
    1727             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1728             : 
    1729             :         /* Get function parameters and do error checking */
    1730             : #ifndef FAST_ZPP
    1731             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
    1732             :                                                           &subject, &limit_val, &flags) == FAILURE) {
    1733             :                 RETURN_FALSE;
    1734             :         }
    1735             : #else
    1736       10838 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1737       32502 :                 Z_PARAM_STR(regex)
    1738       32496 :                 Z_PARAM_STR(subject)
    1739       10830 :                 Z_PARAM_OPTIONAL
    1740       10886 :                 Z_PARAM_LONG(limit_val)
    1741          60 :                 Z_PARAM_LONG(flags)
    1742       10838 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
    1743             : #endif
    1744             : 
    1745       10830 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
    1746           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
    1747           0 :                         RETURN_FALSE;
    1748             :         }
    1749             : 
    1750             :         /* Compile regex or get it from cache. */
    1751       10830 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    1752           5 :                 RETURN_FALSE;
    1753             :         }
    1754             : 
    1755       10825 :         pce->refcount++;
    1756       10825 :         php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
    1757       10825 :         pce->refcount--;
    1758             : }
    1759             : /* }}} */
    1760             : 
    1761             : /* {{{ php_pcre_split
    1762             :  */
    1763       10840 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
    1764             :         zend_long limit_val, zend_long flags)
    1765             : {
    1766       10840 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1767             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1768             :         int                             *offsets;                       /* Array of subpattern offsets */
    1769             :         int                              size_offsets;          /* Size of the offsets array */
    1770       10840 :         int                              exoptions = 0;         /* Execution options */
    1771       10840 :         int                              count = 0;                     /* Count of matched subpatterns */
    1772             :         int                              start_offset;          /* Where the new search starts */
    1773             :         int                              next_offset;           /* End of the last delimiter match + 1 */
    1774       10840 :         int                              g_notempty = 0;        /* If the match should not be empty */
    1775             :         char                    *last_match;            /* Location of last match */
    1776             :         int                              no_empty;                      /* If NO_EMPTY flag is set */
    1777             :         int                              delim_capture;         /* If delimiters should be captured */
    1778             :         int                              offset_capture;        /* If offsets should be captured */
    1779             :         zval                     tmp;
    1780             :         ALLOCA_FLAG(use_heap);
    1781             : 
    1782       10840 :         no_empty = flags & PREG_SPLIT_NO_EMPTY;
    1783       10840 :         delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
    1784       10840 :         offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
    1785             : 
    1786       10840 :         if (limit_val == 0) {
    1787           1 :                 limit_val = -1;
    1788             :         }
    1789             : 
    1790       10840 :         if (extra == NULL) {
    1791           7 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1792           7 :                 extra = &extra_data;
    1793             :         }
    1794       10840 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    1795       10840 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    1796             : #ifdef PCRE_EXTRA_MARK
    1797       10840 :         extra->flags &= ~PCRE_EXTRA_MARK;
    1798             : #endif
    1799             : 
    1800             :         /* Initialize return value */
    1801       10840 :         array_init(return_value);
    1802             : 
    1803             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1804       10840 :         size_offsets = (pce->capture_count + 1) * 3;
    1805       10840 :         if (size_offsets <= 32) {
    1806       10840 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1807             :         } else {
    1808           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1809             :         }
    1810             : 
    1811             :         /* Start at the beginning of the string */
    1812       10840 :         start_offset = 0;
    1813       10840 :         next_offset = 0;
    1814       10840 :         last_match = subject;
    1815       10840 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1816             : 
    1817             :         /* Get next piece if no limit or limit not yet reached and something matched*/
    1818       34345 :         while ((limit_val == -1 || limit_val > 1)) {
    1819       23495 :                 count = pcre_exec(pce->re, extra, subject,
    1820             :                                                   subject_len, start_offset,
    1821             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1822             : 
    1823             :                 /* the string was already proved to be valid UTF-8 */
    1824       23495 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1825             : 
    1826             :                 /* Check for too many substrings condition. */
    1827       23495 :                 if (count == 0) {
    1828           0 :                         php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
    1829           0 :                         count = size_offsets/3;
    1830             :                 }
    1831             : 
    1832             :                 /* If something matched */
    1833       36099 :                 if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
    1834       12604 :                         if (!no_empty || &subject[offsets[0]] != last_match) {
    1835             : 
    1836       12545 :                                 if (offset_capture) {
    1837             :                                         /* Add (match, offset) pair to the return value */
    1838          26 :                                         add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
    1839             :                                 } else {
    1840             :                                         /* Add the piece to the return value */
    1841       25038 :                                         ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
    1842       12519 :                                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1843             :                                 }
    1844             : 
    1845             :                                 /* One less left to do */
    1846       12545 :                                 if (limit_val != -1)
    1847          13 :                                         limit_val--;
    1848             :                         }
    1849             : 
    1850       12604 :                         last_match = &subject[offsets[1]];
    1851       12604 :                         next_offset = offsets[1];
    1852             : 
    1853       12604 :                         if (delim_capture) {
    1854             :                                 int i, match_len;
    1855          62 :                                 for (i = 1; i < count; i++) {
    1856          31 :                                         match_len = offsets[(i<<1)+1] - offsets[i<<1];
    1857             :                                         /* If we have matched a delimiter */
    1858          31 :                                         if (!no_empty || match_len > 0) {
    1859          21 :                                                 if (offset_capture) {
    1860          10 :                                                         add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
    1861             :                                                 } else {
    1862          22 :                                                         ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
    1863          11 :                                                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1864             :                                                 }
    1865             :                                         }
    1866             :                                 }
    1867             :                         }
    1868       10891 :                 } else if (count == PCRE_ERROR_NOMATCH) {
    1869             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
    1870             :                            this is not necessarily the end. We need to advance
    1871             :                            the start offset, and continue. Fudge the offset values
    1872             :                            to achieve this, unless we're already at the end of the string. */
    1873       10889 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1874          61 :                                 offsets[0] = start_offset;
    1875         122 :                                 offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset);
    1876             :                         } else {
    1877             :                                 break;
    1878             :                         }
    1879             :                 } else {
    1880           2 :                         pcre_handle_exec_error(count);
    1881           2 :                         break;
    1882             :                 }
    1883             : 
    1884             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1885             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1886             :                    the match again at the same point. If this fails (picked up above) we
    1887             :                    advance to the next character. */
    1888       12665 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1889             : 
    1890             :                 /* Advance to the position right after the last full match */
    1891       12665 :                 start_offset = offsets[1];
    1892             :         }
    1893             : 
    1894             : 
    1895       10840 :         start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
    1896             : 
    1897       10840 :         if (!no_empty || start_offset < subject_len)
    1898             :         {
    1899       10832 :                 if (offset_capture) {
    1900             :                         /* Add the last (match, offset) pair to the return value */
    1901           5 :                         add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
    1902             :                 } else {
    1903             :                         /* Add the last piece to the return value */
    1904       21654 :                         ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
    1905       10827 :                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1906             :                 }
    1907             :         }
    1908             : 
    1909             : 
    1910             :         /* Clean up */
    1911       10840 :         if (size_offsets <= 32) {
    1912       10840 :                 free_alloca(offsets, use_heap);
    1913             :         } else {
    1914           0 :                 efree(offsets);
    1915             :         }
    1916       10840 : }
    1917             : /* }}} */
    1918             : 
    1919             : /* {{{ proto string preg_quote(string str [, string delim_char])
    1920             :    Quote regular expression characters plus an optional character */
    1921        9096 : static PHP_FUNCTION(preg_quote)
    1922             : {
    1923             :         size_t           in_str_len;
    1924             :         char    *in_str;                /* Input string argument */
    1925             :         char    *in_str_end;    /* End of the input string */
    1926        9096 :         size_t           delim_len = 0;
    1927        9096 :         char    *delim = NULL;  /* Additional delimiter argument */
    1928             :         zend_string     *out_str;       /* Output string with quoted characters */
    1929             :         char    *p,                             /* Iterator for input string */
    1930             :                         *q,                             /* Iterator for output string */
    1931        9096 :                          delim_char=0,  /* Delimiter character to be quoted */
    1932             :                          c;                             /* Current character */
    1933        9096 :         zend_bool quote_delim = 0; /* Whether to quote additional delim char */
    1934             : 
    1935             :         /* Get the arguments and check for errors */
    1936             : #ifndef FAST_ZPP
    1937             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
    1938             :                                                           &delim, &delim_len) == FAILURE) {
    1939             :                 return;
    1940             :         }
    1941             : #else
    1942        9096 :         ZEND_PARSE_PARAMETERS_START(1, 2)
    1943       27279 :                 Z_PARAM_STRING(in_str, in_str_len)
    1944        9091 :                 Z_PARAM_OPTIONAL
    1945       27265 :                 Z_PARAM_STRING(delim, delim_len)
    1946        9096 :         ZEND_PARSE_PARAMETERS_END();
    1947             : #endif
    1948             : 
    1949        9091 :         in_str_end = in_str + in_str_len;
    1950             : 
    1951             :         /* Nothing to do if we got an empty string */
    1952        9091 :         if (in_str == in_str_end) {
    1953           5 :                 RETURN_EMPTY_STRING();
    1954             :         }
    1955             : 
    1956        9086 :         if (delim && *delim) {
    1957        9083 :                 delim_char = delim[0];
    1958        9083 :                 quote_delim = 1;
    1959             :         }
    1960             : 
    1961             :         /* Allocate enough memory so that even if each character
    1962             :            is quoted, we won't run out of room */
    1963       18172 :         out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
    1964             : 
    1965             :         /* Go through the string and quote necessary characters */
    1966     7871623 :         for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
    1967     7862537 :                 c = *p;
    1968     7862537 :                 switch(c) {
    1969             :                         case '.':
    1970             :                         case '\\':
    1971             :                         case '+':
    1972             :                         case '*':
    1973             :                         case '?':
    1974             :                         case '[':
    1975             :                         case '^':
    1976             :                         case ']':
    1977             :                         case '$':
    1978             :                         case '(':
    1979             :                         case ')':
    1980             :                         case '{':
    1981             :                         case '}':
    1982             :                         case '=':
    1983             :                         case '!':
    1984             :                         case '>':
    1985             :                         case '<':
    1986             :                         case '|':
    1987             :                         case ':':
    1988             :                         case '-':
    1989     1104109 :                                 *q++ = '\\';
    1990     1104109 :                                 *q++ = c;
    1991     1104109 :                                 break;
    1992             : 
    1993             :                         case '\0':
    1994        1086 :                                 *q++ = '\\';
    1995        1086 :                                 *q++ = '0';
    1996        1086 :                                 *q++ = '0';
    1997        1086 :                                 *q++ = '0';
    1998        1086 :                                 break;
    1999             : 
    2000             :                         default:
    2001     6757342 :                                 if (quote_delim && c == delim_char)
    2002       15419 :                                         *q++ = '\\';
    2003     6757342 :                                 *q++ = c;
    2004             :                                 break;
    2005             :                 }
    2006             :         }
    2007        9086 :         *q = '\0';
    2008             : 
    2009             :         /* Reallocate string and return it */
    2010       18172 :         out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
    2011        9086 :         RETURN_NEW_STR(out_str);
    2012             : }
    2013             : /* }}} */
    2014             : 
    2015             : /* {{{ proto array preg_grep(string regex, array input [, int flags])
    2016             :    Searches array and returns entries which match regex */
    2017        8221 : static PHP_FUNCTION(preg_grep)
    2018             : {
    2019             :         zend_string                     *regex;                 /* Regular expression */
    2020             :         zval                            *input;                 /* Input array */
    2021        8221 :         zend_long                        flags = 0;             /* Match control flags */
    2022             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    2023             : 
    2024             :         /* Get arguments and do error checking */
    2025             : #ifndef FAST_ZPP
    2026             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
    2027             :                                                           &input, &flags) == FAILURE) {
    2028             :                 return;
    2029             :         }
    2030             : #else
    2031        8221 :         ZEND_PARSE_PARAMETERS_START(2, 3)
    2032       24651 :                 Z_PARAM_STR(regex)
    2033       24645 :                 Z_PARAM_ARRAY(input)
    2034        8212 :                 Z_PARAM_OPTIONAL
    2035        8218 :                 Z_PARAM_LONG(flags)
    2036        8221 :         ZEND_PARSE_PARAMETERS_END();
    2037             : #endif
    2038             : 
    2039             :         /* Compile regex or get it from cache. */
    2040        8212 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    2041           5 :                 RETURN_FALSE;
    2042             :         }
    2043             : 
    2044        8207 :         pce->refcount++;
    2045        8207 :         php_pcre_grep_impl(pce, input, return_value, flags);
    2046        8207 :         pce->refcount--;
    2047             : }
    2048             : /* }}} */
    2049             : 
    2050        8207 : PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
    2051             : {
    2052             :         zval                *entry;                             /* An entry in the input array */
    2053        8207 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    2054             :         pcre_extra               extra_data;            /* Used locally for exec options */
    2055             :         int                             *offsets;                       /* Array of subpattern offsets */
    2056             :         int                              size_offsets;          /* Size of the offsets array */
    2057        8207 :         int                              count = 0;                     /* Count of matched subpatterns */
    2058             :         zend_string             *string_key;
    2059             :         zend_ulong               num_key;
    2060             :         zend_bool                invert;                        /* Whether to return non-matching
    2061             :                                                                                    entries */
    2062             :         ALLOCA_FLAG(use_heap);
    2063             : 
    2064        8207 :         invert = flags & PREG_GREP_INVERT ? 1 : 0;
    2065             : 
    2066        8207 :         if (extra == NULL) {
    2067           0 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    2068           0 :                 extra = &extra_data;
    2069             :         }
    2070        8207 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    2071        8207 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    2072             : #ifdef PCRE_EXTRA_MARK
    2073        8207 :         extra->flags &= ~PCRE_EXTRA_MARK;
    2074             : #endif
    2075             : 
    2076             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    2077        8207 :         size_offsets = (pce->capture_count + 1) * 3;
    2078        8207 :         if (size_offsets <= 32) {
    2079        8207 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    2080             :         } else {
    2081           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    2082             :         }
    2083             : 
    2084             :         /* Initialize return array */
    2085        8207 :         array_init(return_value);
    2086             : 
    2087        8207 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    2088             : 
    2089             :         /* Go through the input array */
    2090       24747 :         ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
    2091        8270 :                 zend_string *subject_str = zval_get_string(entry);
    2092             : 
    2093             :                 /* Perform the match */
    2094        8270 :                 count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
    2095             :                                                   (int)ZSTR_LEN(subject_str), 0,
    2096             :                                                   0, offsets, size_offsets);
    2097             : 
    2098             :                 /* Check for too many substrings condition. */
    2099        8270 :                 if (count == 0) {
    2100           0 :                         php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
    2101           0 :                         count = size_offsets/3;
    2102        8270 :                 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
    2103           0 :                         pcre_handle_exec_error(count);
    2104             :                         zend_string_release(subject_str);
    2105           0 :                         break;
    2106             :                 }
    2107             : 
    2108             :                 /* If the entry fits our requirements */
    2109        8270 :                 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
    2110        8225 :                         if (Z_REFCOUNTED_P(entry)) {
    2111             :                                 Z_ADDREF_P(entry);
    2112             :                         }
    2113             : 
    2114             :                         /* Add to return array */
    2115        8225 :                         if (string_key) {
    2116           3 :                                 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
    2117             :                         } else {
    2118        8222 :                                 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
    2119             :                         }
    2120             :                 }
    2121             : 
    2122             :                 zend_string_release(subject_str);
    2123             :         } ZEND_HASH_FOREACH_END();
    2124             : 
    2125             :         /* Clean up */
    2126        8207 :         if (size_offsets <= 32) {
    2127        8207 :                 free_alloca(offsets, use_heap);
    2128             :         } else {
    2129           0 :                 efree(offsets);
    2130             :         }
    2131        8207 : }
    2132             : /* }}} */
    2133             : 
    2134             : /* {{{ proto int preg_last_error()
    2135             :    Returns the error code of the last regexp execution. */
    2136          18 : static PHP_FUNCTION(preg_last_error)
    2137             : {
    2138             : #ifndef FAST_ZPP
    2139             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
    2140             :                 return;
    2141             :         }
    2142             : #else
    2143          18 :         ZEND_PARSE_PARAMETERS_START(0, 0)
    2144          18 :         ZEND_PARSE_PARAMETERS_END();
    2145             : #endif
    2146             : 
    2147          16 :         RETURN_LONG(PCRE_G(error_code));
    2148             : }
    2149             : /* }}} */
    2150             : 
    2151             : /* {{{ module definition structures */
    2152             : 
    2153             : /* {{{ arginfo */
    2154             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
    2155             :     ZEND_ARG_INFO(0, pattern)
    2156             :     ZEND_ARG_INFO(0, subject)
    2157             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2158             :     ZEND_ARG_INFO(0, flags)
    2159             :     ZEND_ARG_INFO(0, offset)
    2160             : ZEND_END_ARG_INFO()
    2161             : 
    2162             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
    2163             :     ZEND_ARG_INFO(0, pattern)
    2164             :     ZEND_ARG_INFO(0, subject)
    2165             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2166             :     ZEND_ARG_INFO(0, flags)
    2167             :     ZEND_ARG_INFO(0, offset)
    2168             : ZEND_END_ARG_INFO()
    2169             : 
    2170             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
    2171             :     ZEND_ARG_INFO(0, regex)
    2172             :     ZEND_ARG_INFO(0, replace)
    2173             :     ZEND_ARG_INFO(0, subject)
    2174             :     ZEND_ARG_INFO(0, limit)
    2175             :     ZEND_ARG_INFO(1, count)
    2176             : ZEND_END_ARG_INFO()
    2177             : 
    2178             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
    2179             :     ZEND_ARG_INFO(0, regex)
    2180             :     ZEND_ARG_INFO(0, callback)
    2181             :     ZEND_ARG_INFO(0, subject)
    2182             :     ZEND_ARG_INFO(0, limit)
    2183             :     ZEND_ARG_INFO(1, count)
    2184             : ZEND_END_ARG_INFO()
    2185             : 
    2186             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
    2187             :     ZEND_ARG_INFO(0, pattern)
    2188             :     ZEND_ARG_INFO(0, subject)
    2189             :     ZEND_ARG_INFO(0, limit)
    2190             :     ZEND_ARG_INFO(1, count)
    2191             : ZEND_END_ARG_INFO()
    2192             : 
    2193             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
    2194             :     ZEND_ARG_INFO(0, pattern)
    2195             :     ZEND_ARG_INFO(0, subject)
    2196             :     ZEND_ARG_INFO(0, limit)
    2197             :     ZEND_ARG_INFO(0, flags)
    2198             : ZEND_END_ARG_INFO()
    2199             : 
    2200             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
    2201             :     ZEND_ARG_INFO(0, str)
    2202             :     ZEND_ARG_INFO(0, delim_char)
    2203             : ZEND_END_ARG_INFO()
    2204             : 
    2205             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
    2206             :     ZEND_ARG_INFO(0, regex)
    2207             :     ZEND_ARG_INFO(0, input) /* array */
    2208             :     ZEND_ARG_INFO(0, flags)
    2209             : ZEND_END_ARG_INFO()
    2210             : 
    2211             : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
    2212             : ZEND_END_ARG_INFO()
    2213             : /* }}} */
    2214             : 
    2215             : static const zend_function_entry pcre_functions[] = {
    2216             :         PHP_FE(preg_match,                                      arginfo_preg_match)
    2217             :         PHP_FE(preg_match_all,                          arginfo_preg_match_all)
    2218             :         PHP_FE(preg_replace,                            arginfo_preg_replace)
    2219             :         PHP_FE(preg_replace_callback,           arginfo_preg_replace_callback)
    2220             :         PHP_FE(preg_replace_callback_array,     arginfo_preg_replace_callback_array)
    2221             :         PHP_FE(preg_filter,                                     arginfo_preg_replace)
    2222             :         PHP_FE(preg_split,                                      arginfo_preg_split)
    2223             :         PHP_FE(preg_quote,                                      arginfo_preg_quote)
    2224             :         PHP_FE(preg_grep,                                       arginfo_preg_grep)
    2225             :         PHP_FE(preg_last_error,                         arginfo_preg_last_error)
    2226             :         PHP_FE_END
    2227             : };
    2228             : 
    2229             : zend_module_entry pcre_module_entry = {
    2230             :         STANDARD_MODULE_HEADER,
    2231             :    "pcre",
    2232             :         pcre_functions,
    2233             :         PHP_MINIT(pcre),
    2234             :         PHP_MSHUTDOWN(pcre),
    2235             : #ifdef PCRE_STUDY_JIT_COMPILE
    2236             :         PHP_RINIT(pcre),
    2237             : #else
    2238             :         NULL
    2239             : #endif
    2240             :         NULL,
    2241             :         PHP_MINFO(pcre),
    2242             :         PHP_PCRE_VERSION,
    2243             :         PHP_MODULE_GLOBALS(pcre),
    2244             :         PHP_GINIT(pcre),
    2245             :         PHP_GSHUTDOWN(pcre),
    2246             :         NULL,
    2247             :         STANDARD_MODULE_PROPERTIES_EX
    2248             : };
    2249             : 
    2250             : #ifdef COMPILE_DL_PCRE
    2251             : ZEND_GET_MODULE(pcre)
    2252             : #endif
    2253             : 
    2254             : /* }}} */
    2255             : 
    2256             : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
    2257             : 
    2258             : /*
    2259             :  * Local variables:
    2260             :  * tab-width: 4
    2261             :  * c-basic-offset: 4
    2262             :  * End:
    2263             :  * vim600: sw=4 ts=4 fdm=marker
    2264             :  * vim<600: sw=4 ts=4
    2265             :  */

Generated by: LCOV version 1.10

Generated at Wed, 20 Jul 2016 02:56:23 +0000 (5 days ago)

Copyright © 2005-2016 The PHP Group
All rights reserved.