PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/pcre - php_pcre.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 870 954 91.2 %
Date: 2015-08-29 Functions: 32 33 97.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                        |
       4             :    +----------------------------------------------------------------------+
       5             :    | Copyright (c) 1997-2015 The PHP Group                                |
       6             :    +----------------------------------------------------------------------+
       7             :    | This source file is subject to version 3.01 of the PHP license,      |
       8             :    | that is bundled with this package in the file LICENSE, and is        |
       9             :    | available through the world-wide-web at the following url:           |
      10             :    | http://www.php.net/license/3_01.txt                                  |
      11             :    | If you did not receive a copy of the PHP license and are unable to   |
      12             :    | obtain it through the world-wide-web, please send a note to          |
      13             :    | license@php.net so we can mail you a copy immediately.               |
      14             :    +----------------------------------------------------------------------+
      15             :    | Author: Andrei Zmievski <andrei@php.net>                             |
      16             :    +----------------------------------------------------------------------+
      17             :  */
      18             : 
      19             : /* $Id$ */
      20             : 
      21             : #include "php.h"
      22             : #include "php_ini.h"
      23             : #include "php_globals.h"
      24             : #include "php_pcre.h"
      25             : #include "ext/standard/info.h"
      26             : #include "ext/standard/basic_functions.h"
      27             : #include "zend_smart_str.h"
      28             : 
      29             : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : #define PREG_PATTERN_ORDER                      1
      34             : #define PREG_SET_ORDER                          2
      35             : #define PREG_OFFSET_CAPTURE                     (1<<8)
      36             : 
      37             : #define PREG_SPLIT_NO_EMPTY                     (1<<0)
      38             : #define PREG_SPLIT_DELIM_CAPTURE        (1<<1)
      39             : #define PREG_SPLIT_OFFSET_CAPTURE       (1<<2)
      40             : 
      41             : #define PREG_REPLACE_EVAL                       (1<<0)
      42             : 
      43             : #define PREG_GREP_INVERT                        (1<<0)
      44             : 
      45             : #define PCRE_CACHE_SIZE 4096
      46             : 
      47             : /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
      48             : #ifndef PCRE_NOTEMPTY_ATSTART
      49             : # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
      50             : #endif
      51             : 
      52             : enum {
      53             :         PHP_PCRE_NO_ERROR = 0,
      54             :         PHP_PCRE_INTERNAL_ERROR,
      55             :         PHP_PCRE_BACKTRACK_LIMIT_ERROR,
      56             :         PHP_PCRE_RECURSION_LIMIT_ERROR,
      57             :         PHP_PCRE_BAD_UTF8_ERROR,
      58             :         PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
      59             :         PHP_PCRE_JIT_STACKLIMIT_ERROR
      60             : };
      61             : 
      62             : 
      63             : PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
      64             : 
      65             : 
      66          16 : static void pcre_handle_exec_error(int pcre_code) /* {{{ */
      67             : {
      68          16 :         int preg_code = 0;
      69             : 
      70          16 :         switch (pcre_code) {
      71             :                 case PCRE_ERROR_MATCHLIMIT:
      72           4 :                         preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
      73           4 :                         break;
      74             : 
      75             :                 case PCRE_ERROR_RECURSIONLIMIT:
      76           2 :                         preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
      77           2 :                         break;
      78             : 
      79             :                 case PCRE_ERROR_BADUTF8:
      80           8 :                         preg_code = PHP_PCRE_BAD_UTF8_ERROR;
      81           8 :                         break;
      82             : 
      83             :                 case PCRE_ERROR_BADUTF8_OFFSET:
      84           1 :                         preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
      85           1 :                         break;
      86             :                 
      87             : #ifdef PCRE_STUDY_JIT_COMPILE
      88             :                 case PCRE_ERROR_JIT_STACKLIMIT:
      89           1 :                         preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
      90           1 :                         break;
      91             : #endif
      92             : 
      93             :                 default:
      94           0 :                         preg_code = PHP_PCRE_INTERNAL_ERROR;
      95             :                         break;
      96             :         }
      97             : 
      98          16 :         PCRE_G(error_code) = preg_code;
      99          16 : }
     100             : /* }}} */
     101             : 
     102       27815 : static void php_free_pcre_cache(zval *data) /* {{{ */
     103             : {
     104       27815 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
     105       27815 :         if (!pce) return;
     106       27815 :         pcre_free(pce->re);
     107       27815 :         if (pce->extra) {
     108       27810 :                 pcre_free_study(pce->extra);
     109             :         }
     110             : #if HAVE_SETLOCALE
     111       27815 :         if ((void*)pce->tables) pefree((void*)pce->tables, 1);
     112       27815 :         if (pce->locale) {
     113           8 :                 zend_string_release(pce->locale);
     114             :         }
     115             : #endif
     116       27815 :         pefree(pce, 1);
     117             : }
     118             : /* }}} */
     119             : 
     120       21291 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
     121             : {
     122       21291 :         zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
     123       21291 :         pcre_globals->backtrack_limit = 0;
     124       21291 :         pcre_globals->recursion_limit = 0;
     125       21291 :         pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
     126       21291 : }
     127             : /* }}} */
     128             : 
     129       21327 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
     130             : {
     131       21327 :         zend_hash_destroy(&pcre_globals->pcre_cache);
     132       21327 : }
     133             : /* }}} */
     134             : 
     135             : PHP_INI_BEGIN()
     136             :         STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
     137             :         STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
     138             : #ifdef PCRE_STUDY_JIT_COMPILE
     139             :         STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
     140             : #endif
     141             : PHP_INI_END()
     142             : 
     143             : 
     144             : /* {{{ PHP_MINFO_FUNCTION(pcre) */
     145         142 : static PHP_MINFO_FUNCTION(pcre)
     146             : {
     147         142 :         int jit_yes = 0;
     148             : 
     149         142 :         php_info_print_table_start();
     150         142 :         php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
     151         142 :         php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
     152             : 
     153         142 :         if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
     154         142 :                 php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
     155             :         } else {
     156           0 :                 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
     157             :         }
     158             : 
     159         142 :         php_info_print_table_end();
     160             : 
     161         142 :         DISPLAY_INI_ENTRIES();
     162         142 : }
     163             : /* }}} */
     164             : 
     165             : /* {{{ PHP_MINIT_FUNCTION(pcre) */
     166       21291 : static PHP_MINIT_FUNCTION(pcre)
     167             : {
     168       21291 :         REGISTER_INI_ENTRIES();
     169             : 
     170       21291 :         REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
     171       21291 :         REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
     172       21291 :         REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     173       21291 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
     174       21291 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
     175       21291 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     176       21291 :         REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
     177             : 
     178       21291 :         REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
     179       21291 :         REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
     180       21291 :         REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     181       21291 :         REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     182       21291 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
     183       21291 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
     184       21291 :         REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     185       21291 :         REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
     186             : 
     187       21291 :         return SUCCESS;
     188             : }
     189             : /* }}} */
     190             : 
     191             : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
     192       21327 : static PHP_MSHUTDOWN_FUNCTION(pcre)
     193             : {
     194       21327 :         UNREGISTER_INI_ENTRIES();
     195             : 
     196       21327 :         return SUCCESS;
     197             : }
     198             : /* }}} */
     199             : 
     200             : /* {{{ static pcre_clean_cache */
     201       36864 : static int pcre_clean_cache(zval *data, void *arg)
     202             : {
     203       36864 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
     204       36864 :         int *num_clean = (int *)arg;
     205             : 
     206       36864 :         if (*num_clean > 0 && !pce->refcount) {
     207        4608 :                 (*num_clean)--;
     208        4608 :                 return ZEND_HASH_APPLY_REMOVE;
     209             :         } else {
     210       32256 :                 return ZEND_HASH_APPLY_KEEP;
     211             :         }
     212             : }
     213             : /* }}} */
     214             : 
     215             : /* {{{ static make_subpats_table */
     216          10 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
     217             : {
     218          10 :         pcre_extra *extra = pce->extra;
     219          10 :         int name_cnt = pce->name_count, name_size, ni = 0;
     220             :         int rc;
     221             :         char *name_table;
     222             :         unsigned short name_idx;
     223             :         char **subpat_names;
     224             :         int rc1, rc2;
     225             : 
     226          10 :         rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
     227          10 :         rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
     228          10 :         rc = rc2 ? rc2 : rc1;
     229          10 :         if (rc < 0) {
     230           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     231           0 :                 return NULL;
     232             :         }
     233             : 
     234          10 :         subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
     235         174 :         while (ni++ < name_cnt) {
     236         154 :                 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
     237         154 :                 subpat_names[name_idx] = name_table + 2;
     238         308 :                 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
     239           0 :                         php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
     240           0 :                         efree(subpat_names);
     241           0 :                         return NULL;
     242             :                 }
     243         154 :                 name_table += name_size;
     244             :         }
     245          10 :         return subpat_names;
     246             : }
     247             : /* }}} */
     248             : 
     249             : /* {{{ static calculate_unit_length */
     250             : /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
     251             : static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
     252             : {
     253             :         int unit_len;
     254             : 
     255          18 :         if (pce->compile_options & PCRE_UTF8) {
     256          10 :                 char *end = start;
     257             : 
     258             :                 /* skip continuation bytes */
     259          21 :                 while ((*++end & 0xC0) == 0x80);
     260          10 :                 unit_len = end - start;
     261             :         } else {
     262           8 :                 unit_len = 1;
     263             :         }
     264          18 :         return unit_len;
     265             : }
     266             : /* }}} */
     267             : 
     268             : /* {{{ pcre_get_compiled_regex_cache
     269             :  */
     270     1809543 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
     271             : {
     272     1809543 :         pcre                            *re = NULL;
     273             :         pcre_extra                      *extra;
     274     1809543 :         int                                      coptions = 0;
     275     1809543 :         int                                      soptions = 0;
     276             :         const char                      *error;
     277             :         int                                      erroffset;
     278             :         char                             delimiter;
     279             :         char                             start_delimiter;
     280             :         char                             end_delimiter;
     281             :         char                            *p, *pp;
     282             :         char                            *pattern;
     283     1809543 :         int                                      do_study = 0;
     284     1809543 :         int                                      poptions = 0;
     285     1809543 :         unsigned const char *tables = NULL;
     286             :         pcre_cache_entry        *pce;
     287             :         pcre_cache_entry         new_entry;
     288             :         int                                      rc;
     289             : 
     290             :         /* Try to lookup the cached regex entry, and if successful, just pass
     291             :            back the compiled pattern, otherwise go on and compile it. */
     292     1809543 :         pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
     293     1809543 :         if (pce) {
     294             : #if HAVE_SETLOCALE
     295     1781767 :                 if (pce->locale == BG(locale_string) ||
     296          40 :                     (pce->locale && BG(locale_string) &&
     297          20 :                      ZSTR_LEN(pce->locale) == ZSTR_LEN(BG(locale_string)) &&
     298          18 :                      !memcmp(ZSTR_VAL(pce->locale), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(pce->locale))) ||
     299           2 :                     (!pce->locale &&
     300           0 :                      ZSTR_LEN(BG(locale_string)) == 1 &&
     301           0 :                      ZSTR_VAL(BG(locale_string))[0] == 'C') ||
     302           2 :                     (!BG(locale_string) &&
     303           0 :                      ZSTR_LEN(pce->locale) == 1 &&
     304           0 :                      ZSTR_VAL(pce->locale)[0] == 'C')) {
     305     1781683 :                         return pce;
     306             :                 }
     307             : #else
     308             :                 return pce;
     309             : #endif
     310             :         }
     311             : 
     312       27860 :         p = ZSTR_VAL(regex);
     313             : 
     314             :         /* Parse through the leading whitespace, and display a warning if we
     315             :            get to the end without encountering a delimiter. */
     316       27860 :         while (isspace((int)*(unsigned char *)p)) p++;
     317       27860 :         if (*p == 0) {
     318           5 :                 php_error_docref(NULL, E_WARNING,
     319           5 :                                                  p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
     320           5 :                 return NULL;
     321             :         }
     322             : 
     323             :         /* Get the delimiter and display a warning if it is alphanumeric
     324             :            or a backslash. */
     325       27855 :         delimiter = *p++;
     326       27855 :         if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
     327           7 :                 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
     328           7 :                 return NULL;
     329             :         }
     330             : 
     331       27848 :         start_delimiter = delimiter;
     332       27848 :         if ((pp = strchr("([{< )]}> )]}>", delimiter)))
     333          29 :                 delimiter = pp[5];
     334       27848 :         end_delimiter = delimiter;
     335             : 
     336       27848 :         pp = p;
     337             : 
     338       27848 :         if (start_delimiter == end_delimiter) {
     339             :                 /* We need to iterate through the pattern, searching for the ending delimiter,
     340             :                    but skipping the backslashed delimiters.  If the ending delimiter is not
     341             :                    found, display a warning. */
     342    16359616 :                 while (*pp != 0) {
     343    16331787 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     344    15167133 :                         else if (*pp == delimiter)
     345       27809 :                                 break;
     346    16303978 :                         pp++;
     347             :                 }
     348             :         } else {
     349             :                 /* We iterate through the pattern, searching for the matching ending
     350             :                  * delimiter. For each matching starting delimiter, we increment nesting
     351             :                  * level, and decrement it for each matching ending delimiter. If we
     352             :                  * reach the end of the pattern without matching, display a warning.
     353             :                  */
     354          29 :                 int brackets = 1;       /* brackets nesting level */
     355         281 :                 while (*pp != 0) {
     356         249 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     357         226 :                         else if (*pp == end_delimiter && --brackets <= 0)
     358             :                                 break;
     359         200 :                         else if (*pp == start_delimiter)
     360           1 :                                 brackets++;
     361         223 :                         pp++;
     362             :                 }
     363             :         }
     364             : 
     365       27848 :         if (*pp == 0) {
     366          13 :                 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
     367           4 :                         php_error_docref(NULL,E_WARNING, "Null byte in regex");
     368           9 :                 } else if (start_delimiter == end_delimiter) {
     369           8 :                         php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
     370             :                 } else {
     371           1 :                         php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
     372             :                 }
     373          13 :                 return NULL;
     374             :         }
     375             : 
     376             :         /* Make a copy of the actual pattern. */
     377       27835 :         pattern = estrndup(p, pp-p);
     378             : 
     379             :         /* Move on to the options */
     380       27835 :         pp++;
     381             : 
     382             :         /* Parse through the options, setting appropriate flags.  Display
     383             :            a warning if we encounter an unknown modifier. */
     384       69718 :         while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
     385       14066 :                 switch (*pp++) {
     386             :                         /* Perl compatible options */
     387        1914 :                         case 'i':       coptions |= PCRE_CASELESS;              break;
     388        2233 :                         case 'm':       coptions |= PCRE_MULTILINE;             break;
     389        9802 :                         case 's':       coptions |= PCRE_DOTALL;                break;
     390           5 :                         case 'x':       coptions |= PCRE_EXTENDED;              break;
     391             : 
     392             :                         /* PCRE specific options */
     393           2 :                         case 'A':       coptions |= PCRE_ANCHORED;              break;
     394           9 :                         case 'D':       coptions |= PCRE_DOLLAR_ENDONLY;break;
     395          25 :                         case 'S':       do_study  = 1;                                  break;
     396          20 :                         case 'U':       coptions |= PCRE_UNGREEDY;              break;
     397           1 :                         case 'X':       coptions |= PCRE_EXTRA;                 break;
     398          33 :                         case 'u':       coptions |= PCRE_UTF8;
     399             :         /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
     400             :        characters, even in UTF-8 mode. However, this can be changed by setting
     401             :        the PCRE_UCP option. */
     402             : #ifdef PCRE_UCP
     403          33 :                                                 coptions |= PCRE_UCP;
     404             : #endif
     405          33 :                                 break;
     406             : 
     407             :                         /* Custom preg options */
     408           2 :                         case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
     409             : 
     410             :                         case ' ':
     411             :                         case '\n':
     412           2 :                                 break;
     413             : 
     414             :                         default:
     415          18 :                                 if (pp[-1]) {
     416          13 :                                         php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
     417             :                                 } else {
     418           5 :                                         php_error_docref(NULL,E_WARNING, "Null byte in regex");
     419             :                                 }
     420          18 :                                 efree(pattern);
     421          18 :                                 return NULL;
     422             :                 }
     423             :         }
     424             : 
     425             : #if HAVE_SETLOCALE
     426       27829 :         if (BG(locale_string) &&
     427          12 :             (ZSTR_LEN(BG(locale_string)) != 1 || ZSTR_VAL(BG(locale_string))[0] != 'C')) {
     428           4 :                 tables = pcre_maketables();
     429             :         }
     430             : #endif
     431             : 
     432             :         /* Compile pattern and display a warning if compilation failed. */
     433       27817 :         re = pcre_compile(pattern,
     434             :                                           coptions,
     435             :                                           &error,
     436             :                                           &erroffset,
     437             :                                           tables);
     438             : 
     439       27817 :         if (re == NULL) {
     440           6 :                 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
     441           6 :                 efree(pattern);
     442           6 :                 if (tables) {
     443           0 :                         pefree((void*)tables, 1);
     444             :                 }
     445           6 :                 return NULL;
     446             :         }
     447             : 
     448             : #ifdef PCRE_STUDY_JIT_COMPILE
     449       27811 :         if (PCRE_G(jit)) {
     450             :                 /* Enable PCRE JIT compiler */
     451       27806 :                 do_study = 1;
     452       27806 :                 soptions |= PCRE_STUDY_JIT_COMPILE;
     453             :         }
     454             : #endif
     455             : 
     456             :         /* If study option was specified, study the pattern and
     457             :            store the result in extra for passing to pcre_exec. */
     458       27811 :         if (do_study) {
     459       27806 :                 extra = pcre_study(re, soptions, &error);
     460       27806 :                 if (extra) {
     461       27806 :                         extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     462       27806 :                         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
     463       27806 :                         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
     464             :                 }
     465       27806 :                 if (error != NULL) {
     466           0 :                         php_error_docref(NULL, E_WARNING, "Error while studying pattern");
     467             :                 }
     468             :         } else {
     469           5 :                 extra = NULL;
     470             :         }
     471             : 
     472       27811 :         efree(pattern);
     473             : 
     474             :         /*
     475             :          * If we reached cache limit, clean out the items from the head of the list;
     476             :          * these are supposedly the oldest ones (but not necessarily the least used
     477             :          * ones).
     478             :          */
     479       27811 :         if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
     480           9 :                 int num_clean = PCRE_CACHE_SIZE / 8;
     481           9 :                 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
     482             :         }
     483             : 
     484             :         /* Store the compiled pattern and extra info in the cache. */
     485       27811 :         new_entry.re = re;
     486       27811 :         new_entry.extra = extra;
     487       27811 :         new_entry.preg_options = poptions;
     488       27811 :         new_entry.compile_options = coptions;
     489             : #if HAVE_SETLOCALE
     490       27819 :         new_entry.locale = BG(locale_string) ?
     491          16 :                 ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
     492           0 :                         zend_string_copy(BG(locale_string)) :
     493           8 :                         zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1)) :
     494             :                 NULL;
     495       27811 :         new_entry.tables = tables;
     496             : #endif
     497       27811 :         new_entry.refcount = 0;
     498             : 
     499       27811 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
     500       27811 :         if (rc < 0) {
     501           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     502           0 :                 return NULL;
     503             :         }
     504             : 
     505       27811 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
     506       27811 :         if (rc < 0) {
     507           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     508           0 :                 return NULL;
     509             :         }
     510             : 
     511             :         /*
     512             :          * Interned strings are not duplicated when stored in HashTable,
     513             :          * but all the interned strings created during HTTP request are removed
     514             :          * at end of request. However PCRE_G(pcre_cache) must be consistent
     515             :          * on the next request as well. So we disable usage of interned strings
     516             :          * as hash keys especually for this table.
     517             :          * See bug #63180
     518             :          */
     519       27811 :         if (!ZSTR_IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
     520       55622 :                 zend_string *str = zend_string_init(ZSTR_VAL(regex), ZSTR_LEN(regex), 1);
     521       27811 :                 GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
     522       27811 :                 ZSTR_H(str) = ZSTR_H(regex);
     523       27811 :                 regex = str;
     524             :         }
     525             : 
     526       27811 :         pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
     527             : 
     528       27811 :         return pce;
     529             : }
     530             : /* }}} */
     531             : 
     532             : /* {{{ pcre_get_compiled_regex
     533             :  */
     534      131053 : PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
     535             : {
     536      131053 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
     537             : 
     538      131053 :         if (extra) {
     539      131053 :                 *extra = pce ? pce->extra : NULL;
     540             :         }
     541      131053 :         if (preg_options) {
     542      131053 :                 *preg_options = pce ? pce->preg_options : 0;
     543             :         }
     544             : 
     545      131053 :         return pce ? pce->re : NULL;
     546             : }
     547             : /* }}} */
     548             : 
     549             : /* {{{ pcre_get_compiled_regex_ex
     550             :  */
     551           0 : PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
     552             : {
     553           0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
     554             : 
     555           0 :         if (extra) {
     556           0 :                 *extra = pce ? pce->extra : NULL;
     557             :         }
     558           0 :         if (preg_options) {
     559           0 :                 *preg_options = pce ? pce->preg_options : 0;
     560             :         }
     561           0 :         if (compile_options) {
     562           0 :                 *compile_options = pce ? pce->compile_options : 0;
     563             :         }
     564             : 
     565           0 :         return pce ? pce->re : NULL;
     566             : }
     567             : /* }}} */
     568             : 
     569             : /* {{{ add_offset_pair */
     570          73 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
     571             : {
     572             :         zval match_pair, tmp;
     573             : 
     574          73 :         array_init_size(&match_pair, 2);
     575             : 
     576             :         /* Add (match, offset) to the return value */
     577         146 :         ZVAL_STRINGL(&tmp, str, len);
     578          73 :         zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
     579          73 :         ZVAL_LONG(&tmp, offset);
     580          73 :         zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
     581             : 
     582          73 :         if (name) {
     583             :                 Z_ADDREF(match_pair);
     584           2 :                 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
     585             :         }
     586          73 :         zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
     587          73 : }
     588             : /* }}} */
     589             : 
     590     1648608 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
     591             : {
     592             :         /* parameters */
     593             :         zend_string              *regex;                        /* Regular expression */
     594             :         zend_string              *subject;                      /* String to match against */
     595             :         pcre_cache_entry *pce;                          /* Compiled regular expression */
     596     1648608 :         zval                     *subpats = NULL;       /* Array for subpatterns */
     597     1648608 :         zend_long                 flags = 0;            /* Match control flags */
     598     1648608 :         zend_long                 start_offset = 0;     /* Where the new search starts */
     599             : 
     600             : #ifndef FAST_ZPP
     601             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
     602             :                                                           &subject, &subpats, &flags, &start_offset) == FAILURE) {
     603             :                 RETURN_FALSE;
     604             :         }
     605             : #else
     606     1648608 :         ZEND_PARSE_PARAMETERS_START(2, 5)
     607     4945800 :                 Z_PARAM_STR(regex)
     608     4945788 :                 Z_PARAM_STR(subject)
     609     1648592 :                 Z_PARAM_OPTIONAL
     610     3872459 :                 Z_PARAM_ZVAL_EX(subpats, 0, 1)
     611     1120066 :                 Z_PARAM_LONG(flags)
     612          59 :                 Z_PARAM_LONG(start_offset)
     613     1648608 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
     614             : #endif
     615             : 
     616     1648592 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
     617           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
     618           0 :                         RETURN_FALSE;
     619             :         }
     620             : 
     621             :         /* Compile regex or get it from cache. */
     622     1648592 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
     623          27 :                 RETURN_FALSE;
     624             :         }
     625             : 
     626     1648565 :         pce->refcount++;
     627     1648565 :         php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
     628             :                 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
     629     1648565 :         pce->refcount--;
     630             : }
     631             : /* }}} */
     632             : 
     633             : /* {{{ php_pcre_match_impl() */
     634     1649503 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
     635             :         zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
     636             : {
     637             :         zval                     result_set,            /* Holds a set of subpatterns after
     638             :                                                                                    a global match */
     639     1649503 :                                     *match_sets = NULL; /* An array of sets of matches for each
     640             :                                                                                    subpattern after a global match */
     641     1649503 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     642             :         pcre_extra               extra_data;            /* Used locally for exec options */
     643     1649503 :         int                              exoptions = 0;         /* Execution options */
     644     1649503 :         int                              count = 0;                     /* Count of matched subpatterns */
     645             :         int                             *offsets;                       /* Array of subpattern offsets */
     646             :         int                              num_subpats;           /* Number of captured subpatterns */
     647             :         int                              size_offsets;          /* Size of the offsets array */
     648             :         int                              matched;                       /* Has anything matched */
     649     1649503 :         int                              g_notempty = 0;        /* If the match should not be empty */
     650             :         const char         **stringlist;                /* Holds list of subpatterns */
     651             :         char               **subpat_names;              /* Array for named subpatterns */
     652             :         int                              i;
     653             :         int                              subpats_order;         /* Order of subpattern matches */
     654             :         int                              offset_capture;    /* Capture match offsets: yes/no */
     655     1649503 :         unsigned char   *mark = NULL;       /* Target for MARK name */
     656             :         zval            marks;                  /* Array of marks for PREG_PATTERN_ORDER */
     657             :         ALLOCA_FLAG(use_heap);
     658             : 
     659     1649503 :         ZVAL_UNDEF(&marks);
     660             : 
     661             :         /* Overwrite the passed-in value for subpatterns with an empty array. */
     662     1649503 :         if (subpats != NULL) {
     663             :                 zval_dtor(subpats);
     664     1120921 :                 array_init(subpats);
     665             :         }
     666             : 
     667     1649503 :         subpats_order = global ? PREG_PATTERN_ORDER : 0;
     668             : 
     669     1649503 :         if (use_flags) {
     670         914 :                 offset_capture = flags & PREG_OFFSET_CAPTURE;
     671             : 
     672             :                 /*
     673             :                  * subpats_order is pre-set to pattern mode so we change it only if
     674             :                  * necessary.
     675             :                  */
     676         914 :                 if (flags & 0xff) {
     677          23 :                         subpats_order = flags & 0xff;
     678             :                 }
     679         914 :                 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
     680             :                         (!global && subpats_order != 0)) {
     681           1 :                         php_error_docref(NULL, E_WARNING, "Invalid flags specified");
     682           1 :                         return;
     683             :                 }
     684             :         } else {
     685     1648589 :                 offset_capture = 0;
     686             :         }
     687             : 
     688             :         /* Negative offset counts from the end of the string. */
     689     1649502 :         if (start_offset < 0) {
     690           5 :                 start_offset = subject_len + start_offset;
     691           5 :                 if (start_offset < 0) {
     692           1 :                         start_offset = 0;
     693             :                 }
     694             :         }
     695             : 
     696     1649502 :         if (extra == NULL) {
     697           3 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     698           3 :                 extra = &extra_data;
     699             :         }
     700     1649502 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
     701     1649502 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
     702             : #ifdef PCRE_EXTRA_MARK
     703     1649502 :         extra->mark = &mark;
     704     1649502 :         extra->flags |= PCRE_EXTRA_MARK;
     705             : #endif
     706             : 
     707             :         /* Calculate the size of the offsets array, and allocate memory for it. */
     708     1649502 :         num_subpats = pce->capture_count + 1;
     709     1649502 :         size_offsets = num_subpats * 3;
     710             : 
     711             :         /*
     712             :          * Build a mapping from subpattern numbers to their names. We will
     713             :          * allocate the table only if there are any named subpatterns.
     714             :          */
     715     1649502 :         subpat_names = NULL;
     716     1649502 :         if (pce->name_count > 0) {
     717           9 :                 subpat_names = make_subpats_table(num_subpats, pce);
     718           9 :                 if (!subpat_names) {
     719           0 :                         RETURN_FALSE;
     720             :                 }
     721             :         }
     722             : 
     723     1649502 :         if (size_offsets <= 32) {
     724     1649491 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
     725             :         } else {
     726          11 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     727             :         }
     728     1649502 :         memset(offsets, 0, size_offsets*sizeof(int));
     729             :         /* Allocate match sets array and initialize the values. */
     730     1649502 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     731         991 :                 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
     732        2224 :                 for (i=0; i<num_subpats; i++) {
     733        1233 :                         array_init(&match_sets[i]);
     734             :                 }
     735             :         }
     736             : 
     737     1649502 :         matched = 0;
     738     1649502 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
     739             : 
     740             :         do {
     741             :                 /* Execute the regular expression. */
     742     1649712 :                 count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
     743             :                                                   exoptions|g_notempty, offsets, size_offsets);
     744             : 
     745             :                 /* the string was already proved to be valid UTF-8 */
     746     1649712 :                 exoptions |= PCRE_NO_UTF8_CHECK;
     747             : 
     748             :                 /* Check for too many substrings condition. */
     749     1649712 :                 if (count == 0) {
     750           0 :                         php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
     751           0 :                         count = size_offsets/3;
     752             :                 }
     753             : 
     754             :                 /* If something has matched */
     755     1649712 :                 if (count > 0) {
     756       88243 :                         matched++;
     757             : 
     758             :                         /* If subpatterns array has been passed, fill it in with values. */
     759       88243 :                         if (subpats != NULL) {
     760             :                                 /* Try to get the list of substrings and display a warning if failed. */
     761       42597 :                                 if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
     762           0 :                                         if (subpat_names) {
     763           0 :                                                 efree(subpat_names);
     764             :                                         }
     765           0 :                                         if (size_offsets <= 32) {
     766           0 :                                                 free_alloca(offsets, use_heap);
     767             :                                         } else {
     768           0 :                                                 efree(offsets);
     769             :                                         }
     770           0 :                                         if (match_sets) efree(match_sets);
     771           0 :                                         php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
     772           0 :                                         RETURN_FALSE;
     773             :                                 }
     774             : 
     775       42597 :                                 if (global) {   /* global pattern matching */
     776         346 :                                         if (subpats && subpats_order == PREG_PATTERN_ORDER) {
     777             :                                                 /* For each subpattern, insert it into the appropriate array. */
     778         151 :                                                 if (offset_capture) {
     779          28 :                                                         for (i = 0; i < count; i++) {
     780          30 :                                                                 add_offset_pair(&match_sets[i], (char *)stringlist[i],
     781          30 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     782             :                                                         }
     783             :                                                 } else {
     784         357 :                                                         for (i = 0; i < count; i++) {
     785         219 :                                                                 add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
     786         219 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     787             :                                                         }
     788             :                                                 }
     789             :                                                 /* Add MARK, if available */
     790         151 :                                                 if (mark) {
     791           2 :                                                         if (Z_TYPE(marks) == IS_UNDEF) {
     792           1 :                                                                 array_init(&marks);
     793             :                                                         }
     794           2 :                                                         add_index_string(&marks, matched - 1, (char *) mark);
     795             :                                                 }
     796             :                                                 /*
     797             :                                                  * If the number of captured subpatterns on this run is
     798             :                                                  * less than the total possible number, pad the result
     799             :                                                  * arrays with empty strings.
     800             :                                                  */
     801         151 :                                                 if (count < num_subpats) {
     802          11 :                                                         for (; i < num_subpats; i++) {
     803           7 :                                                                 add_next_index_string(&match_sets[i], "");
     804             :                                                         }
     805             :                                                 }
     806             :                                         } else {
     807             :                                                 /* Allocate the result set array */
     808          44 :                                                 array_init_size(&result_set, count + (mark ? 1 : 0));
     809             : 
     810             :                                                 /* Add all the subpatterns to it */
     811          44 :                                                 if (subpat_names) {
     812           2 :                                                         if (offset_capture) {
     813           0 :                                                                 for (i = 0; i < count; i++) {
     814           0 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     815           0 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
     816             :                                                                 }
     817             :                                                         } else {
     818          14 :                                                                 for (i = 0; i < count; i++) {
     819          12 :                                                                         if (subpat_names[i]) {
     820           8 :                                                                                 add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
     821             :                                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     822             :                                                                         }
     823          12 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     824          12 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     825             :                                                                 }
     826             :                                                         }
     827             :                                                 } else {
     828          42 :                                                         if (offset_capture) {
     829          10 :                                                                 for (i = 0; i < count; i++) {
     830          14 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     831          14 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     832             :                                                                 }
     833             :                                                         } else {
     834         343 :                                                                 for (i = 0; i < count; i++) {
     835         304 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     836         304 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     837             :                                                                 }
     838             :                                                         }
     839             :                                                 }
     840             :                                                 /* Add MARK, if available */
     841          44 :                                                 if (mark) {
     842           2 :                                                         add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
     843             :                                                 }
     844             :                                                 /* And add it to the output array */
     845          44 :                                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
     846             :                                         }
     847             :                                 } else {                        /* single pattern matching */
     848             :                                         /* For each subpattern, insert it into the subpatterns array. */
     849       42402 :                                         if (subpat_names) {
     850           5 :                                                 if (offset_capture) {
     851           5 :                                                         for (i = 0; i < count; i++) {
     852          12 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     853           4 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     854           8 :                                                                                                 offsets[i<<1], subpat_names[i]);
     855             :                                                         }
     856             :                                                 } else {
     857          24 :                                                         for (i = 0; i < count; i++) {
     858          20 :                                                                 if (subpat_names[i]) {
     859          13 :                                                                         add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     860             :                                                                                                           offsets[(i<<1)+1] - offsets[i<<1]);
     861             :                                                                 }
     862          20 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     863          20 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     864             :                                                         }
     865             :                                                 }
     866             :                                         } else {
     867       42397 :                                                 if (offset_capture) {
     868          10 :                                                         for (i = 0; i < count; i++) {
     869          12 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     870           6 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     871           6 :                                                                                                 offsets[i<<1], NULL);
     872             :                                                         }
     873             :                                                 } else {
     874      129337 :                                                         for (i = 0; i < count; i++) {
     875       86944 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     876       86944 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     877             :                                                         }
     878             :                                                 }
     879             :                                         }
     880             :                                         /* Add MARK, if available */
     881       42402 :                                         if (mark) {
     882           1 :                                                 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
     883             :                                         }
     884             :                                 }
     885             : 
     886       42597 :                                 pcre_free((void *) stringlist);
     887             :                         }
     888     1561469 :                 } else if (count == PCRE_ERROR_NOMATCH) {
     889             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
     890             :                            this is not necessarily the end. We need to advance
     891             :                            the start offset, and continue. Fudge the offset values
     892             :                            to achieve this, unless we're already at the end of the string. */
     893     1561462 :                         if (g_notempty != 0 && start_offset < subject_len) {
     894          12 :                                 int unit_len = calculate_unit_length(pce, subject + start_offset);
     895             :                                 
     896           6 :                                 offsets[0] = (int)start_offset;
     897           6 :                                 offsets[1] = (int)(start_offset + unit_len);
     898             :                         } else
     899             :                                 break;
     900             :                 } else {
     901           7 :                         pcre_handle_exec_error(count);
     902           7 :                         break;
     903             :                 }
     904             : 
     905             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
     906             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
     907             :                    the match again at the same point. If this fails (picked up above) we
     908             :                    advance to the next character. */
     909       88249 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
     910             : 
     911             :                 /* Advance to the position right after the last full match */
     912       88249 :                 start_offset = offsets[1];
     913       88249 :         } while (global);
     914             : 
     915             :         /* Add the match sets to the output array and clean up */
     916     1649502 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     917         991 :                 if (subpat_names) {
     918          10 :                         for (i = 0; i < num_subpats; i++) {
     919           8 :                                 if (subpat_names[i]) {
     920           5 :                                         zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
     921             :                                                                          strlen(subpat_names[i]), &match_sets[i]);
     922           5 :                                         Z_ADDREF(match_sets[i]);
     923             :                                 }
     924           8 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
     925             :                         }
     926             :                 } else {
     927        2214 :                         for (i = 0; i < num_subpats; i++) {
     928        1225 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
     929             :                         }
     930             :                 }
     931         991 :                 efree(match_sets);
     932             : 
     933         991 :                 if (Z_TYPE(marks) != IS_UNDEF) {
     934           1 :                         add_assoc_zval(subpats, "MARK", &marks);
     935             :                 }
     936             :         }
     937             : 
     938     1649502 :         if (size_offsets <= 32) {
     939     1649491 :                 free_alloca(offsets, use_heap);
     940             :         } else {
     941          11 :                 efree(offsets);
     942             :         }
     943     1649502 :         if (subpat_names) {
     944           9 :                 efree(subpat_names);
     945             :         }
     946             : 
     947             :         /* Did we encounter an error? */
     948     1649502 :         if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
     949     1649495 :                 RETVAL_LONG(matched);
     950             :         } else {
     951           7 :                 RETVAL_FALSE;
     952             :         }
     953             : }
     954             : /* }}} */
     955             : 
     956             : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
     957             :    Perform a Perl-style regular expression match */
     958     1648497 : static PHP_FUNCTION(preg_match)
     959             : {
     960     1648497 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
     961     1648497 : }
     962             : /* }}} */
     963             : 
     964             : /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
     965             :    Perform a Perl-style global regular expression match */
     966         111 : static PHP_FUNCTION(preg_match_all)
     967             : {
     968         111 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
     969         111 : }
     970             : /* }}} */
     971             : 
     972             : /* {{{ preg_get_backref
     973             :  */
     974         126 : static int preg_get_backref(char **str, int *backref)
     975             : {
     976         126 :         register char in_brace = 0;
     977         126 :         register char *walk = *str;
     978             : 
     979         126 :         if (walk[1] == 0)
     980           8 :                 return 0;
     981             : 
     982         118 :         if (*walk == '$' && walk[1] == '{') {
     983          14 :                 in_brace = 1;
     984          14 :                 walk++;
     985             :         }
     986         118 :         walk++;
     987             : 
     988         236 :         if (*walk >= '0' && *walk <= '9') {
     989         118 :                 *backref = *walk - '0';
     990         118 :                 walk++;
     991             :         } else
     992           0 :                 return 0;
     993             : 
     994         118 :         if (*walk && *walk >= '0' && *walk <= '9') {
     995           2 :                 *backref = *backref * 10 + *walk - '0';
     996           2 :                 walk++;
     997             :         }
     998             : 
     999         118 :         if (in_brace) {
    1000          14 :                 if (*walk == 0 || *walk != '}')
    1001           6 :                         return 0;
    1002             :                 else
    1003           8 :                         walk++;
    1004             :         }
    1005             : 
    1006         112 :         *str = walk;
    1007         112 :         return 1;
    1008             : }
    1009             : /* }}} */
    1010             : 
    1011             : /* {{{ preg_do_repl_func
    1012             :  */
    1013          61 : static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
    1014             : {
    1015             :         zend_string *result_str;
    1016             :         zval             retval;                        /* Function return value */
    1017             :         zval         args[1];                   /* Argument to pass to function */
    1018             :         int                      i;
    1019             : 
    1020          61 :         array_init_size(&args[0], count + (mark ? 1 : 0));
    1021          61 :         if (subpat_names) {
    1022           3 :                 for (i = 0; i < count; i++) {
    1023           2 :                         if (subpat_names[i]) {
    1024           1 :                                 add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
    1025             :                         }
    1026           2 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
    1027             :                 }
    1028             :         } else {
    1029         151 :                 for (i = 0; i < count; i++) {
    1030          91 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
    1031             :                 }
    1032             :         }
    1033          61 :         if (mark) {
    1034           2 :                 add_assoc_string(&args[0], "MARK", (char *) mark);
    1035             :         }
    1036             : 
    1037         181 :         if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
    1038          59 :                 result_str = zval_get_string(&retval);
    1039          59 :                 zval_ptr_dtor(&retval);
    1040             :         } else {
    1041           2 :                 if (!EG(exception)) {
    1042           0 :                         php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
    1043             :                 }
    1044             : 
    1045           4 :                 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
    1046             :         }
    1047             : 
    1048          61 :         zval_ptr_dtor(&args[0]);
    1049             : 
    1050          61 :         return result_str;
    1051             : }
    1052             : /* }}} */
    1053             : 
    1054             : /* {{{ php_pcre_replace
    1055             :  */
    1056       26465 : PHPAPI zend_string *php_pcre_replace(zend_string *regex,
    1057             :                                                           zend_string *subject_str,
    1058             :                                                           char *subject, int subject_len,
    1059             :                                                           zval *replace_val, int is_callable_replace,
    1060             :                                                           int limit, int *replace_count)
    1061             : {
    1062             :         pcre_cache_entry        *pce;                       /* Compiled regular expression */
    1063             :         zend_string                     *result;                        /* Function result */
    1064             : 
    1065             :         /* Compile regex or get it from cache. */
    1066       26465 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    1067          12 :                 return NULL;
    1068             :         }
    1069       26453 :         pce->refcount++;
    1070       26453 :         result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
    1071             :                 is_callable_replace, limit, replace_count);
    1072       26453 :         pce->refcount--;
    1073             : 
    1074       26453 :         return result;
    1075             : }
    1076             : /* }}} */
    1077             : 
    1078             : /* {{{ php_pcre_replace_impl() */
    1079       26468 : PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
    1080             : {
    1081       26468 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1082             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1083       26468 :         int                              exoptions = 0;         /* Execution options */
    1084       26468 :         int                              count = 0;                     /* Count of matched subpatterns */
    1085             :         int                             *offsets;                       /* Array of subpattern offsets */
    1086             :         char                    **subpat_names;         /* Array for named subpatterns */
    1087             :         int                              num_subpats;           /* Number of captured subpatterns */
    1088             :         int                              size_offsets;          /* Size of the offsets array */
    1089             :         int                              new_len;                       /* Length of needed storage */
    1090             :         int                              alloc_len;                     /* Actual allocated length */
    1091             :         int                              match_len;                     /* Length of the current match */
    1092             :         int                              backref;                       /* Backreference number */
    1093             :         int                              start_offset;          /* Where the new search starts */
    1094       26468 :         int                              g_notempty=0;          /* If the match should not be empty */
    1095       26468 :         int                              replace_len=0;         /* Length of replacement string */
    1096       26468 :         char                    *replace=NULL,          /* Replacement string */
    1097             :                                         *walkbuf,                       /* Location of current replacement in the result */
    1098             :                                         *walk,                          /* Used to walk the replacement string */
    1099             :                                         *match,                         /* The current match */
    1100             :                                         *piece,                         /* The current piece of subject */
    1101       26468 :                                         *replace_end=NULL,      /* End of replacement string */
    1102             :                                          walk_last;                     /* Last walked character */
    1103             :         int                              result_len;            /* Length of result */
    1104       26468 :         unsigned char   *mark = NULL;       /* Target for MARK name */
    1105             :         zend_string             *result;                        /* Result of replacement */
    1106       26468 :         zend_string     *eval_result=NULL;  /* Result of custom function */
    1107             : 
    1108             :         ALLOCA_FLAG(use_heap);
    1109             : 
    1110       26468 :         if (extra == NULL) {
    1111           2 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1112           2 :                 extra = &extra_data;
    1113             :         }
    1114             : 
    1115       26468 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    1116       26468 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    1117             : 
    1118       26468 :         if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
    1119           1 :                 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
    1120           1 :                 return NULL;
    1121             :         }
    1122             : 
    1123       26467 :         if (!is_callable_replace) {
    1124       26421 :                 replace = Z_STRVAL_P(replace_val);
    1125       26421 :                 replace_len = (int)Z_STRLEN_P(replace_val);
    1126       26421 :                 replace_end = replace + replace_len;
    1127             :         }
    1128             : 
    1129             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1130       26467 :         num_subpats = pce->capture_count + 1;
    1131       26467 :         size_offsets = num_subpats * 3;
    1132       26467 :         if (size_offsets <= 32) {
    1133       26464 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1134             :         } else {
    1135           3 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1136             :         }
    1137             : 
    1138             :         /*
    1139             :          * Build a mapping from subpattern numbers to their names. We will
    1140             :          * allocate the table only if there are any named subpatterns.
    1141             :          */
    1142       26467 :         subpat_names = NULL;
    1143       26467 :         if (UNEXPECTED(pce->name_count > 0)) {
    1144           1 :                 subpat_names = make_subpats_table(num_subpats, pce);
    1145           1 :                 if (!subpat_names) {
    1146           0 :                         return NULL;
    1147             :                 }
    1148             :         }
    1149             : 
    1150       26467 :         alloc_len = 0;
    1151       26467 :         result = NULL;
    1152             : 
    1153             :         /* Initialize */
    1154       26467 :         match = NULL;
    1155       26467 :         start_offset = 0;
    1156       26467 :         result_len = 0;
    1157       26467 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1158             : 
    1159             :         while (1) {
    1160             : #ifdef PCRE_EXTRA_MARK
    1161       33212 :                 extra->mark = &mark;
    1162       33212 :                 extra->flags |= PCRE_EXTRA_MARK;
    1163             : #endif
    1164             :                 /* Execute the regular expression. */
    1165       33212 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
    1166             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1167             : 
    1168             :                 /* the string was already proved to be valid UTF-8 */
    1169       33212 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1170             : 
    1171             :                 /* Check for too many substrings condition. */
    1172       33212 :                 if (UNEXPECTED(count == 0)) {
    1173           0 :                         php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
    1174           0 :                         count = size_offsets / 3;
    1175             :                 }
    1176             : 
    1177       33212 :                 piece = subject + start_offset;
    1178             : 
    1179             :                 /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
    1180       39945 :                 if (EXPECTED(count > 0 && limit)) {
    1181        6733 :                         if (UNEXPECTED(replace_count)) {
    1182        6733 :                                 ++*replace_count;
    1183             :                         }
    1184             : 
    1185             :                         /* Set the match location in subject */
    1186        6733 :                         match = subject + offsets[0];
    1187             : 
    1188        6733 :                         new_len = result_len + offsets[0] - start_offset; /* part before the match */
    1189             :                         
    1190             :                         /* if (!is_callable_replace) */
    1191        6733 :                         if (EXPECTED(replace)) {
    1192             :                                 /* do regular substitution */
    1193        6672 :                                 walk = replace;
    1194        6672 :                                 walk_last = 0;
    1195             : 
    1196       20111 :                                 while (walk < replace_end) {
    1197        6767 :                                         if ('\\' == *walk || '$' == *walk) {
    1198          63 :                                                 if (walk_last == '\\') {
    1199           0 :                                                         walk++;
    1200           0 :                                                         walk_last = 0;
    1201           0 :                                                         continue;
    1202             :                                                 }
    1203          63 :                                                 if (preg_get_backref(&walk, &backref)) {
    1204          56 :                                                         if (backref < count)
    1205          55 :                                                                 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
    1206          56 :                                                         continue;
    1207             :                                                 }
    1208             :                                         }
    1209        6711 :                                         new_len++;
    1210        6711 :                                         walk++;
    1211        6711 :                                         walk_last = walk[-1];
    1212             :                                 }
    1213             : 
    1214        6672 :                                 if (new_len >= alloc_len) {
    1215        1120 :                                         alloc_len = alloc_len + 2 * new_len;
    1216        1120 :                                         if (result == NULL) {
    1217        1288 :                                                 result = zend_string_alloc(alloc_len, 0);
    1218             :                                         } else {
    1219         952 :                                                 result = zend_string_extend(result, alloc_len, 0);
    1220             :                                         }
    1221             :                                 }
    1222             : 
    1223             :                                 /* copy the part of the string before the match */
    1224        6672 :                                 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
    1225        6672 :                                 result_len += (int)(match-piece);
    1226             : 
    1227             :                                 /* copy replacement and backrefs */
    1228        6672 :                                 walkbuf = ZSTR_VAL(result) + result_len;
    1229             : 
    1230        6672 :                                 walk = replace;
    1231        6672 :                                 walk_last = 0;
    1232       20111 :                                 while (walk < replace_end) {
    1233        6767 :                                         if ('\\' == *walk || '$' == *walk) {
    1234          63 :                                                 if (walk_last == '\\') {
    1235           0 :                                                         *(walkbuf-1) = *walk++;
    1236           0 :                                                         walk_last = 0;
    1237           0 :                                                         continue;
    1238             :                                                 }
    1239          63 :                                                 if (preg_get_backref(&walk, &backref)) {
    1240          56 :                                                         if (backref < count) {
    1241          55 :                                                                 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1242          55 :                                                                 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
    1243          55 :                                                                 walkbuf += match_len;
    1244             :                                                         }
    1245          56 :                                                         continue;
    1246             :                                                 }
    1247             :                                         }
    1248        6711 :                                         *walkbuf++ = *walk++;
    1249        6711 :                                         walk_last = walk[-1];
    1250             :                                 }
    1251        6672 :                                 *walkbuf = '\0';
    1252             :                                 /* increment the result length by how much we've added to the string */
    1253        6672 :                                 result_len += (int)(walkbuf - (ZSTR_VAL(result) + result_len));
    1254             :                         } else {
    1255             :                                 /* Use custom function to get replacement string and its length. */
    1256          61 :                                 eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
    1257             :                                 ZEND_ASSERT(eval_result);
    1258          61 :                                 new_len += (int)ZSTR_LEN(eval_result);
    1259          61 :                                 if (new_len >= alloc_len) {
    1260          53 :                                         alloc_len = alloc_len + 2 * new_len;
    1261          53 :                                         if (result == NULL) {
    1262          68 :                                                 result = zend_string_alloc(alloc_len, 0);
    1263             :                                         } else {
    1264          38 :                                                 result = zend_string_extend(result, alloc_len, 0);
    1265             :                                         }
    1266             :                                 }
    1267             :                                 /* copy the part of the string before the match */
    1268          61 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
    1269          61 :                                 result_len += (int)(match-piece);
    1270             : 
    1271             :                                 /* copy replacement and backrefs */
    1272          61 :                                 walkbuf = ZSTR_VAL(result) + result_len;
    1273             : 
    1274             :                                 /* If using custom function, copy result to the buffer and clean up. */
    1275          61 :                                 memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
    1276          61 :                                 result_len += (int)ZSTR_LEN(eval_result);
    1277             :                                 zend_string_release(eval_result);
    1278             :                         }
    1279             : 
    1280        6733 :                         if (EXPECTED(limit)) {
    1281        6733 :                                 limit--;
    1282             :                         }
    1283       26491 :                 } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
    1284             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
    1285             :                            this is not necessarily the end. We need to advance
    1286             :                            the start offset, and continue. Fudge the offset values
    1287             :                            to achieve this, unless we're already at the end of the string. */
    1288       26483 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1289          12 :                                 int unit_len = calculate_unit_length(pce, piece);
    1290             : 
    1291          12 :                                 offsets[0] = start_offset;
    1292          12 :                                 offsets[1] = start_offset + unit_len;
    1293          12 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
    1294          12 :                                 result_len += unit_len;
    1295             :                         } else {
    1296       26459 :                                 if (!result && subject_str) {
    1297       25781 :                                         result = zend_string_copy(subject_str);
    1298       25781 :                                         break;
    1299             :                                 }
    1300         678 :                                 new_len = result_len + subject_len - start_offset;
    1301         678 :                                 if (new_len > alloc_len) {
    1302         262 :                                         alloc_len = new_len; /* now we know exactly how long it is */
    1303         262 :                                         if (NULL != result) {
    1304         524 :                                                 result = zend_string_realloc(result, alloc_len, 0);
    1305             :                                         } else {
    1306           0 :                                                 result = zend_string_alloc(alloc_len, 0);
    1307             :                                         }
    1308             :                                 }
    1309             :                                 /* stick that last bit of string on our output */
    1310         678 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
    1311         678 :                                 result_len += subject_len - start_offset;
    1312         678 :                                 ZSTR_VAL(result)[result_len] = '\0';
    1313         678 :                                 ZSTR_LEN(result) = result_len;
    1314         678 :                                 break;
    1315             :                         }
    1316             :                 } else {
    1317           8 :                         pcre_handle_exec_error(count);
    1318           8 :                         if (result) {
    1319             :                                 zend_string_free(result);
    1320           0 :                                 result = NULL;
    1321             :                         }
    1322           8 :                         break;
    1323             :                 }
    1324             : 
    1325             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1326             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1327             :                    the match again at the same point. If this fails (picked up above) we
    1328             :                    advance to the next character. */
    1329        6745 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1330             : 
    1331             :                 /* Advance to the next piece. */
    1332        6745 :                 start_offset = offsets[1];
    1333        6745 :         }
    1334             : 
    1335       26467 :         if (size_offsets <= 32) {
    1336       26464 :                 free_alloca(offsets, use_heap);
    1337             :         } else {
    1338           3 :                 efree(offsets);
    1339             :         }
    1340       26467 :         if (UNEXPECTED(subpat_names)) {
    1341           1 :                 efree(subpat_names);
    1342             :         }
    1343             : 
    1344       26467 :         return result;
    1345             : }
    1346             : /* }}} */
    1347             : 
    1348             : /* {{{ php_replace_in_subject
    1349             :  */
    1350       26423 : static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
    1351             : {
    1352             :         zval            *regex_entry,
    1353       26423 :                                 *replace_entry = NULL,
    1354             :                                 *replace_value,
    1355             :                                  empty_replace;
    1356             :         zend_string *result;
    1357             :         uint32_t replace_idx;
    1358       26423 :         zend_string     *subject_str = zval_get_string(subject);
    1359             : 
    1360             :         /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
    1361       26423 :         ZVAL_EMPTY_STRING(&empty_replace);
    1362             : 
    1363       26423 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
    1364           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
    1365           0 :                         return NULL;
    1366             :         }
    1367             : 
    1368             :         /* If regex is an array */
    1369       26423 :         if (Z_TYPE_P(regex) == IS_ARRAY) {
    1370          21 :                 replace_value = replace;
    1371          21 :                 replace_idx = 0;
    1372             : 
    1373             :                 /* For each entry in the regex array, get the entry */
    1374         146 :                 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
    1375             :                         /* Make sure we're dealing with strings. */
    1376          63 :                         zend_string *regex_str = zval_get_string(regex_entry);
    1377             : 
    1378             :                         /* If replace is an array and not a callable construct */
    1379          63 :                         if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
    1380             :                                 /* Get current entry */
    1381          48 :                                 replace_entry = NULL;
    1382          96 :                                 while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
    1383          92 :                                         if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
    1384          46 :                                                 replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
    1385          46 :                                                 break;
    1386             :                                         }
    1387           0 :                                         replace_idx++;
    1388             :                                 }
    1389          48 :                                 if (replace_entry != NULL) {
    1390          46 :                                         if (!is_callable_replace) {
    1391          46 :                                                 convert_to_string_ex(replace_entry);
    1392             :                                         }
    1393          46 :                                         replace_value = replace_entry;
    1394          46 :                                         replace_idx++;
    1395             :                                 } else {
    1396             :                                         /* We've run out of replacement strings, so use an empty one */
    1397           2 :                                         replace_value = &empty_replace;
    1398             :                                 }
    1399             :                         }
    1400             : 
    1401             :                         /* Do the actual replacement and put the result back into subject_str
    1402             :                            for further replacements. */
    1403          63 :                         if ((result = php_pcre_replace(regex_str,
    1404             :                                                                                    subject_str,
    1405             :                                                                                    ZSTR_VAL(subject_str),
    1406             :                                                                                    (int)ZSTR_LEN(subject_str),
    1407             :                                                                                    replace_value,
    1408             :                                                                                    is_callable_replace,
    1409             :                                                                                    limit,
    1410             :                                                                                    replace_count)) != NULL) {
    1411             :                                 zend_string_release(subject_str);
    1412          62 :                                 subject_str = result;
    1413             :                         } else {
    1414             :                                 zend_string_release(subject_str);
    1415             :                                 zend_string_release(regex_str);
    1416           1 :                                 return NULL;
    1417             :                         }
    1418             : 
    1419             :                         zend_string_release(regex_str);
    1420             :                 } ZEND_HASH_FOREACH_END();
    1421             : 
    1422          20 :                 return subject_str;
    1423             :         } else {
    1424       26402 :                 result = php_pcre_replace(Z_STR_P(regex),
    1425             :                                                                   subject_str,
    1426             :                                                                   ZSTR_VAL(subject_str),
    1427             :                                                                   (int)ZSTR_LEN(subject_str),
    1428             :                                                                   replace,
    1429             :                                                                   is_callable_replace,
    1430             :                                                                   limit,
    1431             :                                                                   replace_count);
    1432             :                 zend_string_release(subject_str);
    1433       26402 :                 return result;
    1434             :         }
    1435             : }
    1436             : /* }}} */
    1437             : 
    1438             : /* {{{ preg_replace_impl
    1439             :  */
    1440       26413 : static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
    1441             : {
    1442             :         zval            *subject_entry;
    1443             :         zend_string     *result;
    1444             :         zend_string     *string_key;
    1445             :         zend_ulong       num_key;
    1446       26413 :         int                      replace_count = 0, old_replace_count;
    1447             : 
    1448       52816 :         if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
    1449       26425 :                 SEPARATE_ZVAL(replace);
    1450       26397 :                 convert_to_string_ex(replace);
    1451             :         }
    1452             : 
    1453       26411 :         if (Z_TYPE_P(regex) != IS_ARRAY) {
    1454       26427 :                 SEPARATE_ZVAL(regex);
    1455       26402 :                 convert_to_string_ex(regex);
    1456             :         }
    1457             : 
    1458             :         /* if subject is an array */
    1459       26410 :         if (Z_TYPE_P(subject) == IS_ARRAY) {
    1460           6 :                 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
    1461             : 
    1462             :                 /* For each subject entry, convert it to string, then perform replacement
    1463             :                    and add the result to the return_value array. */
    1464          44 :                 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
    1465          19 :                         old_replace_count = replace_count;
    1466          19 :                         if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
    1467          36 :                                 if (!is_filter || replace_count > old_replace_count) {
    1468             :                                         /* Add to return array */
    1469             :                                         zval zv;
    1470             : 
    1471          17 :                                         ZVAL_STR(&zv, result);
    1472          17 :                                         if (string_key) {
    1473           1 :                                                 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
    1474             :                                         } else {
    1475          16 :                                                 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
    1476             :                                         }
    1477             :                                 } else {
    1478             :                                         zend_string_release(result);
    1479             :                                 }
    1480             :                         }
    1481             :                 } ZEND_HASH_FOREACH_END();
    1482             :         } else {        
    1483             :                 /* if subject is not an array */
    1484       26404 :                 old_replace_count = replace_count;
    1485       26404 :                 if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
    1486       52766 :                         if (!is_filter || replace_count > old_replace_count) {
    1487       26383 :                                 RETVAL_STR(result);
    1488             :                         } else {
    1489             :                                 zend_string_release(result);
    1490             :                         }
    1491             :                 }
    1492             :         }
    1493             :         
    1494       26410 :         return replace_count;
    1495             : }
    1496             : /* }}} */
    1497             : 
    1498             : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1499             :    Perform Perl-style regular expression replacement. */
    1500       26370 : static PHP_FUNCTION(preg_replace)
    1501             : {
    1502       26370 :         zval *regex, *replace, *subject, *zcount = NULL;
    1503       26370 :         zend_long limit = -1;
    1504             :         int replace_count;
    1505             : 
    1506             : #ifndef FAST_ZPP
    1507             :         /* Get function parameters and do error-checking. */
    1508             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1509             :                 return;
    1510             :         }
    1511             : #else
    1512       26370 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1513       26367 :                 Z_PARAM_ZVAL(regex)
    1514       26367 :                 Z_PARAM_ZVAL(replace)
    1515       26367 :                 Z_PARAM_ZVAL(subject)
    1516       26367 :                 Z_PARAM_OPTIONAL
    1517       26407 :                 Z_PARAM_LONG(limit)
    1518          27 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1519       26370 :         ZEND_PARSE_PARAMETERS_END();
    1520             : #endif
    1521             : 
    1522       52741 :         if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1523           3 :                 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1524           3 :                 RETURN_FALSE;
    1525             :         }
    1526             : 
    1527       26364 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
    1528       26361 :         if (zcount) {
    1529           7 :                 zval_dtor(zcount);
    1530           7 :                 ZVAL_LONG(zcount, replace_count);
    1531             :         }
    1532             : }
    1533             : /* }}} */
    1534             : 
    1535             : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
    1536             :    Perform Perl-style regular expression replacement using replacement callback. */
    1537          46 : static PHP_FUNCTION(preg_replace_callback)
    1538             : {
    1539          46 :         zval *regex, *replace, *subject, *zcount = NULL;
    1540          46 :         zend_long limit = -1;
    1541             :         zend_string     *callback_name;
    1542             :         int replace_count;
    1543             : 
    1544             : #ifndef FAST_ZPP
    1545             :         /* Get function parameters and do error-checking. */
    1546             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1547             :                 return;
    1548             :         }    
    1549             : #else
    1550          46 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1551          40 :                 Z_PARAM_ZVAL(regex)
    1552          40 :                 Z_PARAM_ZVAL(replace)
    1553          40 :                 Z_PARAM_ZVAL(subject)
    1554          40 :                 Z_PARAM_OPTIONAL
    1555          54 :                 Z_PARAM_LONG(limit)
    1556           6 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1557          46 :         ZEND_PARSE_PARAMETERS_END();
    1558             : #endif
    1559             : 
    1560          37 :         if (!zend_is_callable(replace, 0, &callback_name)) {
    1561           3 :                 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
    1562           3 :                 zend_string_release(callback_name);
    1563           3 :                 ZVAL_COPY(return_value, subject);
    1564           3 :                 return;
    1565             :         }
    1566          34 :         zend_string_release(callback_name);
    1567             : 
    1568          34 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
    1569          34 :         if (zcount) {
    1570           1 :                 zval_dtor(zcount);
    1571           1 :                 ZVAL_LONG(zcount, replace_count);
    1572             :         }
    1573             : }
    1574             : /* }}} */
    1575             : 
    1576             : /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
    1577             :    Perform Perl-style regular expression replacement using replacement callback. */
    1578          12 : static PHP_FUNCTION(preg_replace_callback_array)
    1579             : {
    1580          12 :         zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
    1581          12 :         zend_long limit = -1;
    1582             :         zend_string *str_idx;
    1583             :         zend_string *callback_name;
    1584          12 :         int replace_count = 0;
    1585             : 
    1586             : #ifndef FAST_ZPP
    1587             :         /* Get function parameters and do error-checking. */
    1588             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
    1589             :                 return;
    1590             :         }
    1591             : #else
    1592          12 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1593          30 :                 Z_PARAM_ARRAY(pattern)
    1594           7 :                 Z_PARAM_ZVAL(subject)
    1595           7 :                 Z_PARAM_OPTIONAL
    1596          13 :                 Z_PARAM_LONG(limit)
    1597           4 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1598          12 :         ZEND_PARSE_PARAMETERS_END();
    1599             : #endif
    1600             :         
    1601           6 :         ZVAL_UNDEF(&zv);
    1602          33 :         ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
    1603          15 :                 if (str_idx) {
    1604          15 :                         ZVAL_STR_COPY(&regex, str_idx);
    1605             :                 } else {
    1606           0 :                         php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
    1607           0 :                         zval_ptr_dtor(return_value);
    1608           0 :                         RETURN_NULL();
    1609             :                 }               
    1610             : 
    1611          15 :                 if (!zend_is_callable(replace, 0, &callback_name)) {
    1612           1 :                         php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
    1613           1 :                         zend_string_release(callback_name);
    1614           1 :                         zval_ptr_dtor(&regex);
    1615           1 :                         zval_ptr_dtor(return_value);
    1616           1 :                         ZVAL_COPY(return_value, subject);
    1617           1 :                         return;
    1618             :                 }
    1619          14 :                 zend_string_release(callback_name);
    1620             : 
    1621          14 :                 if (Z_ISNULL_P(return_value)) {
    1622           5 :                         replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
    1623             :                 } else {
    1624           9 :                         replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
    1625           9 :                         zval_ptr_dtor(return_value);
    1626             :                 }
    1627             : 
    1628          14 :                 zval_ptr_dtor(&regex);
    1629             : 
    1630          14 :                 if (Z_ISUNDEF(zv)) {
    1631           1 :                         RETURN_NULL();  
    1632             :                 }
    1633             : 
    1634          13 :                 ZVAL_COPY_VALUE(return_value, &zv);
    1635             : 
    1636          13 :                 if (UNEXPECTED(EG(exception))) {
    1637           1 :                         zval_ptr_dtor(return_value);
    1638           1 :                         RETURN_NULL();  
    1639             :                 }
    1640             :         } ZEND_HASH_FOREACH_END();
    1641             : 
    1642           3 :         if (zcount) {
    1643           1 :                 zval_dtor(zcount);
    1644           1 :                 ZVAL_LONG(zcount, replace_count);
    1645             :         }
    1646             : }
    1647             : /* }}} */
    1648             : 
    1649             : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1650             :    Perform Perl-style regular expression replacement and only return matches. */
    1651           1 : static PHP_FUNCTION(preg_filter)
    1652             : {
    1653           1 :         zval *regex, *replace, *subject, *zcount = NULL;
    1654           1 :         zend_long limit = -1;
    1655             :         int replace_count;
    1656             : 
    1657             : #ifndef FAST_ZPP
    1658             :         /* Get function parameters and do error-checking. */
    1659             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1660             :                 return;
    1661             :         }    
    1662             : #else
    1663           1 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1664           1 :                 Z_PARAM_ZVAL(regex)
    1665           1 :                 Z_PARAM_ZVAL(replace)
    1666           1 :                 Z_PARAM_ZVAL(subject)
    1667           1 :                 Z_PARAM_OPTIONAL
    1668           1 :                 Z_PARAM_LONG(limit)
    1669           0 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1670           1 :         ZEND_PARSE_PARAMETERS_END();
    1671             : #endif
    1672             : 
    1673           3 :         if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1674           0 :                 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1675           0 :                 RETURN_FALSE;
    1676             :         }
    1677             : 
    1678           1 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
    1679           1 :         if (zcount) {
    1680           0 :                 zval_dtor(zcount);
    1681           0 :                 ZVAL_LONG(zcount, replace_count);
    1682             :         }
    1683             : }
    1684             : /* }}} */
    1685             : 
    1686             : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
    1687             :    Split string into an array using a perl-style regular expression as a delimiter */
    1688        2501 : static PHP_FUNCTION(preg_split)
    1689             : {
    1690             :         zend_string                     *regex;                 /* Regular expression */
    1691             :         zend_string                     *subject;               /* String to match against */
    1692        2501 :         zend_long                        limit_val = -1;/* Integer value of limit */
    1693        2501 :         zend_long                        flags = 0;             /* Match control flags */
    1694             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1695             : 
    1696             :         /* Get function parameters and do error checking */
    1697             : #ifndef FAST_ZPP
    1698             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
    1699             :                                                           &subject, &limit_val, &flags) == FAILURE) {
    1700             :                 RETURN_FALSE;
    1701             :         }
    1702             : #else
    1703        2501 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1704        7491 :                 Z_PARAM_STR(regex)
    1705        7485 :                 Z_PARAM_STR(subject)
    1706        2493 :                 Z_PARAM_OPTIONAL
    1707        2549 :                 Z_PARAM_LONG(limit_val)
    1708          60 :                 Z_PARAM_LONG(flags)
    1709        2501 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
    1710             : #endif
    1711             : 
    1712        2493 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
    1713           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
    1714           0 :                         RETURN_FALSE;
    1715             :         }
    1716             : 
    1717             :         /* Compile regex or get it from cache. */
    1718        2493 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    1719           5 :                 RETURN_FALSE;
    1720             :         }
    1721             : 
    1722        2488 :         pce->refcount++;
    1723        2488 :         php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
    1724        2488 :         pce->refcount--;
    1725             : }
    1726             : /* }}} */
    1727             : 
    1728             : /* {{{ php_pcre_split
    1729             :  */
    1730        2503 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
    1731             :         zend_long limit_val, zend_long flags)
    1732             : {
    1733        2503 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1734        2503 :         pcre                    *re_bump = NULL;        /* Regex instance for empty matches */
    1735        2503 :         pcre_extra              *extra_bump = NULL;     /* Almost dummy */
    1736             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1737             :         int                             *offsets;                       /* Array of subpattern offsets */
    1738             :         int                              size_offsets;          /* Size of the offsets array */
    1739        2503 :         int                              exoptions = 0;         /* Execution options */
    1740        2503 :         int                              count = 0;                     /* Count of matched subpatterns */
    1741             :         int                              start_offset;          /* Where the new search starts */
    1742             :         int                              next_offset;           /* End of the last delimiter match + 1 */
    1743        2503 :         int                              g_notempty = 0;        /* If the match should not be empty */
    1744             :         char                    *last_match;            /* Location of last match */
    1745             :         int                              no_empty;                      /* If NO_EMPTY flag is set */
    1746             :         int                              delim_capture;         /* If delimiters should be captured */
    1747             :         int                              offset_capture;        /* If offsets should be captured */
    1748             :         zval                     tmp;
    1749             :         ALLOCA_FLAG(use_heap);
    1750             : 
    1751        2503 :         no_empty = flags & PREG_SPLIT_NO_EMPTY;
    1752        2503 :         delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
    1753        2503 :         offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
    1754             : 
    1755        2503 :         if (limit_val == 0) {
    1756           1 :                 limit_val = -1;
    1757             :         }
    1758             : 
    1759        2503 :         if (extra == NULL) {
    1760           7 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1761           7 :                 extra = &extra_data;
    1762             :         }
    1763        2503 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    1764        2503 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    1765             : #ifdef PCRE_EXTRA_MARK
    1766        2503 :         extra->flags &= ~PCRE_EXTRA_MARK;
    1767             : #endif
    1768             : 
    1769             :         /* Initialize return value */
    1770        2503 :         array_init(return_value);
    1771             : 
    1772             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1773        2503 :         size_offsets = (pce->capture_count + 1) * 3;
    1774        2503 :         if (size_offsets <= 32) {
    1775        2503 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1776             :         } else {
    1777           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1778             :         }
    1779             : 
    1780             :         /* Start at the beginning of the string */
    1781        2503 :         start_offset = 0;
    1782        2503 :         next_offset = 0;
    1783        2503 :         last_match = subject;
    1784        2503 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1785             : 
    1786             :         /* Get next piece if no limit or limit not yet reached and something matched*/
    1787        9281 :         while ((limit_val == -1 || limit_val > 1)) {
    1788        6768 :                 count = pcre_exec(pce->re, extra, subject,
    1789             :                                                   subject_len, start_offset,
    1790             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1791             : 
    1792             :                 /* the string was already proved to be valid UTF-8 */
    1793        6768 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1794             : 
    1795             :                 /* Check for too many substrings condition. */
    1796        6768 :                 if (count == 0) {
    1797           0 :                         php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
    1798           0 :                         count = size_offsets/3;
    1799             :                 }
    1800             : 
    1801             :                 /* If something matched */
    1802        6768 :                 if (count > 0) {
    1803        4214 :                         if (!no_empty || &subject[offsets[0]] != last_match) {
    1804             : 
    1805        4155 :                                 if (offset_capture) {
    1806             :                                         /* Add (match, offset) pair to the return value */
    1807          26 :                                         add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
    1808             :                                 } else {
    1809             :                                         /* Add the piece to the return value */
    1810        8258 :                                         ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
    1811        4129 :                                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1812             :                                 }
    1813             : 
    1814             :                                 /* One less left to do */
    1815        4155 :                                 if (limit_val != -1)
    1816          13 :                                         limit_val--;
    1817             :                         }
    1818             : 
    1819        4214 :                         last_match = &subject[offsets[1]];
    1820        4214 :                         next_offset = offsets[1];
    1821             : 
    1822        4214 :                         if (delim_capture) {
    1823             :                                 int i, match_len;
    1824          62 :                                 for (i = 1; i < count; i++) {
    1825          31 :                                         match_len = offsets[(i<<1)+1] - offsets[i<<1];
    1826             :                                         /* If we have matched a delimiter */
    1827          31 :                                         if (!no_empty || match_len > 0) {
    1828          21 :                                                 if (offset_capture) {
    1829          10 :                                                         add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
    1830             :                                                 } else {
    1831          22 :                                                         ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
    1832          11 :                                                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1833             :                                                 }
    1834             :                                         }
    1835             :                                 }
    1836             :                         }
    1837        2554 :                 } else if (count == PCRE_ERROR_NOMATCH) {
    1838             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
    1839             :                            this is not necessarily the end. We need to advance
    1840             :                            the start offset, and continue. Fudge the offset values
    1841             :                            to achieve this, unless we're already at the end of the string. */
    1842        2553 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1843          61 :                                 if (pce->compile_options & PCRE_UTF8) {
    1844          12 :                                         if (re_bump == NULL) {
    1845             :                                                 int dummy;
    1846           2 :                                                 zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
    1847           2 :                                                 re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
    1848             :                                                 zend_string_release(regex);
    1849           2 :                                                 if (re_bump == NULL) {
    1850           0 :                                                         RETURN_FALSE;
    1851             :                                                 }
    1852             :                                         }
    1853          12 :                                         count = pcre_exec(re_bump, extra_bump, subject,
    1854             :                                                           subject_len, start_offset,
    1855             :                                                           exoptions, offsets, size_offsets);
    1856          12 :                                         if (count < 1) {
    1857           0 :                                                 php_error_docref(NULL, E_WARNING, "Unknown error");
    1858           0 :                                                 RETURN_FALSE;
    1859             :                                         }
    1860             :                                 } else {
    1861          49 :                                         offsets[0] = start_offset;
    1862          49 :                                         offsets[1] = start_offset + 1;
    1863             :                                 }
    1864             :                         } else
    1865             :                                 break;
    1866             :                 } else {
    1867           1 :                         pcre_handle_exec_error(count);
    1868           1 :                         break;
    1869             :                 }
    1870             : 
    1871             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1872             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1873             :                    the match again at the same point. If this fails (picked up above) we
    1874             :                    advance to the next character. */
    1875        4275 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1876             : 
    1877             :                 /* Advance to the position right after the last full match */
    1878        4275 :                 start_offset = offsets[1];
    1879             :         }
    1880             : 
    1881             : 
    1882        2503 :         start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
    1883             : 
    1884        2503 :         if (!no_empty || start_offset < subject_len)
    1885             :         {
    1886        2495 :                 if (offset_capture) {
    1887             :                         /* Add the last (match, offset) pair to the return value */
    1888           5 :                         add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
    1889             :                 } else {
    1890             :                         /* Add the last piece to the return value */
    1891        4980 :                         ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
    1892        2490 :                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1893             :                 }
    1894             :         }
    1895             : 
    1896             : 
    1897             :         /* Clean up */
    1898        2503 :         if (size_offsets <= 32) {
    1899        2503 :                 free_alloca(offsets, use_heap);
    1900             :         } else {
    1901           0 :                 efree(offsets);
    1902             :         }
    1903             : }
    1904             : /* }}} */
    1905             : 
    1906             : /* {{{ proto string preg_quote(string str [, string delim_char])
    1907             :    Quote regular expression characters plus an optional character */
    1908        8380 : static PHP_FUNCTION(preg_quote)
    1909             : {
    1910             :         size_t           in_str_len;
    1911             :         char    *in_str;                /* Input string argument */
    1912             :         char    *in_str_end;    /* End of the input string */
    1913        8380 :         size_t           delim_len = 0;
    1914        8380 :         char    *delim = NULL;  /* Additional delimiter argument */
    1915             :         zend_string     *out_str;       /* Output string with quoted characters */
    1916             :         char    *p,                             /* Iterator for input string */
    1917             :                         *q,                             /* Iterator for output string */
    1918        8380 :                          delim_char=0,  /* Delimiter character to be quoted */
    1919             :                          c;                             /* Current character */
    1920        8380 :         zend_bool quote_delim = 0; /* Whether to quote additional delim char */
    1921             : 
    1922             :         /* Get the arguments and check for errors */
    1923             : #ifndef FAST_ZPP
    1924             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
    1925             :                                                           &delim, &delim_len) == FAILURE) {
    1926             :                 return;
    1927             :         }
    1928             : #else
    1929        8380 :         ZEND_PARSE_PARAMETERS_START(1, 2)
    1930       25131 :                 Z_PARAM_STRING(in_str, in_str_len)
    1931        8375 :                 Z_PARAM_OPTIONAL
    1932       25117 :                 Z_PARAM_STRING(delim, delim_len)
    1933        8380 :         ZEND_PARSE_PARAMETERS_END();
    1934             : #endif
    1935             : 
    1936        8375 :         in_str_end = in_str + in_str_len;
    1937             : 
    1938             :         /* Nothing to do if we got an empty string */
    1939        8375 :         if (in_str == in_str_end) {
    1940           5 :                 RETURN_EMPTY_STRING();
    1941             :         }
    1942             : 
    1943        8370 :         if (delim && *delim) {
    1944        8367 :                 delim_char = delim[0];
    1945        8367 :                 quote_delim = 1;
    1946             :         }
    1947             : 
    1948             :         /* Allocate enough memory so that even if each character
    1949             :            is quoted, we won't run out of room */
    1950       16740 :         out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
    1951             : 
    1952             :         /* Go through the string and quote necessary characters */
    1953     7442791 :         for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
    1954     7434421 :                 c = *p;
    1955     7434421 :                 switch(c) {
    1956             :                         case '.':
    1957             :                         case '\\':
    1958             :                         case '+':
    1959             :                         case '*':
    1960             :                         case '?':
    1961             :                         case '[':
    1962             :                         case '^':
    1963             :                         case ']':
    1964             :                         case '$':
    1965             :                         case '(':
    1966             :                         case ')':
    1967             :                         case '{':
    1968             :                         case '}':
    1969             :                         case '=':
    1970             :                         case '!':
    1971             :                         case '>':
    1972             :                         case '<':
    1973             :                         case '|':
    1974             :                         case ':':
    1975             :                         case '-':
    1976     1057164 :                                 *q++ = '\\';
    1977     1057164 :                                 *q++ = c;
    1978     1057164 :                                 break;
    1979             : 
    1980             :                         case '\0':
    1981        1031 :                                 *q++ = '\\';
    1982        1031 :                                 *q++ = '0';
    1983        1031 :                                 *q++ = '0';
    1984        1031 :                                 *q++ = '0';
    1985        1031 :                                 break;
    1986             : 
    1987             :                         default:
    1988     6376226 :                                 if (quote_delim && c == delim_char)
    1989       15114 :                                         *q++ = '\\';
    1990     6376226 :                                 *q++ = c;
    1991             :                                 break;
    1992             :                 }
    1993             :         }
    1994        8370 :         *q = '\0';
    1995             : 
    1996             :         /* Reallocate string and return it */
    1997       16740 :         out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
    1998        8370 :         RETURN_NEW_STR(out_str);
    1999             : }
    2000             : /* }}} */
    2001             : 
    2002             : /* {{{ proto array preg_grep(string regex, array input [, int flags])
    2003             :    Searches array and returns entries which match regex */
    2004          29 : static PHP_FUNCTION(preg_grep)
    2005             : {
    2006             :         zend_string                     *regex;                 /* Regular expression */
    2007             :         zval                            *input;                 /* Input array */
    2008          29 :         zend_long                        flags = 0;             /* Match control flags */
    2009             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    2010             : 
    2011             :         /* Get arguments and do error checking */
    2012             : #ifndef FAST_ZPP
    2013             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
    2014             :                                                           &input, &flags) == FAILURE) {
    2015             :                 return;
    2016             :         }
    2017             : #else
    2018          29 :         ZEND_PARSE_PARAMETERS_START(2, 3)
    2019          75 :                 Z_PARAM_STR(regex)
    2020          69 :                 Z_PARAM_ARRAY(input)
    2021          20 :                 Z_PARAM_OPTIONAL
    2022          26 :                 Z_PARAM_LONG(flags)
    2023          29 :         ZEND_PARSE_PARAMETERS_END();
    2024             : #endif
    2025             : 
    2026             :         /* Compile regex or get it from cache. */
    2027          20 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    2028           5 :                 RETURN_FALSE;
    2029             :         }
    2030             : 
    2031          15 :         pce->refcount++;
    2032          15 :         php_pcre_grep_impl(pce, input, return_value, flags);
    2033          15 :         pce->refcount--;
    2034             : }
    2035             : /* }}} */
    2036             : 
    2037          15 : PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
    2038             : {
    2039             :         zval                *entry;                             /* An entry in the input array */
    2040          15 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    2041             :         pcre_extra               extra_data;            /* Used locally for exec options */
    2042             :         int                             *offsets;                       /* Array of subpattern offsets */
    2043             :         int                              size_offsets;          /* Size of the offsets array */
    2044          15 :         int                              count = 0;                     /* Count of matched subpatterns */
    2045             :         zend_string             *string_key;
    2046             :         zend_ulong               num_key;
    2047             :         zend_bool                invert;                        /* Whether to return non-matching
    2048             :                                                                                    entries */
    2049             :         ALLOCA_FLAG(use_heap);
    2050             : 
    2051          15 :         invert = flags & PREG_GREP_INVERT ? 1 : 0;
    2052             : 
    2053          15 :         if (extra == NULL) {
    2054           0 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    2055           0 :                 extra = &extra_data;
    2056             :         }
    2057          15 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    2058          15 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    2059             : #ifdef PCRE_EXTRA_MARK
    2060          15 :         extra->flags &= ~PCRE_EXTRA_MARK;
    2061             : #endif
    2062             : 
    2063             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    2064          15 :         size_offsets = (pce->capture_count + 1) * 3;
    2065          15 :         if (size_offsets <= 32) {
    2066          15 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    2067             :         } else {
    2068           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    2069             :         }
    2070             : 
    2071             :         /* Initialize return array */
    2072          15 :         array_init(return_value);
    2073             : 
    2074          15 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    2075             : 
    2076             :         /* Go through the input array */
    2077         171 :         ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
    2078          78 :                 zend_string *subject_str = zval_get_string(entry);
    2079             : 
    2080             :                 /* Perform the match */
    2081          78 :                 count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
    2082             :                                                   (int)ZSTR_LEN(subject_str), 0,
    2083             :                                                   0, offsets, size_offsets);
    2084             : 
    2085             :                 /* Check for too many substrings condition. */
    2086          78 :                 if (count == 0) {
    2087           0 :                         php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
    2088           0 :                         count = size_offsets/3;
    2089          78 :                 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
    2090           0 :                         pcre_handle_exec_error(count);
    2091             :                         zend_string_release(subject_str);
    2092           0 :                         break;
    2093             :                 }
    2094             : 
    2095             :                 /* If the entry fits our requirements */
    2096          78 :                 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
    2097          33 :                         if (Z_REFCOUNTED_P(entry)) {
    2098             :                                 Z_ADDREF_P(entry);
    2099             :                         }
    2100             : 
    2101             :                         /* Add to return array */
    2102          33 :                         if (string_key) {
    2103           3 :                                 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
    2104             :                         } else {
    2105          30 :                                 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
    2106             :                         }
    2107             :                 }
    2108             : 
    2109             :                 zend_string_release(subject_str);
    2110             :         } ZEND_HASH_FOREACH_END();
    2111             : 
    2112             :         /* Clean up */
    2113          15 :         if (size_offsets <= 32) {
    2114          15 :                 free_alloca(offsets, use_heap);
    2115             :         } else {
    2116           0 :                 efree(offsets);
    2117             :         }
    2118          15 : }
    2119             : /* }}} */
    2120             : 
    2121             : /* {{{ proto int preg_last_error()
    2122             :    Returns the error code of the last regexp execution. */
    2123          18 : static PHP_FUNCTION(preg_last_error)
    2124             : {
    2125             : #ifndef FAST_ZPP
    2126             :         if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
    2127             :                 return;
    2128             :         }
    2129             : #else
    2130          18 :         ZEND_PARSE_PARAMETERS_START(0, 0)
    2131          18 :         ZEND_PARSE_PARAMETERS_END();
    2132             : #endif
    2133             : 
    2134          16 :         RETURN_LONG(PCRE_G(error_code));
    2135             : }
    2136             : /* }}} */
    2137             : 
    2138             : /* {{{ module definition structures */
    2139             : 
    2140             : /* {{{ arginfo */
    2141             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
    2142             :     ZEND_ARG_INFO(0, pattern)
    2143             :     ZEND_ARG_INFO(0, subject)
    2144             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2145             :     ZEND_ARG_INFO(0, flags)
    2146             :     ZEND_ARG_INFO(0, offset)
    2147             : ZEND_END_ARG_INFO()
    2148             : 
    2149             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
    2150             :     ZEND_ARG_INFO(0, pattern)
    2151             :     ZEND_ARG_INFO(0, subject)
    2152             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2153             :     ZEND_ARG_INFO(0, flags)
    2154             :     ZEND_ARG_INFO(0, offset)
    2155             : ZEND_END_ARG_INFO()
    2156             : 
    2157             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
    2158             :     ZEND_ARG_INFO(0, regex)
    2159             :     ZEND_ARG_INFO(0, replace)
    2160             :     ZEND_ARG_INFO(0, subject)
    2161             :     ZEND_ARG_INFO(0, limit)
    2162             :     ZEND_ARG_INFO(1, count)
    2163             : ZEND_END_ARG_INFO()
    2164             : 
    2165             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
    2166             :     ZEND_ARG_INFO(0, regex)
    2167             :     ZEND_ARG_INFO(0, callback)
    2168             :     ZEND_ARG_INFO(0, subject)
    2169             :     ZEND_ARG_INFO(0, limit)
    2170             :     ZEND_ARG_INFO(1, count)
    2171             : ZEND_END_ARG_INFO()
    2172             : 
    2173             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
    2174             :     ZEND_ARG_INFO(0, pattern)
    2175             :     ZEND_ARG_INFO(0, subject)
    2176             :     ZEND_ARG_INFO(0, limit)
    2177             :     ZEND_ARG_INFO(1, count)
    2178             : ZEND_END_ARG_INFO()
    2179             : 
    2180             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
    2181             :     ZEND_ARG_INFO(0, pattern)
    2182             :     ZEND_ARG_INFO(0, subject)
    2183             :     ZEND_ARG_INFO(0, limit)
    2184             :     ZEND_ARG_INFO(0, flags)
    2185             : ZEND_END_ARG_INFO()
    2186             : 
    2187             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
    2188             :     ZEND_ARG_INFO(0, str)
    2189             :     ZEND_ARG_INFO(0, delim_char)
    2190             : ZEND_END_ARG_INFO()
    2191             : 
    2192             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
    2193             :     ZEND_ARG_INFO(0, regex)
    2194             :     ZEND_ARG_INFO(0, input) /* array */
    2195             :     ZEND_ARG_INFO(0, flags)
    2196             : ZEND_END_ARG_INFO()
    2197             : 
    2198             : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
    2199             : ZEND_END_ARG_INFO()
    2200             : /* }}} */
    2201             : 
    2202             : static const zend_function_entry pcre_functions[] = {
    2203             :         PHP_FE(preg_match,                                      arginfo_preg_match)
    2204             :         PHP_FE(preg_match_all,                          arginfo_preg_match_all)
    2205             :         PHP_FE(preg_replace,                            arginfo_preg_replace)
    2206             :         PHP_FE(preg_replace_callback,           arginfo_preg_replace_callback)
    2207             :         PHP_FE(preg_replace_callback_array,     arginfo_preg_replace_callback_array)
    2208             :         PHP_FE(preg_filter,                                     arginfo_preg_replace)
    2209             :         PHP_FE(preg_split,                                      arginfo_preg_split)
    2210             :         PHP_FE(preg_quote,                                      arginfo_preg_quote)
    2211             :         PHP_FE(preg_grep,                                       arginfo_preg_grep)
    2212             :         PHP_FE(preg_last_error,                         arginfo_preg_last_error)
    2213             :         PHP_FE_END
    2214             : };
    2215             : 
    2216             : zend_module_entry pcre_module_entry = {
    2217             :         STANDARD_MODULE_HEADER,
    2218             :    "pcre",
    2219             :         pcre_functions,
    2220             :         PHP_MINIT(pcre),
    2221             :         PHP_MSHUTDOWN(pcre),
    2222             :         NULL,
    2223             :         NULL,
    2224             :         PHP_MINFO(pcre),
    2225             :         PHP_PCRE_VERSION,
    2226             :         PHP_MODULE_GLOBALS(pcre),
    2227             :         PHP_GINIT(pcre),
    2228             :         PHP_GSHUTDOWN(pcre),
    2229             :         NULL,
    2230             :         STANDARD_MODULE_PROPERTIES_EX
    2231             : };
    2232             : 
    2233             : #ifdef COMPILE_DL_PCRE
    2234             : ZEND_GET_MODULE(pcre)
    2235             : #endif
    2236             : 
    2237             : /* }}} */
    2238             : 
    2239             : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
    2240             : 
    2241             : /*
    2242             :  * Local variables:
    2243             :  * tab-width: 4
    2244             :  * c-basic-offset: 4
    2245             :  * End:
    2246             :  * vim600: sw=4 ts=4 fdm=marker
    2247             :  * vim<600: sw=4 ts=4
    2248             :  */

Generated by: LCOV version 1.10

Generated at Sat, 29 Aug 2015 10:22:20 +0000 (2 days ago)

Copyright © 2005-2015 The PHP Group
All rights reserved.