PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/pcre - php_pcre.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 901 976 92.3 %
Date: 2022-01-16 Functions: 33 34 97.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    +----------------------------------------------------------------------+
       3             :    | PHP Version 7                                                        |
       4             :    +----------------------------------------------------------------------+
       5             :    | Copyright (c) 1997-2018 The PHP Group                                |
       6             :    +----------------------------------------------------------------------+
       7             :    | This source file is subject to version 3.01 of the PHP license,      |
       8             :    | that is bundled with this package in the file LICENSE, and is        |
       9             :    | available through the world-wide-web at the following url:           |
      10             :    | http://www.php.net/license/3_01.txt                                  |
      11             :    | If you did not receive a copy of the PHP license and are unable to   |
      12             :    | obtain it through the world-wide-web, please send a note to          |
      13             :    | license@php.net so we can mail you a copy immediately.               |
      14             :    +----------------------------------------------------------------------+
      15             :    | Author: Andrei Zmievski <andrei@php.net>                             |
      16             :    +----------------------------------------------------------------------+
      17             :  */
      18             : 
      19             : /* $Id$ */
      20             : 
      21             : #include "php.h"
      22             : #include "php_ini.h"
      23             : #include "php_globals.h"
      24             : #include "php_pcre.h"
      25             : #include "ext/standard/info.h"
      26             : #include "ext/standard/basic_functions.h"
      27             : #include "zend_smart_str.h"
      28             : 
      29             : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
      30             : 
      31             : #include "ext/standard/php_string.h"
      32             : 
      33             : #define PREG_PATTERN_ORDER                      1
      34             : #define PREG_SET_ORDER                          2
      35             : #define PREG_OFFSET_CAPTURE                     (1<<8)
      36             : 
      37             : #define PREG_SPLIT_NO_EMPTY                     (1<<0)
      38             : #define PREG_SPLIT_DELIM_CAPTURE        (1<<1)
      39             : #define PREG_SPLIT_OFFSET_CAPTURE       (1<<2)
      40             : 
      41             : #define PREG_REPLACE_EVAL                       (1<<0)
      42             : 
      43             : #define PREG_GREP_INVERT                        (1<<0)
      44             : 
      45             : #define PCRE_CACHE_SIZE 4096
      46             : 
      47             : /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
      48             : #ifndef PCRE_NOTEMPTY_ATSTART
      49             : # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
      50             : #endif
      51             : 
      52             : enum {
      53             :         PHP_PCRE_NO_ERROR = 0,
      54             :         PHP_PCRE_INTERNAL_ERROR,
      55             :         PHP_PCRE_BACKTRACK_LIMIT_ERROR,
      56             :         PHP_PCRE_RECURSION_LIMIT_ERROR,
      57             :         PHP_PCRE_BAD_UTF8_ERROR,
      58             :         PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
      59             :         PHP_PCRE_JIT_STACKLIMIT_ERROR
      60             : };
      61             : 
      62             : 
      63             : PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
      64             : 
      65             : #ifdef HAVE_PCRE_JIT_SUPPORT
      66             : #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
      67             : #define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
      68             : ZEND_TLS pcre_jit_stack *jit_stack = NULL;
      69             : #endif
      70             : #if defined(ZTS)
      71             : static MUTEX_T pcre_mt = NULL;
      72             : #define php_pcre_mutex_alloc() if (!pcre_mt) pcre_mt = tsrm_mutex_alloc();
      73             : #define php_pcre_mutex_free() if (pcre_mt) tsrm_mutex_free(pcre_mt); pcre_mt = NULL;
      74             : #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
      75             : #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
      76             : #else
      77             : #define php_pcre_mutex_alloc()
      78             : #define php_pcre_mutex_free()
      79             : #define php_pcre_mutex_lock()
      80             : #define php_pcre_mutex_unlock()
      81             : #endif
      82             : 
      83          72 : static void pcre_handle_exec_error(int pcre_code) /* {{{ */
      84             : {
      85          72 :         int preg_code = 0;
      86             : 
      87          72 :         switch (pcre_code) {
      88           4 :                 case PCRE_ERROR_MATCHLIMIT:
      89           4 :                         preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
      90           4 :                         break;
      91             : 
      92           2 :                 case PCRE_ERROR_RECURSIONLIMIT:
      93           2 :                         preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
      94           2 :                         break;
      95             : 
      96           8 :                 case PCRE_ERROR_BADUTF8:
      97           8 :                         preg_code = PHP_PCRE_BAD_UTF8_ERROR;
      98           8 :                         break;
      99             : 
     100           1 :                 case PCRE_ERROR_BADUTF8_OFFSET:
     101           1 :                         preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
     102           1 :                         break;
     103             : 
     104             : #ifdef HAVE_PCRE_JIT_SUPPORT
     105           1 :                 case PCRE_ERROR_JIT_STACKLIMIT:
     106           1 :                         preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
     107           1 :                         break;
     108             : #endif
     109             : 
     110          56 :                 default:
     111          56 :                         preg_code = PHP_PCRE_INTERNAL_ERROR;
     112          56 :                         break;
     113             :         }
     114             : 
     115          72 :         PCRE_G(error_code) = preg_code;
     116          72 : }
     117             : /* }}} */
     118             : 
     119       49979 : static void php_free_pcre_cache(zval *data) /* {{{ */
     120             : {
     121       49979 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
     122       49979 :         if (!pce) return;
     123       49979 :         pcre_free(pce->re);
     124       49979 :         if (pce->extra) {
     125       49971 :                 pcre_free_study(pce->extra);
     126             :         }
     127             : #if HAVE_SETLOCALE
     128       49979 :         if ((void*)pce->tables) pefree((void*)pce->tables, 1);
     129             : #endif
     130       49979 :         pefree(pce, 1);
     131             : }
     132             : /* }}} */
     133             : 
     134       24801 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
     135             : {
     136       24801 :         zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
     137       24801 :         pcre_globals->backtrack_limit = 0;
     138       24801 :         pcre_globals->recursion_limit = 0;
     139       24801 :         pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
     140       24801 : }
     141             : /* }}} */
     142             : 
     143       24843 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
     144             : {
     145       24843 :         zend_hash_destroy(&pcre_globals->pcre_cache);
     146             : 
     147             : #ifdef HAVE_PCRE_JIT_SUPPORT
     148             :         /* Stack may only be destroyed when no cached patterns
     149             :                 possibly associated with it do exist. */
     150       24843 :         if (jit_stack) {
     151       24791 :                 pcre_jit_stack_free(jit_stack);
     152       24791 :                 jit_stack = NULL;
     153             :         }
     154             : #endif
     155             : 
     156       24843 : }
     157             : /* }}} */
     158             : 
     159             : PHP_INI_BEGIN()
     160             :         STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
     161             :         STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
     162             : #ifdef HAVE_PCRE_JIT_SUPPORT
     163             :         STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
     164             : #endif
     165             : PHP_INI_END()
     166             : 
     167             : 
     168             : /* {{{ PHP_MINFO_FUNCTION(pcre) */
     169         146 : static PHP_MINFO_FUNCTION(pcre)
     170             : {
     171             : #ifdef HAVE_PCRE_JIT_SUPPORT
     172         146 :         int jit_yes = 0;
     173             : #endif
     174             : 
     175         146 :         php_info_print_table_start();
     176         146 :         php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
     177         146 :         php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
     178             : 
     179             : #ifdef HAVE_PCRE_JIT_SUPPORT
     180         146 :         if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
     181         146 :                 php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
     182             :         } else {
     183           0 :                 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
     184             :         }
     185             : #else
     186             :         php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
     187             : #endif
     188             : 
     189         146 :         php_info_print_table_end();
     190             : 
     191         146 :         DISPLAY_INI_ENTRIES();
     192         146 : }
     193             : /* }}} */
     194             : 
     195             : /* {{{ PHP_MINIT_FUNCTION(pcre) */
     196       24801 : static PHP_MINIT_FUNCTION(pcre)
     197             : {
     198       24801 :         REGISTER_INI_ENTRIES();
     199             : 
     200             :         php_pcre_mutex_alloc();
     201             : 
     202       24801 :         REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
     203       24801 :         REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
     204       24801 :         REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     205       24801 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
     206       24801 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
     207       24801 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     208       24801 :         REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
     209             : 
     210       24801 :         REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
     211       24801 :         REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
     212       24801 :         REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     213       24801 :         REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     214       24801 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
     215       24801 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
     216       24801 :         REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     217       24801 :         REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
     218             : 
     219       24801 :         return SUCCESS;
     220             : }
     221             : /* }}} */
     222             : 
     223             : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
     224       24843 : static PHP_MSHUTDOWN_FUNCTION(pcre)
     225             : {
     226       24843 :         UNREGISTER_INI_ENTRIES();
     227             : 
     228             :         php_pcre_mutex_free();
     229             : 
     230       24843 :         return SUCCESS;
     231             : }
     232             : /* }}} */
     233             : 
     234             : #ifdef HAVE_PCRE_JIT_SUPPORT
     235             : /* {{{ PHP_RINIT_FUNCTION(pcre) */
     236       24751 : static PHP_RINIT_FUNCTION(pcre)
     237             : {
     238       24751 :         if (PCRE_G(jit) && jit_stack == NULL) {
     239             :                 php_pcre_mutex_lock();
     240       24745 :                 jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
     241             :                 php_pcre_mutex_unlock();
     242             :         }
     243             : 
     244       24751 :         return SUCCESS;
     245             : }
     246             : /* }}} */
     247             : #endif
     248             : 
     249             : /* {{{ static pcre_clean_cache */
     250      278528 : static int pcre_clean_cache(zval *data, void *arg)
     251             : {
     252      278528 :         pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
     253      278528 :         int *num_clean = (int *)arg;
     254             : 
     255      278528 :         if (*num_clean > 0 && !pce->refcount) {
     256       34816 :                 (*num_clean)--;
     257       34816 :                 return ZEND_HASH_APPLY_REMOVE;
     258             :         } else {
     259      243712 :                 return ZEND_HASH_APPLY_KEEP;
     260             :         }
     261             : }
     262             : /* }}} */
     263             : 
     264             : /* {{{ static make_subpats_table */
     265          13 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
     266             : {
     267          13 :         pcre_extra *extra = pce->extra;
     268          13 :         int name_cnt = pce->name_count, name_size, ni = 0;
     269             :         int rc;
     270             :         char *name_table;
     271             :         unsigned short name_idx;
     272             :         char **subpat_names;
     273             :         int rc1, rc2;
     274             : 
     275          13 :         rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
     276          13 :         rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
     277          13 :         rc = rc2 ? rc2 : rc1;
     278          13 :         if (rc < 0) {
     279           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     280           0 :                 return NULL;
     281             :         }
     282             : 
     283          13 :         subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
     284         486 :         while (ni++ < name_cnt) {
     285         460 :                 name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
     286         460 :                 subpat_names[name_idx] = name_table + 2;
     287         920 :                 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
     288           0 :                         php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
     289           0 :                         efree(subpat_names);
     290           0 :                         return NULL;
     291             :                 }
     292         460 :                 name_table += name_size;
     293             :         }
     294          13 :         return subpat_names;
     295             : }
     296             : /* }}} */
     297             : 
     298             : /* {{{ static calculate_unit_length */
     299             : /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
     300             : static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
     301             : {
     302             :         int unit_len;
     303             : 
     304          79 :         if (pce->compile_options & PCRE_UTF8) {
     305          22 :                 char *end = start;
     306             : 
     307             :                 /* skip continuation bytes */
     308          35 :                 while ((*++end & 0xC0) == 0x80);
     309          22 :                 unit_len = end - start;
     310             :         } else {
     311          57 :                 unit_len = 1;
     312             :         }
     313          79 :         return unit_len;
     314             : }
     315             : /* }}} */
     316             : 
     317             : /* {{{ pcre_get_compiled_regex_cache
     318             :  */
     319     1957397 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
     320             : {
     321     1957397 :         pcre                            *re = NULL;
     322             :         pcre_extra                      *extra;
     323     1957397 :         int                                      coptions = 0;
     324     1957397 :         int                                      soptions = 0;
     325             :         const char                      *error;
     326             :         int                                      erroffset;
     327             :         char                             delimiter;
     328             :         char                             start_delimiter;
     329             :         char                             end_delimiter;
     330             :         char                            *p, *pp;
     331             :         char                            *pattern;
     332     1957397 :         int                                      do_study = 0;
     333     1957397 :         int                                      poptions = 0;
     334     1957397 :         unsigned const char *tables = NULL;
     335             :         pcre_cache_entry        *pce;
     336             :         pcre_cache_entry         new_entry;
     337             :         int                                      rc;
     338             :         zend_string             *key;
     339             : 
     340             : #if HAVE_SETLOCALE
     341     1957425 :         if (BG(locale_string) &&
     342          33 :                 (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
     343          10 :                 key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
     344           5 :                 memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
     345           5 :                 memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
     346             :         } else
     347             : #endif
     348             :         {
     349     1957392 :                 key = regex;
     350             :         }
     351             : 
     352             :         /* Try to lookup the cached regex entry, and if successful, just pass
     353             :            back the compiled pattern, otherwise go on and compile it. */
     354     1957397 :         pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key);
     355     1957397 :         if (pce) {
     356             : #if HAVE_SETLOCALE
     357     1907367 :                 if (key != regex) {
     358             :                         zend_string_release(key);
     359             :                 }
     360             : #endif
     361     1907367 :                 return pce;
     362             :         }
     363             : 
     364       50030 :         p = ZSTR_VAL(regex);
     365             : 
     366             :         /* Parse through the leading whitespace, and display a warning if we
     367             :            get to the end without encountering a delimiter. */
     368       50030 :         while (isspace((int)*(unsigned char *)p)) p++;
     369       50030 :         if (*p == 0) {
     370             : #if HAVE_SETLOCALE
     371           5 :                 if (key != regex) {
     372             :                         zend_string_release(key);
     373             :                 }
     374             : #endif
     375           5 :                 php_error_docref(NULL, E_WARNING,
     376           5 :                                                  p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
     377           5 :                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     378           5 :                 return NULL;
     379             :         }
     380             : 
     381             :         /* Get the delimiter and display a warning if it is alphanumeric
     382             :            or a backslash. */
     383       50025 :         delimiter = *p++;
     384       50025 :         if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
     385             : #if HAVE_SETLOCALE
     386           9 :                 if (key != regex) {
     387             :                         zend_string_release(key);
     388             :                 }
     389             : #endif
     390           9 :                 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
     391           9 :                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     392           9 :                 return NULL;
     393             :         }
     394             : 
     395       50016 :         start_delimiter = delimiter;
     396       50016 :         if ((pp = strchr("([{< )]}> )]}>", delimiter)))
     397          30 :                 delimiter = pp[5];
     398       50016 :         end_delimiter = delimiter;
     399             : 
     400       50016 :         pp = p;
     401             : 
     402       50016 :         if (start_delimiter == end_delimiter) {
     403             :                 /* We need to iterate through the pattern, searching for the ending delimiter,
     404             :                    but skipping the backslashed delimiters.  If the ending delimiter is not
     405             :                    found, display a warning. */
     406    16744870 :                 while (*pp != 0) {
     407    16694874 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     408    15510031 :                         else if (*pp == delimiter)
     409       49976 :                                 break;
     410    16644898 :                         pp++;
     411             :                 }
     412             :         } else {
     413             :                 /* We iterate through the pattern, searching for the matching ending
     414             :                  * delimiter. For each matching starting delimiter, we increment nesting
     415             :                  * level, and decrement it for each matching ending delimiter. If we
     416             :                  * reach the end of the pattern without matching, display a warning.
     417             :                  */
     418          30 :                 int brackets = 1;       /* brackets nesting level */
     419         923 :                 while (*pp != 0) {
     420         890 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     421         768 :                         else if (*pp == end_delimiter && --brackets <= 0)
     422             :                                 break;
     423         741 :                         else if (*pp == start_delimiter)
     424          24 :                                 brackets++;
     425         863 :                         pp++;
     426             :                 }
     427             :         }
     428             : 
     429       50016 :         if (*pp == 0) {
     430             : #if HAVE_SETLOCALE
     431          13 :                 if (key != regex) {
     432             :                         zend_string_release(key);
     433             :                 }
     434             : #endif
     435          13 :                 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
     436           4 :                         php_error_docref(NULL,E_WARNING, "Null byte in regex");
     437           9 :                 } else if (start_delimiter == end_delimiter) {
     438           8 :                         php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
     439             :                 } else {
     440           1 :                         php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
     441             :                 }
     442          13 :                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     443          13 :                 return NULL;
     444             :         }
     445             : 
     446             :         /* Make a copy of the actual pattern. */
     447       50003 :         pattern = estrndup(p, pp-p);
     448             : 
     449             :         /* Move on to the options */
     450       50003 :         pp++;
     451             : 
     452             :         /* Parse through the options, setting appropriate flags.  Display
     453             :            a warning if we encounter an unknown modifier. */
     454      115386 :         while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
     455       15398 :                 switch (*pp++) {
     456             :                         /* Perl compatible options */
     457        1999 :                         case 'i':       coptions |= PCRE_CASELESS;              break;
     458        2446 :                         case 'm':       coptions |= PCRE_MULTILINE;             break;
     459       10833 :                         case 's':       coptions |= PCRE_DOTALL;                break;
     460           6 :                         case 'x':       coptions |= PCRE_EXTENDED;              break;
     461             : 
     462             :                         /* PCRE specific options */
     463           2 :                         case 'A':       coptions |= PCRE_ANCHORED;              break;
     464          10 :                         case 'D':       coptions |= PCRE_DOLLAR_ENDONLY;break;
     465          25 :                         case 'S':       do_study  = 1;                                  break;
     466          20 :                         case 'U':       coptions |= PCRE_UNGREEDY;              break;
     467           1 :                         case 'X':       coptions |= PCRE_EXTRA;                 break;
     468          33 :                         case 'u':       coptions |= PCRE_UTF8;
     469             :         /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
     470             :        characters, even in UTF-8 mode. However, this can be changed by setting
     471             :        the PCRE_UCP option. */
     472             : #ifdef PCRE_UCP
     473          33 :                                                 coptions |= PCRE_UCP;
     474             : #endif
     475          33 :                                 break;
     476           1 :                         case 'J':       coptions |= PCRE_DUPNAMES;              break;
     477             : 
     478             :                         /* Custom preg options */
     479           2 :                         case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
     480             : 
     481           2 :                         case ' ':
     482             :                         case '\n':
     483           2 :                                 break;
     484             : 
     485          18 :                         default:
     486          18 :                                 if (pp[-1]) {
     487          13 :                                         php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
     488             :                                 } else {
     489           5 :                                         php_error_docref(NULL,E_WARNING, "Null byte in regex");
     490             :                                 }
     491          18 :                                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     492          18 :                                 efree(pattern);
     493             : #if HAVE_SETLOCALE
     494          18 :                                 if (key != regex) {
     495             :                                         zend_string_release(key);
     496             :                                 }
     497             : #endif
     498          18 :                                 return NULL;
     499             :                 }
     500             :         }
     501             : 
     502             : #if HAVE_SETLOCALE
     503       49985 :         if (key != regex) {
     504           5 :                 tables = pcre_maketables();
     505             :         }
     506             : #endif
     507             : 
     508             :         /* Compile pattern and display a warning if compilation failed. */
     509       49985 :         re = pcre_compile(pattern,
     510             :                                           coptions,
     511             :                                           &error,
     512             :                                           &erroffset,
     513             :                                           tables);
     514             : 
     515       49985 :         if (re == NULL) {
     516             : #if HAVE_SETLOCALE
     517          10 :                 if (key != regex) {
     518             :                         zend_string_release(key);
     519             :                 }
     520             : #endif
     521          10 :                 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
     522          10 :                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     523          10 :                 efree(pattern);
     524          10 :                 if (tables) {
     525           0 :                         pefree((void*)tables, 1);
     526             :                 }
     527          10 :                 return NULL;
     528             :         }
     529             : 
     530             : #ifdef HAVE_PCRE_JIT_SUPPORT
     531       49975 :         if (PCRE_G(jit)) {
     532             :                 /* Enable PCRE JIT compiler */
     533       49967 :                 do_study = 1;
     534       49967 :                 soptions |= PCRE_STUDY_JIT_COMPILE;
     535             :         }
     536             : #endif
     537             : 
     538             :         /* If study option was specified, study the pattern and
     539             :            store the result in extra for passing to pcre_exec. */
     540       49975 :         if (do_study) {
     541             :                 php_pcre_mutex_lock();
     542       49967 :                 extra = pcre_study(re, soptions, &error);
     543             :                 php_pcre_mutex_unlock();
     544       49967 :                 if (extra) {
     545       49967 :                         extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     546       49967 :                         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
     547       49967 :                         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
     548             : #ifdef HAVE_PCRE_JIT_SUPPORT
     549       49967 :                         if (PCRE_G(jit) && jit_stack) {
     550       49967 :                                 pcre_assign_jit_stack(extra, NULL, jit_stack);
     551             :                         }
     552             : #endif
     553             :                 }
     554       49967 :                 if (error != NULL) {
     555           0 :                         php_error_docref(NULL, E_WARNING, "Error while studying pattern");
     556           0 :                         pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     557             :                 }
     558             :         } else {
     559           8 :                 extra = NULL;
     560             :         }
     561             : 
     562       49975 :         efree(pattern);
     563             : 
     564             :         /*
     565             :          * If we reached cache limit, clean out the items from the head of the list;
     566             :          * these are supposedly the oldest ones (but not necessarily the least used
     567             :          * ones).
     568             :          */
     569       49975 :         if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
     570          68 :                 int num_clean = PCRE_CACHE_SIZE / 8;
     571          68 :                 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
     572             :         }
     573             : 
     574             :         /* Store the compiled pattern and extra info in the cache. */
     575       49975 :         new_entry.re = re;
     576       49975 :         new_entry.extra = extra;
     577       49975 :         new_entry.preg_options = poptions;
     578       49975 :         new_entry.compile_options = coptions;
     579             : #if HAVE_SETLOCALE
     580       49975 :         new_entry.locale = NULL;
     581       49975 :         new_entry.tables = tables;
     582             : #endif
     583       49975 :         new_entry.refcount = 0;
     584             : 
     585       49975 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
     586       49975 :         if (rc < 0) {
     587             : #if HAVE_SETLOCALE
     588           0 :                 if (key != regex) {
     589             :                         zend_string_release(key);
     590             :                 }
     591             : #endif
     592           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     593           0 :                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     594           0 :                 return NULL;
     595             :         }
     596             : 
     597       49975 :         rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
     598       49975 :         if (rc < 0) {
     599             : #if HAVE_SETLOCALE
     600           0 :                 if (key != regex) {
     601             :                         zend_string_release(key);
     602             :                 }
     603             : #endif
     604           0 :                 php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     605           0 :                 pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
     606           0 :                 return NULL;
     607             :         }
     608             : 
     609             :         /*
     610             :          * Interned strings are not duplicated when stored in HashTable,
     611             :          * but all the interned strings created during HTTP request are removed
     612             :          * at end of request. However PCRE_G(pcre_cache) must be consistent
     613             :          * on the next request as well. So we disable usage of interned strings
     614             :          * as hash keys especually for this table.
     615             :          * See bug #63180
     616             :          */
     617       49975 :         if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) {
     618       99948 :                 pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache),
     619       49974 :                                 ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry));
     620             : #if HAVE_SETLOCALE
     621       99948 :                 if (key != regex) {
     622             :                         zend_string_release(key);
     623             :                 }
     624             : #endif
     625             :         } else {
     626           1 :                 pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
     627             :         }
     628             : 
     629       49975 :         return pce;
     630             : }
     631             : /* }}} */
     632             : 
     633             : /* {{{ pcre_get_compiled_regex
     634             :  */
     635       14179 : PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
     636             : {
     637       14179 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
     638             : 
     639       14179 :         if (extra) {
     640       14179 :                 *extra = pce ? pce->extra : NULL;
     641             :         }
     642       14179 :         if (preg_options) {
     643       14179 :                 *preg_options = pce ? pce->preg_options : 0;
     644             :         }
     645             : 
     646       14179 :         return pce ? pce->re : NULL;
     647             : }
     648             : /* }}} */
     649             : 
     650             : /* {{{ pcre_get_compiled_regex_ex
     651             :  */
     652           0 : PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
     653             : {
     654           0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
     655             : 
     656           0 :         if (extra) {
     657           0 :                 *extra = pce ? pce->extra : NULL;
     658             :         }
     659           0 :         if (preg_options) {
     660           0 :                 *preg_options = pce ? pce->preg_options : 0;
     661             :         }
     662           0 :         if (compile_options) {
     663           0 :                 *compile_options = pce ? pce->compile_options : 0;
     664             :         }
     665             : 
     666           0 :         return pce ? pce->re : NULL;
     667             : }
     668             : /* }}} */
     669             : 
     670             : /* {{{ add_offset_pair */
     671          75 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
     672             : {
     673             :         zval match_pair, tmp;
     674             : 
     675          75 :         array_init_size(&match_pair, 2);
     676             : 
     677             :         /* Add (match, offset) to the return value */
     678         150 :         ZVAL_STRINGL(&tmp, str, len);
     679          75 :         zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
     680          75 :         ZVAL_LONG(&tmp, offset);
     681          75 :         zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
     682             : 
     683          75 :         if (name) {
     684             :                 Z_ADDREF(match_pair);
     685           2 :                 zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
     686             :         }
     687          75 :         zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
     688          75 : }
     689             : /* }}} */
     690             : 
     691     1883970 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
     692             : {
     693             :         /* parameters */
     694             :         zend_string              *regex;                        /* Regular expression */
     695             :         zend_string              *subject;                      /* String to match against */
     696             :         pcre_cache_entry *pce;                          /* Compiled regular expression */
     697     1883970 :         zval                     *subpats = NULL;       /* Array for subpatterns */
     698     1883970 :         zend_long                 flags = 0;            /* Match control flags */
     699     1883970 :         zend_long                 start_offset = 0;     /* Where the new search starts */
     700             : 
     701     1883970 :         ZEND_PARSE_PARAMETERS_START(2, 5)
     702     5651886 :                 Z_PARAM_STR(regex)
     703     5651874 :                 Z_PARAM_STR(subject)
     704     1883954 :                 Z_PARAM_OPTIONAL
     705     5599574 :                 Z_PARAM_ZVAL_EX(subpats, 0, 1)
     706     1244898 :                 Z_PARAM_LONG(flags)
     707          61 :                 Z_PARAM_LONG(start_offset)
     708     1883970 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
     709             : 
     710     1883954 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
     711           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
     712           0 :                         RETURN_FALSE;
     713             :         }
     714             : 
     715             :         /* Compile regex or get it from cache. */
     716     1883954 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
     717          31 :                 RETURN_FALSE;
     718             :         }
     719             : 
     720     1883923 :         pce->refcount++;
     721     3767846 :         php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
     722     1883923 :                 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
     723     1883923 :         pce->refcount--;
     724             : }
     725             : /* }}} */
     726             : 
     727             : /* {{{ php_pcre_match_impl() */
     728     1885465 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
     729             :         zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
     730             : {
     731             :         zval                     result_set,            /* Holds a set of subpatterns after
     732             :                                                                                    a global match */
     733     1885465 :                                     *match_sets = NULL; /* An array of sets of matches for each
     734             :                                                                                    subpattern after a global match */
     735     1885465 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     736             :         pcre_extra               extra_data;            /* Used locally for exec options */
     737     1885465 :         int                              exoptions = 0;         /* Execution options */
     738     1885465 :         int                              count = 0;                     /* Count of matched subpatterns */
     739             :         int                             *offsets;                       /* Array of subpattern offsets */
     740             :         int                              num_subpats;           /* Number of captured subpatterns */
     741             :         int                              size_offsets;          /* Size of the offsets array */
     742             :         int                              matched;                       /* Has anything matched */
     743     1885465 :         int                              g_notempty = 0;        /* If the match should not be empty */
     744             :         const char         **stringlist;                /* Holds list of subpatterns */
     745             :         char               **subpat_names;              /* Array for named subpatterns */
     746             :         int                              i;
     747             :         int                              subpats_order;         /* Order of subpattern matches */
     748             :         int                              offset_capture;    /* Capture match offsets: yes/no */
     749     1885465 :         unsigned char   *mark = NULL;       /* Target for MARK name */
     750             :         zval            marks;                  /* Array of marks for PREG_PATTERN_ORDER */
     751             :         ALLOCA_FLAG(use_heap);
     752             : 
     753     1885465 :         ZVAL_UNDEF(&marks);
     754             : 
     755             :         /* Overwrite the passed-in value for subpatterns with an empty array. */
     756     1885465 :         if (subpats != NULL) {
     757     1246352 :                 zval_ptr_dtor(subpats);
     758     1246352 :                 array_init(subpats);
     759             :         }
     760             : 
     761     1885465 :         subpats_order = global ? PREG_PATTERN_ORDER : 0;
     762             : 
     763     1885465 :         if (use_flags) {
     764        1517 :                 offset_capture = flags & PREG_OFFSET_CAPTURE;
     765             : 
     766             :                 /*
     767             :                  * subpats_order is pre-set to pattern mode so we change it only if
     768             :                  * necessary.
     769             :                  */
     770        1517 :                 if (flags & 0xff) {
     771          25 :                         subpats_order = flags & 0xff;
     772             :                 }
     773        1517 :                 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
     774        1484 :                         (!global && subpats_order != 0)) {
     775           1 :                         php_error_docref(NULL, E_WARNING, "Invalid flags specified");
     776           1 :                         return;
     777             :                 }
     778             :         } else {
     779     1883948 :                 offset_capture = 0;
     780             :         }
     781             : 
     782             :         /* Negative offset counts from the end of the string. */
     783     1885464 :         if (start_offset < 0) {
     784           5 :                 start_offset = subject_len + start_offset;
     785           5 :                 if (start_offset < 0) {
     786           1 :                         start_offset = 0;
     787             :                 }
     788             :         }
     789             : 
     790     1885464 :         if (extra == NULL) {
     791           6 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     792           6 :                 extra = &extra_data;
     793             :         }
     794     1885464 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
     795     1885464 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
     796             : #ifdef PCRE_EXTRA_MARK
     797     1885464 :         extra->mark = &mark;
     798     1885464 :         extra->flags |= PCRE_EXTRA_MARK;
     799             : #endif
     800             : 
     801             :         /* Calculate the size of the offsets array, and allocate memory for it. */
     802     1885464 :         num_subpats = pce->capture_count + 1;
     803     1885464 :         size_offsets = num_subpats * 3;
     804             : 
     805             :         /*
     806             :          * Build a mapping from subpattern numbers to their names. We will
     807             :          * allocate the table only if there are any named subpatterns.
     808             :          */
     809     1885464 :         subpat_names = NULL;
     810     1885464 :         if (pce->name_count > 0) {
     811          12 :                 subpat_names = make_subpats_table(num_subpats, pce);
     812          12 :                 if (!subpat_names) {
     813           0 :                         RETURN_FALSE;
     814             :                 }
     815             :         }
     816             : 
     817     1885464 :         if (size_offsets <= 32) {
     818     1885450 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
     819             :         } else {
     820          14 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     821             :         }
     822     1885464 :         memset(offsets, 0, size_offsets*sizeof(int));
     823             :         /* Allocate match sets array and initialize the values. */
     824     1885464 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
     825         121 :                 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
     826         387 :                 for (i=0; i<num_subpats; i++) {
     827         266 :                         array_init(&match_sets[i]);
     828             :                 }
     829             :         }
     830             : 
     831     1885464 :         matched = 0;
     832     1885464 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
     833             : 
     834             :         do {
     835             :                 /* Execute the regular expression. */
     836     1885678 :                 count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
     837             :                                                   exoptions|g_notempty, offsets, size_offsets);
     838             : 
     839             :                 /* the string was already proved to be valid UTF-8 */
     840     1885678 :                 exoptions |= PCRE_NO_UTF8_CHECK;
     841             : 
     842             :                 /* Check for too many substrings condition. */
     843     1885678 :                 if (count == 0) {
     844           0 :                         php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
     845           0 :                         count = size_offsets/3;
     846             :                 }
     847             : 
     848             :                 /* If something has matched */
     849     1885678 :                 if (count > 0) {
     850      100569 :                         matched++;
     851             : 
     852             :                         /* If subpatterns array has been passed, fill it in with values. */
     853      100569 :                         if (subpats != NULL) {
     854             :                                 /* Try to get the list of substrings and display a warning if failed. */
     855       49625 :                                 if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
     856           1 :                                         if (subpat_names) {
     857           0 :                                                 efree(subpat_names);
     858             :                                         }
     859           1 :                                         if (size_offsets <= 32) {
     860           1 :                                                 free_alloca(offsets, use_heap);
     861             :                                         } else {
     862           0 :                                                 efree(offsets);
     863             :                                         }
     864           1 :                                         if (match_sets) efree(match_sets);
     865           1 :                                         php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
     866           1 :                                         RETURN_FALSE;
     867             :                                 }
     868             : 
     869       49624 :                                 if (global) {   /* global pattern matching */
     870         199 :                                         if (subpats && subpats_order == PREG_PATTERN_ORDER) {
     871             :                                                 /* For each subpattern, insert it into the appropriate array. */
     872         149 :                                                 if (offset_capture) {
     873          22 :                                                         for (i = 0; i < count; i++) {
     874          24 :                                                                 add_offset_pair(&match_sets[i], (char *)stringlist[i],
     875          24 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     876             :                                                         }
     877             :                                                 } else {
     878         360 :                                                         for (i = 0; i < count; i++) {
     879         221 :                                                                 add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
     880         221 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     881             :                                                         }
     882             :                                                 }
     883             :                                                 /* Add MARK, if available */
     884         149 :                                                 if (mark) {
     885           2 :                                                         if (Z_TYPE(marks) == IS_UNDEF) {
     886           1 :                                                                 array_init(&marks);
     887             :                                                         }
     888           2 :                                                         add_index_string(&marks, matched - 1, (char *) mark);
     889             :                                                 }
     890             :                                                 /*
     891             :                                                  * If the number of captured subpatterns on this run is
     892             :                                                  * less than the total possible number, pad the result
     893             :                                                  * arrays with empty strings.
     894             :                                                  */
     895         298 :                                                 if (count < num_subpats) {
     896          11 :                                                         for (; i < num_subpats; i++) {
     897           7 :                                                                 add_next_index_string(&match_sets[i], "");
     898             :                                                         }
     899             :                                                 }
     900             :                                         } else {
     901             :                                                 /* Allocate the result set array */
     902          50 :                                                 array_init_size(&result_set, count + (mark ? 1 : 0));
     903             : 
     904             :                                                 /* Add all the subpatterns to it */
     905          50 :                                                 if (subpat_names) {
     906           8 :                                                         if (offset_capture) {
     907           0 :                                                                 for (i = 0; i < count; i++) {
     908           0 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     909           0 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
     910             :                                                                 }
     911             :                                                         } else {
     912          40 :                                                                 for (i = 0; i < count; i++) {
     913          32 :                                                                         if (subpat_names[i]) {
     914          22 :                                                                                 add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
     915             :                                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     916             :                                                                         }
     917          32 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     918          32 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     919             :                                                                 }
     920             :                                                         }
     921             :                                                 } else {
     922          42 :                                                         if (offset_capture) {
     923          10 :                                                                 for (i = 0; i < count; i++) {
     924          14 :                                                                         add_offset_pair(&result_set, (char *)stringlist[i],
     925          14 :                                                                                                         offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     926             :                                                                 }
     927             :                                                         } else {
     928         343 :                                                                 for (i = 0; i < count; i++) {
     929         304 :                                                                         add_next_index_stringl(&result_set, (char *)stringlist[i],
     930         304 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1]);
     931             :                                                                 }
     932             :                                                         }
     933             :                                                 }
     934             :                                                 /* Add MARK, if available */
     935          50 :                                                 if (mark) {
     936           2 :                                                         add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
     937             :                                                 }
     938             :                                                 /* And add it to the output array */
     939          50 :                                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
     940             :                                         }
     941             :                                 } else {                        /* single pattern matching */
     942             :                                         /* For each subpattern, insert it into the subpatterns array. */
     943       49425 :                                         if (subpat_names) {
     944           6 :                                                 if (offset_capture) {
     945           5 :                                                         for (i = 0; i < count; i++) {
     946          12 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     947           4 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     948           8 :                                                                                                 offsets[i<<1], subpat_names[i]);
     949             :                                                         }
     950             :                                                 } else {
     951         317 :                                                         for (i = 0; i < count; i++) {
     952         312 :                                                                 if (subpat_names[i]) {
     953         304 :                                                                         add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     954             :                                                                                                           offsets[(i<<1)+1] - offsets[i<<1]);
     955             :                                                                 }
     956         312 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     957         312 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     958             :                                                         }
     959             :                                                 }
     960             :                                         } else {
     961       49419 :                                                 if (offset_capture) {
     962          19 :                                                         for (i = 0; i < count; i++) {
     963          22 :                                                                 add_offset_pair(subpats, (char *)stringlist[i],
     964          11 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1],
     965          11 :                                                                                                 offsets[i<<1], NULL);
     966             :                                                         }
     967             :                                                 } else {
     968      150798 :                                                         for (i = 0; i < count; i++) {
     969      101387 :                                                                 add_next_index_stringl(subpats, (char *)stringlist[i],
     970      101387 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1]);
     971             :                                                         }
     972             :                                                 }
     973             :                                         }
     974             :                                         /* Add MARK, if available */
     975       49425 :                                         if (mark) {
     976           1 :                                                 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
     977             :                                         }
     978             :                                 }
     979             : 
     980       49624 :                                 pcre_free((void *) stringlist);
     981             :                         }
     982     1785109 :                 } else if (count == PCRE_ERROR_NOMATCH) {
     983             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
     984             :                            this is not necessarily the end. We need to advance
     985             :                            the start offset, and continue. Fudge the offset values
     986             :                            to achieve this, unless we're already at the end of the string. */
     987     1785102 :                         if (g_notempty != 0 && start_offset < subject_len) {
     988          12 :                                 int unit_len = calculate_unit_length(pce, subject + start_offset);
     989             : 
     990           6 :                                 offsets[0] = (int)start_offset;
     991           6 :                                 offsets[1] = (int)(start_offset + unit_len);
     992             :                         } else
     993             :                                 break;
     994             :                 } else {
     995           7 :                         pcre_handle_exec_error(count);
     996           7 :                         break;
     997             :                 }
     998             : 
     999             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1000             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1001             :                    the match again at the same point. If this fails (picked up above) we
    1002             :                    advance to the next character. */
    1003      100574 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1004             : 
    1005             :                 /* Advance to the position right after the last full match */
    1006      100574 :                 start_offset = offsets[1];
    1007      100574 :         } while (global);
    1008             : 
    1009             :         /* Add the match sets to the output array and clean up */
    1010     1885463 :         if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
    1011         121 :                 if (subpat_names) {
    1012          10 :                         for (i = 0; i < num_subpats; i++) {
    1013           8 :                                 if (subpat_names[i]) {
    1014           5 :                                         zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
    1015             :                                                                          strlen(subpat_names[i]), &match_sets[i]);
    1016           5 :                                         Z_ADDREF(match_sets[i]);
    1017             :                                 }
    1018           8 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
    1019             :                         }
    1020             :                 } else {
    1021         377 :                         for (i = 0; i < num_subpats; i++) {
    1022         258 :                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
    1023             :                         }
    1024             :                 }
    1025         121 :                 efree(match_sets);
    1026             : 
    1027         121 :                 if (Z_TYPE(marks) != IS_UNDEF) {
    1028           1 :                         add_assoc_zval(subpats, "MARK", &marks);
    1029             :                 }
    1030             :         }
    1031             : 
    1032     1885463 :         if (size_offsets <= 32) {
    1033     1885449 :                 free_alloca(offsets, use_heap);
    1034             :         } else {
    1035          14 :                 efree(offsets);
    1036             :         }
    1037     1885463 :         if (subpat_names) {
    1038          12 :                 efree(subpat_names);
    1039             :         }
    1040             : 
    1041             :         /* Did we encounter an error? */
    1042     1885463 :         if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
    1043     1885456 :                 RETVAL_LONG(matched);
    1044             :         } else {
    1045           7 :                 RETVAL_FALSE;
    1046             :         }
    1047             : }
    1048             : /* }}} */
    1049             : 
    1050             : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
    1051             :    Perform a Perl-style regular expression match */
    1052     1883857 : static PHP_FUNCTION(preg_match)
    1053             : {
    1054     1883857 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
    1055     1883857 : }
    1056             : /* }}} */
    1057             : 
    1058             : /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
    1059             :    Perform a Perl-style global regular expression match */
    1060         113 : static PHP_FUNCTION(preg_match_all)
    1061             : {
    1062         113 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
    1063         113 : }
    1064             : /* }}} */
    1065             : 
    1066             : /* {{{ preg_get_backref
    1067             :  */
    1068         126 : static int preg_get_backref(char **str, int *backref)
    1069             : {
    1070         126 :         register char in_brace = 0;
    1071         126 :         register char *walk = *str;
    1072             : 
    1073         126 :         if (walk[1] == 0)
    1074           8 :                 return 0;
    1075             : 
    1076         118 :         if (*walk == '$' && walk[1] == '{') {
    1077          14 :                 in_brace = 1;
    1078          14 :                 walk++;
    1079             :         }
    1080         118 :         walk++;
    1081             : 
    1082         118 :         if (*walk >= '0' && *walk <= '9') {
    1083         118 :                 *backref = *walk - '0';
    1084         118 :                 walk++;
    1085             :         } else
    1086           0 :                 return 0;
    1087             : 
    1088         168 :         if (*walk && *walk >= '0' && *walk <= '9') {
    1089           2 :                 *backref = *backref * 10 + *walk - '0';
    1090           2 :                 walk++;
    1091             :         }
    1092             : 
    1093         118 :         if (in_brace) {
    1094          14 :                 if (*walk != '}')
    1095           6 :                         return 0;
    1096             :                 else
    1097           8 :                         walk++;
    1098             :         }
    1099             : 
    1100         112 :         *str = walk;
    1101         112 :         return 1;
    1102             : }
    1103             : /* }}} */
    1104             : 
    1105             : /* {{{ preg_do_repl_func
    1106             :  */
    1107          75 : static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
    1108             : {
    1109             :         zend_string *result_str;
    1110             :         zval             retval;                        /* Function return value */
    1111             :         zval         args[1];                   /* Argument to pass to function */
    1112             :         int                      i;
    1113             : 
    1114          75 :         array_init_size(&args[0], count + (mark ? 1 : 0));
    1115          75 :         if (subpat_names) {
    1116           3 :                 for (i = 0; i < count; i++) {
    1117           2 :                         if (subpat_names[i]) {
    1118           1 :                                 add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
    1119             :                         }
    1120           2 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
    1121             :                 }
    1122             :         } else {
    1123         179 :                 for (i = 0; i < count; i++) {
    1124         105 :                         add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
    1125             :                 }
    1126             :         }
    1127          75 :         if (mark) {
    1128           2 :                 add_assoc_string(&args[0], "MARK", (char *) mark);
    1129             :         }
    1130             : 
    1131         150 :         if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
    1132          73 :                 result_str = zval_get_string(&retval);
    1133          73 :                 zval_ptr_dtor(&retval);
    1134             :         } else {
    1135           2 :                 if (!EG(exception)) {
    1136           0 :                         php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
    1137             :                 }
    1138             : 
    1139           4 :                 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
    1140             :         }
    1141             : 
    1142          75 :         zval_ptr_dtor(&args[0]);
    1143             : 
    1144          75 :         return result_str;
    1145             : }
    1146             : /* }}} */
    1147             : 
    1148             : /* {{{ php_pcre_replace
    1149             :  */
    1150       38567 : PHPAPI zend_string *php_pcre_replace(zend_string *regex,
    1151             :                                                           zend_string *subject_str,
    1152             :                                                           char *subject, int subject_len,
    1153             :                                                           zval *replace_val, int is_callable_replace,
    1154             :                                                           int limit, int *replace_count)
    1155             : {
    1156             :         pcre_cache_entry        *pce;                       /* Compiled regular expression */
    1157             :         zend_string                     *result;                        /* Function result */
    1158             : 
    1159             :         /* Compile regex or get it from cache. */
    1160       38567 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    1161          14 :                 return NULL;
    1162             :         }
    1163       38553 :         pce->refcount++;
    1164       38553 :         result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
    1165             :                 is_callable_replace, limit, replace_count);
    1166       38553 :         pce->refcount--;
    1167             : 
    1168       38553 :         return result;
    1169             : }
    1170             : /* }}} */
    1171             : 
    1172             : /* {{{ php_pcre_replace_impl() */
    1173       38568 : PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
    1174             : {
    1175       38568 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1176             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1177       38568 :         int                              exoptions = 0;         /* Execution options */
    1178       38568 :         int                              count = 0;                     /* Count of matched subpatterns */
    1179             :         int                             *offsets;                       /* Array of subpattern offsets */
    1180             :         char                    **subpat_names;         /* Array for named subpatterns */
    1181             :         int                              num_subpats;           /* Number of captured subpatterns */
    1182             :         int                              size_offsets;          /* Size of the offsets array */
    1183             :         size_t                   new_len;                       /* Length of needed storage */
    1184             :         size_t                   alloc_len;                     /* Actual allocated length */
    1185             :         int                              match_len;                     /* Length of the current match */
    1186             :         int                              backref;                       /* Backreference number */
    1187             :         int                              start_offset;          /* Where the new search starts */
    1188       38568 :         int                              g_notempty=0;          /* If the match should not be empty */
    1189       38568 :         char                    *replace=NULL,          /* Replacement string */
    1190             :                                         *walkbuf,                       /* Location of current replacement in the result */
    1191             :                                         *walk,                          /* Used to walk the replacement string */
    1192             :                                         *match,                         /* The current match */
    1193             :                                         *piece,                         /* The current piece of subject */
    1194       38568 :                                         *replace_end=NULL,      /* End of replacement string */
    1195             :                                          walk_last;                     /* Last walked character */
    1196             :         size_t                  result_len;             /* Length of result */
    1197       38568 :         unsigned char   *mark = NULL;       /* Target for MARK name */
    1198             :         zend_string             *result;                        /* Result of replacement */
    1199       38568 :         zend_string     *eval_result=NULL;  /* Result of custom function */
    1200             : 
    1201             :         ALLOCA_FLAG(use_heap);
    1202             : 
    1203       38568 :         if (extra == NULL) {
    1204           2 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1205           2 :                 extra = &extra_data;
    1206             :         }
    1207             : 
    1208       38568 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    1209       38568 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    1210             : 
    1211       38568 :         if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
    1212           1 :                 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
    1213           1 :                 return NULL;
    1214             :         }
    1215             : 
    1216       38567 :         if (!is_callable_replace) {
    1217       38511 :                 replace = Z_STRVAL_P(replace_val);
    1218       38511 :                 replace_end = replace + Z_STRLEN_P(replace_val);
    1219             :         }
    1220             : 
    1221             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1222       38567 :         num_subpats = pce->capture_count + 1;
    1223       38567 :         size_offsets = num_subpats * 3;
    1224       38567 :         if (size_offsets <= 32) {
    1225       38564 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1226             :         } else {
    1227           3 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1228             :         }
    1229             : 
    1230             :         /*
    1231             :          * Build a mapping from subpattern numbers to their names. We will
    1232             :          * allocate the table only if there are any named subpatterns.
    1233             :          */
    1234       38567 :         subpat_names = NULL;
    1235       38567 :         if (UNEXPECTED(pce->name_count > 0)) {
    1236           1 :                 subpat_names = make_subpats_table(num_subpats, pce);
    1237           1 :                 if (!subpat_names) {
    1238           0 :                         if (size_offsets <= 32) {
    1239           0 :                                 free_alloca(offsets, use_heap);
    1240             :                         } else {
    1241           0 :                                 efree(offsets);
    1242             :                         }
    1243           0 :                         return NULL;
    1244             :                 }
    1245             :         }
    1246             : 
    1247       38567 :         alloc_len = 0;
    1248       38567 :         result = NULL;
    1249             : 
    1250             :         /* Initialize */
    1251       38567 :         match = NULL;
    1252       38567 :         start_offset = 0;
    1253       38567 :         result_len = 0;
    1254       38567 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1255             : 
    1256             :         while (1) {
    1257             : #ifdef PCRE_EXTRA_MARK
    1258       60133 :                 extra->mark = &mark;
    1259       49350 :                 extra->flags |= PCRE_EXTRA_MARK;
    1260             : #endif
    1261             :                 /* Execute the regular expression. */
    1262       49350 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
    1263             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1264             : 
    1265             :                 /* the string was already proved to be valid UTF-8 */
    1266       49350 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1267             : 
    1268             :                 /* Check for too many substrings condition. */
    1269       49350 :                 if (UNEXPECTED(count == 0)) {
    1270           0 :                         php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
    1271           0 :                         count = size_offsets / 3;
    1272             :                 }
    1273             : 
    1274       49350 :                 piece = subject + start_offset;
    1275             : 
    1276             :                 /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
    1277       49350 :                 if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) {
    1278       10771 :                         if (UNEXPECTED(replace_count)) {
    1279       10771 :                                 ++*replace_count;
    1280             :                         }
    1281             : 
    1282             :                         /* Set the match location in subject */
    1283       10771 :                         match = subject + offsets[0];
    1284             : 
    1285       10771 :                         new_len = result_len + offsets[0] - start_offset; /* part before the match */
    1286             : 
    1287             :                         /* if (!is_callable_replace) */
    1288       10771 :                         if (EXPECTED(replace)) {
    1289             :                                 /* do regular substitution */
    1290       10696 :                                 walk = replace;
    1291       10696 :                                 walk_last = 0;
    1292             : 
    1293       48571 :                                 while (walk < replace_end) {
    1294       27179 :                                         if ('\\' == *walk || '$' == *walk) {
    1295          63 :                                                 if (walk_last == '\\') {
    1296           0 :                                                         walk++;
    1297           0 :                                                         walk_last = 0;
    1298           0 :                                                         continue;
    1299             :                                                 }
    1300          63 :                                                 if (preg_get_backref(&walk, &backref)) {
    1301          56 :                                                         if (backref < count)
    1302          55 :                                                                 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
    1303          56 :                                                         continue;
    1304             :                                                 }
    1305             :                                         }
    1306       27123 :                                         new_len++;
    1307       27123 :                                         walk++;
    1308       27123 :                                         walk_last = walk[-1];
    1309             :                                 }
    1310             : 
    1311       10696 :                                 if (new_len >= alloc_len) {
    1312        9082 :                                         alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
    1313        9082 :                                         if (result == NULL) {
    1314        8744 :                                                 result = zend_string_alloc(alloc_len, 0);
    1315             :                                         } else {
    1316         338 :                                                 result = zend_string_extend(result, alloc_len, 0);
    1317             :                                         }
    1318             :                                 }
    1319             : 
    1320             :                                 /* copy the part of the string before the match */
    1321       10696 :                                 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
    1322       10696 :                                 result_len += (match-piece);
    1323             : 
    1324             :                                 /* copy replacement and backrefs */
    1325       10696 :                                 walkbuf = ZSTR_VAL(result) + result_len;
    1326             : 
    1327       10696 :                                 walk = replace;
    1328       10696 :                                 walk_last = 0;
    1329       48571 :                                 while (walk < replace_end) {
    1330       27179 :                                         if ('\\' == *walk || '$' == *walk) {
    1331          63 :                                                 if (walk_last == '\\') {
    1332           0 :                                                         *(walkbuf-1) = *walk++;
    1333           0 :                                                         walk_last = 0;
    1334           0 :                                                         continue;
    1335             :                                                 }
    1336          63 :                                                 if (preg_get_backref(&walk, &backref)) {
    1337          56 :                                                         if (backref < count) {
    1338          55 :                                                                 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1339          55 :                                                                 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
    1340          55 :                                                                 walkbuf += match_len;
    1341             :                                                         }
    1342          56 :                                                         continue;
    1343             :                                                 }
    1344             :                                         }
    1345       27123 :                                         *walkbuf++ = *walk++;
    1346       27123 :                                         walk_last = walk[-1];
    1347             :                                 }
    1348       10696 :                                 *walkbuf = '\0';
    1349             :                                 /* increment the result length by how much we've added to the string */
    1350       10696 :                                 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
    1351             :                         } else {
    1352             :                                 /* Use custom function to get replacement string and its length. */
    1353          75 :                                 eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
    1354          75 :                                 ZEND_ASSERT(eval_result);
    1355         150 :                                 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
    1356          75 :                                 if (new_len >= alloc_len) {
    1357          67 :                                         alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
    1358          67 :                                         if (result == NULL) {
    1359          44 :                                                 result = zend_string_alloc(alloc_len, 0);
    1360             :                                         } else {
    1361          23 :                                                 result = zend_string_extend(result, alloc_len, 0);
    1362             :                                         }
    1363             :                                 }
    1364             :                                 /* copy the part of the string before the match */
    1365          75 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
    1366          75 :                                 result_len += (int)(match-piece);
    1367             : 
    1368             :                                 /* copy replacement and backrefs */
    1369          75 :                                 walkbuf = ZSTR_VAL(result) + result_len;
    1370             : 
    1371             :                                 /* If using custom function, copy result to the buffer and clean up. */
    1372          75 :                                 memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
    1373          75 :                                 result_len += (int)ZSTR_LEN(eval_result);
    1374             :                                 zend_string_release(eval_result);
    1375             :                         }
    1376             : 
    1377       21542 :                         if (EXPECTED(limit)) {
    1378       10771 :                                 limit--;
    1379             :                         }
    1380       38579 :                 } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
    1381             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
    1382             :                            this is not necessarily the end. We need to advance
    1383             :                            the start offset, and continue. Fudge the offset values
    1384             :                            to achieve this, unless we're already at the end of the string. */
    1385       38595 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1386          12 :                                 int unit_len = calculate_unit_length(pce, piece);
    1387             : 
    1388          12 :                                 offsets[0] = start_offset;
    1389          12 :                                 offsets[1] = start_offset + unit_len;
    1390          12 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
    1391          12 :                                 result_len += unit_len;
    1392             :                         } else {
    1393       38559 :                                 if (!result && subject_str) {
    1394       29771 :                                         result = zend_string_copy(subject_str);
    1395       29771 :                                         break;
    1396             :                                 }
    1397        8788 :                                 new_len = result_len + subject_len - start_offset;
    1398        8788 :                                 if (new_len >= alloc_len) {
    1399         294 :                                         alloc_len = new_len; /* now we know exactly how long it is */
    1400         294 :                                         if (NULL != result) {
    1401         294 :                                                 result = zend_string_realloc(result, alloc_len, 0);
    1402             :                                         } else {
    1403           0 :                                                 result = zend_string_alloc(alloc_len, 0);
    1404             :                                         }
    1405             :                                 }
    1406             :                                 /* stick that last bit of string on our output */
    1407        8788 :                                 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
    1408        8788 :                                 result_len += subject_len - start_offset;
    1409        8788 :                                 ZSTR_VAL(result)[result_len] = '\0';
    1410        8788 :                                 ZSTR_LEN(result) = result_len;
    1411        8788 :                                 break;
    1412             :                         }
    1413             :                 } else {
    1414           8 :                         pcre_handle_exec_error(count);
    1415           8 :                         if (result) {
    1416             :                                 zend_string_free(result);
    1417           0 :                                 result = NULL;
    1418             :                         }
    1419           8 :                         break;
    1420             :                 }
    1421             : 
    1422             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1423             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1424             :                    the match again at the same point. If this fails (picked up above) we
    1425             :                    advance to the next character. */
    1426       10783 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1427             : 
    1428             :                 /* Advance to the next piece. */
    1429       10783 :                 start_offset = offsets[1];
    1430             :         }
    1431             : 
    1432       38567 :         if (size_offsets <= 32) {
    1433       38564 :                 free_alloca(offsets, use_heap);
    1434             :         } else {
    1435           3 :                 efree(offsets);
    1436             :         }
    1437       38567 :         if (UNEXPECTED(subpat_names)) {
    1438           1 :                 efree(subpat_names);
    1439             :         }
    1440             : 
    1441       38567 :         return result;
    1442             : }
    1443             : /* }}} */
    1444             : 
    1445             : /* {{{ php_replace_in_subject
    1446             :  */
    1447       38525 : static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
    1448             : {
    1449             :         zval            *regex_entry,
    1450             :                                 *replace_value,
    1451             :                                  empty_replace;
    1452             :         zend_string *result;
    1453             :         uint32_t replace_idx;
    1454       38525 :         zend_string     *subject_str = zval_get_string(subject);
    1455             : 
    1456             :         /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
    1457       38525 :         ZVAL_EMPTY_STRING(&empty_replace);
    1458             : 
    1459       38525 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
    1460           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
    1461           0 :                         return NULL;
    1462             :         }
    1463             : 
    1464             :         /* If regex is an array */
    1465       38525 :         if (Z_TYPE_P(regex) == IS_ARRAY) {
    1466          22 :                 replace_value = replace;
    1467          22 :                 replace_idx = 0;
    1468             : 
    1469             :                 /* For each entry in the regex array, get the entry */
    1470         149 :                 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
    1471             :                         zval replace_str;
    1472             :                         /* Make sure we're dealing with strings. */
    1473          64 :                         zend_string *regex_str = zval_get_string(regex_entry);
    1474             : 
    1475          64 :                         ZVAL_UNDEF(&replace_str);
    1476             :                         /* If replace is an array and not a callable construct */
    1477          64 :                         if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
    1478             :                                 /* Get current entry */
    1479          98 :                                 while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
    1480          94 :                                         if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) {
    1481          47 :                                                 ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val);
    1482          47 :                                                 break;
    1483             :                                         }
    1484           0 :                                         replace_idx++;
    1485             :                                 }
    1486          49 :                                 if (!Z_ISUNDEF(replace_str)) {
    1487          47 :                                         if (!is_callable_replace) {
    1488          47 :                                                 convert_to_string(&replace_str);
    1489             :                                         }
    1490          47 :                                         replace_value = &replace_str;
    1491          47 :                                         replace_idx++;
    1492             :                                 } else {
    1493             :                                         /* We've run out of replacement strings, so use an empty one */
    1494           2 :                                         replace_value = &empty_replace;
    1495             :                                 }
    1496             :                         }
    1497             : 
    1498             :                         /* Do the actual replacement and put the result back into subject_str
    1499             :                            for further replacements. */
    1500         128 :                         if ((result = php_pcre_replace(regex_str,
    1501             :                                                                                    subject_str,
    1502          64 :                                                                                    ZSTR_VAL(subject_str),
    1503          64 :                                                                                    (int)ZSTR_LEN(subject_str),
    1504             :                                                                                    replace_value,
    1505             :                                                                                    is_callable_replace,
    1506             :                                                                                    limit,
    1507             :                                                                                    replace_count)) != NULL) {
    1508             :                                 zend_string_release(subject_str);
    1509          63 :                                 subject_str = result;
    1510             :                         } else {
    1511             :                                 zend_string_release(subject_str);
    1512             :                                 zend_string_release(regex_str);
    1513             :                                 zval_dtor(&replace_str);
    1514           1 :                                 return NULL;
    1515             :                         }
    1516             : 
    1517             :                         zend_string_release(regex_str);
    1518             :                         zval_dtor(&replace_str);
    1519             :                 } ZEND_HASH_FOREACH_END();
    1520             : 
    1521          21 :                 return subject_str;
    1522             :         } else {
    1523       77006 :                 result = php_pcre_replace(Z_STR_P(regex),
    1524             :                                                                   subject_str,
    1525       38503 :                                                                   ZSTR_VAL(subject_str),
    1526       38503 :                                                                   (int)ZSTR_LEN(subject_str),
    1527             :                                                                   replace,
    1528             :                                                                   is_callable_replace,
    1529             :                                                                   limit,
    1530             :                                                                   replace_count);
    1531             :                 zend_string_release(subject_str);
    1532       38503 :                 return result;
    1533             :         }
    1534             : }
    1535             : /* }}} */
    1536             : 
    1537             : /* {{{ preg_replace_impl
    1538             :  */
    1539       38515 : static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
    1540             : {
    1541             :         zval            *subject_entry;
    1542             :         zend_string     *result;
    1543             :         zend_string     *string_key;
    1544             :         zend_ulong       num_key;
    1545       38515 :         int                      replace_count = 0, old_replace_count;
    1546             : 
    1547       77017 :         if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
    1548       38489 :                 convert_to_string_ex(replace);
    1549             :         }
    1550             : 
    1551       38513 :         if (Z_TYPE_P(regex) != IS_ARRAY) {
    1552       38503 :                 convert_to_string_ex(regex);
    1553             :         }
    1554             : 
    1555             :         /* if subject is an array */
    1556       38512 :         if (Z_TYPE_P(subject) == IS_ARRAY) {
    1557           6 :                 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
    1558             : 
    1559             :                 /* For each subject entry, convert it to string, then perform replacement
    1560             :                    and add the result to the return_value array. */
    1561          44 :                 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
    1562          19 :                         old_replace_count = replace_count;
    1563          19 :                         if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
    1564          36 :                                 if (!is_filter || replace_count > old_replace_count) {
    1565             :                                         /* Add to return array */
    1566             :                                         zval zv;
    1567             : 
    1568          17 :                                         ZVAL_STR(&zv, result);
    1569          17 :                                         if (string_key) {
    1570           1 :                                                 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
    1571             :                                         } else {
    1572          16 :                                                 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
    1573             :                                         }
    1574             :                                 } else {
    1575             :                                         zend_string_release(result);
    1576             :                                 }
    1577             :                         }
    1578             :                 } ZEND_HASH_FOREACH_END();
    1579             :         } else {
    1580             :                 /* if subject is not an array */
    1581       38506 :                 old_replace_count = replace_count;
    1582       38506 :                 if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
    1583       38483 :                         if (!is_filter || replace_count > old_replace_count) {
    1584       38483 :                                 RETVAL_STR(result);
    1585             :                         } else {
    1586             :                                 zend_string_release(result);
    1587           0 :                                 RETVAL_NULL();
    1588             :                         }
    1589             :                 } else {
    1590          23 :                         RETVAL_NULL();
    1591             :                 }
    1592             :         }
    1593             : 
    1594       38512 :         return replace_count;
    1595             : }
    1596             : /* }}} */
    1597             : 
    1598             : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1599             :    Perform Perl-style regular expression replacement. */
    1600       38459 : static PHP_FUNCTION(preg_replace)
    1601             : {
    1602       38459 :         zval *regex, *replace, *subject, *zcount = NULL;
    1603       38459 :         zend_long limit = -1;
    1604             :         int replace_count;
    1605             : 
    1606             :         /* Get function parameters and do error-checking. */
    1607       38459 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1608       38456 :                 Z_PARAM_ZVAL(regex)
    1609       38456 :                 Z_PARAM_ZVAL(replace)
    1610       38456 :                 Z_PARAM_ZVAL(subject)
    1611       38456 :                 Z_PARAM_OPTIONAL
    1612       38498 :                 Z_PARAM_LONG(limit)
    1613          37 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1614       38459 :         ZEND_PARSE_PARAMETERS_END();
    1615             : 
    1616       76920 :         if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1617           3 :                 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1618           3 :                 RETURN_FALSE;
    1619             :         }
    1620             : 
    1621       38453 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
    1622       38450 :         if (zcount) {
    1623           8 :                 zval_ptr_dtor(zcount);
    1624           8 :                 ZVAL_LONG(zcount, replace_count);
    1625             :         }
    1626             : }
    1627             : /* }}} */
    1628             : 
    1629             : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
    1630             :    Perform Perl-style regular expression replacement using replacement callback. */
    1631          52 : static PHP_FUNCTION(preg_replace_callback)
    1632             : {
    1633          52 :         zval *regex, *replace, *subject, *zcount = NULL;
    1634          52 :         zend_long limit = -1;
    1635             :         zend_string     *callback_name;
    1636             :         int replace_count;
    1637             : 
    1638             :         /* Get function parameters and do error-checking. */
    1639          52 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1640          46 :                 Z_PARAM_ZVAL(regex)
    1641          46 :                 Z_PARAM_ZVAL(replace)
    1642          46 :                 Z_PARAM_ZVAL(subject)
    1643          46 :                 Z_PARAM_OPTIONAL
    1644          62 :                 Z_PARAM_LONG(limit)
    1645          11 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1646          52 :         ZEND_PARSE_PARAMETERS_END();
    1647             : 
    1648          43 :         if (!zend_is_callable(replace, 0, &callback_name)) {
    1649           3 :                 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
    1650           3 :                 zend_string_release(callback_name);
    1651           6 :                 ZVAL_STR(return_value, zval_get_string(subject));
    1652           3 :                 return;
    1653             :         }
    1654          40 :         zend_string_release(callback_name);
    1655             : 
    1656          40 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
    1657          40 :         if (zcount) {
    1658           2 :                 zval_ptr_dtor(zcount);
    1659           2 :                 ZVAL_LONG(zcount, replace_count);
    1660             :         }
    1661             : }
    1662             : /* }}} */
    1663             : 
    1664             : /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
    1665             :    Perform Perl-style regular expression replacement using replacement callback. */
    1666          14 : static PHP_FUNCTION(preg_replace_callback_array)
    1667             : {
    1668          14 :         zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
    1669          14 :         zend_long limit = -1;
    1670             :         zend_string *str_idx;
    1671             :         zend_string *callback_name;
    1672          14 :         int replace_count = 0;
    1673             : 
    1674             :         /* Get function parameters and do error-checking. */
    1675          14 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1676          36 :                 Z_PARAM_ARRAY(pattern)
    1677           9 :                 Z_PARAM_ZVAL(subject)
    1678           9 :                 Z_PARAM_OPTIONAL
    1679          17 :                 Z_PARAM_LONG(limit)
    1680           9 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1681          14 :         ZEND_PARSE_PARAMETERS_END();
    1682             : 
    1683          48 :         ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
    1684          21 :                 if (str_idx) {
    1685          21 :                         ZVAL_STR_COPY(&regex, str_idx);
    1686             :                 } else {
    1687           0 :                         php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
    1688           0 :                         zval_ptr_dtor(return_value);
    1689           0 :                         RETURN_NULL();
    1690             :                 }
    1691             : 
    1692          21 :                 if (!zend_is_callable(replace, 0, &callback_name)) {
    1693           1 :                         php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
    1694           1 :                         zend_string_release(callback_name);
    1695           1 :                         zval_ptr_dtor(&regex);
    1696           1 :                         zval_ptr_dtor(return_value);
    1697           1 :                         ZVAL_COPY(return_value, subject);
    1698           1 :                         return;
    1699             :                 }
    1700          20 :                 zend_string_release(callback_name);
    1701             : 
    1702          20 :                 if (Z_ISNULL_P(return_value)) {
    1703           8 :                         replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
    1704             :                 } else {
    1705          12 :                         replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
    1706          12 :                         zval_ptr_dtor(return_value);
    1707             :                 }
    1708             : 
    1709          20 :                 zval_ptr_dtor(&regex);
    1710             : 
    1711          20 :                 ZVAL_COPY_VALUE(return_value, &zv);
    1712             : 
    1713          20 :                 if (UNEXPECTED(EG(exception))) {
    1714           1 :                         zval_ptr_dtor(return_value);
    1715           1 :                         RETURN_NULL();
    1716             :                 }
    1717             :         } ZEND_HASH_FOREACH_END();
    1718             : 
    1719           6 :         if (zcount) {
    1720           2 :                 zval_ptr_dtor(zcount);
    1721           2 :                 ZVAL_LONG(zcount, replace_count);
    1722             :         }
    1723             : }
    1724             : /* }}} */
    1725             : 
    1726             : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
    1727             :    Perform Perl-style regular expression replacement and only return matches. */
    1728           2 : static PHP_FUNCTION(preg_filter)
    1729             : {
    1730           2 :         zval *regex, *replace, *subject, *zcount = NULL;
    1731           2 :         zend_long limit = -1;
    1732             :         int replace_count;
    1733             : 
    1734             :         /* Get function parameters and do error-checking. */
    1735           2 :         ZEND_PARSE_PARAMETERS_START(3, 5)
    1736           2 :                 Z_PARAM_ZVAL(regex)
    1737           2 :                 Z_PARAM_ZVAL(replace)
    1738           2 :                 Z_PARAM_ZVAL(subject)
    1739           2 :                 Z_PARAM_OPTIONAL
    1740           4 :                 Z_PARAM_LONG(limit)
    1741           3 :                 Z_PARAM_ZVAL_EX(zcount, 0, 1)
    1742           2 :         ZEND_PARSE_PARAMETERS_END();
    1743             : 
    1744           5 :         if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
    1745           0 :                 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1746           0 :                 RETURN_FALSE;
    1747             :         }
    1748             : 
    1749           2 :         replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
    1750           2 :         if (zcount) {
    1751           1 :                 zval_ptr_dtor(zcount);
    1752           1 :                 ZVAL_LONG(zcount, replace_count);
    1753             :         }
    1754             : }
    1755             : /* }}} */
    1756             : 
    1757             : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
    1758             :    Split string into an array using a perl-style regular expression as a delimiter */
    1759       10966 : static PHP_FUNCTION(preg_split)
    1760             : {
    1761             :         zend_string                     *regex;                 /* Regular expression */
    1762             :         zend_string                     *subject;               /* String to match against */
    1763       10966 :         zend_long                        limit_val = -1;/* Integer value of limit */
    1764       10966 :         zend_long                        flags = 0;             /* Match control flags */
    1765             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1766             : 
    1767             :         /* Get function parameters and do error checking */
    1768       10966 :         ZEND_PARSE_PARAMETERS_START(2, 4)
    1769       32886 :                 Z_PARAM_STR(regex)
    1770       32880 :                 Z_PARAM_STR(subject)
    1771       10958 :                 Z_PARAM_OPTIONAL
    1772       11014 :                 Z_PARAM_LONG(limit_val)
    1773          60 :                 Z_PARAM_LONG(flags)
    1774       10966 :         ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
    1775             : 
    1776       10958 :         if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
    1777           0 :                         php_error_docref(NULL, E_WARNING, "Subject is too long");
    1778           0 :                         RETURN_FALSE;
    1779             :         }
    1780             : 
    1781             :         /* Compile regex or get it from cache. */
    1782       10958 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    1783           5 :                 RETURN_FALSE;
    1784             :         }
    1785             : 
    1786       10953 :         pce->refcount++;
    1787       10953 :         php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
    1788       10953 :         pce->refcount--;
    1789             : }
    1790             : /* }}} */
    1791             : 
    1792             : /* {{{ php_pcre_split
    1793             :  */
    1794       10968 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
    1795             :         zend_long limit_val, zend_long flags)
    1796             : {
    1797       10968 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1798             :         pcre_extra               extra_data;            /* Used locally for exec options */
    1799             :         int                             *offsets;                       /* Array of subpattern offsets */
    1800             :         int                              size_offsets;          /* Size of the offsets array */
    1801       10968 :         int                              exoptions = 0;         /* Execution options */
    1802       10968 :         int                              count = 0;                     /* Count of matched subpatterns */
    1803             :         int                              start_offset;          /* Where the new search starts */
    1804             :         int                              next_offset;           /* End of the last delimiter match + 1 */
    1805       10968 :         int                              g_notempty = 0;        /* If the match should not be empty */
    1806             :         char                    *last_match;            /* Location of last match */
    1807             :         int                              no_empty;                      /* If NO_EMPTY flag is set */
    1808             :         int                              delim_capture;         /* If delimiters should be captured */
    1809             :         int                              offset_capture;        /* If offsets should be captured */
    1810             :         zval                     tmp;
    1811             :         ALLOCA_FLAG(use_heap);
    1812             : 
    1813       10968 :         no_empty = flags & PREG_SPLIT_NO_EMPTY;
    1814       10968 :         delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
    1815       10968 :         offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
    1816             : 
    1817       10968 :         if (limit_val == 0) {
    1818           1 :                 limit_val = -1;
    1819             :         }
    1820             : 
    1821       10968 :         if (extra == NULL) {
    1822           7 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1823           7 :                 extra = &extra_data;
    1824             :         }
    1825       10968 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    1826       10968 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    1827             : #ifdef PCRE_EXTRA_MARK
    1828       10968 :         extra->flags &= ~PCRE_EXTRA_MARK;
    1829             : #endif
    1830             : 
    1831             :         /* Initialize return value */
    1832       10968 :         array_init(return_value);
    1833             : 
    1834             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1835       10968 :         size_offsets = (pce->capture_count + 1) * 3;
    1836       10968 :         if (size_offsets <= 32) {
    1837       10968 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    1838             :         } else {
    1839           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1840             :         }
    1841             : 
    1842             :         /* Start at the beginning of the string */
    1843       10968 :         start_offset = 0;
    1844       10968 :         next_offset = 0;
    1845       10968 :         last_match = subject;
    1846       10968 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1847             : 
    1848             :         /* Get next piece if no limit or limit not yet reached and something matched*/
    1849       34925 :         while ((limit_val == -1 || limit_val > 1)) {
    1850       23947 :                 count = pcre_exec(pce->re, extra, subject,
    1851             :                                                   subject_len, start_offset,
    1852             :                                                   exoptions|g_notempty, offsets, size_offsets);
    1853             : 
    1854             :                 /* the string was already proved to be valid UTF-8 */
    1855       23947 :                 exoptions |= PCRE_NO_UTF8_CHECK;
    1856             : 
    1857             :                 /* Check for too many substrings condition. */
    1858       23947 :                 if (count == 0) {
    1859           0 :                         php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
    1860           0 :                         count = size_offsets/3;
    1861             :                 }
    1862             : 
    1863             :                 /* If something matched */
    1864       23947 :                 if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
    1865       12928 :                         if (!no_empty || &subject[offsets[0]] != last_match) {
    1866             : 
    1867       12869 :                                 if (offset_capture) {
    1868             :                                         /* Add (match, offset) pair to the return value */
    1869          26 :                                         add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
    1870             :                                 } else {
    1871             :                                         /* Add the piece to the return value */
    1872       25686 :                                         ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
    1873       12843 :                                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1874             :                                 }
    1875             : 
    1876             :                                 /* One less left to do */
    1877       12869 :                                 if (limit_val != -1)
    1878          13 :                                         limit_val--;
    1879             :                         }
    1880             : 
    1881       12928 :                         last_match = &subject[offsets[1]];
    1882       12928 :                         next_offset = offsets[1];
    1883             : 
    1884       25856 :                         if (delim_capture) {
    1885             :                                 int i, match_len;
    1886          62 :                                 for (i = 1; i < count; i++) {
    1887          31 :                                         match_len = offsets[(i<<1)+1] - offsets[i<<1];
    1888             :                                         /* If we have matched a delimiter */
    1889          31 :                                         if (!no_empty || match_len > 0) {
    1890          21 :                                                 if (offset_capture) {
    1891          10 :                                                         add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
    1892             :                                                 } else {
    1893          22 :                                                         ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
    1894          11 :                                                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1895             :                                                 }
    1896             :                                         }
    1897             :                                 }
    1898             :                         }
    1899       11019 :                 } else if (count == PCRE_ERROR_NOMATCH) {
    1900             :                         /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
    1901             :                            this is not necessarily the end. We need to advance
    1902             :                            the start offset, and continue. Fudge the offset values
    1903             :                            to achieve this, unless we're already at the end of the string. */
    1904       11017 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1905          61 :                                 offsets[0] = start_offset;
    1906         122 :                                 offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset);
    1907             :                         } else {
    1908             :                                 break;
    1909             :                         }
    1910             :                 } else {
    1911           2 :                         pcre_handle_exec_error(count);
    1912           2 :                         break;
    1913             :                 }
    1914             : 
    1915             :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1916             :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
    1917             :                    the match again at the same point. If this fails (picked up above) we
    1918             :                    advance to the next character. */
    1919       12989 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
    1920             : 
    1921             :                 /* Advance to the position right after the last full match */
    1922       12989 :                 start_offset = offsets[1];
    1923             :         }
    1924             : 
    1925             : 
    1926       10968 :         start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
    1927             : 
    1928       10968 :         if (!no_empty || start_offset < subject_len)
    1929             :         {
    1930       10960 :                 if (offset_capture) {
    1931             :                         /* Add the last (match, offset) pair to the return value */
    1932           5 :                         add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
    1933             :                 } else {
    1934             :                         /* Add the last piece to the return value */
    1935       21910 :                         ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
    1936       10955 :                         zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
    1937             :                 }
    1938             :         }
    1939             : 
    1940             : 
    1941             :         /* Clean up */
    1942       10968 :         if (size_offsets <= 32) {
    1943       10968 :                 free_alloca(offsets, use_heap);
    1944             :         } else {
    1945           0 :                 efree(offsets);
    1946             :         }
    1947       10968 : }
    1948             : /* }}} */
    1949             : 
    1950             : /* {{{ proto string preg_quote(string str [, string delim_char])
    1951             :    Quote regular expression characters plus an optional character */
    1952        9342 : static PHP_FUNCTION(preg_quote)
    1953             : {
    1954             :         size_t           in_str_len;
    1955             :         char    *in_str;                /* Input string argument */
    1956             :         char    *in_str_end;    /* End of the input string */
    1957        9342 :         size_t           delim_len = 0;
    1958        9342 :         char    *delim = NULL;  /* Additional delimiter argument */
    1959             :         zend_string     *out_str;       /* Output string with quoted characters */
    1960             :         char    *p,                             /* Iterator for input string */
    1961             :                         *q,                             /* Iterator for output string */
    1962        9342 :                          delim_char=0,  /* Delimiter character to be quoted */
    1963             :                          c;                             /* Current character */
    1964        9342 :         zend_bool quote_delim = 0; /* Whether to quote additional delim char */
    1965             : 
    1966             :         /* Get the arguments and check for errors */
    1967        9342 :         ZEND_PARSE_PARAMETERS_START(1, 2)
    1968       28017 :                 Z_PARAM_STRING(in_str, in_str_len)
    1969        9337 :                 Z_PARAM_OPTIONAL
    1970       28003 :                 Z_PARAM_STRING(delim, delim_len)
    1971        9342 :         ZEND_PARSE_PARAMETERS_END();
    1972             : 
    1973        9337 :         in_str_end = in_str + in_str_len;
    1974             : 
    1975             :         /* Nothing to do if we got an empty string */
    1976        9337 :         if (in_str == in_str_end) {
    1977           5 :                 RETURN_EMPTY_STRING();
    1978             :         }
    1979             : 
    1980        9332 :         if (delim && *delim) {
    1981        9329 :                 delim_char = delim[0];
    1982        9329 :                 quote_delim = 1;
    1983             :         }
    1984             : 
    1985             :         /* Allocate enough memory so that even if each character
    1986             :            is quoted, we won't run out of room */
    1987       18664 :         out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
    1988             : 
    1989             :         /* Go through the string and quote necessary characters */
    1990     7908752 :         for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
    1991     7899420 :                 c = *p;
    1992     7899420 :                 switch(c) {
    1993     1103499 :                         case '.':
    1994             :                         case '\\':
    1995             :                         case '+':
    1996             :                         case '*':
    1997             :                         case '?':
    1998             :                         case '[':
    1999             :                         case '^':
    2000             :                         case ']':
    2001             :                         case '$':
    2002             :                         case '(':
    2003             :                         case ')':
    2004             :                         case '{':
    2005             :                         case '}':
    2006             :                         case '=':
    2007             :                         case '!':
    2008             :                         case '>':
    2009             :                         case '<':
    2010             :                         case '|':
    2011             :                         case ':':
    2012             :                         case '-':
    2013     1103499 :                                 *q++ = '\\';
    2014     1103499 :                                 *q++ = c;
    2015     1103499 :                                 break;
    2016             : 
    2017        1086 :                         case '\0':
    2018        1086 :                                 *q++ = '\\';
    2019        1086 :                                 *q++ = '0';
    2020        1086 :                                 *q++ = '0';
    2021        1086 :                                 *q++ = '0';
    2022        1086 :                                 break;
    2023             : 
    2024     6794835 :                         default:
    2025     6794835 :                                 if (quote_delim && c == delim_char)
    2026       15605 :                                         *q++ = '\\';
    2027     6794835 :                                 *q++ = c;
    2028     6794835 :                                 break;
    2029             :                 }
    2030             :         }
    2031        9332 :         *q = '\0';
    2032             : 
    2033             :         /* Reallocate string and return it */
    2034       18664 :         out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
    2035        9332 :         RETURN_NEW_STR(out_str);
    2036             : }
    2037             : /* }}} */
    2038             : 
    2039             : /* {{{ proto array preg_grep(string regex, array input [, int flags])
    2040             :    Searches array and returns entries which match regex */
    2041        8223 : static PHP_FUNCTION(preg_grep)
    2042             : {
    2043             :         zend_string                     *regex;                 /* Regular expression */
    2044             :         zval                            *input;                 /* Input array */
    2045        8223 :         zend_long                        flags = 0;             /* Match control flags */
    2046             :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    2047             : 
    2048             :         /* Get arguments and do error checking */
    2049        8223 :         ZEND_PARSE_PARAMETERS_START(2, 3)
    2050       24657 :                 Z_PARAM_STR(regex)
    2051       24651 :                 Z_PARAM_ARRAY(input)
    2052        8214 :                 Z_PARAM_OPTIONAL
    2053        8220 :                 Z_PARAM_LONG(flags)
    2054        8223 :         ZEND_PARSE_PARAMETERS_END();
    2055             : 
    2056             :         /* Compile regex or get it from cache. */
    2057        8214 :         if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
    2058           5 :                 RETURN_FALSE;
    2059             :         }
    2060             : 
    2061        8209 :         pce->refcount++;
    2062        8209 :         php_pcre_grep_impl(pce, input, return_value, flags);
    2063        8209 :         pce->refcount--;
    2064             : }
    2065             : /* }}} */
    2066             : 
    2067        8209 : PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
    2068             : {
    2069             :         zval                *entry;                             /* An entry in the input array */
    2070        8209 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    2071             :         pcre_extra               extra_data;            /* Used locally for exec options */
    2072             :         int                             *offsets;                       /* Array of subpattern offsets */
    2073             :         int                              size_offsets;          /* Size of the offsets array */
    2074        8209 :         int                              count = 0;                     /* Count of matched subpatterns */
    2075             :         zend_string             *string_key;
    2076             :         zend_ulong               num_key;
    2077             :         zend_bool                invert;                        /* Whether to return non-matching
    2078             :                                                                                    entries */
    2079             :         ALLOCA_FLAG(use_heap);
    2080             : 
    2081        8209 :         invert = flags & PREG_GREP_INVERT ? 1 : 0;
    2082             : 
    2083        8209 :         if (extra == NULL) {
    2084           0 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    2085           0 :                 extra = &extra_data;
    2086             :         }
    2087        8209 :         extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
    2088        8209 :         extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
    2089             : #ifdef PCRE_EXTRA_MARK
    2090        8209 :         extra->flags &= ~PCRE_EXTRA_MARK;
    2091             : #endif
    2092             : 
    2093             :         /* Calculate the size of the offsets array, and allocate memory for it. */
    2094        8209 :         size_offsets = (pce->capture_count + 1) * 3;
    2095        8209 :         if (size_offsets <= 32) {
    2096        8209 :                 offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
    2097             :         } else {
    2098           0 :                 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    2099             :         }
    2100             : 
    2101             :         /* Initialize return array */
    2102        8209 :         array_init(return_value);
    2103             : 
    2104        8209 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    2105             : 
    2106             :         /* Go through the input array */
    2107       24761 :         ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
    2108        8276 :                 zend_string *subject_str = zval_get_string(entry);
    2109             : 
    2110             :                 /* Perform the match */
    2111        8276 :                 count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
    2112        8276 :                                                   (int)ZSTR_LEN(subject_str), 0,
    2113             :                                                   0, offsets, size_offsets);
    2114             : 
    2115             :                 /* Check for too many substrings condition. */
    2116        8276 :                 if (count == 0) {
    2117           0 :                         php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
    2118           0 :                         count = size_offsets/3;
    2119        8276 :                 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
    2120           0 :                         pcre_handle_exec_error(count);
    2121             :                         zend_string_release(subject_str);
    2122           0 :                         break;
    2123             :                 }
    2124             : 
    2125             :                 /* If the entry fits our requirements */
    2126        8276 :                 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
    2127        8225 :                         if (Z_REFCOUNTED_P(entry)) {
    2128             :                                 Z_ADDREF_P(entry);
    2129             :                         }
    2130             : 
    2131             :                         /* Add to return array */
    2132        8225 :                         if (string_key) {
    2133           3 :                                 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
    2134             :                         } else {
    2135        8222 :                                 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
    2136             :                         }
    2137             :                 }
    2138             : 
    2139             :                 zend_string_release(subject_str);
    2140             :         } ZEND_HASH_FOREACH_END();
    2141             : 
    2142             :         /* Clean up */
    2143        8209 :         if (size_offsets <= 32) {
    2144        8209 :                 free_alloca(offsets, use_heap);
    2145             :         } else {
    2146           0 :                 efree(offsets);
    2147             :         }
    2148        8209 : }
    2149             : /* }}} */
    2150             : 
    2151             : /* {{{ proto int preg_last_error()
    2152             :    Returns the error code of the last regexp execution. */
    2153          20 : static PHP_FUNCTION(preg_last_error)
    2154             : {
    2155          20 :         ZEND_PARSE_PARAMETERS_START(0, 0)
    2156          20 :         ZEND_PARSE_PARAMETERS_END();
    2157             : 
    2158          18 :         RETURN_LONG(PCRE_G(error_code));
    2159             : }
    2160             : /* }}} */
    2161             : 
    2162             : /* {{{ module definition structures */
    2163             : 
    2164             : /* {{{ arginfo */
    2165             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
    2166             :     ZEND_ARG_INFO(0, pattern)
    2167             :     ZEND_ARG_INFO(0, subject)
    2168             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2169             :     ZEND_ARG_INFO(0, flags)
    2170             :     ZEND_ARG_INFO(0, offset)
    2171             : ZEND_END_ARG_INFO()
    2172             : 
    2173             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
    2174             :     ZEND_ARG_INFO(0, pattern)
    2175             :     ZEND_ARG_INFO(0, subject)
    2176             :     ZEND_ARG_INFO(1, subpatterns) /* array */
    2177             :     ZEND_ARG_INFO(0, flags)
    2178             :     ZEND_ARG_INFO(0, offset)
    2179             : ZEND_END_ARG_INFO()
    2180             : 
    2181             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
    2182             :     ZEND_ARG_INFO(0, regex)
    2183             :     ZEND_ARG_INFO(0, replace)
    2184             :     ZEND_ARG_INFO(0, subject)
    2185             :     ZEND_ARG_INFO(0, limit)
    2186             :     ZEND_ARG_INFO(1, count)
    2187             : ZEND_END_ARG_INFO()
    2188             : 
    2189             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
    2190             :     ZEND_ARG_INFO(0, regex)
    2191             :     ZEND_ARG_INFO(0, callback)
    2192             :     ZEND_ARG_INFO(0, subject)
    2193             :     ZEND_ARG_INFO(0, limit)
    2194             :     ZEND_ARG_INFO(1, count)
    2195             : ZEND_END_ARG_INFO()
    2196             : 
    2197             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
    2198             :     ZEND_ARG_INFO(0, pattern)
    2199             :     ZEND_ARG_INFO(0, subject)
    2200             :     ZEND_ARG_INFO(0, limit)
    2201             :     ZEND_ARG_INFO(1, count)
    2202             : ZEND_END_ARG_INFO()
    2203             : 
    2204             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
    2205             :     ZEND_ARG_INFO(0, pattern)
    2206             :     ZEND_ARG_INFO(0, subject)
    2207             :     ZEND_ARG_INFO(0, limit)
    2208             :     ZEND_ARG_INFO(0, flags)
    2209             : ZEND_END_ARG_INFO()
    2210             : 
    2211             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
    2212             :     ZEND_ARG_INFO(0, str)
    2213             :     ZEND_ARG_INFO(0, delim_char)
    2214             : ZEND_END_ARG_INFO()
    2215             : 
    2216             : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
    2217             :     ZEND_ARG_INFO(0, regex)
    2218             :     ZEND_ARG_INFO(0, input) /* array */
    2219             :     ZEND_ARG_INFO(0, flags)
    2220             : ZEND_END_ARG_INFO()
    2221             : 
    2222             : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
    2223             : ZEND_END_ARG_INFO()
    2224             : /* }}} */
    2225             : 
    2226             : static const zend_function_entry pcre_functions[] = {
    2227             :         PHP_FE(preg_match,                                      arginfo_preg_match)
    2228             :         PHP_FE(preg_match_all,                          arginfo_preg_match_all)
    2229             :         PHP_FE(preg_replace,                            arginfo_preg_replace)
    2230             :         PHP_FE(preg_replace_callback,           arginfo_preg_replace_callback)
    2231             :         PHP_FE(preg_replace_callback_array,     arginfo_preg_replace_callback_array)
    2232             :         PHP_FE(preg_filter,                                     arginfo_preg_replace)
    2233             :         PHP_FE(preg_split,                                      arginfo_preg_split)
    2234             :         PHP_FE(preg_quote,                                      arginfo_preg_quote)
    2235             :         PHP_FE(preg_grep,                                       arginfo_preg_grep)
    2236             :         PHP_FE(preg_last_error,                         arginfo_preg_last_error)
    2237             :         PHP_FE_END
    2238             : };
    2239             : 
    2240             : zend_module_entry pcre_module_entry = {
    2241             :         STANDARD_MODULE_HEADER,
    2242             :    "pcre",
    2243             :         pcre_functions,
    2244             :         PHP_MINIT(pcre),
    2245             :         PHP_MSHUTDOWN(pcre),
    2246             : #ifdef HAVE_PCRE_JIT_SUPPORT
    2247             :         PHP_RINIT(pcre),
    2248             : #else
    2249             :         NULL,
    2250             : #endif
    2251             :         NULL,
    2252             :         PHP_MINFO(pcre),
    2253             :         PHP_PCRE_VERSION,
    2254             :         PHP_MODULE_GLOBALS(pcre),
    2255             :         PHP_GINIT(pcre),
    2256             :         PHP_GSHUTDOWN(pcre),
    2257             :         NULL,
    2258             :         STANDARD_MODULE_PROPERTIES_EX
    2259             : };
    2260             : 
    2261             : #ifdef COMPILE_DL_PCRE
    2262             : ZEND_GET_MODULE(pcre)
    2263             : #endif
    2264             : 
    2265             : /* }}} */
    2266             : 
    2267             : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
    2268             : 
    2269             : /*
    2270             :  * Local variables:
    2271             :  * tab-width: 4
    2272             :  * c-basic-offset: 4
    2273             :  * End:
    2274             :  * vim600: sw=4 ts=4 fdm=marker
    2275             :  * vim<600: sw=4 ts=4
    2276             :  */

Generated by: LCOV version 1.10

Generated at Sun, 16 Jan 2022 08:19:18 +0000 (6 days ago)

Copyright © 2005-2022 The PHP Group
All rights reserved.