1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Andrei Zmievski <andrei@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: php_pcre.c 281840 2009-06-08 21:52:08Z scottmac $ */
20 :
21 : #include "php.h"
22 : #include "php_ini.h"
23 : #include "php_globals.h"
24 : #include "php_pcre.h"
25 : #include "ext/standard/info.h"
26 : #include "ext/standard/php_smart_str.h"
27 :
28 : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29 :
30 : #include "ext/standard/php_string.h"
31 :
32 : #define PREG_PATTERN_ORDER 1
33 : #define PREG_SET_ORDER 2
34 : #define PREG_OFFSET_CAPTURE (1<<8)
35 :
36 : #define PREG_SPLIT_NO_EMPTY (1<<0)
37 : #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38 : #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39 :
40 : #define PREG_REPLACE_EVAL (1<<0)
41 :
42 : #define PREG_GREP_INVERT (1<<0)
43 :
44 : #define PCRE_CACHE_SIZE 4096
45 :
46 : enum {
47 : PHP_PCRE_NO_ERROR = 0,
48 : PHP_PCRE_INTERNAL_ERROR,
49 : PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50 : PHP_PCRE_RECURSION_LIMIT_ERROR,
51 : PHP_PCRE_BAD_UTF8_ERROR,
52 : PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53 : };
54 :
55 :
56 : ZEND_DECLARE_MODULE_GLOBALS(pcre)
57 :
58 :
59 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60 7 : {
61 7 : int preg_code = 0;
62 :
63 7 : switch (pcre_code) {
64 : case PCRE_ERROR_MATCHLIMIT:
65 3 : preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66 3 : break;
67 :
68 : case PCRE_ERROR_RECURSIONLIMIT:
69 3 : preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70 3 : break;
71 :
72 : case PCRE_ERROR_BADUTF8:
73 1 : preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74 1 : break;
75 :
76 : case PCRE_ERROR_BADUTF8_OFFSET:
77 0 : preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78 0 : break;
79 :
80 : default:
81 0 : preg_code = PHP_PCRE_INTERNAL_ERROR;
82 : break;
83 : }
84 :
85 7 : PCRE_G(error_code) = preg_code;
86 7 : }
87 : /* }}} */
88 :
89 : static void php_free_pcre_cache(void *data) /* {{{ */
90 10061 : {
91 10061 : pcre_cache_entry *pce = (pcre_cache_entry *) data;
92 10061 : if (!pce) return;
93 10061 : pefree(pce->re, 1);
94 10061 : if (pce->extra) pefree(pce->extra, 1);
95 : #if HAVE_SETLOCALE
96 10061 : if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97 10061 : pefree(pce->locale, 1);
98 : #endif
99 : }
100 : /* }}} */
101 :
102 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103 13565 : {
104 13565 : zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105 13565 : pcre_globals->backtrack_limit = 0;
106 13565 : pcre_globals->recursion_limit = 0;
107 13565 : pcre_globals->error_code = PHP_PCRE_NO_ERROR;
108 13565 : }
109 : /* }}} */
110 :
111 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112 13597 : {
113 13597 : zend_hash_destroy(&pcre_globals->pcre_cache);
114 13597 : }
115 : /* }}} */
116 :
117 : PHP_INI_BEGIN()
118 : STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119 : STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
120 : PHP_INI_END()
121 :
122 :
123 : /* {{{ PHP_MINFO_FUNCTION(pcre) */
124 : static PHP_MINFO_FUNCTION(pcre)
125 6 : {
126 6 : php_info_print_table_start();
127 6 : php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128 6 : php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129 6 : php_info_print_table_end();
130 :
131 6 : DISPLAY_INI_ENTRIES();
132 6 : }
133 : /* }}} */
134 :
135 : /* {{{ PHP_MINIT_FUNCTION(pcre) */
136 : static PHP_MINIT_FUNCTION(pcre)
137 13565 : {
138 13565 : REGISTER_INI_ENTRIES();
139 :
140 13565 : REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141 13565 : REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142 13565 : REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143 13565 : REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144 13565 : REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145 13565 : REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146 13565 : REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147 :
148 13565 : REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149 13565 : REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150 13565 : REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151 13565 : REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152 13565 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153 13565 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154 13565 : REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155 :
156 13565 : return SUCCESS;
157 : }
158 : /* }}} */
159 :
160 : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
161 : static PHP_MSHUTDOWN_FUNCTION(pcre)
162 13597 : {
163 13597 : UNREGISTER_INI_ENTRIES();
164 :
165 13597 : return SUCCESS;
166 : }
167 : /* }}} */
168 :
169 : /* {{{ static pcre_clean_cache */
170 : static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171 16384 : {
172 16384 : int *num_clean = (int *)arg;
173 :
174 16384 : if (*num_clean > 0) {
175 2048 : (*num_clean)--;
176 2048 : return 1;
177 : } else {
178 14336 : return 0;
179 : }
180 : }
181 : /* }}} */
182 :
183 : /* {{{ static make_subpats_table */
184 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185 1283129 : {
186 1283129 : pcre_extra *extra = pce->extra;
187 1283129 : int name_cnt = 0, name_size, ni = 0;
188 : int rc;
189 : char *name_table;
190 : unsigned short name_idx;
191 1283129 : char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192 :
193 1283129 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194 1283129 : if (rc < 0) {
195 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196 0 : efree(subpat_names);
197 0 : return NULL;
198 : }
199 1283129 : if (name_cnt > 0) {
200 : int rc1, rc2;
201 :
202 12 : rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203 12 : rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204 12 : rc = rc2 ? rc2 : rc1;
205 12 : if (rc < 0) {
206 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207 0 : efree(subpat_names);
208 0 : return NULL;
209 : }
210 :
211 1076 : while (ni++ < name_cnt) {
212 1054 : name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213 1054 : subpat_names[name_idx] = name_table + 2;
214 1054 : if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215 2 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216 2 : efree(subpat_names);
217 2 : return NULL;
218 : }
219 1052 : name_table += name_size;
220 : }
221 : }
222 :
223 1283127 : return subpat_names;
224 : }
225 : /* }}} */
226 :
227 : /* {{{ pcre_get_compiled_regex_cache
228 : */
229 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230 1284328 : {
231 1284328 : pcre *re = NULL;
232 : pcre_extra *extra;
233 1284328 : int coptions = 0;
234 1284328 : int soptions = 0;
235 : const char *error;
236 : int erroffset;
237 : char delimiter;
238 : char start_delimiter;
239 : char end_delimiter;
240 : char *p, *pp;
241 : char *pattern;
242 1284328 : int do_study = 0;
243 1284328 : int poptions = 0;
244 1284328 : unsigned const char *tables = NULL;
245 : #if HAVE_SETLOCALE
246 1284328 : char *locale = setlocale(LC_CTYPE, NULL);
247 : #endif
248 : pcre_cache_entry *pce;
249 : pcre_cache_entry new_entry;
250 :
251 : /* Try to lookup the cached regex entry, and if successful, just pass
252 : back the compiled pattern, otherwise go on and compile it. */
253 1284328 : regex_len = strlen(regex);
254 1284328 : if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
255 : /*
256 : * We use a quick pcre_info() check to see whether cache is corrupted, and if it
257 : * is, we flush it and compile the pattern from scratch.
258 : */
259 1274234 : if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
260 0 : zend_hash_clean(&PCRE_G(pcre_cache));
261 : } else {
262 : #if HAVE_SETLOCALE
263 1274234 : if (!strcmp(pce->locale, locale)) {
264 : #endif
265 1274232 : return pce;
266 : #if HAVE_SETLOCALE
267 : }
268 : #endif
269 : }
270 : }
271 :
272 10096 : p = regex;
273 :
274 : /* Parse through the leading whitespace, and display a warning if we
275 : get to the end without encountering a delimiter. */
276 10096 : while (isspace((int)*(unsigned char *)p)) p++;
277 10096 : if (*p == 0) {
278 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
279 3 : return NULL;
280 : }
281 :
282 : /* Get the delimiter and display a warning if it is alphanumeric
283 : or a backslash. */
284 10093 : delimiter = *p++;
285 10093 : if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
286 7 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
287 7 : return NULL;
288 : }
289 :
290 10086 : start_delimiter = delimiter;
291 10086 : if ((pp = strchr("([{< )]}> )]}>", delimiter)))
292 20 : delimiter = pp[5];
293 10086 : end_delimiter = delimiter;
294 :
295 10086 : if (start_delimiter == end_delimiter) {
296 : /* We need to iterate through the pattern, searching for the ending delimiter,
297 : but skipping the backslashed delimiters. If the ending delimiter is not
298 : found, display a warning. */
299 10066 : pp = p;
300 14929503 : while (*pp != 0) {
301 14919430 : if (*pp == '\\' && pp[1] != 0) pp++;
302 14049321 : else if (*pp == delimiter)
303 10059 : break;
304 14909371 : pp++;
305 : }
306 10066 : if (*pp == 0) {
307 7 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
308 7 : return NULL;
309 : }
310 : } else {
311 : /* We iterate through the pattern, searching for the matching ending
312 : * delimiter. For each matching starting delimiter, we increment nesting
313 : * level, and decrement it for each matching ending delimiter. If we
314 : * reach the end of the pattern without matching, display a warning.
315 : */
316 20 : int brackets = 1; /* brackets nesting level */
317 20 : pp = p;
318 180 : while (*pp != 0) {
319 159 : if (*pp == '\\' && pp[1] != 0) pp++;
320 147 : else if (*pp == end_delimiter && --brackets <= 0)
321 : break;
322 128 : else if (*pp == start_delimiter)
323 1 : brackets++;
324 140 : pp++;
325 : }
326 20 : if (*pp == 0) {
327 1 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
328 1 : return NULL;
329 : }
330 : }
331 :
332 : /* Make a copy of the actual pattern. */
333 10078 : pattern = estrndup(p, pp-p);
334 :
335 : /* Move on to the options */
336 10078 : pp++;
337 :
338 : /* Parse through the options, setting appropriate flags. Display
339 : a warning if we encounter an unknown modifier. */
340 25694 : while (*pp != 0) {
341 5551 : switch (*pp++) {
342 : /* Perl compatible options */
343 20 : case 'i': coptions |= PCRE_CASELESS; break;
344 3 : case 'm': coptions |= PCRE_MULTILINE; break;
345 5446 : case 's': coptions |= PCRE_DOTALL; break;
346 4 : case 'x': coptions |= PCRE_EXTENDED; break;
347 :
348 : /* PCRE specific options */
349 2 : case 'A': coptions |= PCRE_ANCHORED; break;
350 7 : case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
351 28 : case 'S': do_study = 1; break;
352 6 : case 'U': coptions |= PCRE_UNGREEDY; break;
353 1 : case 'X': coptions |= PCRE_EXTRA; break;
354 14 : case 'u': coptions |= PCRE_UTF8; break;
355 :
356 : /* Custom preg options */
357 5 : case 'e': poptions |= PREG_REPLACE_EVAL; break;
358 :
359 : case ' ':
360 : case '\n':
361 2 : break;
362 :
363 : default:
364 13 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
365 13 : efree(pattern);
366 13 : return NULL;
367 : }
368 : }
369 :
370 : #if HAVE_SETLOCALE
371 10065 : if (strcmp(locale, "C"))
372 4 : tables = pcre_maketables();
373 : #endif
374 :
375 : /* Compile pattern and display a warning if compilation failed. */
376 10065 : re = pcre_compile(pattern,
377 : coptions,
378 : &error,
379 : &erroffset,
380 : tables);
381 :
382 10065 : if (re == NULL) {
383 4 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
384 4 : efree(pattern);
385 4 : if (tables) {
386 0 : pefree((void*)tables, 1);
387 : }
388 4 : return NULL;
389 : }
390 :
391 : /* If study option was specified, study the pattern and
392 : store the result in extra for passing to pcre_exec. */
393 10061 : if (do_study) {
394 28 : extra = pcre_study(re, soptions, &error);
395 28 : if (extra) {
396 4 : extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
397 : }
398 28 : if (error != NULL) {
399 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
400 : }
401 : } else {
402 10033 : extra = NULL;
403 : }
404 :
405 10061 : efree(pattern);
406 :
407 : /*
408 : * If we reached cache limit, clean out the items from the head of the list;
409 : * these are supposedly the oldest ones (but not necessarily the least used
410 : * ones).
411 : */
412 10061 : if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
413 4 : int num_clean = PCRE_CACHE_SIZE / 8;
414 4 : zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
415 : }
416 :
417 : /* Store the compiled pattern and extra info in the cache. */
418 10061 : new_entry.re = re;
419 10061 : new_entry.extra = extra;
420 10061 : new_entry.preg_options = poptions;
421 10061 : new_entry.compile_options = coptions;
422 : #if HAVE_SETLOCALE
423 10061 : new_entry.locale = pestrdup(locale, 1);
424 10061 : new_entry.tables = tables;
425 : #endif
426 10061 : zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
427 : sizeof(pcre_cache_entry), (void**)&pce);
428 :
429 10061 : return pce;
430 : }
431 : /* }}} */
432 :
433 : /* {{{ pcre_get_compiled_regex
434 : */
435 : PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
436 47 : {
437 47 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
438 :
439 47 : if (extra) {
440 47 : *extra = pce ? pce->extra : NULL;
441 : }
442 47 : if (preg_options) {
443 47 : *preg_options = pce ? pce->preg_options : 0;
444 : }
445 :
446 47 : return pce ? pce->re : NULL;
447 : }
448 : /* }}} */
449 :
450 : /* {{{ pcre_get_compiled_regex_ex
451 : */
452 : PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
453 0 : {
454 0 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
455 :
456 0 : if (extra) {
457 0 : *extra = pce ? pce->extra : NULL;
458 : }
459 0 : if (preg_options) {
460 0 : *preg_options = pce ? pce->preg_options : 0;
461 : }
462 0 : if (compile_options) {
463 0 : *compile_options = pce ? pce->compile_options : 0;
464 : }
465 :
466 0 : return pce ? pce->re : NULL;
467 : }
468 : /* }}} */
469 :
470 : /* {{{ add_offset_pair */
471 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
472 64 : {
473 : zval *match_pair;
474 :
475 64 : ALLOC_ZVAL(match_pair);
476 64 : array_init(match_pair);
477 64 : INIT_PZVAL(match_pair);
478 :
479 : /* Add (match, offset) to the return value */
480 64 : add_next_index_stringl(match_pair, str, len, 1);
481 64 : add_next_index_long(match_pair, offset);
482 :
483 64 : if (name) {
484 2 : zval_add_ref(&match_pair);
485 2 : zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
486 : }
487 64 : zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
488 64 : }
489 : /* }}} */
490 :
491 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
492 1266049 : {
493 : /* parameters */
494 : char *regex; /* Regular expression */
495 : char *subject; /* String to match against */
496 : int regex_len;
497 : int subject_len;
498 : pcre_cache_entry *pce; /* Compiled regular expression */
499 1266049 : zval *subpats = NULL; /* Array for subpatterns */
500 1266049 : long flags = 0; /* Match control flags */
501 1266049 : long start_offset = 0; /* Where the new search starts */
502 :
503 1266049 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len,
504 : &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
505 16 : RETURN_FALSE;
506 : }
507 :
508 : /* Compile regex or get it from cache. */
509 1266033 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
510 16 : RETURN_FALSE;
511 : }
512 :
513 1266017 : php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
514 : global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
515 : }
516 : /* }}} */
517 :
518 : /* {{{ php_pcre_match_impl() */
519 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
520 : zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
521 1266085 : {
522 : zval *result_set, /* Holds a set of subpatterns after
523 : a global match */
524 1266085 : **match_sets = NULL; /* An array of sets of matches for each
525 : subpattern after a global match */
526 1266085 : pcre_extra *extra = pce->extra;/* Holds results of studying */
527 : pcre_extra extra_data; /* Used locally for exec options */
528 1266085 : int exoptions = 0; /* Execution options */
529 1266085 : int count = 0; /* Count of matched subpatterns */
530 : int *offsets; /* Array of subpattern offsets */
531 : int num_subpats; /* Number of captured subpatterns */
532 : int size_offsets; /* Size of the offsets array */
533 : int matched; /* Has anything matched */
534 1266085 : int g_notempty = 0; /* If the match should not be empty */
535 : const char **stringlist; /* Holds list of subpatterns */
536 : char *match; /* The current match */
537 : char **subpat_names; /* Array for named subpatterns */
538 : int i, rc;
539 : int subpats_order; /* Order of subpattern matches */
540 : int offset_capture; /* Capture match offsets: yes/no */
541 :
542 : /* Overwrite the passed-in value for subpatterns with an empty array. */
543 1266085 : if (subpats != NULL) {
544 872180 : zval_dtor(subpats);
545 872180 : array_init(subpats);
546 : }
547 :
548 1266085 : subpats_order = global ? PREG_PATTERN_ORDER : 0;
549 :
550 1266085 : if (use_flags) {
551 24 : offset_capture = flags & PREG_OFFSET_CAPTURE;
552 :
553 : /*
554 : * subpats_order is pre-set to pattern mode so we change it only if
555 : * necessary.
556 : */
557 24 : if (flags & 0xff) {
558 14 : subpats_order = flags & 0xff;
559 : }
560 24 : if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
561 : (!global && subpats_order != 0)) {
562 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
563 1 : return;
564 : }
565 : } else {
566 1266061 : offset_capture = 0;
567 : }
568 :
569 : /* Negative offset counts from the end of the string. */
570 1266084 : if (start_offset < 0) {
571 4 : start_offset = subject_len + start_offset;
572 4 : if (start_offset < 0) {
573 1 : start_offset = 0;
574 : }
575 : }
576 :
577 1266084 : if (extra == NULL) {
578 1266082 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
579 1266082 : extra = &extra_data;
580 : }
581 1266084 : extra->match_limit = PCRE_G(backtrack_limit);
582 1266084 : extra->match_limit_recursion = PCRE_G(recursion_limit);
583 :
584 : /* Calculate the size of the offsets array, and allocate memory for it. */
585 1266084 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
586 1266084 : if (rc < 0) {
587 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
588 0 : RETURN_FALSE;
589 : }
590 1266084 : num_subpats++;
591 1266084 : size_offsets = num_subpats * 3;
592 :
593 : /*
594 : * Build a mapping from subpattern numbers to their names. We will always
595 : * allocate the table, even though there may be no named subpatterns. This
596 : * avoids somewhat more complicated logic in the inner loops.
597 : */
598 1266084 : subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
599 1266084 : if (!subpat_names) {
600 1 : RETURN_FALSE;
601 : }
602 :
603 1266083 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
604 :
605 : /* Allocate match sets array and initialize the values. */
606 1266083 : if (global && subpats_order == PREG_PATTERN_ORDER) {
607 110 : match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
608 352 : for (i=0; i<num_subpats; i++) {
609 242 : ALLOC_ZVAL(match_sets[i]);
610 242 : array_init(match_sets[i]);
611 242 : INIT_PZVAL(match_sets[i]);
612 : }
613 : }
614 :
615 1266083 : match = NULL;
616 1266083 : matched = 0;
617 1266083 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
618 :
619 : do {
620 : /* Execute the regular expression. */
621 1266248 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
622 : exoptions|g_notempty, offsets, size_offsets);
623 :
624 : /* the string was already proved to be valid UTF-8 */
625 1266248 : exoptions |= PCRE_NO_UTF8_CHECK;
626 :
627 : /* Check for too many substrings condition. */
628 1266248 : if (count == 0) {
629 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
630 0 : count = size_offsets/3;
631 : }
632 :
633 : /* If something has matched */
634 1266248 : if (count > 0) {
635 50167 : matched++;
636 50167 : match = subject + offsets[0];
637 :
638 : /* If subpatterns array has been passed, fill it in with values. */
639 50167 : if (subpats != NULL) {
640 : /* Try to get the list of substrings and display a warning if failed. */
641 25732 : if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
642 0 : efree(subpat_names);
643 0 : efree(offsets);
644 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
645 0 : RETURN_FALSE;
646 : }
647 :
648 25732 : if (global) { /* global pattern matching */
649 163 : if (subpats_order == PREG_PATTERN_ORDER) {
650 : /* For each subpattern, insert it into the appropriate array. */
651 316 : for (i = 0; i < count; i++) {
652 191 : if (offset_capture) {
653 7 : add_offset_pair(match_sets[i], (char *)stringlist[i],
654 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
655 : } else {
656 184 : add_next_index_stringl(match_sets[i], (char *)stringlist[i],
657 : offsets[(i<<1)+1] - offsets[i<<1], 1);
658 : }
659 : }
660 : /*
661 : * If the number of captured subpatterns on this run is
662 : * less than the total possible number, pad the result
663 : * arrays with empty strings.
664 : */
665 125 : if (count < num_subpats) {
666 2 : for (; i < num_subpats; i++) {
667 1 : add_next_index_string(match_sets[i], "", 1);
668 : }
669 : }
670 : } else {
671 : /* Allocate the result set array */
672 38 : ALLOC_ZVAL(result_set);
673 38 : array_init(result_set);
674 38 : INIT_PZVAL(result_set);
675 :
676 : /* Add all the subpatterns to it */
677 343 : for (i = 0; i < count; i++) {
678 305 : if (offset_capture) {
679 7 : add_offset_pair(result_set, (char *)stringlist[i],
680 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
681 : } else {
682 298 : if (subpat_names[i]) {
683 8 : add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
684 : offsets[(i<<1)+1] - offsets[i<<1], 1);
685 : }
686 298 : add_next_index_stringl(result_set, (char *)stringlist[i],
687 : offsets[(i<<1)+1] - offsets[i<<1], 1);
688 : }
689 : }
690 : /* And add it to the output array */
691 38 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
692 : }
693 : } else { /* single pattern matching */
694 : /* For each subpattern, insert it into the subpatterns array. */
695 76869 : for (i = 0; i < count; i++) {
696 51300 : if (offset_capture) {
697 9 : add_offset_pair(subpats, (char *)stringlist[i],
698 : offsets[(i<<1)+1] - offsets[i<<1],
699 : offsets[i<<1], subpat_names[i]);
700 : } else {
701 51291 : if (subpat_names[i]) {
702 13 : add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
703 : offsets[(i<<1)+1] - offsets[i<<1], 1);
704 : }
705 51291 : add_next_index_stringl(subpats, (char *)stringlist[i],
706 : offsets[(i<<1)+1] - offsets[i<<1], 1);
707 : }
708 : }
709 : }
710 :
711 25732 : pcre_free((void *) stringlist);
712 : }
713 1216081 : } else if (count == PCRE_ERROR_NOMATCH) {
714 : /* If we previously set PCRE_NOTEMPTY after a null match,
715 : this is not necessarily the end. We need to advance
716 : the start offset, and continue. Fudge the offset values
717 : to achieve this, unless we're already at the end of the string. */
718 1216079 : if (g_notempty != 0 && start_offset < subject_len) {
719 2 : offsets[0] = start_offset;
720 2 : offsets[1] = start_offset + 1;
721 : } else
722 : break;
723 : } else {
724 2 : pcre_handle_exec_error(count TSRMLS_CC);
725 2 : break;
726 : }
727 :
728 : /* If we have matched an empty string, mimic what Perl's /g options does.
729 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
730 : the match again at the same point. If this fails (picked up above) we
731 : advance to the next character. */
732 50169 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
733 :
734 : /* Advance to the position right after the last full match */
735 50169 : start_offset = offsets[1];
736 50169 : } while (global);
737 :
738 : /* Add the match sets to the output array and clean up */
739 1266083 : if (global && subpats_order == PREG_PATTERN_ORDER) {
740 352 : for (i = 0; i < num_subpats; i++) {
741 242 : if (subpat_names[i]) {
742 5 : zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
743 : strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
744 5 : ZVAL_ADDREF(match_sets[i]);
745 : }
746 242 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
747 : }
748 110 : efree(match_sets);
749 : }
750 :
751 1266083 : efree(offsets);
752 1266083 : efree(subpat_names);
753 :
754 1266083 : RETVAL_LONG(matched);
755 : }
756 : /* }}} */
757 :
758 : /* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset]]])
759 : Perform a Perl-style regular expression match */
760 : PHP_FUNCTION(preg_match)
761 1265953 : {
762 1265953 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
763 1265953 : }
764 : /* }}} */
765 :
766 : /* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]])
767 : Perform a Perl-style global regular expression match */
768 : PHP_FUNCTION(preg_match_all)
769 96 : {
770 96 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
771 96 : }
772 : /* }}} */
773 :
774 : /* {{{ preg_get_backref
775 : */
776 : static int preg_get_backref(char **str, int *backref)
777 103 : {
778 103 : register char in_brace = 0;
779 103 : register char *walk = *str;
780 :
781 103 : if (walk[1] == 0)
782 9 : return 0;
783 :
784 94 : if (*walk == '$' && walk[1] == '{') {
785 14 : in_brace = 1;
786 14 : walk++;
787 : }
788 94 : walk++;
789 :
790 94 : if (*walk >= '0' && *walk <= '9') {
791 70 : *backref = *walk - '0';
792 70 : walk++;
793 : } else
794 24 : return 0;
795 :
796 104 : if (*walk && *walk >= '0' && *walk <= '9') {
797 2 : *backref = *backref * 10 + *walk - '0';
798 2 : walk++;
799 : }
800 :
801 70 : if (in_brace) {
802 14 : if (*walk == 0 || *walk != '}')
803 6 : return 0;
804 : else
805 8 : walk++;
806 : }
807 :
808 64 : *str = walk;
809 64 : return 1;
810 : }
811 : /* }}} */
812 :
813 : /* {{{ preg_do_repl_func
814 : */
815 : static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
816 22 : {
817 : zval *retval_ptr; /* Function return value */
818 : zval **args[1]; /* Argument to pass to function */
819 : zval *subpats; /* Captured subpatterns */
820 : int result_len; /* Return value length */
821 : int i;
822 :
823 22 : MAKE_STD_ZVAL(subpats);
824 22 : array_init(subpats);
825 50 : for (i = 0; i < count; i++) {
826 28 : if (subpat_names[i]) {
827 1 : add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
828 : }
829 28 : add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
830 : }
831 22 : args[0] = &subpats;
832 :
833 43 : if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
834 21 : convert_to_string_ex(&retval_ptr);
835 21 : *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
836 21 : result_len = Z_STRLEN_P(retval_ptr);
837 21 : zval_ptr_dtor(&retval_ptr);
838 : } else {
839 1 : if (!EG(exception)) {
840 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
841 : }
842 1 : result_len = offsets[1] - offsets[0];
843 1 : *result = estrndup(&subject[offsets[0]], result_len);
844 : }
845 :
846 22 : zval_ptr_dtor(&subpats);
847 :
848 22 : return result_len;
849 : }
850 : /* }}} */
851 :
852 : /* {{{ preg_do_eval
853 : */
854 : static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
855 : int *offsets, int count, char **result TSRMLS_DC)
856 5 : {
857 : zval retval; /* Return value from evaluation */
858 : char *eval_str_end, /* End of eval string */
859 : *match, /* Current match for a backref */
860 : *esc_match, /* Quote-escaped match */
861 : *walk, /* Used to walk the code string */
862 : *segment, /* Start of segment to append while walking */
863 : walk_last; /* Last walked character */
864 : int match_len; /* Length of the match */
865 : int esc_match_len; /* Length of the quote-escaped match */
866 : int result_len; /* Length of the result of the evaluation */
867 : int backref; /* Current backref */
868 : char *compiled_string_description;
869 5 : smart_str code = {0};
870 :
871 5 : eval_str_end = eval_str + eval_str_len;
872 5 : walk = segment = eval_str;
873 5 : walk_last = 0;
874 :
875 195 : while (walk < eval_str_end) {
876 : /* If found a backreference.. */
877 185 : if ('\\' == *walk || '$' == *walk) {
878 33 : smart_str_appendl(&code, segment, walk - segment);
879 33 : if (walk_last == '\\') {
880 0 : code.c[code.len-1] = *walk++;
881 0 : segment = walk;
882 0 : walk_last = 0;
883 0 : continue;
884 : }
885 33 : segment = walk;
886 33 : if (preg_get_backref(&walk, &backref)) {
887 8 : if (backref < count) {
888 : /* Find the corresponding string match and substitute it
889 : in instead of the backref */
890 8 : match = subject + offsets[backref<<1];
891 8 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
892 8 : if (match_len) {
893 8 : esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
894 : } else {
895 0 : esc_match = match;
896 0 : esc_match_len = 0;
897 : }
898 : } else {
899 0 : esc_match = "";
900 0 : esc_match_len = 0;
901 0 : match_len = 0;
902 : }
903 8 : smart_str_appendl(&code, esc_match, esc_match_len);
904 :
905 8 : segment = walk;
906 :
907 : /* Clean up and reassign */
908 8 : if (esc_match_len)
909 8 : efree(esc_match);
910 8 : continue;
911 : }
912 : }
913 177 : walk++;
914 177 : walk_last = walk[-1];
915 : }
916 5 : smart_str_appendl(&code, segment, walk - segment);
917 5 : smart_str_0(&code);
918 :
919 5 : compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
920 : /* Run the code */
921 5 : if (zend_eval_string(code.c, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
922 1 : efree(compiled_string_description);
923 1 : php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
924 : /* zend_error() does not return in this case */
925 : }
926 4 : efree(compiled_string_description);
927 4 : convert_to_string(&retval);
928 :
929 : /* Save the return value and its length */
930 4 : *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
931 4 : result_len = Z_STRLEN(retval);
932 :
933 : /* Clean up */
934 4 : zval_dtor(&retval);
935 4 : smart_str_free(&code);
936 :
937 4 : return result_len;
938 : }
939 : /* }}} */
940 :
941 : /* {{{ php_pcre_replace
942 : */
943 : PHPAPI char *php_pcre_replace(char *regex, int regex_len,
944 : char *subject, int subject_len,
945 : zval *replace_val, int is_callable_replace,
946 : int *result_len, int limit, int *replace_count TSRMLS_DC)
947 17055 : {
948 : pcre_cache_entry *pce; /* Compiled regular expression */
949 :
950 : /* Compile regex or get it from cache. */
951 17055 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
952 9 : return NULL;
953 : }
954 :
955 17046 : return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
956 : is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
957 : }
958 : /* }}} */
959 :
960 : /* {{{ php_pcre_replace_impl() */
961 : PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
962 : int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
963 17046 : {
964 17046 : pcre_extra *extra = pce->extra;/* Holds results of studying */
965 : pcre_extra extra_data; /* Used locally for exec options */
966 17046 : int exoptions = 0; /* Execution options */
967 17046 : int count = 0; /* Count of matched subpatterns */
968 : int *offsets; /* Array of subpattern offsets */
969 : char **subpat_names; /* Array for named subpatterns */
970 : int num_subpats; /* Number of captured subpatterns */
971 : int size_offsets; /* Size of the offsets array */
972 : int new_len; /* Length of needed storage */
973 : int alloc_len; /* Actual allocated length */
974 17046 : int eval_result_len=0; /* Length of the eval'ed or
975 : function-returned string */
976 : int match_len; /* Length of the current match */
977 : int backref; /* Backreference number */
978 : int eval; /* If the replacement string should be eval'ed */
979 : int start_offset; /* Where the new search starts */
980 17046 : int g_notempty=0; /* If the match should not be empty */
981 17046 : int replace_len=0; /* Length of replacement string */
982 : char *result, /* Result of replacement */
983 17046 : *replace=NULL, /* Replacement string */
984 : *new_buf, /* Temporary buffer for re-allocation */
985 : *walkbuf, /* Location of current replacement in the result */
986 : *walk, /* Used to walk the replacement string */
987 : *match, /* The current match */
988 : *piece, /* The current piece of subject */
989 17046 : *replace_end=NULL, /* End of replacement string */
990 : *eval_result, /* Result of eval or custom function */
991 : walk_last; /* Last walked character */
992 : int rc;
993 :
994 17046 : if (extra == NULL) {
995 17044 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
996 17044 : extra = &extra_data;
997 : }
998 17046 : extra->match_limit = PCRE_G(backtrack_limit);
999 17046 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1000 :
1001 17046 : eval = pce->preg_options & PREG_REPLACE_EVAL;
1002 17046 : if (is_callable_replace) {
1003 21 : if (eval) {
1004 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1005 1 : return NULL;
1006 : }
1007 : } else {
1008 17025 : replace = Z_STRVAL_P(replace_val);
1009 17025 : replace_len = Z_STRLEN_P(replace_val);
1010 17025 : replace_end = replace + replace_len;
1011 : }
1012 :
1013 : /* Calculate the size of the offsets array, and allocate memory for it. */
1014 17045 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1015 17045 : if (rc < 0) {
1016 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1017 0 : return NULL;
1018 : }
1019 17045 : num_subpats++;
1020 17045 : size_offsets = num_subpats * 3;
1021 :
1022 : /*
1023 : * Build a mapping from subpattern numbers to their names. We will always
1024 : * allocate the table, even though there may be no named subpatterns. This
1025 : * avoids somewhat more complicated logic in the inner loops.
1026 : */
1027 17045 : subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1028 17045 : if (!subpat_names) {
1029 1 : return NULL;
1030 : }
1031 :
1032 17044 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1033 :
1034 17044 : alloc_len = 2 * subject_len + 1;
1035 17044 : result = safe_emalloc(alloc_len, sizeof(char), 0);
1036 :
1037 : /* Initialize */
1038 17044 : match = NULL;
1039 17044 : *result_len = 0;
1040 17044 : start_offset = 0;
1041 17044 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1042 :
1043 : while (1) {
1044 : /* Execute the regular expression. */
1045 21475 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1046 : exoptions|g_notempty, offsets, size_offsets);
1047 :
1048 : /* the string was already proved to be valid UTF-8 */
1049 21475 : exoptions |= PCRE_NO_UTF8_CHECK;
1050 :
1051 : /* Check for too many substrings condition. */
1052 21475 : if (count == 0) {
1053 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1054 0 : count = size_offsets/3;
1055 : }
1056 :
1057 21475 : piece = subject + start_offset;
1058 :
1059 25906 : if (count > 0 && (limit == -1 || limit > 0)) {
1060 4432 : if (replace_count) {
1061 28 : ++*replace_count;
1062 : }
1063 : /* Set the match location in subject */
1064 4432 : match = subject + offsets[0];
1065 :
1066 4432 : new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1067 :
1068 : /* If evaluating, do it and add the return string's length */
1069 4432 : if (eval) {
1070 5 : eval_result_len = preg_do_eval(replace, replace_len, subject,
1071 : offsets, count, &eval_result TSRMLS_CC);
1072 4 : new_len += eval_result_len;
1073 4427 : } else if (is_callable_replace) {
1074 : /* Use custom function to get replacement string and its length. */
1075 22 : eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1076 22 : new_len += eval_result_len;
1077 : } else { /* do regular substitution */
1078 4405 : walk = replace;
1079 4405 : walk_last = 0;
1080 13333 : while (walk < replace_end) {
1081 4523 : if ('\\' == *walk || '$' == *walk) {
1082 35 : if (walk_last == '\\') {
1083 0 : walk++;
1084 0 : walk_last = 0;
1085 0 : continue;
1086 : }
1087 35 : if (preg_get_backref(&walk, &backref)) {
1088 28 : if (backref < count)
1089 27 : new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1090 28 : continue;
1091 : }
1092 : }
1093 4495 : new_len++;
1094 4495 : walk++;
1095 4495 : walk_last = walk[-1];
1096 : }
1097 : }
1098 :
1099 4431 : if (new_len + 1 > alloc_len) {
1100 5 : alloc_len = 1 + alloc_len + 2 * new_len;
1101 5 : new_buf = emalloc(alloc_len);
1102 5 : memcpy(new_buf, result, *result_len);
1103 5 : efree(result);
1104 5 : result = new_buf;
1105 : }
1106 : /* copy the part of the string before the match */
1107 4431 : memcpy(&result[*result_len], piece, match-piece);
1108 4431 : *result_len += match-piece;
1109 :
1110 : /* copy replacement and backrefs */
1111 4431 : walkbuf = result + *result_len;
1112 :
1113 : /* If evaluating or using custom function, copy result to the buffer
1114 : * and clean up. */
1115 4457 : if (eval || is_callable_replace) {
1116 26 : memcpy(walkbuf, eval_result, eval_result_len);
1117 26 : *result_len += eval_result_len;
1118 26 : STR_FREE(eval_result);
1119 : } else { /* do regular backreference copying */
1120 4405 : walk = replace;
1121 4405 : walk_last = 0;
1122 13333 : while (walk < replace_end) {
1123 4523 : if ('\\' == *walk || '$' == *walk) {
1124 35 : if (walk_last == '\\') {
1125 0 : *(walkbuf-1) = *walk++;
1126 0 : walk_last = 0;
1127 0 : continue;
1128 : }
1129 35 : if (preg_get_backref(&walk, &backref)) {
1130 28 : if (backref < count) {
1131 27 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1132 27 : memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1133 27 : walkbuf += match_len;
1134 : }
1135 28 : continue;
1136 : }
1137 : }
1138 4495 : *walkbuf++ = *walk++;
1139 4495 : walk_last = walk[-1];
1140 : }
1141 4405 : *walkbuf = '\0';
1142 : /* increment the result length by how much we've added to the string */
1143 4405 : *result_len += walkbuf - (result + *result_len);
1144 : }
1145 :
1146 4431 : if (limit != -1)
1147 15 : limit--;
1148 :
1149 17043 : } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1150 : /* If we previously set PCRE_NOTEMPTY after a null match,
1151 : this is not necessarily the end. We need to advance
1152 : the start offset, and continue. Fudge the offset values
1153 : to achieve this, unless we're already at the end of the string. */
1154 17040 : if (g_notempty != 0 && start_offset < subject_len) {
1155 0 : offsets[0] = start_offset;
1156 0 : offsets[1] = start_offset + 1;
1157 0 : memcpy(&result[*result_len], piece, 1);
1158 0 : (*result_len)++;
1159 : } else {
1160 17040 : new_len = *result_len + subject_len - start_offset;
1161 17040 : if (new_len + 1 > alloc_len) {
1162 0 : alloc_len = new_len + 1; /* now we know exactly how long it is */
1163 0 : new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1164 0 : memcpy(new_buf, result, *result_len);
1165 0 : efree(result);
1166 0 : result = new_buf;
1167 : }
1168 : /* stick that last bit of string on our output */
1169 17040 : memcpy(&result[*result_len], piece, subject_len - start_offset);
1170 17040 : *result_len += subject_len - start_offset;
1171 17040 : result[*result_len] = '\0';
1172 17040 : break;
1173 : }
1174 : } else {
1175 3 : pcre_handle_exec_error(count TSRMLS_CC);
1176 3 : efree(result);
1177 3 : result = NULL;
1178 3 : break;
1179 : }
1180 :
1181 : /* If we have matched an empty string, mimic what Perl's /g options does.
1182 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1183 : the match again at the same point. If this fails (picked up above) we
1184 : advance to the next character. */
1185 4431 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1186 :
1187 : /* Advance to the next piece. */
1188 4431 : start_offset = offsets[1];
1189 4431 : }
1190 :
1191 17043 : efree(offsets);
1192 17043 : efree(subpat_names);
1193 :
1194 17043 : return result;
1195 : }
1196 : /* }}} */
1197 :
1198 : /* {{{ php_replace_in_subject
1199 : */
1200 : static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, zend_bool is_callable_replace, int *replace_count TSRMLS_DC)
1201 17028 : {
1202 : zval **regex_entry,
1203 17028 : **replace_entry = NULL,
1204 : *replace_value,
1205 : empty_replace;
1206 : char *subject_value,
1207 : *result;
1208 : int subject_len;
1209 :
1210 : /* Make sure we're dealing with strings. */
1211 17028 : convert_to_string_ex(subject);
1212 : /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1213 17028 : ZVAL_STRINGL(&empty_replace, "", 0, 0);
1214 :
1215 : /* If regex is an array */
1216 17028 : if (Z_TYPE_P(regex) == IS_ARRAY) {
1217 : /* Duplicate subject string for repeated replacement */
1218 14 : subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1219 14 : subject_len = Z_STRLEN_PP(subject);
1220 14 : *result_len = subject_len;
1221 :
1222 14 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1223 :
1224 14 : replace_value = replace;
1225 14 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1226 7 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1227 :
1228 : /* For each entry in the regex array, get the entry */
1229 68 : while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1230 : /* Make sure we're dealing with strings. */
1231 41 : convert_to_string_ex(regex_entry);
1232 :
1233 : /* If replace is an array and not a callable construct */
1234 41 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1235 : /* Get current entry */
1236 26 : if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1237 24 : if (!is_callable_replace) {
1238 24 : convert_to_string_ex(replace_entry);
1239 : }
1240 24 : replace_value = *replace_entry;
1241 24 : zend_hash_move_forward(Z_ARRVAL_P(replace));
1242 : } else {
1243 : /* We've run out of replacement strings, so use an empty one */
1244 2 : replace_value = &empty_replace;
1245 : }
1246 : }
1247 :
1248 : /* Do the actual replacement and put the result back into subject_value
1249 : for further replacements. */
1250 41 : if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1251 : Z_STRLEN_PP(regex_entry),
1252 : subject_value,
1253 : subject_len,
1254 : replace_value,
1255 : is_callable_replace,
1256 : result_len,
1257 : limit,
1258 : replace_count TSRMLS_CC)) != NULL) {
1259 40 : efree(subject_value);
1260 40 : subject_value = result;
1261 40 : subject_len = *result_len;
1262 : } else {
1263 1 : efree(subject_value);
1264 1 : return NULL;
1265 : }
1266 :
1267 40 : zend_hash_move_forward(Z_ARRVAL_P(regex));
1268 : }
1269 :
1270 13 : return subject_value;
1271 : } else {
1272 17014 : result = php_pcre_replace(Z_STRVAL_P(regex),
1273 : Z_STRLEN_P(regex),
1274 : Z_STRVAL_PP(subject),
1275 : Z_STRLEN_PP(subject),
1276 : replace,
1277 : is_callable_replace,
1278 : result_len,
1279 : limit,
1280 : replace_count TSRMLS_CC);
1281 17013 : return result;
1282 : }
1283 : }
1284 : /* }}} */
1285 :
1286 : /* {{{ preg_replace_impl
1287 : */
1288 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_callable_replace)
1289 17042 : {
1290 : zval **regex,
1291 : **replace,
1292 : **subject,
1293 : **limit,
1294 : **subject_entry,
1295 : **zcount;
1296 : char *result;
1297 : int result_len;
1298 17042 : int limit_val = -1;
1299 : char *string_key;
1300 : ulong num_key;
1301 : char *callback_name;
1302 17042 : int replace_count=0;
1303 17042 : int *replace_count_ptr=NULL;
1304 :
1305 : /* Get function parameters and do error-checking. */
1306 17042 : if (ZEND_NUM_ARGS() < 3 || ZEND_NUM_ARGS() > 5 ||
1307 : zend_get_parameters_ex(ZEND_NUM_ARGS(), ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1308 9 : WRONG_PARAM_COUNT;
1309 : }
1310 17033 : if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1311 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1312 3 : RETURN_FALSE;
1313 : }
1314 :
1315 17030 : SEPARATE_ZVAL(replace);
1316 17030 : if (Z_TYPE_PP(replace) != IS_ARRAY)
1317 17022 : convert_to_string_ex(replace);
1318 17029 : if (is_callable_replace) {
1319 27 : if (!zend_is_callable(*replace, 0, &callback_name)) {
1320 6 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1321 6 : efree(callback_name);
1322 6 : *return_value = **subject;
1323 6 : zval_copy_ctor(return_value);
1324 6 : INIT_PZVAL(return_value);
1325 6 : return;
1326 : }
1327 21 : efree(callback_name);
1328 : }
1329 :
1330 17023 : SEPARATE_ZVAL(regex);
1331 17023 : SEPARATE_ZVAL(subject);
1332 :
1333 17023 : if (ZEND_NUM_ARGS() > 3) {
1334 9 : convert_to_long_ex(limit);
1335 9 : limit_val = Z_LVAL_PP(limit);
1336 : }
1337 17023 : if (ZEND_NUM_ARGS() > 4) {
1338 7 : replace_count_ptr =& replace_count;
1339 : }
1340 :
1341 17023 : if (Z_TYPE_PP(regex) != IS_ARRAY)
1342 17013 : convert_to_string_ex(regex);
1343 :
1344 : /* if subject is an array */
1345 17022 : if (Z_TYPE_PP(subject) == IS_ARRAY) {
1346 5 : array_init(return_value);
1347 5 : zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1348 :
1349 : /* For each subject entry, convert it to string, then perform replacement
1350 : and add the result to the return_value array. */
1351 21 : while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1352 11 : SEPARATE_ZVAL(subject_entry);
1353 11 : if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL) {
1354 : /* Add to return array */
1355 11 : switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1356 : {
1357 : case HASH_KEY_IS_STRING:
1358 1 : add_assoc_stringl(return_value, string_key, result, result_len, 0);
1359 1 : break;
1360 :
1361 : case HASH_KEY_IS_LONG:
1362 10 : add_index_stringl(return_value, num_key, result, result_len, 0);
1363 : break;
1364 : }
1365 : }
1366 :
1367 11 : zend_hash_move_forward(Z_ARRVAL_PP(subject));
1368 : }
1369 : } else { /* if subject is not an array */
1370 17017 : if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL) {
1371 17002 : RETVAL_STRINGL(result, result_len, 0);
1372 : }
1373 : }
1374 17021 : if (replace_count_ptr) {
1375 7 : zval_dtor(*zcount);
1376 7 : ZVAL_LONG(*zcount, replace_count);
1377 : }
1378 :
1379 : }
1380 : /* }}} */
1381 :
1382 : /* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, count]])
1383 : Perform Perl-style regular expression replacement. */
1384 : PHP_FUNCTION(preg_replace)
1385 17009 : {
1386 17009 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1387 17006 : }
1388 : /* }}} */
1389 :
1390 : /* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, count]])
1391 : Perform Perl-style regular expression replacement using replacement callback. */
1392 : PHP_FUNCTION(preg_replace_callback)
1393 33 : {
1394 33 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1395 33 : }
1396 : /* }}} */
1397 :
1398 : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1399 : Split string into an array using a perl-style regular expression as a delimiter */
1400 : PHP_FUNCTION(preg_split)
1401 1161 : {
1402 : char *regex; /* Regular expression */
1403 : char *subject; /* String to match against */
1404 : int regex_len;
1405 : int subject_len;
1406 1161 : long limit_val = -1;/* Integer value of limit */
1407 1161 : long flags = 0; /* Match control flags */
1408 : pcre_cache_entry *pce; /* Compiled regular expression */
1409 :
1410 : /* Get function parameters and do error checking */
1411 1161 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1412 : &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1413 8 : RETURN_FALSE;
1414 : }
1415 :
1416 : /* Compile regex or get it from cache. */
1417 1153 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1418 5 : RETURN_FALSE;
1419 : }
1420 :
1421 1148 : php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1422 : }
1423 : /* }}} */
1424 :
1425 : /* {{{ php_pcre_split
1426 : */
1427 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1428 : long limit_val, long flags TSRMLS_DC)
1429 1164 : {
1430 1164 : pcre_extra *extra = NULL; /* Holds results of studying */
1431 1164 : pcre *re_bump = NULL; /* Regex instance for empty matches */
1432 1164 : pcre_extra *extra_bump = NULL; /* Almost dummy */
1433 : pcre_extra extra_data; /* Used locally for exec options */
1434 : int *offsets; /* Array of subpattern offsets */
1435 : int size_offsets; /* Size of the offsets array */
1436 1164 : int exoptions = 0; /* Execution options */
1437 1164 : int count = 0; /* Count of matched subpatterns */
1438 : int start_offset; /* Where the new search starts */
1439 : int next_offset; /* End of the last delimiter match + 1 */
1440 1164 : int g_notempty = 0; /* If the match should not be empty */
1441 : char *match, /* The current match */
1442 : *last_match; /* Location of last match */
1443 : int rc;
1444 : int no_empty; /* If NO_EMPTY flag is set */
1445 : int delim_capture; /* If delimiters should be captured */
1446 : int offset_capture; /* If offsets should be captured */
1447 :
1448 1164 : no_empty = flags & PREG_SPLIT_NO_EMPTY;
1449 1164 : delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1450 1164 : offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1451 :
1452 1164 : if (limit_val == 0) {
1453 1 : limit_val = -1;
1454 : }
1455 :
1456 1164 : if (extra == NULL) {
1457 1164 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1458 1164 : extra = &extra_data;
1459 : }
1460 1164 : extra->match_limit = PCRE_G(backtrack_limit);
1461 1164 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1462 :
1463 : /* Initialize return value */
1464 1164 : array_init(return_value);
1465 :
1466 : /* Calculate the size of the offsets array, and allocate memory for it. */
1467 1164 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1468 1164 : if (rc < 0) {
1469 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1470 0 : RETURN_FALSE;
1471 : }
1472 1164 : size_offsets = (size_offsets + 1) * 3;
1473 1164 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1474 :
1475 : /* Start at the beginning of the string */
1476 1164 : start_offset = 0;
1477 1164 : next_offset = 0;
1478 1164 : last_match = subject;
1479 1164 : match = NULL;
1480 1164 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1481 :
1482 : /* Get next piece if no limit or limit not yet reached and something matched*/
1483 4193 : while ((limit_val == -1 || limit_val > 1)) {
1484 3027 : count = pcre_exec(pce->re, extra, subject,
1485 : subject_len, start_offset,
1486 : exoptions|g_notempty, offsets, size_offsets);
1487 :
1488 : /* the string was already proved to be valid UTF-8 */
1489 3027 : exoptions |= PCRE_NO_UTF8_CHECK;
1490 :
1491 : /* Check for too many substrings condition. */
1492 3027 : if (count == 0) {
1493 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1494 0 : count = size_offsets/3;
1495 : }
1496 :
1497 : /* If something matched */
1498 3027 : if (count > 0) {
1499 1807 : match = subject + offsets[0];
1500 :
1501 1807 : if (!no_empty || &subject[offsets[0]] != last_match) {
1502 :
1503 1748 : if (offset_capture) {
1504 : /* Add (match, offset) pair to the return value */
1505 26 : add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1506 : } else {
1507 : /* Add the piece to the return value */
1508 1722 : add_next_index_stringl(return_value, last_match,
1509 : &subject[offsets[0]]-last_match, 1);
1510 : }
1511 :
1512 : /* One less left to do */
1513 1748 : if (limit_val != -1)
1514 1 : limit_val--;
1515 : }
1516 :
1517 1807 : last_match = &subject[offsets[1]];
1518 1807 : next_offset = offsets[1];
1519 :
1520 1807 : if (delim_capture) {
1521 : int i, match_len;
1522 62 : for (i = 1; i < count; i++) {
1523 31 : match_len = offsets[(i<<1)+1] - offsets[i<<1];
1524 : /* If we have matched a delimiter */
1525 31 : if (!no_empty || match_len > 0) {
1526 21 : if (offset_capture) {
1527 10 : add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1528 : } else {
1529 11 : add_next_index_stringl(return_value,
1530 : &subject[offsets[i<<1]],
1531 : match_len, 1);
1532 : }
1533 : }
1534 : }
1535 : }
1536 1220 : } else if (count == PCRE_ERROR_NOMATCH) {
1537 : /* If we previously set PCRE_NOTEMPTY after a null match,
1538 : this is not necessarily the end. We need to advance
1539 : the start offset, and continue. Fudge the offset values
1540 : to achieve this, unless we're already at the end of the string. */
1541 1219 : if (g_notempty != 0 && start_offset < subject_len) {
1542 58 : if (pce->compile_options & PCRE_UTF8) {
1543 12 : if (re_bump == NULL) {
1544 : int dummy;
1545 :
1546 2 : if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1547 0 : RETURN_FALSE;
1548 : }
1549 : }
1550 12 : count = pcre_exec(re_bump, extra_bump, subject,
1551 : subject_len, start_offset,
1552 : exoptions, offsets, size_offsets);
1553 12 : if (count < 1) {
1554 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error");
1555 0 : offsets[0] = start_offset;
1556 0 : offsets[1] = start_offset + 1;
1557 : }
1558 : } else {
1559 46 : offsets[0] = start_offset;
1560 46 : offsets[1] = start_offset + 1;
1561 : }
1562 : } else
1563 : break;
1564 : } else {
1565 1 : pcre_handle_exec_error(count TSRMLS_CC);
1566 1 : break;
1567 : }
1568 :
1569 : /* If we have matched an empty string, mimic what Perl's /g options does.
1570 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1571 : the match again at the same point. If this fails (picked up above) we
1572 : advance to the next character. */
1573 1865 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1574 :
1575 : /* Advance to the position right after the last full match */
1576 1865 : start_offset = offsets[1];
1577 : }
1578 :
1579 :
1580 1164 : start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1581 :
1582 1164 : if (!no_empty || start_offset < subject_len)
1583 : {
1584 1156 : if (offset_capture) {
1585 : /* Add the last (match, offset) pair to the return value */
1586 5 : add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1587 : } else {
1588 : /* Add the last piece to the return value */
1589 1151 : add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1590 : }
1591 : }
1592 :
1593 :
1594 : /* Clean up */
1595 1164 : efree(offsets);
1596 : }
1597 : /* }}} */
1598 :
1599 : /* {{{ proto string preg_quote(string str [, string delim_char])
1600 : Quote regular expression characters plus an optional character */
1601 : PHP_FUNCTION(preg_quote)
1602 5535 : {
1603 : int in_str_len;
1604 : char *in_str; /* Input string argument */
1605 : char *in_str_end; /* End of the input string */
1606 : int delim_len;
1607 5535 : char *delim = NULL; /* Additional delimiter argument */
1608 : char *out_str, /* Output string with quoted characters */
1609 : *p, /* Iterator for input string */
1610 : *q, /* Iterator for output string */
1611 5535 : delim_char=0, /* Delimiter character to be quoted */
1612 : c; /* Current character */
1613 5535 : zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1614 :
1615 : /* Get the arguments and check for errors */
1616 5535 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1617 : &delim, &delim_len) == FAILURE) {
1618 5 : return;
1619 : }
1620 :
1621 5530 : in_str_end = in_str + in_str_len;
1622 :
1623 : /* Nothing to do if we got an empty string */
1624 5530 : if (in_str == in_str_end) {
1625 2 : RETURN_EMPTY_STRING();
1626 : }
1627 :
1628 5528 : if (delim && *delim) {
1629 5526 : delim_char = delim[0];
1630 5526 : quote_delim = 1;
1631 : }
1632 :
1633 : /* Allocate enough memory so that even if each character
1634 : is quoted, we won't run out of room */
1635 5528 : out_str = safe_emalloc(4, in_str_len, 1);
1636 :
1637 : /* Go through the string and quote necessary characters */
1638 6535547 : for(p = in_str, q = out_str; p != in_str_end; p++) {
1639 6530019 : c = *p;
1640 6530019 : switch(c) {
1641 : case '.':
1642 : case '\\':
1643 : case '+':
1644 : case '*':
1645 : case '?':
1646 : case '[':
1647 : case '^':
1648 : case ']':
1649 : case '$':
1650 : case '(':
1651 : case ')':
1652 : case '{':
1653 : case '}':
1654 : case '=':
1655 : case '!':
1656 : case '>':
1657 : case '<':
1658 : case '|':
1659 : case ':':
1660 809223 : *q++ = '\\';
1661 809223 : *q++ = c;
1662 809223 : break;
1663 :
1664 : case '\0':
1665 1112 : *q++ = '\\';
1666 1112 : *q++ = '0';
1667 1112 : *q++ = '0';
1668 1112 : *q++ = '0';
1669 1112 : break;
1670 :
1671 : default:
1672 5719684 : if (quote_delim && c == delim_char)
1673 12864 : *q++ = '\\';
1674 5719684 : *q++ = c;
1675 : break;
1676 : }
1677 : }
1678 5528 : *q = '\0';
1679 :
1680 : /* Reallocate string and return it */
1681 5528 : RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1682 : }
1683 : /* }}} */
1684 :
1685 : /* {{{ proto array preg_grep(string regex, array input [, int flags])
1686 : Searches array and returns entries which match regex */
1687 : PHP_FUNCTION(preg_grep)
1688 29 : {
1689 : char *regex; /* Regular expression */
1690 : int regex_len;
1691 : zval *input; /* Input array */
1692 29 : long flags = 0; /* Match control flags */
1693 : pcre_cache_entry *pce; /* Compiled regular expression */
1694 :
1695 : /* Get arguments and do error checking */
1696 29 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1697 : &input, &flags) == FAILURE) {
1698 9 : return;
1699 : }
1700 :
1701 : /* Compile regex or get it from cache. */
1702 20 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1703 5 : RETURN_FALSE;
1704 : }
1705 :
1706 15 : php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1707 : }
1708 : /* }}} */
1709 :
1710 : PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1711 15 : {
1712 : zval **entry; /* An entry in the input array */
1713 15 : pcre_extra *extra = pce->extra;/* Holds results of studying */
1714 : pcre_extra extra_data; /* Used locally for exec options */
1715 : int *offsets; /* Array of subpattern offsets */
1716 : int size_offsets; /* Size of the offsets array */
1717 15 : int count = 0; /* Count of matched subpatterns */
1718 : char *string_key;
1719 : ulong num_key;
1720 : zend_bool invert; /* Whether to return non-matching
1721 : entries */
1722 : int rc;
1723 :
1724 15 : invert = flags & PREG_GREP_INVERT ? 1 : 0;
1725 :
1726 15 : if (extra == NULL) {
1727 15 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1728 15 : extra = &extra_data;
1729 : }
1730 15 : extra->match_limit = PCRE_G(backtrack_limit);
1731 15 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1732 :
1733 : /* Calculate the size of the offsets array, and allocate memory for it. */
1734 15 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1735 15 : if (rc < 0) {
1736 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1737 0 : RETURN_FALSE;
1738 : }
1739 15 : size_offsets = (size_offsets + 1) * 3;
1740 15 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1741 :
1742 : /* Initialize return array */
1743 15 : array_init(return_value);
1744 :
1745 15 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1746 :
1747 : /* Go through the input array */
1748 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1749 104 : while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1750 75 : zval subject = **entry;
1751 :
1752 75 : if (Z_TYPE_PP(entry) != IS_STRING) {
1753 23 : zval_copy_ctor(&subject);
1754 23 : convert_to_string(&subject);
1755 : }
1756 :
1757 : /* Perform the match */
1758 75 : count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1759 : Z_STRLEN(subject), 0,
1760 : 0, offsets, size_offsets);
1761 :
1762 : /* Check for too many substrings condition. */
1763 75 : if (count == 0) {
1764 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1765 0 : count = size_offsets/3;
1766 75 : } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1767 1 : pcre_handle_exec_error(count TSRMLS_CC);
1768 1 : break;
1769 : }
1770 :
1771 : /* If the entry fits our requirements */
1772 74 : if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1773 30 : ZVAL_ADDREF(*entry);
1774 :
1775 : /* Add to return array */
1776 30 : switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1777 : {
1778 : case HASH_KEY_IS_STRING:
1779 2 : zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1780 : strlen(string_key)+1, entry, sizeof(zval *), NULL);
1781 2 : break;
1782 :
1783 : case HASH_KEY_IS_LONG:
1784 28 : zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1785 : sizeof(zval *), NULL);
1786 : break;
1787 : }
1788 : }
1789 :
1790 74 : if (Z_TYPE_PP(entry) != IS_STRING) {
1791 23 : zval_dtor(&subject);
1792 : }
1793 :
1794 74 : zend_hash_move_forward(Z_ARRVAL_P(input));
1795 : }
1796 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1797 : /* Clean up */
1798 15 : efree(offsets);
1799 : }
1800 : /* }}} */
1801 :
1802 : /* {{{ proto int preg_last_error()
1803 : Returns the error code of the last regexp execution. */
1804 : PHP_FUNCTION(preg_last_error)
1805 13 : {
1806 13 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1807 2 : return;
1808 : }
1809 :
1810 11 : RETURN_LONG(PCRE_G(error_code));
1811 : }
1812 : /* }}} */
1813 :
1814 : /* {{{ module definition structures */
1815 :
1816 : zend_function_entry pcre_functions[] = {
1817 : PHP_FE(preg_match, third_arg_force_ref)
1818 : PHP_FE(preg_match_all, third_arg_force_ref)
1819 : PHP_FE(preg_replace, fifth_arg_force_ref)
1820 : PHP_FE(preg_replace_callback, fifth_arg_force_ref)
1821 : PHP_FE(preg_split, NULL)
1822 : PHP_FE(preg_quote, NULL)
1823 : PHP_FE(preg_grep, NULL)
1824 : PHP_FE(preg_last_error, NULL)
1825 : {NULL, NULL, NULL}
1826 : };
1827 :
1828 : zend_module_entry pcre_module_entry = {
1829 : STANDARD_MODULE_HEADER,
1830 : "pcre",
1831 : pcre_functions,
1832 : PHP_MINIT(pcre),
1833 : PHP_MSHUTDOWN(pcre),
1834 : NULL,
1835 : NULL,
1836 : PHP_MINFO(pcre),
1837 : NO_VERSION_YET,
1838 : PHP_MODULE_GLOBALS(pcre),
1839 : PHP_GINIT(pcre),
1840 : PHP_GSHUTDOWN(pcre),
1841 : NULL,
1842 : STANDARD_MODULE_PROPERTIES_EX
1843 : };
1844 :
1845 : #ifdef COMPILE_DL_PCRE
1846 : ZEND_GET_MODULE(pcre)
1847 : # ifdef PHP_WIN32
1848 : # include "zend_arg_defs.c"
1849 : # endif
1850 : #endif
1851 :
1852 : /* }}} */
1853 :
1854 : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1855 :
1856 : /*
1857 : * Local variables:
1858 : * tab-width: 4
1859 : * c-basic-offset: 4
1860 : * End:
1861 : * vim600: sw=4 ts=4 fdm=marker
1862 : * vim<600: sw=4 ts=4
1863 : */
|