1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Andrei Zmievski <andrei@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: php_pcre.c 288111 2009-09-06 17:41:34Z felipe $ */
20 :
21 : #include "php.h"
22 : #include "php_ini.h"
23 : #include "php_globals.h"
24 : #include "php_pcre.h"
25 : #include "ext/standard/info.h"
26 : #include "ext/standard/php_smart_str.h"
27 :
28 : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29 :
30 : #include "ext/standard/php_string.h"
31 :
32 : #define PREG_PATTERN_ORDER 1
33 : #define PREG_SET_ORDER 2
34 : #define PREG_OFFSET_CAPTURE (1<<8)
35 :
36 : #define PREG_SPLIT_NO_EMPTY (1<<0)
37 : #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38 : #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39 :
40 : #define PREG_REPLACE_EVAL (1<<0)
41 :
42 : #define PREG_GREP_INVERT (1<<0)
43 :
44 : #define PCRE_CACHE_SIZE 4096
45 :
46 : enum {
47 : PHP_PCRE_NO_ERROR = 0,
48 : PHP_PCRE_INTERNAL_ERROR,
49 : PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50 : PHP_PCRE_RECURSION_LIMIT_ERROR,
51 : PHP_PCRE_BAD_UTF8_ERROR,
52 : PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53 : };
54 :
55 :
56 : ZEND_DECLARE_MODULE_GLOBALS(pcre)
57 :
58 :
59 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60 9 : {
61 9 : int preg_code = 0;
62 :
63 9 : switch (pcre_code) {
64 : case PCRE_ERROR_MATCHLIMIT:
65 3 : preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66 3 : break;
67 :
68 : case PCRE_ERROR_RECURSIONLIMIT:
69 3 : preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70 3 : break;
71 :
72 : case PCRE_ERROR_BADUTF8:
73 2 : preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74 2 : break;
75 :
76 : case PCRE_ERROR_BADUTF8_OFFSET:
77 1 : preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78 1 : break;
79 :
80 : default:
81 0 : preg_code = PHP_PCRE_INTERNAL_ERROR;
82 : break;
83 : }
84 :
85 9 : PCRE_G(error_code) = preg_code;
86 9 : }
87 : /* }}} */
88 :
89 : static void php_free_pcre_cache(void *data) /* {{{ */
90 22054 : {
91 22054 : pcre_cache_entry *pce = (pcre_cache_entry *) data;
92 22054 : if (!pce) return;
93 22054 : pefree(pce->re, 1);
94 22054 : if (pce->extra) pefree(pce->extra, 1);
95 : #if HAVE_SETLOCALE
96 22054 : if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97 22054 : pefree(pce->locale, 1);
98 : #endif
99 : }
100 : /* }}} */
101 :
102 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103 17633 : {
104 17633 : zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105 17633 : pcre_globals->backtrack_limit = 0;
106 17633 : pcre_globals->recursion_limit = 0;
107 17633 : pcre_globals->error_code = PHP_PCRE_NO_ERROR;
108 17633 : }
109 : /* }}} */
110 :
111 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112 17665 : {
113 17665 : zend_hash_destroy(&pcre_globals->pcre_cache);
114 17665 : }
115 : /* }}} */
116 :
117 : PHP_INI_BEGIN()
118 : STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119 : STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
120 : PHP_INI_END()
121 :
122 :
123 : /* {{{ PHP_MINFO_FUNCTION(pcre) */
124 : static PHP_MINFO_FUNCTION(pcre)
125 42 : {
126 42 : php_info_print_table_start();
127 42 : php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128 42 : php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129 42 : php_info_print_table_end();
130 :
131 42 : DISPLAY_INI_ENTRIES();
132 42 : }
133 : /* }}} */
134 :
135 : /* {{{ PHP_MINIT_FUNCTION(pcre) */
136 : static PHP_MINIT_FUNCTION(pcre)
137 17633 : {
138 17633 : REGISTER_INI_ENTRIES();
139 :
140 17633 : REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141 17633 : REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142 17633 : REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143 17633 : REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144 17633 : REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145 17633 : REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146 17633 : REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147 :
148 17633 : REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149 17633 : REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150 17633 : REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151 17633 : REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152 17633 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153 17633 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154 17633 : REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155 :
156 17633 : return SUCCESS;
157 : }
158 : /* }}} */
159 :
160 : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
161 : static PHP_MSHUTDOWN_FUNCTION(pcre)
162 17665 : {
163 17665 : UNREGISTER_INI_ENTRIES();
164 :
165 17665 : return SUCCESS;
166 : }
167 : /* }}} */
168 :
169 : /* {{{ static pcre_clean_cache */
170 : static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171 28672 : {
172 28672 : int *num_clean = (int *)arg;
173 :
174 28672 : if (*num_clean > 0) {
175 3584 : (*num_clean)--;
176 3584 : return 1;
177 : } else {
178 25088 : return 0;
179 : }
180 : }
181 : /* }}} */
182 :
183 : /* {{{ static make_subpats_table */
184 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185 1530098 : {
186 1530098 : pcre_extra *extra = pce->extra;
187 1530098 : int name_cnt = 0, name_size, ni = 0;
188 : int rc;
189 : char *name_table;
190 : unsigned short name_idx;
191 1530098 : char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192 :
193 1530098 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194 1530098 : if (rc < 0) {
195 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196 0 : efree(subpat_names);
197 0 : return NULL;
198 : }
199 1530098 : if (name_cnt > 0) {
200 : int rc1, rc2;
201 :
202 12 : rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203 12 : rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204 12 : rc = rc2 ? rc2 : rc1;
205 12 : if (rc < 0) {
206 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207 0 : efree(subpat_names);
208 0 : return NULL;
209 : }
210 :
211 1076 : while (ni++ < name_cnt) {
212 1054 : name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213 1054 : subpat_names[name_idx] = name_table + 2;
214 1054 : if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215 2 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216 2 : efree(subpat_names);
217 2 : return NULL;
218 : }
219 1052 : name_table += name_size;
220 : }
221 : }
222 :
223 1530096 : return subpat_names;
224 : }
225 : /* }}} */
226 :
227 : /* {{{ pcre_get_compiled_regex_cache
228 : */
229 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230 1659991 : {
231 1659991 : pcre *re = NULL;
232 : pcre_extra *extra;
233 1659991 : int coptions = 0;
234 1659991 : int soptions = 0;
235 : const char *error;
236 : int erroffset;
237 : char delimiter;
238 : char start_delimiter;
239 : char end_delimiter;
240 : char *p, *pp;
241 : char *pattern;
242 1659991 : int do_study = 0;
243 1659991 : int poptions = 0;
244 1659991 : unsigned const char *tables = NULL;
245 : #if HAVE_SETLOCALE
246 1659991 : char *locale = setlocale(LC_CTYPE, NULL);
247 : #endif
248 : pcre_cache_entry *pce;
249 : pcre_cache_entry new_entry;
250 :
251 : /* Try to lookup the cached regex entry, and if successful, just pass
252 : back the compiled pattern, otherwise go on and compile it. */
253 1659991 : if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
254 : /*
255 : * We use a quick pcre_info() check to see whether cache is corrupted, and if it
256 : * is, we flush it and compile the pattern from scratch.
257 : */
258 1637904 : if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
259 0 : zend_hash_clean(&PCRE_G(pcre_cache));
260 : } else {
261 : #if HAVE_SETLOCALE
262 1637904 : if (!strcmp(pce->locale, locale)) {
263 : #endif
264 1637902 : return pce;
265 : #if HAVE_SETLOCALE
266 : }
267 : #endif
268 : }
269 : }
270 :
271 22089 : p = regex;
272 :
273 : /* Parse through the leading whitespace, and display a warning if we
274 : get to the end without encountering a delimiter. */
275 22089 : while (isspace((int)*(unsigned char *)p)) p++;
276 22089 : if (*p == 0) {
277 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
278 3 : return NULL;
279 : }
280 :
281 : /* Get the delimiter and display a warning if it is alphanumeric
282 : or a backslash. */
283 22086 : delimiter = *p++;
284 22086 : if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
285 7 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
286 7 : return NULL;
287 : }
288 :
289 22079 : start_delimiter = delimiter;
290 22079 : if ((pp = strchr("([{< )]}> )]}>", delimiter)))
291 25 : delimiter = pp[5];
292 22079 : end_delimiter = delimiter;
293 :
294 22079 : if (start_delimiter == end_delimiter) {
295 : /* We need to iterate through the pattern, searching for the ending delimiter,
296 : but skipping the backslashed delimiters. If the ending delimiter is not
297 : found, display a warning. */
298 22054 : pp = p;
299 16192110 : while (*pp != 0) {
300 16170049 : if (*pp == '\\' && pp[1] != 0) pp++;
301 15000329 : else if (*pp == delimiter)
302 22047 : break;
303 16148002 : pp++;
304 : }
305 22054 : if (*pp == 0) {
306 7 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
307 7 : return NULL;
308 : }
309 : } else {
310 : /* We iterate through the pattern, searching for the matching ending
311 : * delimiter. For each matching starting delimiter, we increment nesting
312 : * level, and decrement it for each matching ending delimiter. If we
313 : * reach the end of the pattern without matching, display a warning.
314 : */
315 25 : int brackets = 1; /* brackets nesting level */
316 25 : pp = p;
317 272 : while (*pp != 0) {
318 246 : if (*pp == '\\' && pp[1] != 0) pp++;
319 223 : else if (*pp == end_delimiter && --brackets <= 0)
320 : break;
321 199 : else if (*pp == start_delimiter)
322 1 : brackets++;
323 222 : pp++;
324 : }
325 25 : if (*pp == 0) {
326 1 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
327 1 : return NULL;
328 : }
329 : }
330 :
331 : /* Make a copy of the actual pattern. */
332 22071 : pattern = estrndup(p, pp-p);
333 :
334 : /* Move on to the options */
335 22071 : pp++;
336 :
337 : /* Parse through the options, setting appropriate flags. Display
338 : a warning if we encounter an unknown modifier. */
339 53759 : while (*pp != 0) {
340 9630 : switch (*pp++) {
341 : /* Perl compatible options */
342 928 : case 'i': coptions |= PCRE_CASELESS; break;
343 868 : case 'm': coptions |= PCRE_MULTILINE; break;
344 7714 : case 's': coptions |= PCRE_DOTALL; break;
345 4 : case 'x': coptions |= PCRE_EXTENDED; break;
346 :
347 : /* PCRE specific options */
348 2 : case 'A': coptions |= PCRE_ANCHORED; break;
349 7 : case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
350 28 : case 'S': do_study = 1; break;
351 27 : case 'U': coptions |= PCRE_UNGREEDY; break;
352 1 : case 'X': coptions |= PCRE_EXTRA; break;
353 31 : case 'u': coptions |= PCRE_UTF8; break;
354 :
355 : /* Custom preg options */
356 5 : case 'e': poptions |= PREG_REPLACE_EVAL; break;
357 :
358 : case ' ':
359 : case '\n':
360 2 : break;
361 :
362 : default:
363 13 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
364 13 : efree(pattern);
365 13 : return NULL;
366 : }
367 : }
368 :
369 : #if HAVE_SETLOCALE
370 22058 : if (strcmp(locale, "C"))
371 4 : tables = pcre_maketables();
372 : #endif
373 :
374 : /* Compile pattern and display a warning if compilation failed. */
375 22058 : re = pcre_compile(pattern,
376 : coptions,
377 : &error,
378 : &erroffset,
379 : tables);
380 :
381 22058 : if (re == NULL) {
382 4 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
383 4 : efree(pattern);
384 4 : if (tables) {
385 0 : pefree((void*)tables, 1);
386 : }
387 4 : return NULL;
388 : }
389 :
390 : /* If study option was specified, study the pattern and
391 : store the result in extra for passing to pcre_exec. */
392 22054 : if (do_study) {
393 28 : extra = pcre_study(re, soptions, &error);
394 28 : if (extra) {
395 28 : extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
396 : }
397 28 : if (error != NULL) {
398 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
399 : }
400 : } else {
401 22026 : extra = NULL;
402 : }
403 :
404 22054 : efree(pattern);
405 :
406 : /*
407 : * If we reached cache limit, clean out the items from the head of the list;
408 : * these are supposedly the oldest ones (but not necessarily the least used
409 : * ones).
410 : */
411 22054 : if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
412 7 : int num_clean = PCRE_CACHE_SIZE / 8;
413 7 : zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
414 : }
415 :
416 : /* Store the compiled pattern and extra info in the cache. */
417 22054 : new_entry.re = re;
418 22054 : new_entry.extra = extra;
419 22054 : new_entry.preg_options = poptions;
420 22054 : new_entry.compile_options = coptions;
421 : #if HAVE_SETLOCALE
422 22054 : new_entry.locale = pestrdup(locale, 1);
423 22054 : new_entry.tables = tables;
424 : #endif
425 22054 : zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
426 : sizeof(pcre_cache_entry), (void**)&pce);
427 :
428 22054 : return pce;
429 : }
430 : /* }}} */
431 :
432 : /* {{{ pcre_get_compiled_regex
433 : */
434 : PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
435 127847 : {
436 127847 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
437 :
438 127847 : if (extra) {
439 127847 : *extra = pce ? pce->extra : NULL;
440 : }
441 127847 : if (preg_options) {
442 127847 : *preg_options = pce ? pce->preg_options : 0;
443 : }
444 :
445 127847 : return pce ? pce->re : NULL;
446 : }
447 : /* }}} */
448 :
449 : /* {{{ pcre_get_compiled_regex_ex
450 : */
451 : PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
452 0 : {
453 0 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
454 :
455 0 : if (extra) {
456 0 : *extra = pce ? pce->extra : NULL;
457 : }
458 0 : if (preg_options) {
459 0 : *preg_options = pce ? pce->preg_options : 0;
460 : }
461 0 : if (compile_options) {
462 0 : *compile_options = pce ? pce->compile_options : 0;
463 : }
464 :
465 0 : return pce ? pce->re : NULL;
466 : }
467 : /* }}} */
468 :
469 : /* {{{ add_offset_pair */
470 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
471 66 : {
472 : zval *match_pair;
473 :
474 66 : ALLOC_ZVAL(match_pair);
475 66 : array_init(match_pair);
476 66 : INIT_PZVAL(match_pair);
477 :
478 : /* Add (match, offset) to the return value */
479 66 : add_next_index_stringl(match_pair, str, len, 1);
480 66 : add_next_index_long(match_pair, offset);
481 :
482 66 : if (name) {
483 2 : zval_add_ref(&match_pair);
484 2 : zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
485 : }
486 66 : zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
487 66 : }
488 : /* }}} */
489 :
490 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
491 1508445 : {
492 : /* parameters */
493 : char *regex; /* Regular expression */
494 : char *subject; /* String to match against */
495 : int regex_len;
496 : int subject_len;
497 : pcre_cache_entry *pce; /* Compiled regular expression */
498 1508445 : zval *subpats = NULL; /* Array for subpatterns */
499 1508445 : long flags = 0; /* Match control flags */
500 1508445 : long start_offset = 0; /* Where the new search starts */
501 :
502 1508445 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len,
503 : &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
504 16 : RETURN_FALSE;
505 : }
506 :
507 : /* Compile regex or get it from cache. */
508 1508429 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
509 16 : RETURN_FALSE;
510 : }
511 :
512 1508413 : php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
513 : global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
514 : }
515 : /* }}} */
516 :
517 : /* {{{ php_pcre_match_impl() */
518 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
519 : zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
520 1508676 : {
521 : zval *result_set, /* Holds a set of subpatterns after
522 : a global match */
523 1508676 : **match_sets = NULL; /* An array of sets of matches for each
524 : subpattern after a global match */
525 1508676 : pcre_extra *extra = pce->extra;/* Holds results of studying */
526 : pcre_extra extra_data; /* Used locally for exec options */
527 1508676 : int exoptions = 0; /* Execution options */
528 1508676 : int count = 0; /* Count of matched subpatterns */
529 : int *offsets; /* Array of subpattern offsets */
530 : int num_subpats; /* Number of captured subpatterns */
531 : int size_offsets; /* Size of the offsets array */
532 : int matched; /* Has anything matched */
533 1508676 : int g_notempty = 0; /* If the match should not be empty */
534 : const char **stringlist; /* Holds list of subpatterns */
535 : char **subpat_names; /* Array for named subpatterns */
536 : int i, rc;
537 : int subpats_order; /* Order of subpattern matches */
538 : int offset_capture; /* Capture match offsets: yes/no */
539 :
540 : /* Overwrite the passed-in value for subpatterns with an empty array. */
541 1508676 : if (subpats != NULL) {
542 1033040 : zval_dtor(subpats);
543 1033040 : array_init(subpats);
544 : }
545 :
546 1508676 : subpats_order = global ? PREG_PATTERN_ORDER : 0;
547 :
548 1508676 : if (use_flags) {
549 226 : offset_capture = flags & PREG_OFFSET_CAPTURE;
550 :
551 : /*
552 : * subpats_order is pre-set to pattern mode so we change it only if
553 : * necessary.
554 : */
555 226 : if (flags & 0xff) {
556 17 : subpats_order = flags & 0xff;
557 : }
558 226 : if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
559 : (!global && subpats_order != 0)) {
560 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
561 1 : return;
562 : }
563 : } else {
564 1508450 : offset_capture = 0;
565 : }
566 :
567 : /* Negative offset counts from the end of the string. */
568 1508675 : if (start_offset < 0) {
569 5 : start_offset = subject_len + start_offset;
570 5 : if (start_offset < 0) {
571 1 : start_offset = 0;
572 : }
573 : }
574 :
575 1508675 : if (extra == NULL) {
576 1508667 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
577 1508667 : extra = &extra_data;
578 : }
579 1508675 : extra->match_limit = PCRE_G(backtrack_limit);
580 1508675 : extra->match_limit_recursion = PCRE_G(recursion_limit);
581 :
582 : /* Calculate the size of the offsets array, and allocate memory for it. */
583 1508675 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
584 1508675 : if (rc < 0) {
585 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
586 0 : RETURN_FALSE;
587 : }
588 1508675 : num_subpats++;
589 1508675 : size_offsets = num_subpats * 3;
590 :
591 : /*
592 : * Build a mapping from subpattern numbers to their names. We will always
593 : * allocate the table, even though there may be no named subpatterns. This
594 : * avoids somewhat more complicated logic in the inner loops.
595 : */
596 1508675 : subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
597 1508675 : if (!subpat_names) {
598 1 : RETURN_FALSE;
599 : }
600 :
601 1508674 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
602 :
603 : /* Allocate match sets array and initialize the values. */
604 1508674 : if (global && subpats_order == PREG_PATTERN_ORDER) {
605 306 : match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
606 747 : for (i=0; i<num_subpats; i++) {
607 441 : ALLOC_ZVAL(match_sets[i]);
608 441 : array_init(match_sets[i]);
609 441 : INIT_PZVAL(match_sets[i]);
610 : }
611 : }
612 :
613 1508674 : matched = 0;
614 1508674 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
615 :
616 : do {
617 : /* Execute the regular expression. */
618 1508843 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
619 : exoptions|g_notempty, offsets, size_offsets);
620 :
621 : /* the string was already proved to be valid UTF-8 */
622 1508843 : exoptions |= PCRE_NO_UTF8_CHECK;
623 :
624 : /* Check for too many substrings condition. */
625 1508843 : if (count == 0) {
626 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
627 0 : count = size_offsets/3;
628 : }
629 :
630 : /* If something has matched */
631 1508843 : if (count > 0) {
632 61738 : matched++;
633 :
634 : /* If subpatterns array has been passed, fill it in with values. */
635 61738 : if (subpats != NULL) {
636 : /* Try to get the list of substrings and display a warning if failed. */
637 34765 : if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
638 0 : efree(subpat_names);
639 0 : efree(offsets);
640 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
641 0 : RETURN_FALSE;
642 : }
643 :
644 34765 : if (global) { /* global pattern matching */
645 167 : if (subpats_order == PREG_PATTERN_ORDER) {
646 : /* For each subpattern, insert it into the appropriate array. */
647 321 : for (i = 0; i < count; i++) {
648 194 : if (offset_capture) {
649 8 : add_offset_pair(match_sets[i], (char *)stringlist[i],
650 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
651 : } else {
652 186 : add_next_index_stringl(match_sets[i], (char *)stringlist[i],
653 : offsets[(i<<1)+1] - offsets[i<<1], 1);
654 : }
655 : }
656 : /*
657 : * If the number of captured subpatterns on this run is
658 : * less than the total possible number, pad the result
659 : * arrays with empty strings.
660 : */
661 127 : if (count < num_subpats) {
662 2 : for (; i < num_subpats; i++) {
663 1 : add_next_index_string(match_sets[i], "", 1);
664 : }
665 : }
666 : } else {
667 : /* Allocate the result set array */
668 40 : ALLOC_ZVAL(result_set);
669 40 : array_init(result_set);
670 40 : INIT_PZVAL(result_set);
671 :
672 : /* Add all the subpatterns to it */
673 349 : for (i = 0; i < count; i++) {
674 309 : if (offset_capture) {
675 7 : add_offset_pair(result_set, (char *)stringlist[i],
676 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
677 : } else {
678 302 : if (subpat_names[i]) {
679 8 : add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
680 : offsets[(i<<1)+1] - offsets[i<<1], 1);
681 : }
682 302 : add_next_index_stringl(result_set, (char *)stringlist[i],
683 : offsets[(i<<1)+1] - offsets[i<<1], 1);
684 : }
685 : }
686 : /* And add it to the output array */
687 40 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
688 : }
689 : } else { /* single pattern matching */
690 : /* For each subpattern, insert it into the subpatterns array. */
691 104079 : for (i = 0; i < count; i++) {
692 69481 : if (offset_capture) {
693 10 : add_offset_pair(subpats, (char *)stringlist[i],
694 : offsets[(i<<1)+1] - offsets[i<<1],
695 : offsets[i<<1], subpat_names[i]);
696 : } else {
697 69471 : if (subpat_names[i]) {
698 13 : add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
699 : offsets[(i<<1)+1] - offsets[i<<1], 1);
700 : }
701 69471 : add_next_index_stringl(subpats, (char *)stringlist[i],
702 : offsets[(i<<1)+1] - offsets[i<<1], 1);
703 : }
704 : }
705 : }
706 :
707 34765 : pcre_free((void *) stringlist);
708 : }
709 1447105 : } else if (count == PCRE_ERROR_NOMATCH) {
710 : /* If we previously set PCRE_NOTEMPTY after a null match,
711 : this is not necessarily the end. We need to advance
712 : the start offset, and continue. Fudge the offset values
713 : to achieve this, unless we're already at the end of the string. */
714 1447102 : if (g_notempty != 0 && start_offset < subject_len) {
715 2 : offsets[0] = start_offset;
716 2 : offsets[1] = start_offset + 1;
717 : } else
718 : break;
719 : } else {
720 3 : pcre_handle_exec_error(count TSRMLS_CC);
721 3 : break;
722 : }
723 :
724 : /* If we have matched an empty string, mimic what Perl's /g options does.
725 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
726 : the match again at the same point. If this fails (picked up above) we
727 : advance to the next character. */
728 61740 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
729 :
730 : /* Advance to the position right after the last full match */
731 61740 : start_offset = offsets[1];
732 61740 : } while (global);
733 :
734 : /* Add the match sets to the output array and clean up */
735 1508674 : if (global && subpats_order == PREG_PATTERN_ORDER) {
736 747 : for (i = 0; i < num_subpats; i++) {
737 441 : if (subpat_names[i]) {
738 5 : zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
739 : strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
740 5 : Z_ADDREF_P(match_sets[i]);
741 : }
742 441 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
743 : }
744 306 : efree(match_sets);
745 : }
746 :
747 1508674 : efree(offsets);
748 1508674 : efree(subpat_names);
749 :
750 1508674 : RETVAL_LONG(matched);
751 : }
752 : /* }}} */
753 :
754 : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
755 : Perform a Perl-style regular expression match */
756 : static PHP_FUNCTION(preg_match)
757 1508347 : {
758 1508347 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
759 1508347 : }
760 : /* }}} */
761 :
762 : /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]])
763 : Perform a Perl-style global regular expression match */
764 : static PHP_FUNCTION(preg_match_all)
765 98 : {
766 98 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
767 98 : }
768 : /* }}} */
769 :
770 : /* {{{ preg_get_backref
771 : */
772 : static int preg_get_backref(char **str, int *backref)
773 135 : {
774 135 : register char in_brace = 0;
775 135 : register char *walk = *str;
776 :
777 135 : if (walk[1] == 0)
778 9 : return 0;
779 :
780 126 : if (*walk == '$' && walk[1] == '{') {
781 14 : in_brace = 1;
782 14 : walk++;
783 : }
784 126 : walk++;
785 :
786 126 : if (*walk >= '0' && *walk <= '9') {
787 102 : *backref = *walk - '0';
788 102 : walk++;
789 : } else
790 24 : return 0;
791 :
792 146 : if (*walk && *walk >= '0' && *walk <= '9') {
793 2 : *backref = *backref * 10 + *walk - '0';
794 2 : walk++;
795 : }
796 :
797 102 : if (in_brace) {
798 14 : if (*walk == 0 || *walk != '}')
799 6 : return 0;
800 : else
801 8 : walk++;
802 : }
803 :
804 96 : *str = walk;
805 96 : return 1;
806 : }
807 : /* }}} */
808 :
809 : /* {{{ preg_do_repl_func
810 : */
811 : static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
812 36 : {
813 : zval *retval_ptr; /* Function return value */
814 : zval **args[1]; /* Argument to pass to function */
815 : zval *subpats; /* Captured subpatterns */
816 : int result_len; /* Return value length */
817 : int i;
818 :
819 36 : MAKE_STD_ZVAL(subpats);
820 36 : array_init(subpats);
821 82 : for (i = 0; i < count; i++) {
822 46 : if (subpat_names[i]) {
823 1 : add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
824 : }
825 46 : add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
826 : }
827 36 : args[0] = &subpats;
828 :
829 71 : if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
830 35 : convert_to_string_ex(&retval_ptr);
831 35 : *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
832 35 : result_len = Z_STRLEN_P(retval_ptr);
833 35 : zval_ptr_dtor(&retval_ptr);
834 : } else {
835 1 : if (!EG(exception)) {
836 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
837 : }
838 1 : result_len = offsets[1] - offsets[0];
839 1 : *result = estrndup(&subject[offsets[0]], result_len);
840 : }
841 :
842 36 : zval_ptr_dtor(&subpats);
843 :
844 36 : return result_len;
845 : }
846 : /* }}} */
847 :
848 : /* {{{ preg_do_eval
849 : */
850 : static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
851 : int *offsets, int count, char **result TSRMLS_DC)
852 5 : {
853 : zval retval; /* Return value from evaluation */
854 : char *eval_str_end, /* End of eval string */
855 : *match, /* Current match for a backref */
856 : *esc_match, /* Quote-escaped match */
857 : *walk, /* Used to walk the code string */
858 : *segment, /* Start of segment to append while walking */
859 : walk_last; /* Last walked character */
860 : int match_len; /* Length of the match */
861 : int esc_match_len; /* Length of the quote-escaped match */
862 : int result_len; /* Length of the result of the evaluation */
863 : int backref; /* Current backref */
864 : char *compiled_string_description;
865 5 : smart_str code = {0};
866 :
867 5 : eval_str_end = eval_str + eval_str_len;
868 5 : walk = segment = eval_str;
869 5 : walk_last = 0;
870 :
871 195 : while (walk < eval_str_end) {
872 : /* If found a backreference.. */
873 185 : if ('\\' == *walk || '$' == *walk) {
874 33 : smart_str_appendl(&code, segment, walk - segment);
875 33 : if (walk_last == '\\') {
876 0 : code.c[code.len-1] = *walk++;
877 0 : segment = walk;
878 0 : walk_last = 0;
879 0 : continue;
880 : }
881 33 : segment = walk;
882 33 : if (preg_get_backref(&walk, &backref)) {
883 8 : if (backref < count) {
884 : /* Find the corresponding string match and substitute it
885 : in instead of the backref */
886 8 : match = subject + offsets[backref<<1];
887 8 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
888 8 : if (match_len) {
889 8 : esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
890 : } else {
891 0 : esc_match = match;
892 0 : esc_match_len = 0;
893 : }
894 : } else {
895 0 : esc_match = "";
896 0 : esc_match_len = 0;
897 : }
898 8 : smart_str_appendl(&code, esc_match, esc_match_len);
899 :
900 8 : segment = walk;
901 :
902 : /* Clean up and reassign */
903 8 : if (esc_match_len)
904 8 : efree(esc_match);
905 8 : continue;
906 : }
907 : }
908 177 : walk++;
909 177 : walk_last = walk[-1];
910 : }
911 5 : smart_str_appendl(&code, segment, walk - segment);
912 5 : smart_str_0(&code);
913 :
914 5 : compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
915 : /* Run the code */
916 5 : if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
917 1 : efree(compiled_string_description);
918 1 : php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
919 : /* zend_error() does not return in this case */
920 : }
921 4 : efree(compiled_string_description);
922 4 : convert_to_string(&retval);
923 :
924 : /* Save the return value and its length */
925 4 : *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
926 4 : result_len = Z_STRLEN(retval);
927 :
928 : /* Clean up */
929 4 : zval_dtor(&retval);
930 4 : smart_str_free(&code);
931 :
932 4 : return result_len;
933 : }
934 : /* }}} */
935 :
936 : /* {{{ php_pcre_replace
937 : */
938 : PHPAPI char *php_pcre_replace(char *regex, int regex_len,
939 : char *subject, int subject_len,
940 : zval *replace_val, int is_callable_replace,
941 : int *result_len, int limit, int *replace_count TSRMLS_DC)
942 21433 : {
943 : pcre_cache_entry *pce; /* Compiled regular expression */
944 :
945 : /* Compile regex or get it from cache. */
946 21433 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
947 9 : return NULL;
948 : }
949 :
950 21424 : return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
951 : is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
952 : }
953 : /* }}} */
954 :
955 : /* {{{ php_pcre_replace_impl() */
956 : PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
957 : int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
958 21424 : {
959 21424 : pcre_extra *extra = pce->extra;/* Holds results of studying */
960 : pcre_extra extra_data; /* Used locally for exec options */
961 21424 : int exoptions = 0; /* Execution options */
962 21424 : int count = 0; /* Count of matched subpatterns */
963 : int *offsets; /* Array of subpattern offsets */
964 : char **subpat_names; /* Array for named subpatterns */
965 : int num_subpats; /* Number of captured subpatterns */
966 : int size_offsets; /* Size of the offsets array */
967 : int new_len; /* Length of needed storage */
968 : int alloc_len; /* Actual allocated length */
969 21424 : int eval_result_len=0; /* Length of the eval'ed or
970 : function-returned string */
971 : int match_len; /* Length of the current match */
972 : int backref; /* Backreference number */
973 : int eval; /* If the replacement string should be eval'ed */
974 : int start_offset; /* Where the new search starts */
975 21424 : int g_notempty=0; /* If the match should not be empty */
976 21424 : int replace_len=0; /* Length of replacement string */
977 : char *result, /* Result of replacement */
978 21424 : *replace=NULL, /* Replacement string */
979 : *new_buf, /* Temporary buffer for re-allocation */
980 : *walkbuf, /* Location of current replacement in the result */
981 : *walk, /* Used to walk the replacement string */
982 : *match, /* The current match */
983 : *piece, /* The current piece of subject */
984 21424 : *replace_end=NULL, /* End of replacement string */
985 : *eval_result, /* Result of eval or custom function */
986 : walk_last; /* Last walked character */
987 : int rc;
988 :
989 21424 : if (extra == NULL) {
990 21405 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
991 21405 : extra = &extra_data;
992 : }
993 21424 : extra->match_limit = PCRE_G(backtrack_limit);
994 21424 : extra->match_limit_recursion = PCRE_G(recursion_limit);
995 :
996 21424 : eval = pce->preg_options & PREG_REPLACE_EVAL;
997 21424 : if (is_callable_replace) {
998 28 : if (eval) {
999 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1000 1 : return NULL;
1001 : }
1002 : } else {
1003 21396 : replace = Z_STRVAL_P(replace_val);
1004 21396 : replace_len = Z_STRLEN_P(replace_val);
1005 21396 : replace_end = replace + replace_len;
1006 : }
1007 :
1008 : /* Calculate the size of the offsets array, and allocate memory for it. */
1009 21423 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1010 21423 : if (rc < 0) {
1011 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1012 0 : return NULL;
1013 : }
1014 21423 : num_subpats++;
1015 21423 : size_offsets = num_subpats * 3;
1016 :
1017 : /*
1018 : * Build a mapping from subpattern numbers to their names. We will always
1019 : * allocate the table, even though there may be no named subpatterns. This
1020 : * avoids somewhat more complicated logic in the inner loops.
1021 : */
1022 21423 : subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1023 21423 : if (!subpat_names) {
1024 1 : return NULL;
1025 : }
1026 :
1027 21422 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1028 :
1029 21422 : alloc_len = 2 * subject_len + 1;
1030 21422 : result = safe_emalloc(alloc_len, sizeof(char), 0);
1031 :
1032 : /* Initialize */
1033 21422 : match = NULL;
1034 21422 : *result_len = 0;
1035 21422 : start_offset = 0;
1036 21422 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1037 :
1038 : while (1) {
1039 : /* Execute the regular expression. */
1040 26341 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1041 : exoptions|g_notempty, offsets, size_offsets);
1042 :
1043 : /* the string was already proved to be valid UTF-8 */
1044 26341 : exoptions |= PCRE_NO_UTF8_CHECK;
1045 :
1046 : /* Check for too many substrings condition. */
1047 26341 : if (count == 0) {
1048 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1049 0 : count = size_offsets/3;
1050 : }
1051 :
1052 26341 : piece = subject + start_offset;
1053 :
1054 31260 : if (count > 0 && (limit == -1 || limit > 0)) {
1055 4920 : if (replace_count) {
1056 4920 : ++*replace_count;
1057 : }
1058 : /* Set the match location in subject */
1059 4920 : match = subject + offsets[0];
1060 :
1061 4920 : new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1062 :
1063 : /* If evaluating, do it and add the return string's length */
1064 4920 : if (eval) {
1065 5 : eval_result_len = preg_do_eval(replace, replace_len, subject,
1066 : offsets, count, &eval_result TSRMLS_CC);
1067 4 : new_len += eval_result_len;
1068 4915 : } else if (is_callable_replace) {
1069 : /* Use custom function to get replacement string and its length. */
1070 36 : eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1071 36 : new_len += eval_result_len;
1072 : } else { /* do regular substitution */
1073 4879 : walk = replace;
1074 4879 : walk_last = 0;
1075 14772 : while (walk < replace_end) {
1076 5014 : if ('\\' == *walk || '$' == *walk) {
1077 51 : if (walk_last == '\\') {
1078 0 : walk++;
1079 0 : walk_last = 0;
1080 0 : continue;
1081 : }
1082 51 : if (preg_get_backref(&walk, &backref)) {
1083 44 : if (backref < count)
1084 43 : new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1085 44 : continue;
1086 : }
1087 : }
1088 4970 : new_len++;
1089 4970 : walk++;
1090 4970 : walk_last = walk[-1];
1091 : }
1092 : }
1093 :
1094 4919 : if (new_len + 1 > alloc_len) {
1095 12 : alloc_len = 1 + alloc_len + 2 * new_len;
1096 12 : new_buf = emalloc(alloc_len);
1097 12 : memcpy(new_buf, result, *result_len);
1098 12 : efree(result);
1099 12 : result = new_buf;
1100 : }
1101 : /* copy the part of the string before the match */
1102 4919 : memcpy(&result[*result_len], piece, match-piece);
1103 4919 : *result_len += match-piece;
1104 :
1105 : /* copy replacement and backrefs */
1106 4919 : walkbuf = result + *result_len;
1107 :
1108 : /* If evaluating or using custom function, copy result to the buffer
1109 : * and clean up. */
1110 4959 : if (eval || is_callable_replace) {
1111 40 : memcpy(walkbuf, eval_result, eval_result_len);
1112 40 : *result_len += eval_result_len;
1113 40 : STR_FREE(eval_result);
1114 : } else { /* do regular backreference copying */
1115 4879 : walk = replace;
1116 4879 : walk_last = 0;
1117 14772 : while (walk < replace_end) {
1118 5014 : if ('\\' == *walk || '$' == *walk) {
1119 51 : if (walk_last == '\\') {
1120 0 : *(walkbuf-1) = *walk++;
1121 0 : walk_last = 0;
1122 0 : continue;
1123 : }
1124 51 : if (preg_get_backref(&walk, &backref)) {
1125 44 : if (backref < count) {
1126 43 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1127 43 : memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1128 43 : walkbuf += match_len;
1129 : }
1130 44 : continue;
1131 : }
1132 : }
1133 4970 : *walkbuf++ = *walk++;
1134 4970 : walk_last = walk[-1];
1135 : }
1136 4879 : *walkbuf = '\0';
1137 : /* increment the result length by how much we've added to the string */
1138 4879 : *result_len += walkbuf - (result + *result_len);
1139 : }
1140 :
1141 4919 : if (limit != -1)
1142 20 : limit--;
1143 :
1144 21421 : } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1145 : /* If we previously set PCRE_NOTEMPTY after a null match,
1146 : this is not necessarily the end. We need to advance
1147 : the start offset, and continue. Fudge the offset values
1148 : to achieve this, unless we're already at the end of the string. */
1149 21417 : if (g_notempty != 0 && start_offset < subject_len) {
1150 0 : offsets[0] = start_offset;
1151 0 : offsets[1] = start_offset + 1;
1152 0 : memcpy(&result[*result_len], piece, 1);
1153 0 : (*result_len)++;
1154 : } else {
1155 21417 : new_len = *result_len + subject_len - start_offset;
1156 21417 : if (new_len + 1 > alloc_len) {
1157 1 : alloc_len = new_len + 1; /* now we know exactly how long it is */
1158 1 : new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1159 1 : memcpy(new_buf, result, *result_len);
1160 1 : efree(result);
1161 1 : result = new_buf;
1162 : }
1163 : /* stick that last bit of string on our output */
1164 21417 : memcpy(&result[*result_len], piece, subject_len - start_offset);
1165 21417 : *result_len += subject_len - start_offset;
1166 21417 : result[*result_len] = '\0';
1167 21417 : break;
1168 : }
1169 : } else {
1170 4 : pcre_handle_exec_error(count TSRMLS_CC);
1171 4 : efree(result);
1172 4 : result = NULL;
1173 4 : break;
1174 : }
1175 :
1176 : /* If we have matched an empty string, mimic what Perl's /g options does.
1177 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1178 : the match again at the same point. If this fails (picked up above) we
1179 : advance to the next character. */
1180 4919 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1181 :
1182 : /* Advance to the next piece. */
1183 4919 : start_offset = offsets[1];
1184 4919 : }
1185 :
1186 21421 : efree(offsets);
1187 21421 : efree(subpat_names);
1188 :
1189 21421 : return result;
1190 : }
1191 : /* }}} */
1192 :
1193 : /* {{{ php_replace_in_subject
1194 : */
1195 : static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1196 21390 : {
1197 : zval **regex_entry,
1198 21390 : **replace_entry = NULL,
1199 : *replace_value,
1200 : empty_replace;
1201 : char *subject_value,
1202 : *result;
1203 : int subject_len;
1204 :
1205 : /* Make sure we're dealing with strings. */
1206 21390 : convert_to_string_ex(subject);
1207 : /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1208 21390 : ZVAL_STRINGL(&empty_replace, "", 0, 0);
1209 :
1210 : /* If regex is an array */
1211 21390 : if (Z_TYPE_P(regex) == IS_ARRAY) {
1212 : /* Duplicate subject string for repeated replacement */
1213 22 : subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1214 22 : subject_len = Z_STRLEN_PP(subject);
1215 22 : *result_len = subject_len;
1216 :
1217 22 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1218 :
1219 22 : replace_value = replace;
1220 22 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1221 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1222 :
1223 : /* For each entry in the regex array, get the entry */
1224 108 : while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1225 : /* Make sure we're dealing with strings. */
1226 65 : convert_to_string_ex(regex_entry);
1227 :
1228 : /* If replace is an array and not a callable construct */
1229 65 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1230 : /* Get current entry */
1231 50 : if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1232 48 : if (!is_callable_replace) {
1233 48 : convert_to_string_ex(replace_entry);
1234 : }
1235 48 : replace_value = *replace_entry;
1236 48 : zend_hash_move_forward(Z_ARRVAL_P(replace));
1237 : } else {
1238 : /* We've run out of replacement strings, so use an empty one */
1239 2 : replace_value = &empty_replace;
1240 : }
1241 : }
1242 :
1243 : /* Do the actual replacement and put the result back into subject_value
1244 : for further replacements. */
1245 65 : if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1246 : Z_STRLEN_PP(regex_entry),
1247 : subject_value,
1248 : subject_len,
1249 : replace_value,
1250 : is_callable_replace,
1251 : result_len,
1252 : limit,
1253 : replace_count TSRMLS_CC)) != NULL) {
1254 64 : efree(subject_value);
1255 64 : subject_value = result;
1256 64 : subject_len = *result_len;
1257 : } else {
1258 1 : efree(subject_value);
1259 1 : return NULL;
1260 : }
1261 :
1262 64 : zend_hash_move_forward(Z_ARRVAL_P(regex));
1263 : }
1264 :
1265 21 : return subject_value;
1266 : } else {
1267 21368 : result = php_pcre_replace(Z_STRVAL_P(regex),
1268 : Z_STRLEN_P(regex),
1269 : Z_STRVAL_PP(subject),
1270 : Z_STRLEN_PP(subject),
1271 : replace,
1272 : is_callable_replace,
1273 : result_len,
1274 : limit,
1275 : replace_count TSRMLS_CC);
1276 21367 : return result;
1277 : }
1278 : }
1279 : /* }}} */
1280 :
1281 : /* {{{ preg_replace_impl
1282 : */
1283 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1284 21397 : {
1285 : zval **regex,
1286 : **replace,
1287 : **subject,
1288 : **subject_entry,
1289 21397 : **zcount = NULL;
1290 : char *result;
1291 : int result_len;
1292 21397 : int limit_val = -1;
1293 21397 : long limit = -1;
1294 : char *string_key;
1295 : ulong num_key;
1296 : char *callback_name;
1297 21397 : int replace_count=0, old_replace_count;
1298 :
1299 : /* Get function parameters and do error-checking. */
1300 21397 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1301 11 : return;
1302 : }
1303 :
1304 21386 : if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1305 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1306 3 : RETURN_FALSE;
1307 : }
1308 :
1309 21383 : SEPARATE_ZVAL(replace);
1310 21383 : if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1311 21372 : convert_to_string_ex(replace);
1312 : }
1313 21382 : if (is_callable_replace) {
1314 32 : if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1315 4 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1316 4 : efree(callback_name);
1317 4 : *return_value = **subject;
1318 4 : zval_copy_ctor(return_value);
1319 4 : INIT_PZVAL(return_value);
1320 4 : return;
1321 : }
1322 28 : efree(callback_name);
1323 : }
1324 :
1325 21378 : SEPARATE_ZVAL(regex);
1326 21378 : SEPARATE_ZVAL(subject);
1327 :
1328 21378 : if (ZEND_NUM_ARGS() > 3) {
1329 10 : limit_val = limit;
1330 : }
1331 :
1332 21378 : if (Z_TYPE_PP(regex) != IS_ARRAY)
1333 21367 : convert_to_string_ex(regex);
1334 :
1335 : /* if subject is an array */
1336 21377 : if (Z_TYPE_PP(subject) == IS_ARRAY) {
1337 6 : array_init(return_value);
1338 6 : zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1339 :
1340 : /* For each subject entry, convert it to string, then perform replacement
1341 : and add the result to the return_value array. */
1342 31 : while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1343 19 : SEPARATE_ZVAL(subject_entry);
1344 19 : old_replace_count = replace_count;
1345 19 : if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1346 36 : if (!is_filter || replace_count > old_replace_count) {
1347 : /* Add to return array */
1348 17 : switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1349 : {
1350 : case HASH_KEY_IS_STRING:
1351 1 : add_assoc_stringl(return_value, string_key, result, result_len, 0);
1352 1 : break;
1353 :
1354 : case HASH_KEY_IS_LONG:
1355 16 : add_index_stringl(return_value, num_key, result, result_len, 0);
1356 : break;
1357 : }
1358 : } else {
1359 2 : efree(result);
1360 : }
1361 : }
1362 :
1363 19 : zend_hash_move_forward(Z_ARRVAL_PP(subject));
1364 : }
1365 : } else { /* if subject is not an array */
1366 21371 : old_replace_count = replace_count;
1367 21371 : if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1368 42710 : if (!is_filter || replace_count > old_replace_count) {
1369 21355 : RETVAL_STRINGL(result, result_len, 0);
1370 : } else {
1371 0 : efree(result);
1372 : }
1373 : }
1374 : }
1375 21376 : if (ZEND_NUM_ARGS() > 4) {
1376 8 : zval_dtor(*zcount);
1377 8 : ZVAL_LONG(*zcount, replace_count);
1378 : }
1379 :
1380 : }
1381 : /* }}} */
1382 :
1383 : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1384 : Perform Perl-style regular expression replacement. */
1385 : static PHP_FUNCTION(preg_replace)
1386 21356 : {
1387 21356 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1388 21353 : }
1389 : /* }}} */
1390 :
1391 : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1392 : Perform Perl-style regular expression replacement using replacement callback. */
1393 : static PHP_FUNCTION(preg_replace_callback)
1394 40 : {
1395 40 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1396 40 : }
1397 : /* }}} */
1398 :
1399 : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1400 : Perform Perl-style regular expression replacement and only return matches. */
1401 : static PHP_FUNCTION(preg_filter)
1402 1 : {
1403 1 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1404 1 : }
1405 : /* }}} */
1406 :
1407 : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1408 : Split string into an array using a perl-style regular expression as a delimiter */
1409 : static PHP_FUNCTION(preg_split)
1410 2046 : {
1411 : char *regex; /* Regular expression */
1412 : char *subject; /* String to match against */
1413 : int regex_len;
1414 : int subject_len;
1415 2046 : long limit_val = -1;/* Integer value of limit */
1416 2046 : long flags = 0; /* Match control flags */
1417 : pcre_cache_entry *pce; /* Compiled regular expression */
1418 :
1419 : /* Get function parameters and do error checking */
1420 2046 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1421 : &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1422 8 : RETURN_FALSE;
1423 : }
1424 :
1425 : /* Compile regex or get it from cache. */
1426 2038 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1427 5 : RETURN_FALSE;
1428 : }
1429 :
1430 2033 : php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1431 : }
1432 : /* }}} */
1433 :
1434 : /* {{{ php_pcre_split
1435 : */
1436 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1437 : long limit_val, long flags TSRMLS_DC)
1438 2049 : {
1439 2049 : pcre_extra *extra = NULL; /* Holds results of studying */
1440 2049 : pcre *re_bump = NULL; /* Regex instance for empty matches */
1441 2049 : pcre_extra *extra_bump = NULL; /* Almost dummy */
1442 : pcre_extra extra_data; /* Used locally for exec options */
1443 : int *offsets; /* Array of subpattern offsets */
1444 : int size_offsets; /* Size of the offsets array */
1445 2049 : int exoptions = 0; /* Execution options */
1446 2049 : int count = 0; /* Count of matched subpatterns */
1447 : int start_offset; /* Where the new search starts */
1448 : int next_offset; /* End of the last delimiter match + 1 */
1449 2049 : int g_notempty = 0; /* If the match should not be empty */
1450 : char *last_match; /* Location of last match */
1451 : int rc;
1452 : int no_empty; /* If NO_EMPTY flag is set */
1453 : int delim_capture; /* If delimiters should be captured */
1454 : int offset_capture; /* If offsets should be captured */
1455 :
1456 2049 : no_empty = flags & PREG_SPLIT_NO_EMPTY;
1457 2049 : delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1458 2049 : offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1459 :
1460 2049 : if (limit_val == 0) {
1461 1 : limit_val = -1;
1462 : }
1463 :
1464 2049 : if (extra == NULL) {
1465 2049 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1466 2049 : extra = &extra_data;
1467 : }
1468 2049 : extra->match_limit = PCRE_G(backtrack_limit);
1469 2049 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1470 :
1471 : /* Initialize return value */
1472 2049 : array_init(return_value);
1473 :
1474 : /* Calculate the size of the offsets array, and allocate memory for it. */
1475 2049 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1476 2049 : if (rc < 0) {
1477 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1478 0 : RETURN_FALSE;
1479 : }
1480 2049 : size_offsets = (size_offsets + 1) * 3;
1481 2049 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1482 :
1483 : /* Start at the beginning of the string */
1484 2049 : start_offset = 0;
1485 2049 : next_offset = 0;
1486 2049 : last_match = subject;
1487 2049 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1488 :
1489 : /* Get next piece if no limit or limit not yet reached and something matched*/
1490 7424 : while ((limit_val == -1 || limit_val > 1)) {
1491 5373 : count = pcre_exec(pce->re, extra, subject,
1492 : subject_len, start_offset,
1493 : exoptions|g_notempty, offsets, size_offsets);
1494 :
1495 : /* the string was already proved to be valid UTF-8 */
1496 5373 : exoptions |= PCRE_NO_UTF8_CHECK;
1497 :
1498 : /* Check for too many substrings condition. */
1499 5373 : if (count == 0) {
1500 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1501 0 : count = size_offsets/3;
1502 : }
1503 :
1504 : /* If something matched */
1505 5373 : if (count > 0) {
1506 3268 : if (!no_empty || &subject[offsets[0]] != last_match) {
1507 :
1508 3209 : if (offset_capture) {
1509 : /* Add (match, offset) pair to the return value */
1510 26 : add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1511 : } else {
1512 : /* Add the piece to the return value */
1513 3183 : add_next_index_stringl(return_value, last_match,
1514 : &subject[offsets[0]]-last_match, 1);
1515 : }
1516 :
1517 : /* One less left to do */
1518 3209 : if (limit_val != -1)
1519 1 : limit_val--;
1520 : }
1521 :
1522 3268 : last_match = &subject[offsets[1]];
1523 3268 : next_offset = offsets[1];
1524 :
1525 3268 : if (delim_capture) {
1526 : int i, match_len;
1527 62 : for (i = 1; i < count; i++) {
1528 31 : match_len = offsets[(i<<1)+1] - offsets[i<<1];
1529 : /* If we have matched a delimiter */
1530 31 : if (!no_empty || match_len > 0) {
1531 21 : if (offset_capture) {
1532 10 : add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1533 : } else {
1534 11 : add_next_index_stringl(return_value,
1535 : &subject[offsets[i<<1]],
1536 : match_len, 1);
1537 : }
1538 : }
1539 : }
1540 : }
1541 2105 : } else if (count == PCRE_ERROR_NOMATCH) {
1542 : /* If we previously set PCRE_NOTEMPTY after a null match,
1543 : this is not necessarily the end. We need to advance
1544 : the start offset, and continue. Fudge the offset values
1545 : to achieve this, unless we're already at the end of the string. */
1546 2104 : if (g_notempty != 0 && start_offset < subject_len) {
1547 58 : if (pce->compile_options & PCRE_UTF8) {
1548 12 : if (re_bump == NULL) {
1549 : int dummy;
1550 :
1551 2 : if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1552 0 : RETURN_FALSE;
1553 : }
1554 : }
1555 12 : count = pcre_exec(re_bump, extra_bump, subject,
1556 : subject_len, start_offset,
1557 : exoptions, offsets, size_offsets);
1558 12 : if (count < 1) {
1559 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1560 0 : RETURN_FALSE;
1561 : }
1562 : } else {
1563 46 : offsets[0] = start_offset;
1564 46 : offsets[1] = start_offset + 1;
1565 : }
1566 : } else
1567 : break;
1568 : } else {
1569 1 : pcre_handle_exec_error(count TSRMLS_CC);
1570 1 : break;
1571 : }
1572 :
1573 : /* If we have matched an empty string, mimic what Perl's /g options does.
1574 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1575 : the match again at the same point. If this fails (picked up above) we
1576 : advance to the next character. */
1577 3326 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1578 :
1579 : /* Advance to the position right after the last full match */
1580 3326 : start_offset = offsets[1];
1581 : }
1582 :
1583 :
1584 2049 : start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1585 :
1586 2049 : if (!no_empty || start_offset < subject_len)
1587 : {
1588 2041 : if (offset_capture) {
1589 : /* Add the last (match, offset) pair to the return value */
1590 5 : add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1591 : } else {
1592 : /* Add the last piece to the return value */
1593 2036 : add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1594 : }
1595 : }
1596 :
1597 :
1598 : /* Clean up */
1599 2049 : efree(offsets);
1600 : }
1601 : /* }}} */
1602 :
1603 : /* {{{ proto string preg_quote(string str [, string delim_char])
1604 : Quote regular expression characters plus an optional character */
1605 : static PHP_FUNCTION(preg_quote)
1606 7041 : {
1607 : int in_str_len;
1608 : char *in_str; /* Input string argument */
1609 : char *in_str_end; /* End of the input string */
1610 7041 : int delim_len = 0;
1611 7041 : char *delim = NULL; /* Additional delimiter argument */
1612 : char *out_str, /* Output string with quoted characters */
1613 : *p, /* Iterator for input string */
1614 : *q, /* Iterator for output string */
1615 7041 : delim_char=0, /* Delimiter character to be quoted */
1616 : c; /* Current character */
1617 7041 : zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1618 :
1619 : /* Get the arguments and check for errors */
1620 7041 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1621 : &delim, &delim_len) == FAILURE) {
1622 5 : return;
1623 : }
1624 :
1625 7036 : in_str_end = in_str + in_str_len;
1626 :
1627 : /* Nothing to do if we got an empty string */
1628 7036 : if (in_str == in_str_end) {
1629 2 : RETURN_EMPTY_STRING();
1630 : }
1631 :
1632 7034 : if (delim && *delim) {
1633 7031 : delim_char = delim[0];
1634 7031 : quote_delim = 1;
1635 : }
1636 :
1637 : /* Allocate enough memory so that even if each character
1638 : is quoted, we won't run out of room */
1639 7034 : out_str = safe_emalloc(4, in_str_len, 1);
1640 :
1641 : /* Go through the string and quote necessary characters */
1642 7374812 : for(p = in_str, q = out_str; p != in_str_end; p++) {
1643 7367778 : c = *p;
1644 7367778 : switch(c) {
1645 : case '.':
1646 : case '\\':
1647 : case '+':
1648 : case '*':
1649 : case '?':
1650 : case '[':
1651 : case '^':
1652 : case ']':
1653 : case '$':
1654 : case '(':
1655 : case ')':
1656 : case '{':
1657 : case '}':
1658 : case '=':
1659 : case '!':
1660 : case '>':
1661 : case '<':
1662 : case '|':
1663 : case ':':
1664 : case '-':
1665 1072495 : *q++ = '\\';
1666 1072495 : *q++ = c;
1667 1072495 : break;
1668 :
1669 : case '\0':
1670 1291 : *q++ = '\\';
1671 1291 : *q++ = '0';
1672 1291 : *q++ = '0';
1673 1291 : *q++ = '0';
1674 1291 : break;
1675 :
1676 : default:
1677 6293992 : if (quote_delim && c == delim_char)
1678 14627 : *q++ = '\\';
1679 6293992 : *q++ = c;
1680 : break;
1681 : }
1682 : }
1683 7034 : *q = '\0';
1684 :
1685 : /* Reallocate string and return it */
1686 7034 : RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1687 : }
1688 : /* }}} */
1689 :
1690 : /* {{{ proto array preg_grep(string regex, array input [, int flags])
1691 : Searches array and returns entries which match regex */
1692 : static PHP_FUNCTION(preg_grep)
1693 29 : {
1694 : char *regex; /* Regular expression */
1695 : int regex_len;
1696 : zval *input; /* Input array */
1697 29 : long flags = 0; /* Match control flags */
1698 : pcre_cache_entry *pce; /* Compiled regular expression */
1699 :
1700 : /* Get arguments and do error checking */
1701 29 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1702 : &input, &flags) == FAILURE) {
1703 9 : return;
1704 : }
1705 :
1706 : /* Compile regex or get it from cache. */
1707 20 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1708 5 : RETURN_FALSE;
1709 : }
1710 :
1711 15 : php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1712 : }
1713 : /* }}} */
1714 :
1715 : PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1716 15 : {
1717 : zval **entry; /* An entry in the input array */
1718 15 : pcre_extra *extra = pce->extra;/* Holds results of studying */
1719 : pcre_extra extra_data; /* Used locally for exec options */
1720 : int *offsets; /* Array of subpattern offsets */
1721 : int size_offsets; /* Size of the offsets array */
1722 15 : int count = 0; /* Count of matched subpatterns */
1723 : char *string_key;
1724 : ulong num_key;
1725 : zend_bool invert; /* Whether to return non-matching
1726 : entries */
1727 : int rc;
1728 :
1729 15 : invert = flags & PREG_GREP_INVERT ? 1 : 0;
1730 :
1731 15 : if (extra == NULL) {
1732 15 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1733 15 : extra = &extra_data;
1734 : }
1735 15 : extra->match_limit = PCRE_G(backtrack_limit);
1736 15 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1737 :
1738 : /* Calculate the size of the offsets array, and allocate memory for it. */
1739 15 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1740 15 : if (rc < 0) {
1741 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1742 0 : RETURN_FALSE;
1743 : }
1744 15 : size_offsets = (size_offsets + 1) * 3;
1745 15 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1746 :
1747 : /* Initialize return array */
1748 15 : array_init(return_value);
1749 :
1750 15 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1751 :
1752 : /* Go through the input array */
1753 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1754 104 : while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1755 75 : zval subject = **entry;
1756 :
1757 75 : if (Z_TYPE_PP(entry) != IS_STRING) {
1758 23 : zval_copy_ctor(&subject);
1759 23 : convert_to_string(&subject);
1760 : }
1761 :
1762 : /* Perform the match */
1763 75 : count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1764 : Z_STRLEN(subject), 0,
1765 : 0, offsets, size_offsets);
1766 :
1767 : /* Check for too many substrings condition. */
1768 75 : if (count == 0) {
1769 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1770 0 : count = size_offsets/3;
1771 75 : } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1772 1 : pcre_handle_exec_error(count TSRMLS_CC);
1773 1 : break;
1774 : }
1775 :
1776 : /* If the entry fits our requirements */
1777 74 : if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1778 :
1779 30 : Z_ADDREF_PP(entry);
1780 :
1781 : /* Add to return array */
1782 30 : switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1783 : {
1784 : case HASH_KEY_IS_STRING:
1785 2 : zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1786 : strlen(string_key)+1, entry, sizeof(zval *), NULL);
1787 2 : break;
1788 :
1789 : case HASH_KEY_IS_LONG:
1790 28 : zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1791 : sizeof(zval *), NULL);
1792 : break;
1793 : }
1794 : }
1795 :
1796 74 : if (Z_TYPE_PP(entry) != IS_STRING) {
1797 23 : zval_dtor(&subject);
1798 : }
1799 :
1800 74 : zend_hash_move_forward(Z_ARRVAL_P(input));
1801 : }
1802 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1803 : /* Clean up */
1804 15 : efree(offsets);
1805 : }
1806 : /* }}} */
1807 :
1808 : /* {{{ proto int preg_last_error()
1809 : Returns the error code of the last regexp execution. */
1810 : static PHP_FUNCTION(preg_last_error)
1811 17 : {
1812 17 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1813 2 : return;
1814 : }
1815 :
1816 15 : RETURN_LONG(PCRE_G(error_code));
1817 : }
1818 : /* }}} */
1819 :
1820 : /* {{{ module definition structures */
1821 :
1822 : /* {{{ arginfo */
1823 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1824 : ZEND_ARG_INFO(0, pattern)
1825 : ZEND_ARG_INFO(0, subject)
1826 : ZEND_ARG_INFO(1, subpatterns) /* array */
1827 : ZEND_ARG_INFO(0, flags)
1828 : ZEND_ARG_INFO(0, offset)
1829 : ZEND_END_ARG_INFO()
1830 :
1831 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
1832 : ZEND_ARG_INFO(0, pattern)
1833 : ZEND_ARG_INFO(0, subject)
1834 : ZEND_ARG_INFO(1, subpatterns) /* array */
1835 : ZEND_ARG_INFO(0, flags)
1836 : ZEND_ARG_INFO(0, offset)
1837 : ZEND_END_ARG_INFO()
1838 :
1839 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1840 : ZEND_ARG_INFO(0, regex)
1841 : ZEND_ARG_INFO(0, replace)
1842 : ZEND_ARG_INFO(0, subject)
1843 : ZEND_ARG_INFO(0, limit)
1844 : ZEND_ARG_INFO(1, count)
1845 : ZEND_END_ARG_INFO()
1846 :
1847 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1848 : ZEND_ARG_INFO(0, regex)
1849 : ZEND_ARG_INFO(0, callback)
1850 : ZEND_ARG_INFO(0, subject)
1851 : ZEND_ARG_INFO(0, limit)
1852 : ZEND_ARG_INFO(1, count)
1853 : ZEND_END_ARG_INFO()
1854 :
1855 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1856 : ZEND_ARG_INFO(0, pattern)
1857 : ZEND_ARG_INFO(0, subject)
1858 : ZEND_ARG_INFO(0, limit)
1859 : ZEND_ARG_INFO(0, flags)
1860 : ZEND_END_ARG_INFO()
1861 :
1862 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1863 : ZEND_ARG_INFO(0, str)
1864 : ZEND_ARG_INFO(0, delim_char)
1865 : ZEND_END_ARG_INFO()
1866 :
1867 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1868 : ZEND_ARG_INFO(0, regex)
1869 : ZEND_ARG_INFO(0, input) /* array */
1870 : ZEND_ARG_INFO(0, flags)
1871 : ZEND_END_ARG_INFO()
1872 :
1873 : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1874 : ZEND_END_ARG_INFO()
1875 : /* }}} */
1876 :
1877 : static const zend_function_entry pcre_functions[] = {
1878 : PHP_FE(preg_match, arginfo_preg_match)
1879 : PHP_FE(preg_match_all, arginfo_preg_match_all)
1880 : PHP_FE(preg_replace, arginfo_preg_replace)
1881 : PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
1882 : PHP_FE(preg_filter, arginfo_preg_replace)
1883 : PHP_FE(preg_split, arginfo_preg_split)
1884 : PHP_FE(preg_quote, arginfo_preg_quote)
1885 : PHP_FE(preg_grep, arginfo_preg_grep)
1886 : PHP_FE(preg_last_error, arginfo_preg_last_error)
1887 : {NULL, NULL, NULL}
1888 : };
1889 :
1890 : zend_module_entry pcre_module_entry = {
1891 : STANDARD_MODULE_HEADER,
1892 : "pcre",
1893 : pcre_functions,
1894 : PHP_MINIT(pcre),
1895 : PHP_MSHUTDOWN(pcre),
1896 : NULL,
1897 : NULL,
1898 : PHP_MINFO(pcre),
1899 : NO_VERSION_YET,
1900 : PHP_MODULE_GLOBALS(pcre),
1901 : PHP_GINIT(pcre),
1902 : PHP_GSHUTDOWN(pcre),
1903 : NULL,
1904 : STANDARD_MODULE_PROPERTIES_EX
1905 : };
1906 :
1907 : #ifdef COMPILE_DL_PCRE
1908 : ZEND_GET_MODULE(pcre)
1909 : #endif
1910 :
1911 : /* }}} */
1912 :
1913 : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1914 :
1915 : /*
1916 : * Local variables:
1917 : * tab-width: 4
1918 : * c-basic-offset: 4
1919 : * End:
1920 : * vim600: sw=4 ts=4 fdm=marker
1921 : * vim<600: sw=4 ts=4
1922 : */
|