1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 6 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Andrei Zmievski <andrei@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: php_pcre.c 288111 2009-09-06 17:41:34Z felipe $ */
20 :
21 : /* TODO
22 : * php_pcre_replace_impl():
23 : * - should use fcall info cache (enhancement)
24 : */
25 :
26 : #include "php.h"
27 : #include "php_ini.h"
28 : #include "php_globals.h"
29 : #include "php_pcre.h"
30 : #include "ext/standard/info.h"
31 : #include "ext/standard/php_smart_str.h"
32 :
33 : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
34 :
35 : #include "ext/standard/php_string.h"
36 :
37 : #define PREG_PATTERN_ORDER 1
38 : #define PREG_SET_ORDER 2
39 : #define PREG_OFFSET_CAPTURE (1<<8)
40 :
41 : #define PREG_SPLIT_NO_EMPTY (1<<0)
42 : #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
43 : #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
44 :
45 : #define PREG_REPLACE_EVAL (1<<0)
46 :
47 : #define PREG_GREP_INVERT (1<<0)
48 :
49 : #define PCRE_CACHE_SIZE 4096
50 :
51 : enum {
52 : PHP_PCRE_NO_ERROR = 0,
53 : PHP_PCRE_INTERNAL_ERROR,
54 : PHP_PCRE_BACKTRACK_LIMIT_ERROR,
55 : PHP_PCRE_RECURSION_LIMIT_ERROR,
56 : PHP_PCRE_BAD_UTF8_ERROR,
57 : PHP_PCRE_BAD_UTF8_OFFSET_ERROR
58 : };
59 :
60 : typedef struct {
61 : char *str;
62 : int byte_offset;
63 : int cp_offset;
64 : } offset_map_t;
65 :
66 : ZEND_DECLARE_MODULE_GLOBALS(pcre)
67 :
68 :
69 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
70 8 : {
71 8 : int preg_code = 0;
72 :
73 8 : switch (pcre_code) {
74 : case PCRE_ERROR_MATCHLIMIT:
75 3 : preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
76 3 : break;
77 :
78 : case PCRE_ERROR_RECURSIONLIMIT:
79 3 : preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
80 3 : break;
81 :
82 : case PCRE_ERROR_BADUTF8:
83 1 : preg_code = PHP_PCRE_BAD_UTF8_ERROR;
84 1 : break;
85 :
86 : case PCRE_ERROR_BADUTF8_OFFSET:
87 1 : preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
88 1 : break;
89 :
90 : default:
91 0 : preg_code = PHP_PCRE_INTERNAL_ERROR;
92 : break;
93 : }
94 :
95 8 : PCRE_G(error_code) = preg_code;
96 8 : }
97 : /* }}} */
98 :
99 :
100 : static void php_free_pcre_cache(void *data) /* {{{ */
101 23837 : {
102 23837 : pcre_cache_entry *pce = (pcre_cache_entry *) data;
103 23837 : if (!pce) return;
104 23837 : pefree(pce->re, 1);
105 23837 : if (pce->extra) pefree(pce->extra, 1);
106 : #if HAVE_SETLOCALE
107 23837 : if ((void*)pce->tables) pefree((void*)pce->tables, 1);
108 23837 : pefree(pce->locale, 1);
109 : #endif
110 : }
111 : /* }}} */
112 :
113 :
114 : static PHP_GINIT_FUNCTION(pcre) /* {{{ */
115 17007 : {
116 17007 : zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
117 17007 : pcre_globals->backtrack_limit = 0;
118 17007 : pcre_globals->recursion_limit = 0;
119 17007 : pcre_globals->error_code = PHP_PCRE_NO_ERROR;
120 17007 : }
121 : /* }}} */
122 :
123 : static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
124 17039 : {
125 17039 : zend_hash_destroy(&pcre_globals->pcre_cache);
126 17039 : }
127 : /* }}} */
128 :
129 : PHP_INI_BEGIN()
130 : STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
131 : STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
132 : PHP_INI_END()
133 :
134 :
135 : /* {{{ PHP_MINFO_FUNCTION(pcre) */
136 : static PHP_MINFO_FUNCTION(pcre)
137 43 : {
138 43 : php_info_print_table_start();
139 43 : php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
140 43 : php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
141 43 : php_info_print_table_end();
142 :
143 43 : DISPLAY_INI_ENTRIES();
144 43 : }
145 : /* }}} */
146 :
147 : /* {{{ PHP_MINIT_FUNCTION(pcre) */
148 : static PHP_MINIT_FUNCTION(pcre)
149 17007 : {
150 17007 : REGISTER_INI_ENTRIES();
151 :
152 17007 : REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
153 17007 : REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
154 17007 : REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
155 17007 : REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
156 17007 : REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
157 17007 : REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
158 17007 : REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
159 :
160 17007 : REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
161 17007 : REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
162 17007 : REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
163 17007 : REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
164 17007 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
165 17007 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
166 17007 : REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
167 :
168 17007 : return SUCCESS;
169 : }
170 : /* }}} */
171 :
172 : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
173 : static PHP_MSHUTDOWN_FUNCTION(pcre)
174 17039 : {
175 17039 : UNREGISTER_INI_ENTRIES();
176 :
177 17039 : return SUCCESS;
178 : }
179 : /* }}} */
180 :
181 : /* {{{ static pcre_clean_cache */
182 : static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
183 40960 : {
184 40960 : int *num_clean = (int *)arg;
185 :
186 40960 : if (*num_clean > 0) {
187 5120 : (*num_clean)--;
188 5120 : return 1;
189 : } else {
190 35840 : return 0;
191 : }
192 : }
193 : /* }}} */
194 :
195 : /* {{{ static make_subpats_table */
196 : static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
197 3560667 : {
198 3560667 : pcre_extra *extra = pce->extra;
199 3560667 : int name_cnt = 0, name_size, ni = 0;
200 : int rc;
201 : char *name_table;
202 : unsigned short name_idx;
203 3560667 : char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
204 :
205 3560667 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
206 3560667 : if (rc < 0) {
207 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
208 0 : efree(subpat_names);
209 0 : return NULL;
210 : }
211 3560667 : if (name_cnt > 0) {
212 : int rc1, rc2;
213 12 : rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
214 12 : rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
215 12 : rc = rc2 ? rc2 : rc1;
216 12 : if (rc < 0) {
217 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
218 0 : efree(subpat_names);
219 0 : return NULL;
220 : }
221 :
222 1076 : while (ni++ < name_cnt) {
223 1054 : name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
224 1054 : subpat_names[name_idx] = name_table + 2;
225 1054 : if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
226 2 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
227 2 : efree(subpat_names);
228 2 : return NULL;
229 : }
230 1052 : name_table += name_size;
231 : }
232 : }
233 :
234 3560665 : return subpat_names;
235 : }
236 : /* }}} */
237 :
238 : /* {{{ pcre_get_compiled_regex_cache
239 : */
240 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_uchar utype, char *regex, int regex_len TSRMLS_DC)
241 3690455 : {
242 3690455 : pcre *re = NULL;
243 : pcre_extra *extra;
244 3690455 : int coptions = 0;
245 3690455 : int soptions = 0;
246 : const char *error;
247 : int erroffset;
248 : char delimiter;
249 : char start_delimiter;
250 : char end_delimiter;
251 : char *p, *pp;
252 : char *pattern;
253 3690455 : int do_study = 0;
254 3690455 : int poptions = 0;
255 3690455 : unsigned const char *tables = NULL;
256 : #if HAVE_SETLOCALE
257 3690455 : char *locale = setlocale(LC_CTYPE, NULL);
258 : #endif
259 : pcre_cache_entry *pce;
260 : pcre_cache_entry new_entry;
261 :
262 : /* Try to lookup the cached regex entry, and if successful, just pass
263 : back the compiled pattern, otherwise go on and compile it. */
264 3690455 : if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
265 : /*
266 : * We use a quick pcre_info() check to see whether cache is corrupted, and if it
267 : * is, we flush it and compile the pattern from scratch.
268 : */
269 3666543 : if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
270 0 : zend_hash_clean(&PCRE_G(pcre_cache));
271 : } else {
272 : #if HAVE_SETLOCALE
273 3666543 : if (!strcmp(pce->locale, locale)) {
274 3666543 : return pce;
275 : }
276 : #else
277 : return pce;
278 : #endif
279 : }
280 : }
281 :
282 23912 : p = regex;
283 :
284 : /* Parse through the leading whitespace, and display a warning if we
285 : get to the end without encountering a delimiter. */
286 23912 : while (isspace((int)*(unsigned char *)p)) p++;
287 23912 : if (*p == 0) {
288 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
289 3 : return NULL;
290 : }
291 :
292 : /* Get the delimiter and display a warning if it is alphanumeric
293 : or a backslash. */
294 23909 : delimiter = *p++;
295 23909 : if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
296 7 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
297 7 : return NULL;
298 : }
299 :
300 23902 : start_delimiter = delimiter;
301 23902 : if ((pp = strchr("([{< )]}> )]}>", delimiter)))
302 20 : delimiter = pp[5];
303 23902 : end_delimiter = delimiter;
304 :
305 23902 : if (start_delimiter == end_delimiter) {
306 : /* We need to iterate through the pattern, searching for the ending delimiter,
307 : but skipping the backslashed delimiters. If the ending delimiter is not
308 : found, display a warning. */
309 23882 : pp = p;
310 14844533 : while (*pp != 0) {
311 14820644 : if (*pp == '\\' && pp[1] != 0) pp++;
312 13889661 : else if (*pp == delimiter)
313 23875 : break;
314 14796769 : pp++;
315 : }
316 23882 : if (*pp == 0) {
317 7 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
318 7 : return NULL;
319 : }
320 : } else {
321 : /* We iterate through the pattern, searching for the matching ending
322 : * delimiter. For each matching starting delimiter, we increment nesting
323 : * level, and decrement it for each matching ending delimiter. If we
324 : * reach the end of the pattern without matching, display a warning.
325 : */
326 20 : int brackets = 1; /* brackets nesting level */
327 20 : pp = p;
328 180 : while (*pp != 0) {
329 159 : if (*pp == '\\' && pp[1] != 0) pp++;
330 147 : else if (*pp == end_delimiter && --brackets <= 0)
331 : break;
332 128 : else if (*pp == start_delimiter)
333 1 : brackets++;
334 140 : pp++;
335 : }
336 20 : if (*pp == 0) {
337 1 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
338 1 : return NULL;
339 : }
340 : }
341 :
342 : /* Make a copy of the actual pattern. */
343 23894 : pattern = estrndup(p, pp-p);
344 :
345 : /* Move on to the options */
346 23894 : pp++;
347 :
348 : /* Parse through the options, setting appropriate flags. Display
349 : a warning if we encounter an unknown modifier. */
350 59048 : while (*pp != 0) {
351 11273 : switch (*pp++) {
352 : /* Perl compatible options */
353 913 : case 'i': coptions |= PCRE_CASELESS; break;
354 853 : case 'm': coptions |= PCRE_MULTILINE; break;
355 9389 : case 's': coptions |= PCRE_DOTALL; break;
356 4 : case 'x': coptions |= PCRE_EXTENDED; break;
357 :
358 : /* PCRE specific options */
359 2 : case 'A': coptions |= PCRE_ANCHORED; break;
360 7 : case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
361 28 : case 'S': do_study = 1; break;
362 27 : case 'U': coptions |= PCRE_UNGREEDY; break;
363 1 : case 'X': coptions |= PCRE_EXTRA; break;
364 29 : case 'u': coptions |= PCRE_UTF8; break;
365 :
366 : /* Custom preg options */
367 5 : case 'e': poptions |= PREG_REPLACE_EVAL; break;
368 :
369 : case ' ':
370 : case '\n':
371 2 : break;
372 :
373 : default:
374 13 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
375 13 : efree(pattern);
376 13 : return NULL;
377 : }
378 : }
379 :
380 23881 : if (utype == IS_UNICODE) {
381 15293 : coptions |= PCRE_UTF8;
382 : }
383 :
384 : #if HAVE_SETLOCALE
385 23881 : if (strcmp(locale, "C"))
386 2 : tables = pcre_maketables();
387 : #endif
388 :
389 : /* Compile pattern and display a warning if compilation failed. */
390 23881 : re = pcre_compile(pattern,
391 : coptions,
392 : &error,
393 : &erroffset,
394 : tables);
395 :
396 23881 : if (re == NULL) {
397 44 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
398 44 : efree(pattern);
399 44 : if (tables) {
400 0 : pefree((void*)tables, 1);
401 : }
402 44 : return NULL;
403 : }
404 :
405 : /* If study option was specified, study the pattern and
406 : store the result in extra for passing to pcre_exec. */
407 23837 : if (do_study) {
408 28 : extra = pcre_study(re, soptions, &error);
409 28 : if (extra) {
410 28 : extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
411 : }
412 28 : if (error != NULL) {
413 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
414 : }
415 : } else {
416 23809 : extra = NULL;
417 : }
418 :
419 23837 : efree(pattern);
420 :
421 : /*
422 : * If we reached cache limit, clean out the items from the head of the list;
423 : * these are supposedly the oldest ones (but not necessarily the least used
424 : * ones).
425 : */
426 23837 : if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
427 10 : int num_clean = PCRE_CACHE_SIZE / 8;
428 10 : zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
429 : }
430 :
431 : /* Store the compiled pattern and extra info in the cache. */
432 23837 : new_entry.re = re;
433 23837 : new_entry.extra = extra;
434 23837 : new_entry.preg_options = poptions;
435 23837 : new_entry.compile_options = coptions;
436 : #if HAVE_SETLOCALE
437 23837 : new_entry.locale = pestrdup(locale, 1);
438 23837 : new_entry.tables = tables;
439 : #endif
440 23837 : zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
441 : sizeof(pcre_cache_entry), (void**)&pce);
442 :
443 23837 : return pce;
444 : }
445 : /* }}} */
446 :
447 : /* {{{ pcre_get_compiled_regex
448 : */
449 : PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
450 127831 : {
451 127831 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(IS_UNICODE, regex, strlen(regex) TSRMLS_CC);
452 :
453 127831 : if (extra) {
454 127831 : *extra = pce ? pce->extra : NULL;
455 : }
456 127831 : if (preg_options) {
457 127831 : *preg_options = pce ? pce->preg_options : 0;
458 : }
459 :
460 127831 : return pce ? pce->re : NULL;
461 : }
462 : /* }}} */
463 :
464 : /* {{{ pcre_get_compiled_regex_ex
465 : */
466 : PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
467 0 : {
468 0 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(IS_UNICODE, regex, strlen(regex) TSRMLS_CC);
469 :
470 0 : if (extra) {
471 0 : *extra = pce ? pce->extra : NULL;
472 : }
473 0 : if (preg_options) {
474 0 : *preg_options = pce ? pce->preg_options : 0;
475 : }
476 0 : if (compile_options) {
477 0 : *compile_options = pce ? pce->compile_options : 0;
478 : }
479 :
480 0 : return pce ? pce->re : NULL;
481 : }
482 : /* }}} */
483 :
484 : /* {{{ add_offset_pair */
485 : static inline void add_offset_pair(zval *result, zend_uchar utype, char *str, int len, int offset, char *name, offset_map_t *prev TSRMLS_DC)
486 64 : {
487 : zval *match_pair;
488 : int tmp;
489 :
490 64 : ALLOC_ZVAL(match_pair);
491 64 : array_init(match_pair);
492 64 : INIT_PZVAL(match_pair);
493 :
494 : /* Add (match, offset) to the return value */
495 64 : if (utype == IS_UNICODE) {
496 64 : add_next_index_utf8_stringl(match_pair, str, len, 1);
497 : } else {
498 0 : add_next_index_stringl(match_pair, str, len, 1);
499 : }
500 :
501 : /* Calculate codepoint offset from the previous chunk */
502 64 : if (offset) {
503 47 : if (utype == IS_UNICODE) {
504 47 : tmp = prev->byte_offset;
505 282 : while (tmp < offset) {
506 188 : U8_FWD_1(prev->str, tmp, offset);
507 188 : prev->cp_offset++;
508 : }
509 47 : prev->byte_offset = tmp;
510 : } else {
511 0 : prev->cp_offset = offset;
512 0 : prev->byte_offset = offset;
513 : }
514 : }
515 64 : add_next_index_long(match_pair, prev->cp_offset);
516 :
517 64 : if (name) {
518 2 : UErrorCode status = U_ZERO_ERROR;
519 2 : UChar *u = NULL;
520 : int u_len;
521 2 : zval_add_ref(&match_pair);
522 2 : zend_string_to_unicode_ex(UG(utf8_conv), &u, &u_len, name, strlen(name), &status);
523 2 : zend_u_hash_update(Z_ARRVAL_P(result), IS_UNICODE, ZSTR(u), u_len+1, &match_pair, sizeof(zval *), NULL);
524 2 : efree(u);
525 : }
526 64 : zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
527 64 : }
528 : /* }}} */
529 :
530 : /* {{{ php_do_pcre_match */
531 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
532 3539703 : {
533 : /* parameters */
534 : zstr regex; /* Regular expression */
535 : zstr subject; /* String to match against */
536 : int regex_len;
537 : int subject_len;
538 : pcre_cache_entry *pce; /* Compiled regular expression */
539 3539703 : zval *subpats = NULL; /* Array for subpatterns */
540 3539703 : long flags = 0; /* Match control flags */
541 3539703 : long start_offset = 0; /* Where the new search starts */
542 : zend_uchar str_type;
543 3539703 : char *regex_utf8 = NULL, *subject_utf8 = NULL;
544 : int regex_utf8_len, subject_utf8_len;
545 3539703 : UErrorCode status = U_ZERO_ERROR;
546 :
547 3539703 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "TTz|ll" : "TT|zll"), ®ex, ®ex_len, &str_type,
548 : &subject, &subject_len, &str_type, &subpats, &flags, &start_offset) == FAILURE) {
549 16 : RETURN_FALSE;
550 : }
551 :
552 3539687 : if (str_type == IS_UNICODE) {
553 1366322 : zend_unicode_to_string_ex(UG(utf8_conv), ®ex_utf8, ®ex_utf8_len, regex.u, regex_len, &status);
554 1366322 : zend_unicode_to_string_ex(UG(utf8_conv), &subject_utf8, &subject_utf8_len, subject.u, subject_len, &status);
555 1366322 : regex.s = regex_utf8;
556 1366322 : regex_len = regex_utf8_len;
557 1366322 : subject.s = subject_utf8;
558 1366322 : subject_len = subject_utf8_len;
559 : }
560 :
561 : /* Compile regex or get it from cache. */
562 3539687 : if ((pce = pcre_get_compiled_regex_cache(str_type, regex.s, regex_len TSRMLS_CC)) == NULL) {
563 16 : if (str_type == IS_UNICODE) {
564 16 : efree(regex_utf8);
565 16 : efree(subject_utf8);
566 : }
567 16 : RETURN_FALSE;
568 : }
569 :
570 3539671 : php_pcre_match_impl(pce, str_type, subject.s, subject_len, return_value, subpats,
571 : global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
572 :
573 3539671 : if (str_type == IS_UNICODE) {
574 1366306 : efree(regex_utf8);
575 1366306 : efree(subject_utf8);
576 : }
577 : }
578 : /* }}} */
579 :
580 : /* {{{ php_pcre_match_impl */
581 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_uchar utype, char *subject, int subject_len, zval *return_value,
582 : zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
583 3539934 : {
584 : zval *result_set, /* Holds a set of subpatterns after
585 : a global match */
586 3539934 : **match_sets = NULL; /* An array of sets of matches for each
587 : subpattern after a global match */
588 3539934 : pcre_extra *extra = pce->extra;/* Holds results of studying */
589 : pcre_extra extra_data; /* Used locally for exec options */
590 3539934 : int exoptions = 0; /* Execution options */
591 3539934 : int count = 0; /* Count of matched subpatterns */
592 : int *offsets; /* Array of subpattern offsets */
593 : int num_subpats; /* Number of captured subpatterns */
594 : int size_offsets; /* Size of the offsets array */
595 : int matched; /* Has anything matched */
596 3539934 : int g_notempty = 0; /* If the match should not be empty */
597 : const char **stringlist; /* Holds list of subpatterns */
598 : char **subpat_names; /* Array for named subpatterns */
599 : int i, rc;
600 : int subpats_order; /* Order of subpattern matches */
601 : int offset_capture; /* Capture match offsets: yes/no */
602 :
603 : /* Overwrite the passed-in value for subpatterns with an empty array. */
604 3539934 : if (subpats != NULL) {
605 995592 : zval_dtor(subpats);
606 995592 : array_init(subpats);
607 : }
608 :
609 3539934 : subpats_order = global ? PREG_PATTERN_ORDER : 0;
610 :
611 3539934 : if (use_flags) {
612 221 : offset_capture = flags & PREG_OFFSET_CAPTURE;
613 :
614 : /*
615 : * subpats_order is pre-set to pattern mode so we change it only if
616 : * necessary.
617 : */
618 221 : if (flags & 0xff) {
619 14 : subpats_order = flags & 0xff;
620 : }
621 221 : if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
622 : (!global && subpats_order != 0)) {
623 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
624 1 : return;
625 : }
626 : } else {
627 3539713 : offset_capture = 0;
628 : }
629 :
630 3539933 : if (utype == IS_UNICODE) {
631 1366373 : int k = 0;
632 : /* Calculate byte offset from codepoint offset */
633 1366373 : if (start_offset < 0) {
634 4 : k = subject_len;
635 4 : U8_BACK_N((unsigned char*)subject, 0, k, -start_offset);
636 : } else {
637 1366369 : U8_FWD_N(subject, k, subject_len, start_offset);
638 : }
639 1366373 : start_offset = k;
640 : } else {
641 : /* Negative offset counts from the end of the string. */
642 2173560 : if (start_offset < 0) {
643 0 : start_offset = subject_len + start_offset;
644 0 : if (start_offset < 0) {
645 0 : start_offset = 0;
646 : }
647 : }
648 2173560 : if (!(pce->compile_options & PCRE_UTF8)) {
649 2173557 : exoptions |= PCRE_NO_UTF8_CHECK;
650 : }
651 : }
652 :
653 3539933 : if (extra == NULL) {
654 3539925 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
655 3539925 : extra = &extra_data;
656 : }
657 3539933 : extra->match_limit = PCRE_G(backtrack_limit);
658 3539933 : extra->match_limit_recursion = PCRE_G(recursion_limit);
659 :
660 : /* Calculate the size of the offsets array, and allocate memory for it. */
661 3539933 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
662 3539933 : if (rc < 0) {
663 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
664 0 : RETURN_FALSE;
665 : }
666 3539933 : num_subpats++;
667 3539933 : size_offsets = num_subpats * 3;
668 :
669 : /*
670 : * Build a mapping from subpattern numbers to their names. We will always
671 : * allocate the table, even though there may be no named subpatterns. This
672 : * avoids somewhat more complicated logic in the inner loops.
673 : */
674 3539933 : subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
675 3539933 : if (!subpat_names) {
676 1 : RETURN_FALSE;
677 : }
678 :
679 3539932 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
680 :
681 : /* Allocate match sets array and initialize the values. */
682 3539932 : if (global && subpats_order == PREG_PATTERN_ORDER) {
683 305 : match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
684 742 : for (i=0; i<num_subpats; i++) {
685 437 : ALLOC_ZVAL(match_sets[i]);
686 437 : array_init(match_sets[i]);
687 437 : INIT_PZVAL(match_sets[i]);
688 : }
689 : }
690 :
691 3539932 : matched = 0;
692 3539932 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
693 :
694 : do {
695 : /* Execute the regular expression. */
696 3540097 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
697 : exoptions|g_notempty, offsets, size_offsets);
698 :
699 : /* the string was already proved to be valid UTF-8 */
700 3540097 : exoptions |= PCRE_NO_UTF8_CHECK;
701 :
702 : /* Check for too many substrings condition. */
703 3540097 : if (count == 0) {
704 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
705 0 : count = size_offsets/3;
706 : }
707 :
708 : /* If something has matched */
709 3540097 : if (count > 0) {
710 419920 : matched++;
711 :
712 : /* If subpatterns array has been passed, fill it in with values. */
713 419920 : if (subpats != NULL) {
714 33834 : offset_map_t map = { subject, 0, 0 };
715 :
716 : /* Try to get the list of substrings and display a warning if failed. */
717 33834 : if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
718 0 : efree(subpat_names);
719 0 : efree(offsets);
720 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
721 0 : RETURN_FALSE;
722 : }
723 :
724 33834 : if (global) { /* global pattern matching */
725 163 : if (subpats_order == PREG_PATTERN_ORDER) {
726 : /* For each subpattern, insert it into the appropriate array. */
727 316 : for (i = 0; i < count; i++) {
728 191 : if (offset_capture) {
729 7 : add_offset_pair(match_sets[i], utype, (char *)stringlist[i],
730 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, &map TSRMLS_CC);
731 184 : } else if (utype == IS_UNICODE) {
732 174 : add_next_index_utf8_stringl(match_sets[i], (char *)stringlist[i],
733 : offsets[(i<<1)+1] - offsets[i<<1], 1);
734 : } else {
735 10 : add_next_index_stringl(match_sets[i], (char *)stringlist[i],
736 : offsets[(i<<1)+1] - offsets[i<<1], 1);
737 : }
738 : }
739 : /*
740 : * If the number of captured subpatterns on this run is
741 : * less than the total possible number, pad the result
742 : * arrays with empty strings.
743 : */
744 125 : if (count < num_subpats) {
745 2 : for (; i < num_subpats; i++) {
746 1 : add_next_index_unicode(match_sets[i], EMPTY_STR, 1);
747 : }
748 : }
749 : } else {
750 : /* Allocate the result set array */
751 38 : ALLOC_ZVAL(result_set);
752 38 : array_init(result_set);
753 38 : INIT_PZVAL(result_set);
754 :
755 : /* Add all the subpatterns to it */
756 343 : for (i = 0; i < count; i++) {
757 305 : if (offset_capture) {
758 7 : add_offset_pair(result_set, utype, (char *)stringlist[i],
759 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1],
760 : subpat_names[i], &map TSRMLS_CC);
761 298 : } else if (utype == IS_UNICODE) {
762 298 : if (subpat_names[i]) {
763 8 : add_utf8_assoc_utf8_stringl(result_set, subpat_names[i], (char *)stringlist[i],
764 : offsets[(i<<1)+1] - offsets[i<<1], 1);
765 : }
766 298 : add_next_index_utf8_stringl(result_set, (char *)stringlist[i],
767 : offsets[(i<<1)+1] - offsets[i<<1], 1);
768 : } else {
769 0 : if (subpat_names[i]) {
770 0 : add_rt_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
771 : offsets[(i<<1)+1] - offsets[i<<1], 1);
772 : }
773 0 : add_next_index_stringl(result_set, (char *)stringlist[i],
774 : offsets[(i<<1)+1] - offsets[i<<1], 1);
775 : }
776 : }
777 : /* And add it to the output array */
778 38 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
779 : }
780 : } else { /* single pattern matching */
781 : /* For each subpattern, insert it into the subpatterns array. */
782 101185 : for (i = 0; i < count; i++) {
783 67514 : if (offset_capture) {
784 9 : add_offset_pair(subpats, utype, (char *)stringlist[i],
785 : offsets[(i<<1)+1] - offsets[i<<1],
786 : offsets[i<<1], subpat_names[i], &map TSRMLS_CC);
787 67505 : } else if (utype == IS_UNICODE) {
788 67003 : if (subpat_names[i]) {
789 13 : add_utf8_assoc_utf8_stringl(subpats, subpat_names[i], (char *)stringlist[i],
790 : offsets[(i<<1)+1] - offsets[i<<1], 1);
791 : }
792 67003 : add_next_index_utf8_stringl(subpats, (char *)stringlist[i],
793 : offsets[(i<<1)+1] - offsets[i<<1], 1);
794 : } else {
795 502 : if (subpat_names[i]) {
796 0 : add_rt_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
797 : offsets[(i<<1)+1] - offsets[i<<1], 1);
798 : }
799 502 : add_next_index_stringl(subpats, (char *)stringlist[i],
800 : offsets[(i<<1)+1] - offsets[i<<1], 1);
801 : }
802 : }
803 : }
804 :
805 33834 : pcre_free((void *) stringlist);
806 : }
807 3120177 : } else if (count == PCRE_ERROR_NOMATCH) {
808 : /* If we previously set PCRE_NOTEMPTY after a null match,
809 : this is not necessarily the end. We need to advance
810 : the start offset, and continue. Fudge the offset values
811 : to achieve this, unless we're already at the end of the string. */
812 3120174 : if (g_notempty != 0 && start_offset < subject_len) {
813 2 : offsets[0] = start_offset;
814 4 : if (utype == IS_UNICODE || pce->compile_options & PCRE_UTF8) {
815 2 : offsets[1] = start_offset;
816 2 : U8_FWD_1(subject, offsets[1], subject_len);
817 : } else {
818 0 : offsets[1] = start_offset + 1;
819 : }
820 : } else
821 : break;
822 : } else {
823 3 : pcre_handle_exec_error(count TSRMLS_CC);
824 3 : break;
825 : }
826 :
827 : /* If we have matched an empty string, mimic what Perl's /g options does.
828 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
829 : the match again at the same point. If this fails (picked up above) we
830 : advance to the next character. */
831 419922 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
832 :
833 : /* Advance to the position right after the last full match */
834 419922 : start_offset = offsets[1];
835 419922 : } while (global);
836 :
837 : /* Add the match sets to the output array and clean up */
838 3539932 : if (global && subpats_order == PREG_PATTERN_ORDER) {
839 305 : UErrorCode status = U_ZERO_ERROR;
840 305 : UChar *u = NULL;
841 : int u_len;
842 742 : for (i = 0; i < num_subpats; i++) {
843 437 : if (subpat_names[i]) {
844 5 : zend_string_to_unicode_ex(UG(utf8_conv), &u, &u_len, subpat_names[i], strlen(subpat_names[i]), &status);
845 5 : zend_u_hash_update(Z_ARRVAL_P(subpats), IS_UNICODE, ZSTR(u),
846 : u_len+1, &match_sets[i], sizeof(zval *), NULL);
847 5 : Z_ADDREF_P(match_sets[i]);
848 5 : efree(u);
849 5 : status = U_ZERO_ERROR;
850 : }
851 437 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
852 : }
853 305 : efree(match_sets);
854 : }
855 :
856 3539932 : efree(offsets);
857 3539932 : efree(subpat_names);
858 :
859 3539932 : RETVAL_LONG(matched);
860 : }
861 : /* }}} */
862 :
863 : /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]]) U
864 : Perform a Perl-style regular expression match */
865 : static PHP_FUNCTION(preg_match)
866 3539607 : {
867 3539607 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
868 3539607 : }
869 : /* }}} */
870 :
871 : /* {{{ proto int preg_match_all(string pattern, string subject, array &subpatterns [, int flags [, int offset]]) U
872 : Perform a Perl-style global regular expression match */
873 : static PHP_FUNCTION(preg_match_all)
874 96 : {
875 96 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
876 96 : }
877 : /* }}} */
878 :
879 : /* {{{ preg_get_backref
880 : */
881 : static int preg_get_backref(char **str, int *backref)
882 119 : {
883 119 : register char in_brace = 0;
884 119 : register char *walk = *str;
885 :
886 119 : if (walk[1] == 0)
887 9 : return 0;
888 :
889 110 : if (*walk == '$' && walk[1] == '{') {
890 14 : in_brace = 1;
891 14 : walk++;
892 : }
893 110 : walk++;
894 :
895 110 : if (*walk >= '0' && *walk <= '9') {
896 86 : *backref = *walk - '0';
897 86 : walk++;
898 : } else
899 24 : return 0;
900 :
901 120 : if (*walk && *walk >= '0' && *walk <= '9') {
902 2 : *backref = *backref * 10 + *walk - '0';
903 2 : walk++;
904 : }
905 :
906 86 : if (in_brace) {
907 14 : if (*walk == 0 || *walk != '}')
908 6 : return 0;
909 : else
910 8 : walk++;
911 : }
912 :
913 80 : *str = walk;
914 80 : return 1;
915 : }
916 : /* }}} */
917 :
918 : /* {{{ preg_do_repl_func
919 : */
920 : static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
921 42 : {
922 : zval *retval_ptr; /* Function return value */
923 : zval **args[1]; /* Argument to pass to function */
924 : zval *subpats; /* Captured subpatterns */
925 : int result_len; /* Return value length */
926 : int i;
927 :
928 42 : MAKE_STD_ZVAL(subpats);
929 42 : array_init(subpats);
930 94 : for (i = 0; i < count; i++) {
931 52 : if (subpat_names[i]) {
932 1 : add_utf8_assoc_utf8_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
933 : }
934 52 : add_next_index_utf8_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
935 : }
936 42 : args[0] = &subpats;
937 :
938 83 : if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
939 41 : convert_to_string_with_converter_ex(&retval_ptr, UG(utf8_conv));
940 41 : *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
941 41 : result_len = Z_STRLEN_P(retval_ptr);
942 41 : zval_ptr_dtor(&retval_ptr);
943 : } else {
944 1 : if (!EG(exception)) {
945 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
946 : }
947 1 : result_len = offsets[1] - offsets[0];
948 1 : *result = estrndup(&subject[offsets[0]], result_len);
949 : }
950 :
951 42 : zval_ptr_dtor(&subpats);
952 :
953 42 : return result_len;
954 : }
955 : /* }}} */
956 :
957 : /* {{{ preg_do_eval
958 : */
959 : static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
960 : int *offsets, int count, char **result TSRMLS_DC)
961 5 : {
962 : zval retval; /* Return value from evaluation */
963 : char *eval_str_end, /* End of eval string */
964 : *match, /* Current match for a backref */
965 : *esc_match, /* Quote-escaped match */
966 : *walk, /* Used to walk the code string */
967 : *segment, /* Start of segment to append while walking */
968 : walk_last; /* Last walked character */
969 : int match_len; /* Length of the match */
970 : int esc_match_len; /* Length of the quote-escaped match */
971 : int result_len; /* Length of the result of the evaluation */
972 : int backref; /* Current backref */
973 : char *compiled_string_description;
974 : UConverter *orig_runtime_conv;
975 5 : smart_str code = {0};
976 :
977 5 : eval_str_end = eval_str + eval_str_len;
978 5 : walk = segment = eval_str;
979 5 : walk_last = 0;
980 :
981 195 : while (walk < eval_str_end) {
982 : /* If found a backreference.. */
983 185 : if ('\\' == *walk || '$' == *walk) {
984 33 : smart_str_appendl(&code, segment, walk - segment);
985 33 : if (walk_last == '\\') {
986 0 : code.c[code.len-1] = *walk++;
987 0 : segment = walk;
988 0 : walk_last = 0;
989 0 : continue;
990 : }
991 33 : segment = walk;
992 33 : if (preg_get_backref(&walk, &backref)) {
993 8 : if (backref < count) {
994 : /* Find the corresponding string match and substitute it
995 : in instead of the backref */
996 8 : match = subject + offsets[backref<<1];
997 8 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
998 8 : if (match_len) {
999 8 : esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0 TSRMLS_CC);
1000 : } else {
1001 0 : esc_match = match;
1002 0 : esc_match_len = 0;
1003 : }
1004 : } else {
1005 0 : esc_match = "";
1006 0 : esc_match_len = 0;
1007 : }
1008 8 : smart_str_appendl(&code, esc_match, esc_match_len);
1009 :
1010 8 : segment = walk;
1011 :
1012 : /* Clean up and reassign */
1013 8 : if (esc_match_len)
1014 8 : efree(esc_match);
1015 8 : continue;
1016 : }
1017 : }
1018 177 : walk++;
1019 177 : walk_last = walk[-1];
1020 : }
1021 5 : smart_str_appendl(&code, segment, walk - segment);
1022 5 : smart_str_0(&code);
1023 :
1024 5 : orig_runtime_conv = UG(runtime_encoding_conv);
1025 5 : UG(runtime_encoding_conv) = UG(utf8_conv);
1026 5 : compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
1027 : /* Run the code */
1028 5 : if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
1029 1 : efree(compiled_string_description);
1030 1 : UG(runtime_encoding_conv) = orig_runtime_conv;
1031 1 : php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
1032 : /* zend_error() does not return in this case */
1033 : }
1034 4 : efree(compiled_string_description);
1035 4 : UG(runtime_encoding_conv) = orig_runtime_conv;
1036 4 : convert_to_string_with_converter(&retval, UG(utf8_conv));
1037 :
1038 : /* Save the return value and its length */
1039 4 : *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
1040 4 : result_len = Z_STRLEN(retval);
1041 :
1042 : /* Clean up */
1043 4 : zval_dtor(&retval);
1044 4 : smart_str_free(&code);
1045 :
1046 4 : return result_len;
1047 : }
1048 : /* }}} */
1049 :
1050 : /* {{{ php_pcre_replace
1051 : */
1052 : PHPAPI char *php_pcre_replace(zend_uchar utype,
1053 : char *regex, int regex_len,
1054 : char *subject, int subject_len,
1055 : zval *replace_val, int is_callable_replace,
1056 : int *result_len, int limit, int *replace_count TSRMLS_DC)
1057 20744 : {
1058 : pcre_cache_entry *pce; /* Compiled regular expression */
1059 :
1060 : /* Compile regex or get it from cache. */
1061 20744 : if ((pce = pcre_get_compiled_regex_cache(utype, regex, regex_len TSRMLS_CC)) == NULL) {
1062 9 : return NULL;
1063 : }
1064 :
1065 20735 : return php_pcre_replace_impl(pce, utype, subject, subject_len, replace_val,
1066 : is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
1067 : }
1068 : /* }}} */
1069 :
1070 : /* {{{ php_pcre_replace_impl() */
1071 : PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype, char *subject, int subject_len, zval *replace_val,
1072 : int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
1073 20735 : {
1074 20735 : pcre_extra *extra = pce->extra;/* Holds results of studying */
1075 : pcre_extra extra_data; /* Used locally for exec options */
1076 20735 : int exoptions = 0; /* Execution options */
1077 20735 : int count = 0; /* Count of matched subpatterns */
1078 : int *offsets; /* Array of subpattern offsets */
1079 : char **subpat_names; /* Array for named subpatterns */
1080 : int num_subpats; /* Number of captured subpatterns */
1081 : int size_offsets; /* Size of the offsets array */
1082 : int new_len; /* Length of needed storage */
1083 : int alloc_len; /* Actual allocated length */
1084 20735 : int eval_result_len=0; /* Length of the eval'ed or
1085 : function-returned string */
1086 : int match_len; /* Length of the current match */
1087 : int backref; /* Backreference number */
1088 : int eval; /* If the replacement string should be eval'ed */
1089 : int start_offset; /* Where the new search starts */
1090 20735 : int g_notempty=0; /* If the match should not be empty */
1091 20735 : int replace_len=0; /* Length of replacement string */
1092 : char *result, /* Result of replacement */
1093 20735 : *replace=NULL, /* Replacement string */
1094 : *new_buf, /* Temporary buffer for re-allocation */
1095 : *walkbuf, /* Location of current replacement in the result */
1096 : *walk, /* Used to walk the replacement string */
1097 : *match, /* The current match */
1098 : *piece, /* The current piece of subject */
1099 20735 : *replace_end=NULL, /* End of replacement string */
1100 : *eval_result, /* Result of eval or custom function */
1101 : walk_last; /* Last walked character */
1102 : int rc;
1103 :
1104 20735 : if (extra == NULL) {
1105 20716 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1106 20716 : extra = &extra_data;
1107 : }
1108 20735 : extra->match_limit = PCRE_G(backtrack_limit);
1109 20735 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1110 :
1111 20735 : eval = pce->preg_options & PREG_REPLACE_EVAL;
1112 20735 : if (is_callable_replace) {
1113 28 : if (eval) {
1114 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1115 1 : return NULL;
1116 : }
1117 : } else {
1118 20707 : replace = Z_STRVAL_P(replace_val);
1119 20707 : replace_len = Z_STRLEN_P(replace_val);
1120 20707 : replace_end = replace + replace_len;
1121 : }
1122 :
1123 : /* Calculate the size of the offsets array, and allocate memory for it. */
1124 20734 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1125 20734 : if (rc < 0) {
1126 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1127 0 : return NULL;
1128 : }
1129 20734 : num_subpats++;
1130 20734 : size_offsets = num_subpats * 3;
1131 :
1132 : /*
1133 : * Build a mapping from subpattern numbers to their names. We will always
1134 : * allocate the table, even though there may be no named subpatterns. This
1135 : * avoids somewhat more complicated logic in the inner loops.
1136 : */
1137 20734 : subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1138 20734 : if (!subpat_names) {
1139 1 : return NULL;
1140 : }
1141 :
1142 20733 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1143 :
1144 20733 : alloc_len = 2 * subject_len + 1;
1145 20733 : result = safe_emalloc(alloc_len, sizeof(char), 0);
1146 :
1147 : /* Initialize */
1148 20733 : match = NULL;
1149 20733 : *result_len = 0;
1150 20733 : start_offset = 0;
1151 20733 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1152 :
1153 20733 : if (utype != IS_UNICODE && !(pce->compile_options & PCRE_UTF8)) {
1154 14319 : exoptions |= PCRE_NO_UTF8_CHECK;
1155 : }
1156 :
1157 : while (1) {
1158 : /* Execute the regular expression. */
1159 25603 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1160 : exoptions|g_notempty, offsets, size_offsets);
1161 :
1162 : /* the string was already proved to be valid UTF-8 */
1163 25603 : exoptions |= PCRE_NO_UTF8_CHECK;
1164 :
1165 : /* Check for too many substrings condition. */
1166 25603 : if (count == 0) {
1167 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1168 0 : count = size_offsets/3;
1169 : }
1170 :
1171 25603 : piece = subject + start_offset;
1172 :
1173 30473 : if (count > 0 && (limit == -1 || limit > 0)) {
1174 4871 : if (replace_count) {
1175 4871 : ++*replace_count;
1176 : }
1177 : /* Set the match location in subject */
1178 4871 : match = subject + offsets[0];
1179 :
1180 4871 : new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1181 :
1182 : /* If evaluating, do it and add the return string's length */
1183 4871 : if (eval) {
1184 5 : eval_result_len = preg_do_eval(replace, replace_len, subject,
1185 : offsets, count, &eval_result TSRMLS_CC);
1186 4 : new_len += eval_result_len;
1187 4866 : } else if (is_callable_replace) {
1188 : /* Use custom function to get replacement string and its length. */
1189 42 : eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1190 42 : new_len += eval_result_len;
1191 : } else { /* do regular substitution */
1192 4824 : walk = replace;
1193 4824 : walk_last = 0;
1194 14580 : while (walk < replace_end) {
1195 4932 : if ('\\' == *walk || '$' == *walk) {
1196 43 : if (walk_last == '\\') {
1197 0 : walk++;
1198 0 : walk_last = 0;
1199 0 : continue;
1200 : }
1201 43 : if (preg_get_backref(&walk, &backref)) {
1202 36 : if (backref < count)
1203 35 : new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1204 36 : continue;
1205 : }
1206 : }
1207 4896 : new_len++;
1208 4896 : walk++;
1209 4896 : walk_last = walk[-1];
1210 : }
1211 : }
1212 :
1213 4870 : if (new_len + 1 > alloc_len) {
1214 12 : alloc_len = 1 + alloc_len + 2 * new_len;
1215 12 : new_buf = emalloc(alloc_len);
1216 12 : memcpy(new_buf, result, *result_len);
1217 12 : efree(result);
1218 12 : result = new_buf;
1219 : }
1220 : /* copy the part of the string before the match */
1221 4870 : memcpy(&result[*result_len], piece, match-piece);
1222 4870 : *result_len += match-piece;
1223 :
1224 : /* copy replacement and backrefs */
1225 4870 : walkbuf = result + *result_len;
1226 :
1227 : /* If evaluating or using custom function, copy result to the buffer
1228 : * and clean up. */
1229 4916 : if (eval || is_callable_replace) {
1230 46 : memcpy(walkbuf, eval_result, eval_result_len);
1231 46 : *result_len += eval_result_len;
1232 46 : STR_FREE(eval_result);
1233 : } else { /* do regular backreference copying */
1234 4824 : walk = replace;
1235 4824 : walk_last = 0;
1236 14580 : while (walk < replace_end) {
1237 4932 : if ('\\' == *walk || '$' == *walk) {
1238 43 : if (walk_last == '\\') {
1239 0 : *(walkbuf-1) = *walk++;
1240 0 : walk_last = 0;
1241 0 : continue;
1242 : }
1243 43 : if (preg_get_backref(&walk, &backref)) {
1244 36 : if (backref < count) {
1245 35 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1246 35 : memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1247 35 : walkbuf += match_len;
1248 : }
1249 36 : continue;
1250 : }
1251 : }
1252 4896 : *walkbuf++ = *walk++;
1253 4896 : walk_last = walk[-1];
1254 : }
1255 4824 : *walkbuf = '\0';
1256 : /* increment the result length by how much we've added to the string */
1257 4824 : *result_len += walkbuf - (result + *result_len);
1258 : }
1259 :
1260 4870 : if (limit != -1)
1261 15 : limit--;
1262 :
1263 20732 : } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1264 : /* If we previously set PCRE_NOTEMPTY after a null match,
1265 : this is not necessarily the end. We need to advance
1266 : the start offset, and continue. Fudge the offset values
1267 : to achieve this, unless we're already at the end of the string. */
1268 20729 : if (g_notempty != 0 && start_offset < subject_len) {
1269 0 : offsets[0] = start_offset;
1270 0 : if (utype == IS_UNICODE || pce->compile_options & PCRE_UTF8) {
1271 0 : offsets[1] = start_offset;
1272 0 : U8_FWD_1(subject, offsets[1], subject_len);
1273 : } else {
1274 0 : offsets[1] = start_offset + 1;
1275 : }
1276 0 : memcpy(&result[*result_len], piece, 1);
1277 0 : (*result_len)++;
1278 : } else {
1279 20729 : new_len = *result_len + subject_len - start_offset;
1280 20729 : if (new_len + 1 > alloc_len) {
1281 1 : alloc_len = new_len + 1; /* now we know exactly how long it is */
1282 1 : new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1283 1 : memcpy(new_buf, result, *result_len);
1284 1 : efree(result);
1285 1 : result = new_buf;
1286 : }
1287 : /* stick that last bit of string on our output */
1288 20729 : memcpy(&result[*result_len], piece, subject_len - start_offset);
1289 20729 : *result_len += subject_len - start_offset;
1290 20729 : result[*result_len] = '\0';
1291 20729 : break;
1292 : }
1293 : } else {
1294 3 : pcre_handle_exec_error(count TSRMLS_CC);
1295 3 : efree(result);
1296 3 : result = NULL;
1297 3 : break;
1298 : }
1299 :
1300 : /* If we have matched an empty string, mimic what Perl's /g options does.
1301 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1302 : the match again at the same point. If this fails (picked up above) we
1303 : advance to the next character. */
1304 4870 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1305 :
1306 : /* Advance to the next piece. */
1307 4870 : start_offset = offsets[1];
1308 4870 : }
1309 :
1310 20732 : efree(offsets);
1311 20732 : efree(subpat_names);
1312 :
1313 20732 : return result;
1314 : }
1315 : /* }}} */
1316 :
1317 : /* {{{ php_replace_in_subject
1318 : */
1319 : static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1320 20701 : {
1321 : zval **regex_entry,
1322 20701 : **replace_entry = NULL,
1323 : *replace_value,
1324 : empty_replace;
1325 : char *subject_value,
1326 : *result;
1327 : int subject_len;
1328 : zend_uchar utype;
1329 :
1330 : /* Make sure we're dealing with strings. */
1331 20701 : utype = Z_TYPE_PP(subject);
1332 20701 : convert_to_string_with_converter_ex(subject, UG(utf8_conv));
1333 :
1334 20701 : ZVAL_STRINGL(&empty_replace, "", 0, 0);
1335 :
1336 : /* If regex is an array */
1337 20701 : if (Z_TYPE_P(regex) == IS_ARRAY) {
1338 : /* Duplicate subject string for repeated replacement */
1339 22 : subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1340 22 : subject_len = Z_STRLEN_PP(subject);
1341 22 : *result_len = subject_len;
1342 :
1343 22 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1344 :
1345 22 : replace_value = replace;
1346 22 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1347 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1348 :
1349 : /* For each entry in the regex array, get the entry */
1350 108 : while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1351 : /* Make sure we're dealing with strings. */
1352 65 : convert_to_string_with_converter_ex(regex_entry, UG(utf8_conv));
1353 :
1354 : /* If replace is an array and not a callable construct */
1355 65 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1356 : /* Get current entry */
1357 50 : if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1358 48 : if (!is_callable_replace) {
1359 48 : convert_to_string_with_converter_ex(replace_entry, UG(utf8_conv));
1360 : }
1361 48 : replace_value = *replace_entry;
1362 48 : zend_hash_move_forward(Z_ARRVAL_P(replace));
1363 : } else {
1364 : /* We've run out of replacement strings, so use an empty one */
1365 2 : replace_value = &empty_replace;
1366 : }
1367 : }
1368 :
1369 : /* Do the actual replacement and put the result back into subject_value
1370 : for further replacements. */
1371 65 : if ((result = php_pcre_replace(utype,
1372 : Z_STRVAL_PP(regex_entry),
1373 : Z_STRLEN_PP(regex_entry),
1374 : subject_value,
1375 : subject_len,
1376 : replace_value,
1377 : is_callable_replace,
1378 : result_len,
1379 : limit,
1380 : replace_count TSRMLS_CC)) != NULL) {
1381 64 : efree(subject_value);
1382 64 : subject_value = result;
1383 64 : subject_len = *result_len;
1384 : } else {
1385 1 : efree(subject_value);
1386 1 : return NULL;
1387 : }
1388 :
1389 64 : zend_hash_move_forward(Z_ARRVAL_P(regex));
1390 : }
1391 :
1392 21 : return subject_value;
1393 : } else {
1394 20679 : result = php_pcre_replace(utype,
1395 : Z_STRVAL_P(regex),
1396 : Z_STRLEN_P(regex),
1397 : Z_STRVAL_PP(subject),
1398 : Z_STRLEN_PP(subject),
1399 : replace,
1400 : is_callable_replace,
1401 : result_len,
1402 : limit,
1403 : replace_count TSRMLS_CC);
1404 20678 : return result;
1405 : }
1406 : }
1407 : /* }}} */
1408 :
1409 : /* {{{ preg_replace_impl
1410 : */
1411 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1412 20708 : {
1413 : zval *regex,
1414 : *replace,
1415 : *subject,
1416 : **subject_entry,
1417 20708 : *zcount = NULL;
1418 : char *result;
1419 : int result_len;
1420 20708 : long limit = -1;
1421 : zstr string_key;
1422 : uint string_key_len;
1423 : ulong num_key;
1424 : zval callback_name;
1425 20708 : int replace_count=0, old_replace_count;
1426 : zend_uchar utype;
1427 :
1428 20708 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z/z/z/|lz", ®ex,
1429 : &replace, &subject, &limit, &zcount) == FAILURE) {
1430 11 : return;
1431 : }
1432 :
1433 20697 : if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1434 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1435 3 : RETURN_FALSE;
1436 : }
1437 :
1438 20694 : if (is_callable_replace) {
1439 32 : if (Z_TYPE_P(replace) != IS_ARRAY && Z_TYPE_P(replace) != IS_OBJECT) {
1440 27 : convert_to_unicode(replace);
1441 : }
1442 32 : if (!zend_is_callable(replace, 0, &callback_name TSRMLS_CC)) {
1443 4 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%R', to be a valid callback", Z_TYPE(callback_name), Z_UNIVAL(callback_name));
1444 4 : zval_dtor(&callback_name);
1445 4 : *return_value = *subject;
1446 4 : zval_copy_ctor(return_value);
1447 4 : INIT_PZVAL(return_value);
1448 4 : return;
1449 : }
1450 28 : zval_dtor(&callback_name);
1451 20662 : } else if (Z_TYPE_P(replace) != IS_ARRAY) {
1452 20656 : convert_to_string_with_converter(replace, UG(utf8_conv));
1453 : }
1454 :
1455 20689 : if (Z_TYPE_P(regex) != IS_ARRAY) {
1456 20678 : convert_to_string_with_converter(regex, UG(utf8_conv));
1457 : }
1458 :
1459 : /* if subject is an array */
1460 20688 : if (Z_TYPE_P(subject) == IS_ARRAY) {
1461 6 : array_init(return_value);
1462 6 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(subject));
1463 :
1464 : /* For each subject entry, convert it to string, then perform replacement
1465 : and add the result to the return_value array. */
1466 31 : while (zend_hash_get_current_data(Z_ARRVAL_P(subject), (void **)&subject_entry) == SUCCESS) {
1467 19 : SEPARATE_ZVAL(subject_entry);
1468 19 : utype = Z_TYPE_PP(subject_entry);
1469 19 : old_replace_count = replace_count;
1470 19 : if ((result = php_replace_in_subject(regex, replace, subject_entry, &result_len, limit, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1471 36 : if (!is_filter || replace_count > old_replace_count) {
1472 : /* Add to return array */
1473 17 : switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(subject), &string_key, &string_key_len, &num_key, 0, NULL))
1474 : {
1475 : case HASH_KEY_IS_UNICODE:
1476 2 : if (utype == IS_UNICODE || utype != IS_STRING) {
1477 1 : add_u_assoc_utf8_stringl_ex(return_value, IS_UNICODE, string_key, string_key_len, result, result_len, ZSTR_AUTOFREE);
1478 : } else {
1479 0 : add_u_assoc_stringl_ex(return_value, IS_UNICODE, string_key, string_key_len, result, result_len, 0);
1480 : }
1481 1 : break;
1482 :
1483 : case HASH_KEY_IS_STRING:
1484 0 : if (utype == IS_UNICODE || utype != IS_STRING) {
1485 0 : add_u_assoc_utf8_stringl_ex(return_value, IS_STRING, string_key, string_key_len, result, result_len, ZSTR_AUTOFREE);
1486 : } else {
1487 0 : add_u_assoc_stringl_ex(return_value, IS_STRING, string_key, string_key_len, result, result_len, 0);
1488 : }
1489 0 : break;
1490 :
1491 : case HASH_KEY_IS_LONG:
1492 32 : if (utype == IS_UNICODE || utype != IS_STRING) {
1493 16 : add_index_utf8_stringl(return_value, num_key, result, result_len, ZSTR_AUTOFREE);
1494 : } else {
1495 0 : add_index_stringl(return_value, num_key, result, result_len, 0);
1496 : }
1497 : break;
1498 : }
1499 : } else {
1500 2 : efree(result);
1501 : }
1502 : }
1503 :
1504 19 : zend_hash_move_forward(Z_ARRVAL_P(subject));
1505 : }
1506 : } else { /* if subject is not an array */
1507 20682 : utype = Z_TYPE_P(subject);
1508 20682 : old_replace_count = replace_count;
1509 20682 : if ((result = php_replace_in_subject(regex, replace, &subject, &result_len, limit, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1510 41334 : if (!is_filter || replace_count > old_replace_count) {
1511 26870 : if (utype == IS_UNICODE || utype != IS_STRING) {
1512 6203 : RETVAL_UTF8_STRINGL(result, result_len, ZSTR_AUTOFREE);
1513 : } else {
1514 14464 : RETVAL_STRINGL(result, result_len, 0);
1515 : }
1516 : } else {
1517 0 : efree(result);
1518 : }
1519 : }
1520 : }
1521 20687 : if (ZEND_NUM_ARGS() > 4) {
1522 7 : zval_dtor(zcount);
1523 7 : ZVAL_LONG(zcount, replace_count);
1524 : }
1525 :
1526 : }
1527 : /* }}} */
1528 :
1529 : /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) U
1530 : Perform Perl-style regular expression replacement. */
1531 : static PHP_FUNCTION(preg_replace)
1532 20667 : {
1533 20667 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1534 20664 : }
1535 : /* }}} */
1536 :
1537 : /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]]) U
1538 : Perform Perl-style regular expression replacement using replacement callback. */
1539 : static PHP_FUNCTION(preg_replace_callback)
1540 40 : {
1541 40 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1542 40 : }
1543 : /* }}} */
1544 :
1545 : /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) U
1546 : Perform Perl-style regular expression replacement and only return matches. */
1547 : static PHP_FUNCTION(preg_filter)
1548 1 : {
1549 1 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1550 1 : }
1551 : /* }}} */
1552 :
1553 : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) U
1554 : Split string into an array using a perl-style regular expression as a delimiter */
1555 : static PHP_FUNCTION(preg_split)
1556 1957 : {
1557 : zstr regex; /* Regular expression */
1558 : zstr subject; /* String to match against */
1559 : int regex_len;
1560 : int subject_len;
1561 1957 : long limit_val = -1;/* Integer value of limit */
1562 1957 : long flags = 0; /* Match control flags */
1563 : pcre_cache_entry *pce; /* Compiled regular expression */
1564 : zend_uchar str_type;
1565 1957 : char *regex_utf8 = NULL, *subject_utf8 = NULL;
1566 : int regex_utf8_len, subject_utf8_len;
1567 1957 : UErrorCode status = U_ZERO_ERROR;
1568 :
1569 : /* Get function parameters and do error checking */
1570 1957 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT|ll", ®ex, ®ex_len, &str_type,
1571 : &subject, &subject_len, &str_type, &limit_val, &flags) == FAILURE) {
1572 8 : RETURN_FALSE;
1573 : }
1574 :
1575 1949 : if (str_type == IS_UNICODE) {
1576 1687 : zend_unicode_to_string_ex(UG(utf8_conv), ®ex_utf8, ®ex_utf8_len, regex.u, regex_len, &status);
1577 1687 : zend_unicode_to_string_ex(UG(utf8_conv), &subject_utf8, &subject_utf8_len, subject.u, subject_len, &status);
1578 1687 : regex.s = regex_utf8;
1579 1687 : regex_len = regex_utf8_len;
1580 1687 : subject.s = subject_utf8;
1581 1687 : subject_len = subject_utf8_len;
1582 : }
1583 :
1584 : /* Compile regex or get it from cache. */
1585 1949 : if ((pce = pcre_get_compiled_regex_cache(str_type, regex.s, regex_len TSRMLS_CC)) == NULL) {
1586 5 : if (str_type == IS_UNICODE) {
1587 5 : efree(regex_utf8);
1588 5 : efree(subject_utf8);
1589 : }
1590 5 : RETURN_FALSE;
1591 : }
1592 :
1593 1944 : php_pcre_split_impl(pce, str_type, subject.s, subject_len, return_value, limit_val, flags TSRMLS_CC);
1594 :
1595 1944 : if (str_type == IS_UNICODE) {
1596 1682 : efree(regex_utf8);
1597 1682 : efree(subject_utf8);
1598 : }
1599 : }
1600 : /* }}} */
1601 :
1602 : /* {{{ php_pcre_split_impl
1603 : */
1604 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_uchar utype, char *subject, int subject_len, zval *return_value,
1605 : long limit_val, long flags TSRMLS_DC)
1606 1960 : {
1607 1960 : pcre_extra *extra = NULL; /* Holds results of studying */
1608 : pcre_extra extra_data; /* Used locally for exec options */
1609 : int *offsets; /* Array of subpattern offsets */
1610 : int size_offsets; /* Size of the offsets array */
1611 1960 : int exoptions = 0; /* Execution options */
1612 1960 : int count = 0; /* Count of matched subpatterns */
1613 : int start_offset; /* Where the new search starts */
1614 : int next_offset; /* End of the last delimiter match + 1 */
1615 1960 : int g_notempty = 0; /* If the match should not be empty */
1616 : char *last_match; /* Location of last match */
1617 : int rc;
1618 : int no_empty; /* If NO_EMPTY flag is set */
1619 : int delim_capture; /* If delimiters should be captured */
1620 : int offset_capture; /* If offsets should be captured */
1621 1960 : offset_map_t map = { subject, 0, 0 };
1622 :
1623 1960 : no_empty = flags & PREG_SPLIT_NO_EMPTY;
1624 1960 : delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1625 1960 : offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1626 :
1627 1960 : if (limit_val == 0) {
1628 1 : limit_val = -1;
1629 : }
1630 :
1631 1960 : if (extra == NULL) {
1632 1960 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1633 1960 : extra = &extra_data;
1634 : }
1635 1960 : extra->match_limit = PCRE_G(backtrack_limit);
1636 1960 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1637 :
1638 : /* Initialize return value */
1639 1960 : array_init(return_value);
1640 :
1641 : /* Calculate the size of the offsets array, and allocate memory for it. */
1642 1960 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1643 1960 : if (rc < 0) {
1644 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1645 0 : RETURN_FALSE;
1646 : }
1647 1960 : size_offsets = (size_offsets + 1) * 3;
1648 1960 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1649 :
1650 : /* Start at the beginning of the string */
1651 1960 : start_offset = 0;
1652 1960 : next_offset = 0;
1653 1960 : last_match = subject;
1654 1960 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1655 :
1656 1960 : if (utype != IS_UNICODE && !(pce->compile_options & PCRE_UTF8)) {
1657 0 : exoptions |= PCRE_NO_UTF8_CHECK;
1658 : }
1659 :
1660 : /* Get next piece if no limit or limit not yet reached and something matched*/
1661 7129 : while ((limit_val == -1 || limit_val > 1)) {
1662 5167 : count = pcre_exec(pce->re, extra, subject,
1663 : subject_len, start_offset,
1664 : exoptions|g_notempty, offsets, size_offsets);
1665 :
1666 : /* the string was already proved to be valid UTF-8 */
1667 5167 : exoptions |= PCRE_NO_UTF8_CHECK;
1668 :
1669 : /* Check for too many substrings condition. */
1670 5167 : if (count == 0) {
1671 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1672 0 : count = size_offsets/3;
1673 : }
1674 :
1675 : /* If something matched */
1676 5167 : if (count > 0) {
1677 3151 : if (!no_empty || &subject[offsets[0]] != last_match) {
1678 :
1679 3092 : if (offset_capture) {
1680 : /* Add (match, offset) pair to the return value */
1681 26 : add_offset_pair(return_value, utype, last_match, &subject[offsets[0]]-last_match, next_offset, NULL, &map TSRMLS_CC);
1682 3066 : } else if (utype == IS_UNICODE) {
1683 : /* Add the piece to the return value */
1684 2675 : add_next_index_utf8_stringl(return_value, last_match,
1685 : &subject[offsets[0]]-last_match, 1);
1686 : } else {
1687 : /* Add the piece to the return value */
1688 391 : add_next_index_stringl(return_value, last_match,
1689 : &subject[offsets[0]]-last_match, 1);
1690 : }
1691 :
1692 : /* One less left to do */
1693 3092 : if (limit_val != -1)
1694 1 : limit_val--;
1695 : }
1696 :
1697 3151 : last_match = &subject[offsets[1]];
1698 3151 : next_offset = offsets[1];
1699 :
1700 3151 : if (delim_capture) {
1701 : int i, match_len;
1702 62 : for (i = 1; i < count; i++) {
1703 31 : match_len = offsets[(i<<1)+1] - offsets[i<<1];
1704 : /* If we have matched a delimiter */
1705 31 : if (!no_empty || match_len > 0) {
1706 21 : if (offset_capture) {
1707 10 : add_offset_pair(return_value, utype, &subject[offsets[i<<1]], match_len,
1708 : offsets[i<<1], NULL, &map TSRMLS_CC);
1709 11 : } else if (utype == IS_UNICODE) {
1710 11 : add_next_index_utf8_stringl(return_value, &subject[offsets[i<<1]],
1711 : match_len, 1);
1712 : } else {
1713 0 : add_next_index_stringl(return_value, &subject[offsets[i<<1]],
1714 : match_len, 1);
1715 : }
1716 : }
1717 : }
1718 : }
1719 2016 : } else if (count == PCRE_ERROR_NOMATCH) {
1720 : /* If we previously set PCRE_NOTEMPTY after a null match,
1721 : this is not necessarily the end. We need to advance
1722 : the start offset, and continue. Fudge the offset values
1723 : to achieve this, unless we're already at the end of the string. */
1724 2015 : if (g_notempty != 0 && start_offset < subject_len) {
1725 58 : offsets[0] = start_offset;
1726 116 : if (utype == IS_UNICODE || pce->compile_options & PCRE_UTF8) {
1727 58 : offsets[1] = start_offset;
1728 58 : U8_FWD_1(subject, offsets[1], subject_len);
1729 : } else {
1730 0 : offsets[1] = start_offset + 1;
1731 : }
1732 : } else
1733 : break;
1734 : } else {
1735 1 : pcre_handle_exec_error(count TSRMLS_CC);
1736 1 : break;
1737 : }
1738 :
1739 : /* If we have matched an empty string, mimic what Perl's /g options does.
1740 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1741 : the match again at the same point. If this fails (picked up above) we
1742 : advance to the next character. */
1743 3209 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1744 :
1745 : /* Advance to the position right after the last full match */
1746 3209 : start_offset = offsets[1];
1747 : }
1748 :
1749 :
1750 1960 : start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1751 :
1752 1960 : if (!no_empty || start_offset < subject_len)
1753 : {
1754 1952 : if (offset_capture) {
1755 : /* Add the last (match, offset) pair to the return value */
1756 5 : add_offset_pair(return_value, utype, &subject[start_offset],
1757 : subject_len - start_offset, start_offset, NULL, &map TSRMLS_CC);
1758 1947 : } else if (utype == IS_UNICODE) {
1759 : /* Add the last piece to the return value */
1760 1686 : add_next_index_utf8_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1761 : } else {
1762 : /* Add the last piece to the return value */
1763 261 : add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1764 : }
1765 : }
1766 :
1767 : /* Clean up */
1768 1960 : efree(offsets);
1769 : }
1770 : /* }}} */
1771 :
1772 : /* {{{ proto string preg_quote(string str [, string delim_char]) U
1773 : Quote regular expression characters plus an optional character */
1774 : static PHP_FUNCTION(preg_quote)
1775 6145 : {
1776 : int in_str_len;
1777 : char *in_str; /* Input string argument */
1778 : char *in_str_end; /* End of the input string */
1779 6145 : int delim_len = 0;
1780 6145 : char *delim = NULL; /* Additional delimiter argument */
1781 : char *out_str, /* Output string with quoted characters */
1782 : *p, /* Iterator for input string */
1783 : *q, /* Iterator for output string */
1784 : c; /* Current character */
1785 6145 : UChar32 delim_char=0; /* Delimiter character to be quoted */
1786 6145 : zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1787 :
1788 : /* Get the arguments and check for errors */
1789 6145 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&|s&", &in_str, &in_str_len, UG(utf8_conv),
1790 : &delim, &delim_len, UG(utf8_conv)) == FAILURE) {
1791 5 : return;
1792 : }
1793 :
1794 6140 : in_str_end = in_str + in_str_len;
1795 :
1796 : /* Nothing to do if we got an empty string */
1797 6140 : if (in_str == in_str_end) {
1798 2 : RETURN_EMPTY_UNICODE();
1799 : }
1800 :
1801 6138 : if (delim && *delim) {
1802 6135 : U8_GET((unsigned char*)delim, 0, 0, delim_len, delim_char);
1803 6135 : quote_delim = 1;
1804 : }
1805 :
1806 : /* Allocate enough memory so that even if each character
1807 : is quoted, we won't run out of room. In Unicode mode, the longest UTF-8
1808 : sequence is 4 bytes, so the multiplier is (4+1). In non-Unicode mode, we
1809 : have to assume that any character can be '\0', which needs 4 chars to
1810 : be escaped. */
1811 6138 : out_str = safe_emalloc(5, in_str_len, 1);
1812 :
1813 : /* Go through the string and quote necessary characters */
1814 5945129 : for(p = in_str, q = out_str; p != in_str_end; p++) {
1815 5938991 : c = *p;
1816 5938991 : switch(c) {
1817 : case '.':
1818 : case '\\':
1819 : case '+':
1820 : case '*':
1821 : case '?':
1822 : case '[':
1823 : case '^':
1824 : case ']':
1825 : case '$':
1826 : case '(':
1827 : case ')':
1828 : case '{':
1829 : case '}':
1830 : case '=':
1831 : case '!':
1832 : case '>':
1833 : case '<':
1834 : case '|':
1835 : case ':':
1836 : case '-':
1837 823380 : *q++ = '\\';
1838 823380 : *q++ = c;
1839 823380 : break;
1840 :
1841 : case '\0':
1842 808 : *q++ = '\\';
1843 808 : *q++ = '0';
1844 808 : *q++ = '0';
1845 808 : *q++ = '0';
1846 808 : break;
1847 :
1848 : default:
1849 5114803 : if ((UChar32)(unsigned char)c > 0x7f) { /* non-ASCII char */
1850 1413 : int tmp = 0;
1851 1413 : UChar32 cp = 0;
1852 1413 : U8_NEXT(p, tmp, in_str_end-p, cp);
1853 1413 : if (quote_delim && cp == delim_char) {
1854 0 : *q++ = '\\';
1855 : }
1856 1413 : memcpy(q, p, tmp);
1857 1413 : q += tmp;
1858 1413 : p += tmp-1; /* going to be incremented by the loop */
1859 : } else {
1860 5113390 : if (quote_delim && c == delim_char)
1861 14361 : *q++ = '\\';
1862 5113390 : *q++ = c;
1863 : }
1864 : break;
1865 : }
1866 : }
1867 6138 : *q = '\0';
1868 :
1869 : /* Reallocate string and return it */
1870 6138 : RETVAL_UTF8_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, ZSTR_AUTOFREE);
1871 : }
1872 : /* }}} */
1873 :
1874 : /* {{{ proto array preg_grep(string regex, array input [, int flags]) U
1875 : Searches array and returns entries which match regex */
1876 : static PHP_FUNCTION(preg_grep)
1877 29 : {
1878 : zstr regex; /* Regular expression */
1879 : int regex_len;
1880 : char* regex_utf8;
1881 : int regex_utf8_len;
1882 : zend_uchar regex_type;
1883 : zval *input; /* Input array */
1884 29 : long flags = 0; /* Match control flags */
1885 : pcre_cache_entry *pce; /* Compiled regular expression */
1886 29 : UErrorCode status = U_ZERO_ERROR;
1887 :
1888 : /* Get arguments and do error checking */
1889 29 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ta|l", ®ex,
1890 : ®ex_len, ®ex_type, &input, &flags) == FAILURE) {
1891 9 : return;
1892 : }
1893 :
1894 20 : if (regex_type == IS_UNICODE) {
1895 20 : zend_unicode_to_string_ex(UG(utf8_conv), ®ex_utf8, ®ex_utf8_len, regex.u, regex_len, &status);
1896 20 : regex.s = regex_utf8;
1897 20 : regex_len = regex_utf8_len;
1898 : }
1899 :
1900 : /* Compile regex or get it from cache. */
1901 20 : if ((pce = pcre_get_compiled_regex_cache(regex_type, regex.s, regex_len TSRMLS_CC)) == NULL) {
1902 5 : if (regex_type == IS_UNICODE) {
1903 5 : efree(regex_utf8);
1904 : }
1905 5 : RETURN_FALSE;
1906 : }
1907 :
1908 15 : php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1909 :
1910 15 : if (regex_type == IS_UNICODE) {
1911 15 : efree(regex_utf8);
1912 : }
1913 : }
1914 : /* }}} */
1915 :
1916 : /* {{{ php_pcre_grep_impl */
1917 : PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value,
1918 : long flags TSRMLS_DC)
1919 15 : {
1920 : zval **entry; /* An entry in the input array */
1921 15 : pcre_extra *extra = pce->extra;/* Holds results of studying */
1922 : pcre_extra extra_data; /* Used locally for exec options */
1923 : int *offsets; /* Array of subpattern offsets */
1924 : int size_offsets; /* Size of the offsets array */
1925 15 : int count = 0; /* Count of matched subpatterns */
1926 : zstr string_key;
1927 : uint string_key_len;
1928 : ulong num_key;
1929 : zend_bool invert; /* Whether to return non-matching
1930 : entries */
1931 : int rc;
1932 15 : int exoptions = 0; /* Execution options */
1933 :
1934 :
1935 15 : invert = flags & PREG_GREP_INVERT ? 1 : 0;
1936 :
1937 15 : if (extra == NULL) {
1938 15 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1939 15 : extra = &extra_data;
1940 : }
1941 15 : extra->match_limit = PCRE_G(backtrack_limit);
1942 15 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1943 :
1944 : /* Calculate the size of the offsets array, and allocate memory for it. */
1945 15 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1946 15 : if (rc < 0) {
1947 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1948 0 : RETURN_FALSE;
1949 : }
1950 15 : size_offsets = (size_offsets + 1) * 3;
1951 15 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1952 :
1953 : /* Initialize return array */
1954 15 : array_init(return_value);
1955 :
1956 15 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1957 :
1958 : /* Go through the input array */
1959 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1960 104 : while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1961 75 : zval subject = **entry;
1962 :
1963 75 : if (Z_TYPE_PP(entry) != IS_STRING) {
1964 75 : zval_copy_ctor(&subject);
1965 75 : convert_to_string_with_converter(&subject, UG(utf8_conv));
1966 : }
1967 :
1968 : /* Perform the match */
1969 75 : count = pcre_exec(pce->re, extra, Z_STRVAL(subject), Z_STRLEN(subject),
1970 : 0, exoptions | ((Z_TYPE_PP(entry) != IS_UNICODE && !(pce->compile_options & PCRE_UTF8))?PCRE_NO_UTF8_CHECK:0), offsets, size_offsets);
1971 :
1972 : /* Check for too many substrings condition. */
1973 75 : if (count == 0) {
1974 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1975 0 : count = size_offsets/3;
1976 75 : } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1977 1 : if (Z_TYPE_PP(entry) != IS_STRING) {
1978 1 : zval_dtor(&subject);
1979 : }
1980 1 : pcre_handle_exec_error(count TSRMLS_CC);
1981 1 : break;
1982 : }
1983 :
1984 : /* If the entry fits our requirements */
1985 74 : if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1986 :
1987 30 : Z_ADDREF_PP(entry);
1988 :
1989 : /* Add to return array */
1990 30 : switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL))
1991 : {
1992 : case HASH_KEY_IS_UNICODE:
1993 2 : add_u_assoc_zval_ex(return_value, IS_UNICODE, string_key, string_key_len, *entry);
1994 2 : break;
1995 :
1996 : case HASH_KEY_IS_STRING:
1997 0 : add_u_assoc_zval_ex(return_value, IS_STRING, string_key, string_key_len, *entry);
1998 0 : break;
1999 :
2000 : case HASH_KEY_IS_LONG:
2001 28 : add_index_zval(return_value, num_key, *entry);
2002 : break;
2003 : }
2004 : }
2005 :
2006 74 : if (Z_TYPE_PP(entry) != IS_STRING) {
2007 74 : zval_dtor(&subject);
2008 : }
2009 :
2010 74 : zend_hash_move_forward(Z_ARRVAL_P(input));
2011 : }
2012 15 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
2013 : /* Clean up */
2014 15 : efree(offsets);
2015 : }
2016 : /* }}} */
2017 :
2018 : /* {{{ proto int preg_last_error()
2019 : Returns the error code of the last regexp execution. */
2020 : static PHP_FUNCTION(preg_last_error)
2021 17 : {
2022 17 : if (zend_parse_parameters_none() == FAILURE) {
2023 2 : return;
2024 : }
2025 :
2026 15 : RETURN_LONG(PCRE_G(error_code));
2027 : }
2028 : /* }}} */
2029 :
2030 : /* {{{ module definition structures */
2031 :
2032 : /* {{{ arginfo */
2033 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2034 : ZEND_ARG_INFO(0, pattern)
2035 : ZEND_ARG_INFO(0, subject)
2036 : ZEND_ARG_INFO(1, subpatterns) /* array */
2037 : ZEND_ARG_INFO(0, flags)
2038 : ZEND_ARG_INFO(0, offset)
2039 : ZEND_END_ARG_INFO()
2040 :
2041 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 3)
2042 : ZEND_ARG_INFO(0, pattern)
2043 : ZEND_ARG_INFO(0, subject)
2044 : ZEND_ARG_INFO(1, subpatterns) /* array */
2045 : ZEND_ARG_INFO(0, flags)
2046 : ZEND_ARG_INFO(0, offset)
2047 : ZEND_END_ARG_INFO()
2048 :
2049 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2050 : ZEND_ARG_INFO(0, regex)
2051 : ZEND_ARG_INFO(0, replace)
2052 : ZEND_ARG_INFO(0, subject)
2053 : ZEND_ARG_INFO(0, limit)
2054 : ZEND_ARG_INFO(1, count)
2055 : ZEND_END_ARG_INFO()
2056 :
2057 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2058 : ZEND_ARG_INFO(0, regex)
2059 : ZEND_ARG_INFO(0, callback)
2060 : ZEND_ARG_INFO(0, subject)
2061 : ZEND_ARG_INFO(0, limit)
2062 : ZEND_ARG_INFO(1, count)
2063 : ZEND_END_ARG_INFO()
2064 :
2065 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2066 : ZEND_ARG_INFO(0, pattern)
2067 : ZEND_ARG_INFO(0, subject)
2068 : ZEND_ARG_INFO(0, limit)
2069 : ZEND_ARG_INFO(0, flags)
2070 : ZEND_END_ARG_INFO()
2071 :
2072 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2073 : ZEND_ARG_INFO(0, str)
2074 : ZEND_ARG_INFO(0, delim_char)
2075 : ZEND_END_ARG_INFO()
2076 :
2077 : ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2078 : ZEND_ARG_INFO(0, regex)
2079 : ZEND_ARG_INFO(0, input) /* array */
2080 : ZEND_ARG_INFO(0, flags)
2081 : ZEND_END_ARG_INFO()
2082 :
2083 : ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2084 : ZEND_END_ARG_INFO()
2085 : /* }}} */
2086 :
2087 : static const zend_function_entry pcre_functions[] = {
2088 : PHP_FE(preg_match, arginfo_preg_match)
2089 : PHP_FE(preg_match_all, arginfo_preg_match_all)
2090 : PHP_FE(preg_replace, arginfo_preg_replace)
2091 : PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
2092 : PHP_FE(preg_filter, arginfo_preg_replace)
2093 : PHP_FE(preg_split, arginfo_preg_split)
2094 : PHP_FE(preg_quote, arginfo_preg_quote)
2095 : PHP_FE(preg_grep, arginfo_preg_grep)
2096 : PHP_FE(preg_last_error, arginfo_preg_last_error)
2097 : {NULL, NULL, NULL}
2098 : };
2099 :
2100 : zend_module_entry pcre_module_entry = {
2101 : STANDARD_MODULE_HEADER,
2102 : "pcre",
2103 : pcre_functions,
2104 : PHP_MINIT(pcre),
2105 : PHP_MSHUTDOWN(pcre),
2106 : NULL,
2107 : NULL,
2108 : PHP_MINFO(pcre),
2109 : NO_VERSION_YET,
2110 : PHP_MODULE_GLOBALS(pcre),
2111 : PHP_GINIT(pcre),
2112 : PHP_GSHUTDOWN(pcre),
2113 : NULL,
2114 : STANDARD_MODULE_PROPERTIES_EX
2115 : };
2116 :
2117 : #ifdef COMPILE_DL_PCRE
2118 : ZEND_GET_MODULE(pcre)
2119 : #endif
2120 :
2121 : /* }}} */
2122 :
2123 : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2124 :
2125 : /*
2126 : * Local variables:
2127 : * tab-width: 4
2128 : * c-basic-offset: 4
2129 : * End:
2130 : * vim600: sw=4 ts=4 fdm=marker
2131 : * vim<600: sw=4 ts=4
2132 : */
|