1 : /*************************************************
2 : * Perl-Compatible Regular Expressions *
3 : *************************************************/
4 :
5 : /* PCRE is a library of functions to support regular expressions whose syntax
6 : and semantics are as close as possible to those of the Perl 5 language.
7 :
8 : Written by Philip Hazel
9 : Copyright (c) 1997-2008 University of Cambridge
10 :
11 : -----------------------------------------------------------------------------
12 : Redistribution and use in source and binary forms, with or without
13 : modification, are permitted provided that the following conditions are met:
14 :
15 : * Redistributions of source code must retain the above copyright notice,
16 : this list of conditions and the following disclaimer.
17 :
18 : * Redistributions in binary form must reproduce the above copyright
19 : notice, this list of conditions and the following disclaimer in the
20 : documentation and/or other materials provided with the distribution.
21 :
22 : * Neither the name of the University of Cambridge nor the names of its
23 : contributors may be used to endorse or promote products derived from
24 : this software without specific prior written permission.
25 :
26 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 : POSSIBILITY OF SUCH DAMAGE.
37 : -----------------------------------------------------------------------------
38 : */
39 :
40 :
41 : /* This module contains some convenience functions for extracting substrings
42 : from the subject string after a regex match has succeeded. The original idea
43 : for these functions came from Scott Wimer. */
44 :
45 :
46 : #include "config.h"
47 :
48 : #include "pcre_internal.h"
49 :
50 :
51 : /*************************************************
52 : * Find number for named string *
53 : *************************************************/
54 :
55 : /* This function is used by the get_first_set() function below, as well
56 : as being generally available. It assumes that names are unique.
57 :
58 : Arguments:
59 : code the compiled regex
60 : stringname the name whose number is required
61 :
62 : Returns: the number of the named parentheses, or a negative number
63 : (PCRE_ERROR_NOSUBSTRING) if not found
64 : */
65 :
66 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
67 : pcre_get_stringnumber(const pcre *code, const char *stringname)
68 0 : {
69 : int rc;
70 : int entrysize;
71 : int top, bot;
72 : uschar *nametable;
73 :
74 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
75 0 : return rc;
76 0 : if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
77 :
78 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
79 0 : return rc;
80 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
81 0 : return rc;
82 :
83 0 : bot = 0;
84 0 : while (top > bot)
85 : {
86 0 : int mid = (top + bot) / 2;
87 0 : uschar *entry = nametable + entrysize*mid;
88 0 : int c = strcmp(stringname, (char *)(entry + 2));
89 0 : if (c == 0) return (entry[0] << 8) + entry[1];
90 0 : if (c > 0) bot = mid + 1; else top = mid;
91 : }
92 :
93 0 : return PCRE_ERROR_NOSUBSTRING;
94 : }
95 :
96 :
97 :
98 : /*************************************************
99 : * Find (multiple) entries for named string *
100 : *************************************************/
101 :
102 : /* This is used by the get_first_set() function below, as well as being
103 : generally available. It is used when duplicated names are permitted.
104 :
105 : Arguments:
106 : code the compiled regex
107 : stringname the name whose entries required
108 : firstptr where to put the pointer to the first entry
109 : lastptr where to put the pointer to the last entry
110 :
111 : Returns: the length of each entry, or a negative number
112 : (PCRE_ERROR_NOSUBSTRING) if not found
113 : */
114 :
115 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
116 : pcre_get_stringtable_entries(const pcre *code, const char *stringname,
117 : char **firstptr, char **lastptr)
118 0 : {
119 : int rc;
120 : int entrysize;
121 : int top, bot;
122 : uschar *nametable, *lastentry;
123 :
124 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
125 0 : return rc;
126 0 : if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
127 :
128 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
129 0 : return rc;
130 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
131 0 : return rc;
132 :
133 0 : lastentry = nametable + entrysize * (top - 1);
134 0 : bot = 0;
135 0 : while (top > bot)
136 : {
137 0 : int mid = (top + bot) / 2;
138 0 : uschar *entry = nametable + entrysize*mid;
139 0 : int c = strcmp(stringname, (char *)(entry + 2));
140 0 : if (c == 0)
141 : {
142 0 : uschar *first = entry;
143 0 : uschar *last = entry;
144 0 : while (first > nametable)
145 : {
146 0 : if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
147 0 : first -= entrysize;
148 : }
149 0 : while (last < lastentry)
150 : {
151 0 : if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
152 0 : last += entrysize;
153 : }
154 0 : *firstptr = (char *)first;
155 0 : *lastptr = (char *)last;
156 0 : return entrysize;
157 : }
158 0 : if (c > 0) bot = mid + 1; else top = mid;
159 : }
160 :
161 0 : return PCRE_ERROR_NOSUBSTRING;
162 : }
163 :
164 :
165 :
166 : /*************************************************
167 : * Find first set of multiple named strings *
168 : *************************************************/
169 :
170 : /* This function allows for duplicate names in the table of named substrings.
171 : It returns the number of the first one that was set in a pattern match.
172 :
173 : Arguments:
174 : code the compiled regex
175 : stringname the name of the capturing substring
176 : ovector the vector of matched substrings
177 :
178 : Returns: the number of the first that is set,
179 : or the number of the last one if none are set,
180 : or a negative number on error
181 : */
182 :
183 : static int
184 : get_first_set(const pcre *code, const char *stringname, int *ovector)
185 0 : {
186 0 : const real_pcre *re = (const real_pcre *)code;
187 : int entrysize;
188 : char *first, *last;
189 : uschar *entry;
190 0 : if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
191 0 : return pcre_get_stringnumber(code, stringname);
192 0 : entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
193 0 : if (entrysize <= 0) return entrysize;
194 0 : for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
195 : {
196 0 : int n = (entry[0] << 8) + entry[1];
197 0 : if (ovector[n*2] >= 0) return n;
198 : }
199 0 : return (first[0] << 8) + first[1];
200 : }
201 :
202 :
203 :
204 :
205 : /*************************************************
206 : * Copy captured string to given buffer *
207 : *************************************************/
208 :
209 : /* This function copies a single captured substring into a given buffer.
210 : Note that we use memcpy() rather than strncpy() in case there are binary zeros
211 : in the string.
212 :
213 : Arguments:
214 : subject the subject string that was matched
215 : ovector pointer to the offsets table
216 : stringcount the number of substrings that were captured
217 : (i.e. the yield of the pcre_exec call, unless
218 : that was zero, in which case it should be 1/3
219 : of the offset table size)
220 : stringnumber the number of the required substring
221 : buffer where to put the substring
222 : size the size of the buffer
223 :
224 : Returns: if successful:
225 : the length of the copied string, not including the zero
226 : that is put on the end; can be zero
227 : if not successful:
228 : PCRE_ERROR_NOMEMORY (-6) buffer too small
229 : PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
230 : */
231 :
232 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
233 : pcre_copy_substring(const char *subject, int *ovector, int stringcount,
234 : int stringnumber, char *buffer, int size)
235 0 : {
236 : int yield;
237 0 : if (stringnumber < 0 || stringnumber >= stringcount)
238 0 : return PCRE_ERROR_NOSUBSTRING;
239 0 : stringnumber *= 2;
240 0 : yield = ovector[stringnumber+1] - ovector[stringnumber];
241 0 : if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
242 0 : memcpy(buffer, subject + ovector[stringnumber], yield);
243 0 : buffer[yield] = 0;
244 0 : return yield;
245 : }
246 :
247 :
248 :
249 : /*************************************************
250 : * Copy named captured string to given buffer *
251 : *************************************************/
252 :
253 : /* This function copies a single captured substring into a given buffer,
254 : identifying it by name. If the regex permits duplicate names, the first
255 : substring that is set is chosen.
256 :
257 : Arguments:
258 : code the compiled regex
259 : subject the subject string that was matched
260 : ovector pointer to the offsets table
261 : stringcount the number of substrings that were captured
262 : (i.e. the yield of the pcre_exec call, unless
263 : that was zero, in which case it should be 1/3
264 : of the offset table size)
265 : stringname the name of the required substring
266 : buffer where to put the substring
267 : size the size of the buffer
268 :
269 : Returns: if successful:
270 : the length of the copied string, not including the zero
271 : that is put on the end; can be zero
272 : if not successful:
273 : PCRE_ERROR_NOMEMORY (-6) buffer too small
274 : PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
275 : */
276 :
277 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
278 : pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
279 : int stringcount, const char *stringname, char *buffer, int size)
280 0 : {
281 0 : int n = get_first_set(code, stringname, ovector);
282 0 : if (n <= 0) return n;
283 0 : return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
284 : }
285 :
286 :
287 :
288 : /*************************************************
289 : * Copy all captured strings to new store *
290 : *************************************************/
291 :
292 : /* This function gets one chunk of store and builds a list of pointers and all
293 : of the captured substrings in it. A NULL pointer is put on the end of the list.
294 :
295 : Arguments:
296 : subject the subject string that was matched
297 : ovector pointer to the offsets table
298 : stringcount the number of substrings that were captured
299 : (i.e. the yield of the pcre_exec call, unless
300 : that was zero, in which case it should be 1/3
301 : of the offset table size)
302 : listptr set to point to the list of pointers
303 :
304 : Returns: if successful: 0
305 : if not successful:
306 : PCRE_ERROR_NOMEMORY (-6) failed to get store
307 : */
308 :
309 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
310 : pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
311 : const char ***listptr)
312 25732 : {
313 : int i;
314 25732 : int size = sizeof(char *);
315 25732 : int double_count = stringcount * 2;
316 : char **stringlist;
317 : char *p;
318 :
319 77528 : for (i = 0; i < double_count; i += 2)
320 51796 : size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
321 :
322 25732 : stringlist = (char **)(pcre_malloc)(size);
323 25732 : if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
324 :
325 25732 : *listptr = (const char **)stringlist;
326 25732 : p = (char *)(stringlist + stringcount + 1);
327 :
328 77528 : for (i = 0; i < double_count; i += 2)
329 : {
330 51796 : int len = ovector[i+1] - ovector[i];
331 51796 : memcpy(p, subject + ovector[i], len);
332 51796 : *stringlist++ = p;
333 51796 : p += len;
334 51796 : *p++ = 0;
335 : }
336 :
337 25732 : *stringlist = NULL;
338 25732 : return 0;
339 : }
340 :
341 :
342 :
343 : /*************************************************
344 : * Free store obtained by get_substring_list *
345 : *************************************************/
346 :
347 : /* This function exists for the benefit of people calling PCRE from non-C
348 : programs that can call its functions, but not free() or (pcre_free)() directly.
349 :
350 : Argument: the result of a previous pcre_get_substring_list()
351 : Returns: nothing
352 : */
353 :
354 : PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
355 : pcre_free_substring_list(const char **pointer)
356 0 : {
357 0 : (pcre_free)((void *)pointer);
358 0 : }
359 :
360 :
361 :
362 : /*************************************************
363 : * Copy captured string to new store *
364 : *************************************************/
365 :
366 : /* This function copies a single captured substring into a piece of new
367 : store
368 :
369 : Arguments:
370 : subject the subject string that was matched
371 : ovector pointer to the offsets table
372 : stringcount the number of substrings that were captured
373 : (i.e. the yield of the pcre_exec call, unless
374 : that was zero, in which case it should be 1/3
375 : of the offset table size)
376 : stringnumber the number of the required substring
377 : stringptr where to put a pointer to the substring
378 :
379 : Returns: if successful:
380 : the length of the string, not including the zero that
381 : is put on the end; can be zero
382 : if not successful:
383 : PCRE_ERROR_NOMEMORY (-6) failed to get store
384 : PCRE_ERROR_NOSUBSTRING (-7) substring not present
385 : */
386 :
387 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
388 : pcre_get_substring(const char *subject, int *ovector, int stringcount,
389 : int stringnumber, const char **stringptr)
390 0 : {
391 : int yield;
392 : char *substring;
393 0 : if (stringnumber < 0 || stringnumber >= stringcount)
394 0 : return PCRE_ERROR_NOSUBSTRING;
395 0 : stringnumber *= 2;
396 0 : yield = ovector[stringnumber+1] - ovector[stringnumber];
397 0 : substring = (char *)(pcre_malloc)(yield + 1);
398 0 : if (substring == NULL) return PCRE_ERROR_NOMEMORY;
399 0 : memcpy(substring, subject + ovector[stringnumber], yield);
400 0 : substring[yield] = 0;
401 0 : *stringptr = substring;
402 0 : return yield;
403 : }
404 :
405 :
406 :
407 : /*************************************************
408 : * Copy named captured string to new store *
409 : *************************************************/
410 :
411 : /* This function copies a single captured substring, identified by name, into
412 : new store. If the regex permits duplicate names, the first substring that is
413 : set is chosen.
414 :
415 : Arguments:
416 : code the compiled regex
417 : subject the subject string that was matched
418 : ovector pointer to the offsets table
419 : stringcount the number of substrings that were captured
420 : (i.e. the yield of the pcre_exec call, unless
421 : that was zero, in which case it should be 1/3
422 : of the offset table size)
423 : stringname the name of the required substring
424 : stringptr where to put the pointer
425 :
426 : Returns: if successful:
427 : the length of the copied string, not including the zero
428 : that is put on the end; can be zero
429 : if not successful:
430 : PCRE_ERROR_NOMEMORY (-6) couldn't get memory
431 : PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
432 : */
433 :
434 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
435 : pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
436 : int stringcount, const char *stringname, const char **stringptr)
437 0 : {
438 0 : int n = get_first_set(code, stringname, ovector);
439 0 : if (n <= 0) return n;
440 0 : return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
441 : }
442 :
443 :
444 :
445 :
446 : /*************************************************
447 : * Free store obtained by get_substring *
448 : *************************************************/
449 :
450 : /* This function exists for the benefit of people calling PCRE from non-C
451 : programs that can call its functions, but not free() or (pcre_free)() directly.
452 :
453 : Argument: the result of a previous pcre_get_substring()
454 : Returns: nothing
455 : */
456 :
457 : PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
458 : pcre_free_substring(const char *pointer)
459 0 : {
460 0 : (pcre_free)((void *)pointer);
461 0 : }
462 :
463 : /* End of pcre_get.c */
|