1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Clayton Collie <clcollie@mindspring.com> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: scanf.c 275270 2009-02-06 10:22:34Z felipe $ */
20 :
21 : /*
22 : scanf.c --
23 :
24 : This file contains the base code which implements sscanf and by extension
25 : fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26 :
27 : This software is copyrighted by the Regents of the University of
28 : California, Sun Microsystems, Inc., Scriptics Corporation,
29 : and other parties. The following terms apply to all files associated
30 : with the software unless explicitly disclaimed in individual files.
31 :
32 : The authors hereby grant permission to use, copy, modify, distribute,
33 : and license this software and its documentation for any purpose, provided
34 : that existing copyright notices are retained in all copies and that this
35 : notice is included verbatim in any distributions. No written agreement,
36 : license, or royalty fee is required for any of the authorized uses.
37 : Modifications to this software may be copyrighted by their authors
38 : and need not follow the licensing terms described here, provided that
39 : the new terms are clearly indicated on the first page of each file where
40 : they apply.
41 :
42 : IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43 : FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44 : ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45 : DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46 : POSSIBILITY OF SUCH DAMAGE.
47 :
48 : THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49 : INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50 : FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
51 : IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52 : NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53 : MODIFICATIONS.
54 :
55 : GOVERNMENT USE: If you are acquiring this software on behalf of the
56 : U.S. government, the Government shall have only "Restricted Rights"
57 : in the software and related documentation as defined in the Federal
58 : Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
59 : are acquiring the software on behalf of the Department of Defense, the
60 : software shall be classified as "Commercial Computer Software" and the
61 : Government shall have only "Restricted Rights" as defined in Clause
62 : 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
63 : authors grant the U.S. Government and others acting in its behalf
64 : permission to use and distribute the software in accordance with the
65 : terms specified in this license.
66 : */
67 :
68 : #include <stdio.h>
69 : #include <limits.h>
70 : #include <ctype.h>
71 : #include "php.h"
72 : #include "php_variables.h"
73 : #ifdef HAVE_LOCALE_H
74 : #include <locale.h>
75 : #endif
76 : #include "zend_execute.h"
77 : #include "zend_operators.h"
78 : #include "zend_strtod.h"
79 : #include "php_globals.h"
80 : #include "basic_functions.h"
81 : #include "scanf.h"
82 :
83 : /*
84 : * Flag values used internally by [f|s]canf.
85 : */
86 : #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
87 : #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
88 : #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
89 : #define SCAN_WIDTH 0x8 /* A width value was supplied. */
90 :
91 : #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
92 : #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
93 : #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
94 : #define SCAN_XOK 0x80 /* An 'x' is allowed. */
95 : #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
96 : #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
97 :
98 : #define UCHAR(x) (zend_uchar)(x)
99 :
100 : /*
101 : * The following structure contains the information associated with
102 : * a character set.
103 : */
104 : typedef struct CharSet {
105 : int exclude; /* 1 if this is an exclusion set. */
106 : int nchars;
107 : char *chars;
108 : int nranges;
109 : struct Range {
110 : char start;
111 : char end;
112 : } *ranges;
113 : } CharSet;
114 :
115 : /*
116 : * Declarations for functions used only in this file.
117 : */
118 : static char *BuildCharSet(CharSet *cset, char *format);
119 : static int CharInSet(CharSet *cset, int ch);
120 : static void ReleaseCharSet(CharSet *cset);
121 : static inline void scan_set_error_return(int numVars, zval **return_value);
122 :
123 :
124 : /* {{{ BuildCharSet
125 : *----------------------------------------------------------------------
126 : *
127 : * BuildCharSet --
128 : *
129 : * This function examines a character set format specification
130 : * and builds a CharSet containing the individual characters and
131 : * character ranges specified.
132 : *
133 : * Results:
134 : * Returns the next format position.
135 : *
136 : * Side effects:
137 : * Initializes the charset.
138 : *
139 : *----------------------------------------------------------------------
140 : */
141 : static char * BuildCharSet(CharSet *cset, char *format)
142 579 : {
143 : char *ch, start;
144 : int nranges;
145 : char *end;
146 :
147 579 : memset(cset, 0, sizeof(CharSet));
148 :
149 579 : ch = format;
150 579 : if (*ch == '^') {
151 2 : cset->exclude = 1;
152 2 : ch = ++format;
153 : }
154 579 : end = format + 1; /* verify this - cc */
155 :
156 : /*
157 : * Find the close bracket so we can overallocate the set.
158 : */
159 579 : if (*ch == ']') {
160 0 : ch = end++;
161 : }
162 579 : nranges = 0;
163 4219 : while (*ch != ']') {
164 3061 : if (*ch == '-') {
165 921 : nranges++;
166 : }
167 3061 : ch = end++;
168 : }
169 :
170 579 : cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
171 579 : if (nranges > 0) {
172 577 : cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
173 : } else {
174 2 : cset->ranges = NULL;
175 : }
176 :
177 : /*
178 : * Now build the character set.
179 : */
180 579 : cset->nchars = cset->nranges = 0;
181 579 : ch = format++;
182 579 : start = *ch;
183 579 : if (*ch == ']' || *ch == '-') {
184 0 : cset->chars[cset->nchars++] = *ch;
185 0 : ch = format++;
186 : }
187 3298 : while (*ch != ']') {
188 2140 : if (*format == '-') {
189 : /*
190 : * This may be the first character of a range, so don't add
191 : * it yet.
192 : */
193 921 : start = *ch;
194 1219 : } else if (*ch == '-') {
195 : /*
196 : * Check to see if this is the last character in the set, in which
197 : * case it is not a range and we should add the previous character
198 : * as well as the dash.
199 : */
200 921 : if (*format == ']') {
201 0 : cset->chars[cset->nchars++] = start;
202 0 : cset->chars[cset->nchars++] = *ch;
203 : } else {
204 921 : ch = format++;
205 :
206 : /*
207 : * Check to see if the range is in reverse order.
208 : */
209 921 : if (start < *ch) {
210 921 : cset->ranges[cset->nranges].start = start;
211 921 : cset->ranges[cset->nranges].end = *ch;
212 : } else {
213 0 : cset->ranges[cset->nranges].start = *ch;
214 0 : cset->ranges[cset->nranges].end = start;
215 : }
216 921 : cset->nranges++;
217 : }
218 : } else {
219 298 : cset->chars[cset->nchars++] = *ch;
220 : }
221 2140 : ch = format++;
222 : }
223 579 : return format;
224 : }
225 : /* }}} */
226 :
227 : /* {{{ CharInSet
228 : *----------------------------------------------------------------------
229 : *
230 : * CharInSet --
231 : *
232 : * Check to see if a character matches the given set.
233 : *
234 : * Results:
235 : * Returns non-zero if the character matches the given set.
236 : *
237 : * Side effects:
238 : * None.
239 : *
240 : *----------------------------------------------------------------------
241 : */
242 : static int CharInSet(CharSet *cset, int c)
243 1736 : {
244 1736 : char ch = (char) c;
245 1736 : int i, match = 0;
246 :
247 2770 : for (i = 0; i < cset->nchars; i++) {
248 1034 : if (cset->chars[i] == ch) {
249 0 : match = 1;
250 0 : break;
251 : }
252 : }
253 1736 : if (!match) {
254 3139 : for (i = 0; i < cset->nranges; i++) {
255 2556 : if ((cset->ranges[i].start <= ch)
256 : && (ch <= cset->ranges[i].end)) {
257 1153 : match = 1;
258 1153 : break;
259 : }
260 : }
261 : }
262 1736 : return (cset->exclude ? !match : match);
263 : }
264 : /* }}} */
265 :
266 : /* {{{ ReleaseCharSet
267 : *----------------------------------------------------------------------
268 : *
269 : * ReleaseCharSet --
270 : *
271 : * Free the storage associated with a character set.
272 : *
273 : * Results:
274 : * None.
275 : *
276 : * Side effects:
277 : * None.
278 : *
279 : *----------------------------------------------------------------------
280 : */
281 : static void ReleaseCharSet(CharSet *cset)
282 579 : {
283 579 : efree((char *)cset->chars);
284 579 : if (cset->ranges) {
285 577 : efree((char *)cset->ranges);
286 : }
287 579 : }
288 : /* }}} */
289 :
290 : /* {{{ ValidateFormat
291 : *----------------------------------------------------------------------
292 : *
293 : * ValidateFormat --
294 : *
295 : * Parse the format string and verify that it is properly formed
296 : * and that there are exactly enough variables on the command line.
297 : *
298 : * Results:
299 : * FAILURE or SUCCESS.
300 : *
301 : * Side effects:
302 : * May set php_error based on abnormal conditions.
303 : *
304 : * Parameters :
305 : * format The format string.
306 : * numVars The number of variables passed to the scan command.
307 : * totalSubs The number of variables that will be required.
308 : *
309 : *----------------------------------------------------------------------
310 : */
311 : PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
312 7901 : {
313 : #define STATIC_LIST_SIZE 16
314 : int gotXpg, gotSequential, value, i, flags;
315 7901 : char *end, *ch = NULL;
316 : int staticAssign[STATIC_LIST_SIZE];
317 7901 : int *nassign = staticAssign;
318 7901 : int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
319 : TSRMLS_FETCH();
320 :
321 : /*
322 : * Initialize an array that records the number of times a variable
323 : * is assigned to by the format string. We use this to detect if
324 : * a variable is multiply assigned or left unassigned.
325 : */
326 7901 : if (numVars > nspace) {
327 0 : nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
328 0 : nspace = numVars;
329 : }
330 134317 : for (i = 0; i < nspace; i++) {
331 126416 : nassign[i] = 0;
332 : }
333 :
334 7901 : xpgSize = objIndex = gotXpg = gotSequential = 0;
335 :
336 25907 : while (*format != '\0') {
337 10690 : ch = format++;
338 10690 : flags = 0;
339 :
340 10690 : if (*ch != '%') {
341 2719 : continue;
342 : }
343 7971 : ch = format++;
344 7971 : if (*ch == '%') {
345 0 : continue;
346 : }
347 7971 : if (*ch == '*') {
348 578 : flags |= SCAN_SUPPRESS;
349 578 : ch = format++;
350 578 : goto xpgCheckDone;
351 : }
352 :
353 7393 : if ( isdigit( (int)*ch ) ) {
354 : /*
355 : * Check for an XPG3-style %n$ specification. Note: there
356 : * must not be a mixture of XPG3 specs and non-XPG3 specs
357 : * in the same format string.
358 : */
359 1168 : value = strtoul(format-1, &end, 10);
360 1168 : if (*end != '$') {
361 1156 : goto notXpg;
362 : }
363 12 : format = end+1;
364 12 : ch = format++;
365 12 : gotXpg = 1;
366 12 : if (gotSequential) {
367 0 : goto mixedXPG;
368 : }
369 12 : objIndex = value - 1;
370 12 : if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
371 : goto badIndex;
372 12 : } else if (numVars == 0) {
373 : /*
374 : * In the case where no vars are specified, the user can
375 : * specify %9999$ legally, so we have to consider special
376 : * rules for growing the assign array. 'value' is
377 : * guaranteed to be > 0.
378 : */
379 :
380 : /* set a lower artificial limit on this
381 : * in the interest of security and resource friendliness
382 : * 255 arguments should be more than enough. - cc
383 : */
384 7 : if (value > SCAN_MAX_ARGS) {
385 0 : goto badIndex;
386 : }
387 :
388 7 : xpgSize = (xpgSize > value) ? xpgSize : value;
389 : }
390 12 : goto xpgCheckDone;
391 : }
392 :
393 7381 : notXpg:
394 7381 : gotSequential = 1;
395 7381 : if (gotXpg) {
396 0 : mixedXPG:
397 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"\%\" and \"\%n$\" conversion specifiers");
398 0 : goto error;
399 : }
400 :
401 7971 : xpgCheckDone:
402 : /*
403 : * Parse any width specifier.
404 : */
405 7971 : if (isdigit(UCHAR(*ch))) {
406 1156 : value = strtoul(format-1, &format, 10);
407 1156 : flags |= SCAN_WIDTH;
408 1156 : ch = format++;
409 : }
410 :
411 : /*
412 : * Ignore size specifier.
413 : */
414 7971 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
415 1717 : ch = format++;
416 : }
417 :
418 7971 : if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
419 2 : goto badIndex;
420 : }
421 :
422 : /*
423 : * Handle the various field types.
424 : */
425 7969 : switch (*ch) {
426 : case 'n':
427 : case 'd':
428 : case 'D':
429 : case 'i':
430 : case 'o':
431 : case 'x':
432 : case 'X':
433 : case 'u':
434 : case 'f':
435 : case 'e':
436 : case 'E':
437 : case 'g':
438 : case 's':
439 5948 : break;
440 :
441 : case 'c':
442 : /* we differ here with the TCL implementation in allowing for */
443 : /* a character width specification, to be more consistent with */
444 : /* ANSI. since Zend auto allocates space for vars, this is no */
445 : /* problem - cc */
446 : /*
447 : if (flags & SCAN_WIDTH) {
448 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
449 : goto error;
450 : }
451 : */
452 858 : break;
453 :
454 : case '[':
455 580 : if (*format == '\0') {
456 0 : goto badSet;
457 : }
458 580 : ch = format++;
459 580 : if (*ch == '^') {
460 2 : if (*format == '\0') {
461 0 : goto badSet;
462 : }
463 2 : ch = format++;
464 : }
465 580 : if (*ch == ']') {
466 0 : if (*format == '\0') {
467 0 : goto badSet;
468 : }
469 0 : ch = format++;
470 : }
471 4230 : while (*ch != ']') {
472 3070 : if (*format == '\0') {
473 0 : goto badSet;
474 : }
475 3070 : ch = format++;
476 : }
477 580 : break;
478 0 : badSet:
479 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
480 0 : goto error;
481 :
482 : default: {
483 583 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
484 583 : goto error;
485 : }
486 : }
487 :
488 7386 : if (!(flags & SCAN_SUPPRESS)) {
489 6808 : if (objIndex >= nspace) {
490 : /*
491 : * Expand the nassign buffer. If we are using XPG specifiers,
492 : * make sure that we grow to a large enough size. xpgSize is
493 : * guaranteed to be at least one larger than objIndex.
494 : */
495 0 : value = nspace;
496 0 : if (xpgSize) {
497 0 : nspace = xpgSize;
498 : } else {
499 0 : nspace += STATIC_LIST_SIZE;
500 : }
501 0 : if (nassign == staticAssign) {
502 0 : nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
503 0 : for (i = 0; i < STATIC_LIST_SIZE; ++i) {
504 0 : nassign[i] = staticAssign[i];
505 : }
506 : } else {
507 0 : nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
508 : }
509 0 : for (i = value; i < nspace; i++) {
510 0 : nassign[i] = 0;
511 : }
512 : }
513 6808 : nassign[objIndex]++;
514 6808 : objIndex++;
515 : }
516 : } /* while (*format != '\0') */
517 :
518 : /*
519 : * Verify that all of the variable were assigned exactly once.
520 : */
521 7316 : if (numVars == 0) {
522 7292 : if (xpgSize) {
523 4 : numVars = xpgSize;
524 : } else {
525 7288 : numVars = objIndex;
526 : }
527 : }
528 7316 : if (totalSubs) {
529 7316 : *totalSubs = numVars;
530 : }
531 14120 : for (i = 0; i < numVars; i++) {
532 6807 : if (nassign[i] > 1) {
533 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
534 0 : goto error;
535 6807 : } else if (!xpgSize && (nassign[i] == 0)) {
536 : /*
537 : * If the space is empty, and xpgSize is 0 (means XPG wasn't
538 : * used, and/or numVars != 0), then too many vars were given
539 : */
540 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
541 3 : goto error;
542 : }
543 : }
544 :
545 7313 : if (nassign != staticAssign) {
546 0 : efree((char *)nassign);
547 : }
548 7313 : return SCAN_SUCCESS;
549 :
550 2 : badIndex:
551 2 : if (gotXpg) {
552 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
553 : } else {
554 2 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
555 : }
556 :
557 588 : error:
558 588 : if (nassign != staticAssign) {
559 0 : efree((char *)nassign);
560 : }
561 588 : return SCAN_ERROR_INVALID_FORMAT;
562 : #undef STATIC_LIST_SIZE
563 : }
564 : /* }}} */
565 :
566 : /* {{{ php_sscanf_internal
567 : * This is the internal function which does processing on behalf of
568 : * both sscanf() and fscanf()
569 : *
570 : * parameters :
571 : * string literal string to be processed
572 : * format format string
573 : * argCount total number of elements in the args array
574 : * args arguments passed in from user function (f|s)scanf
575 : * varStart offset (in args) of 1st variable passed in to (f|s)scanf
576 : * return_value set with the results of the scan
577 : */
578 :
579 : PHPAPI int php_sscanf_internal( char *string, char *format,
580 : int argCount, zval ***args,
581 : int varStart, zval **return_value TSRMLS_DC)
582 7901 : {
583 7901 : int numVars, nconversions, totalVars = -1;
584 : int i, value, result;
585 : int objIndex;
586 : char *end, *baseString;
587 : zval **current;
588 7901 : char op = 0;
589 7901 : int base = 0;
590 7901 : int underflow = 0;
591 : size_t width;
592 7901 : long (*fn)() = NULL;
593 : char *ch, sch;
594 : int flags;
595 : char buf[64]; /* Temporary buffer to hold scanned number
596 : * strings before they are passed to strtoul() */
597 :
598 : /* do some sanity checking */
599 7901 : if ((varStart > argCount) || (varStart < 0)){
600 0 : varStart = SCAN_MAX_ARGS + 1;
601 : }
602 7901 : numVars = argCount - varStart;
603 7901 : if (numVars < 0) {
604 0 : numVars = 0;
605 : }
606 :
607 : #if 0
608 : zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
609 : string, format, numVars, varStart);
610 : #endif
611 : /*
612 : * Check for errors in the format string.
613 : */
614 7901 : if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
615 588 : scan_set_error_return( numVars, return_value );
616 588 : return SCAN_ERROR_INVALID_FORMAT;
617 : }
618 :
619 7313 : objIndex = numVars ? varStart : 0;
620 :
621 : /*
622 : * If any variables are passed, make sure they are all passed by reference
623 : */
624 7313 : if (numVars) {
625 90 : for (i = varStart;i < argCount;i++){
626 69 : if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
627 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
628 0 : scan_set_error_return(numVars, return_value);
629 0 : return SCAN_ERROR_VAR_PASSED_BYVAL;
630 : }
631 : }
632 : }
633 :
634 : /*
635 : * Allocate space for the result objects. Only happens when no variables
636 : * are specified
637 : */
638 7313 : if (!numVars) {
639 : zval *tmp;
640 :
641 : /* allocate an array for return */
642 7292 : array_init(*return_value);
643 :
644 14024 : for (i = 0; i < totalVars; i++) {
645 6732 : MAKE_STD_ZVAL(tmp);
646 6732 : ZVAL_NULL(tmp);
647 6732 : if (add_next_index_zval(*return_value, tmp) == FAILURE) {
648 0 : scan_set_error_return(0, return_value);
649 0 : return FAILURE;
650 : }
651 : }
652 7292 : varStart = 0; /* Array index starts from 0 */
653 : }
654 :
655 7313 : baseString = string;
656 :
657 : /*
658 : * Iterate over the format string filling in the result objects until
659 : * we reach the end of input, the end of the format string, or there
660 : * is a mismatch.
661 : */
662 7313 : nconversions = 0;
663 : /* note ! - we need to limit the loop for objIndex to keep it in bounds */
664 :
665 22066 : while (*format != '\0') {
666 9813 : ch = format++;
667 9813 : flags = 0;
668 :
669 : /*
670 : * If we see whitespace in the format, skip whitespace in the string.
671 : */
672 9813 : if ( isspace( (int)*ch ) ) {
673 2248 : sch = *string;
674 5033 : while ( isspace( (int)sch ) ) {
675 537 : if (*string == '\0') {
676 0 : goto done;
677 : }
678 537 : string++;
679 537 : sch = *string;
680 : }
681 2248 : continue;
682 : }
683 :
684 7565 : if (*ch != '%') {
685 194 : literal:
686 194 : if (*string == '\0') {
687 3 : underflow = 1;
688 3 : goto done;
689 : }
690 191 : sch = *string;
691 191 : string++;
692 191 : if (*ch != sch) {
693 34 : goto done;
694 : }
695 157 : continue;
696 : }
697 :
698 7371 : ch = format++;
699 7371 : if (*ch == '%') {
700 0 : goto literal;
701 : }
702 :
703 : /*
704 : * Check for assignment suppression ('*') or an XPG3-style
705 : * assignment ('%n$').
706 : */
707 7371 : if (*ch == '*') {
708 578 : flags |= SCAN_SUPPRESS;
709 578 : ch = format++;
710 6793 : } else if ( isdigit(UCHAR(*ch))) {
711 1167 : value = strtoul(format-1, &end, 10);
712 1167 : if (*end == '$') {
713 11 : format = end+1;
714 11 : ch = format++;
715 11 : objIndex = varStart + value - 1;
716 : }
717 : }
718 :
719 : /*
720 : * Parse any width specifier.
721 : */
722 7371 : if ( isdigit(UCHAR(*ch))) {
723 1156 : width = strtoul(format-1, &format, 10);
724 1156 : ch = format++;
725 : } else {
726 6215 : width = 0;
727 : }
728 :
729 : /*
730 : * Ignore size specifier.
731 : */
732 7371 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
733 1716 : ch = format++;
734 : }
735 :
736 : /*
737 : * Handle the various field types.
738 : */
739 7371 : switch (*ch) {
740 : case 'n':
741 2 : if (!(flags & SCAN_SUPPRESS)) {
742 2 : if (numVars && objIndex >= argCount) {
743 : break;
744 2 : } else if (numVars) {
745 : zend_uint refcount;
746 :
747 1 : current = args[objIndex++];
748 1 : refcount = (*current)->refcount;
749 1 : zval_dtor( *current );
750 1 : ZVAL_LONG( *current, (long)(string - baseString) );
751 1 : (*current)->refcount = refcount;
752 1 : (*current)->is_ref = 1;
753 : } else {
754 1 : add_index_long(*return_value, objIndex++, string - baseString);
755 : }
756 : }
757 2 : nconversions++;
758 2 : continue;
759 :
760 : case 'd':
761 : case 'D':
762 805 : op = 'i';
763 805 : base = 10;
764 805 : fn = (long (*)())strtol;
765 805 : break;
766 : case 'i':
767 0 : op = 'i';
768 0 : base = 0;
769 0 : fn = (long (*)())strtol;
770 0 : break;
771 : case 'o':
772 806 : op = 'i';
773 806 : base = 8;
774 806 : fn = (long (*)())strtol;
775 806 : break;
776 : case 'x':
777 : case 'X':
778 820 : op = 'i';
779 820 : base = 16;
780 820 : fn = (long (*)())strtol;
781 820 : break;
782 : case 'u':
783 806 : op = 'i';
784 806 : base = 10;
785 806 : flags |= SCAN_UNSIGNED;
786 806 : fn = (long (*)())strtoul;
787 806 : break;
788 :
789 : case 'f':
790 : case 'e':
791 : case 'E':
792 : case 'g':
793 1782 : op = 'f';
794 1782 : break;
795 :
796 : case 's':
797 912 : op = 's';
798 912 : break;
799 :
800 : case 'c':
801 858 : op = 's';
802 858 : flags |= SCAN_NOSKIP;
803 : /*-cc-*/
804 858 : if (0 == width) {
805 710 : width = 1;
806 : }
807 : /*-cc-*/
808 858 : break;
809 : case '[':
810 580 : op = '[';
811 580 : flags |= SCAN_NOSKIP;
812 : break;
813 : } /* switch */
814 :
815 : /*
816 : * At this point, we will need additional characters from the
817 : * string to proceed.
818 : */
819 7369 : if (*string == '\0') {
820 129 : underflow = 1;
821 129 : goto done;
822 : }
823 :
824 : /*
825 : * Skip any leading whitespace at the beginning of a field unless
826 : * the format suppresses this behavior.
827 : */
828 7240 : if (!(flags & SCAN_NOSKIP)) {
829 11918 : while (*string != '\0') {
830 5839 : sch = *string;
831 5839 : if (! isspace((int)sch) ) {
832 5551 : break;
833 : }
834 288 : string++;
835 : }
836 5815 : if (*string == '\0') {
837 264 : underflow = 1;
838 264 : goto done;
839 : }
840 : }
841 :
842 : /*
843 : * Perform the requested scanning operation.
844 : */
845 6976 : switch (op) {
846 : case 'c':
847 : case 's':
848 : /*
849 : * Scan a string up to width characters or whitespace.
850 : */
851 1642 : if (width == 0) {
852 666 : width = (size_t) ~0;
853 : }
854 1642 : end = string;
855 8128 : while (*end != '\0') {
856 6462 : sch = *end;
857 6462 : if ( isspace( (int)sch ) ) {
858 846 : break;
859 : }
860 5616 : end++;
861 5616 : if (--width == 0) {
862 772 : break;
863 : }
864 : }
865 1642 : if (!(flags & SCAN_SUPPRESS)) {
866 1503 : if (numVars && objIndex >= argCount) {
867 : break;
868 1503 : } else if (numVars) {
869 : zend_uint refcount;
870 :
871 20 : current = args[objIndex++];
872 20 : refcount = (*current)->refcount;
873 20 : zval_dtor( *current );
874 20 : ZVAL_STRINGL( *current, string, end-string, 1);
875 20 : (*current)->refcount = refcount;
876 20 : (*current)->is_ref = 1;
877 : } else {
878 1483 : add_index_stringl( *return_value, objIndex++, string, end-string, 1);
879 : }
880 : }
881 1642 : string = end;
882 1642 : break;
883 :
884 : case '[': {
885 : CharSet cset;
886 :
887 579 : if (width == 0) {
888 579 : width = (size_t) ~0;
889 : }
890 579 : end = string;
891 :
892 579 : format = BuildCharSet(&cset, format);
893 2317 : while (*end != '\0') {
894 1736 : sch = *end;
895 1736 : if (!CharInSet(&cset, (int)sch)) {
896 577 : break;
897 : }
898 1159 : end++;
899 1159 : if (--width == 0) {
900 0 : break;
901 : }
902 : }
903 579 : ReleaseCharSet(&cset);
904 :
905 579 : if (string == end) {
906 : /*
907 : * Nothing matched the range, stop processing
908 : */
909 320 : goto done;
910 : }
911 259 : if (!(flags & SCAN_SUPPRESS)) {
912 259 : if (numVars && objIndex >= argCount) {
913 : break;
914 259 : } else if (numVars) {
915 2 : current = args[objIndex++];
916 2 : zval_dtor( *current );
917 2 : ZVAL_STRINGL( *current, string, end-string, 1);
918 : } else {
919 257 : add_index_stringl(*return_value, objIndex++, string, end-string, 1);
920 : }
921 : }
922 259 : string = end;
923 259 : break;
924 : }
925 : /*
926 : case 'c':
927 : / Scan a single character./
928 :
929 : sch = *string;
930 : string++;
931 : if (!(flags & SCAN_SUPPRESS)) {
932 : if (numVars) {
933 : char __buf[2];
934 : __buf[0] = sch;
935 : __buf[1] = '\0';;
936 : current = args[objIndex++];
937 : convert_to_string_ex( current );
938 : ZVAL_STRINGL( *current, __buf, 1, 1);
939 : } else {
940 : add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
941 : }
942 : }
943 : break;
944 : */
945 : case 'i':
946 : /*
947 : * Scan an unsigned or signed integer.
948 : */
949 : /*-cc-*/
950 3061 : buf[0] = '\0';
951 : /*-cc-*/
952 3061 : if ((width == 0) || (width > sizeof(buf) - 1)) {
953 2541 : width = sizeof(buf) - 1;
954 : }
955 :
956 3061 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
957 12805 : for (end = buf; width > 0; width--) {
958 12715 : switch (*string) {
959 : /*
960 : * The 0 digit has special meaning at the beginning of
961 : * a number. If we are unsure of the base, it
962 : * indicates that we are in base 8 or base 16 (if it is
963 : * followed by an 'x').
964 : */
965 : case '0':
966 : /*-cc-*/
967 1822 : if (base == 16) {
968 459 : flags |= SCAN_XOK;
969 : }
970 : /*-cc-*/
971 1822 : if (base == 0) {
972 0 : base = 8;
973 0 : flags |= SCAN_XOK;
974 : }
975 1822 : if (flags & SCAN_NOZERO) {
976 638 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
977 : } else {
978 1184 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
979 : }
980 1822 : goto addToInt;
981 :
982 : case '1': case '2': case '3': case '4':
983 : case '5': case '6': case '7':
984 6276 : if (base == 0) {
985 0 : base = 10;
986 : }
987 6276 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
988 6276 : goto addToInt;
989 :
990 : case '8': case '9':
991 1080 : if (base == 0) {
992 0 : base = 10;
993 : }
994 1080 : if (base <= 8) {
995 196 : break;
996 : }
997 884 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
998 884 : goto addToInt;
999 :
1000 : case 'A': case 'B': case 'C':
1001 : case 'D': case 'E': case 'F':
1002 : case 'a': case 'b': case 'c':
1003 : case 'd': case 'e': case 'f':
1004 711 : if (base <= 10) {
1005 481 : break;
1006 : }
1007 230 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1008 230 : goto addToInt;
1009 :
1010 : case '+': case '-':
1011 534 : if (flags & SCAN_SIGNOK) {
1012 534 : flags &= ~SCAN_SIGNOK;
1013 534 : goto addToInt;
1014 : }
1015 0 : break;
1016 :
1017 : case 'x': case 'X':
1018 0 : if ((flags & SCAN_XOK) && (end == buf+1)) {
1019 0 : base = 16;
1020 0 : flags &= ~SCAN_XOK;
1021 0 : goto addToInt;
1022 : }
1023 : break;
1024 : }
1025 :
1026 : /*
1027 : * We got an illegal character so we are done accumulating.
1028 : */
1029 2969 : break;
1030 :
1031 9746 : addToInt:
1032 : /*
1033 : * Add the character to the temporary buffer.
1034 : */
1035 9746 : *end++ = *string++;
1036 9746 : if (*string == '\0') {
1037 2 : break;
1038 : }
1039 : }
1040 :
1041 : /*
1042 : * Check to see if we need to back up because we only got a
1043 : * sign or a trailing x after a 0.
1044 : */
1045 3061 : if (flags & SCAN_NODIGITS) {
1046 1037 : if (*string == '\0') {
1047 0 : underflow = 1;
1048 : }
1049 1037 : goto done;
1050 2024 : } else if (end[-1] == 'x' || end[-1] == 'X') {
1051 0 : end--;
1052 0 : string--;
1053 : }
1054 :
1055 : /*
1056 : * Scan the value from the temporary buffer. If we are
1057 : * returning a large unsigned value, we have to convert it back
1058 : * to a string since PHP only supports signed values.
1059 : */
1060 2024 : if (!(flags & SCAN_SUPPRESS)) {
1061 1855 : *end = '\0';
1062 1855 : value = (int) (*fn)(buf, NULL, base);
1063 1968 : if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1064 113 : snprintf(buf, sizeof(buf), "%u", value); /* INTL: ISO digit */
1065 113 : if (numVars && objIndex >= argCount) {
1066 : break;
1067 113 : } else if (numVars) {
1068 : /* change passed value type to string */
1069 2 : current = args[objIndex++];
1070 2 : zval_dtor(*current);
1071 2 : ZVAL_STRING( *current, buf, 1 );
1072 : } else {
1073 111 : add_index_string(*return_value, objIndex++, buf, 1);
1074 : }
1075 : } else {
1076 1742 : if (numVars && objIndex >= argCount) {
1077 : break;
1078 1742 : } else if (numVars) {
1079 21 : current = args[objIndex++];
1080 21 : zval_dtor(*current);
1081 21 : ZVAL_LONG(*current, value);
1082 : } else {
1083 1721 : add_index_long(*return_value, objIndex++, value);
1084 : }
1085 : }
1086 : }
1087 2024 : break;
1088 :
1089 : case 'f':
1090 : /*
1091 : * Scan a floating point number
1092 : */
1093 1694 : buf[0] = '\0'; /* call me pedantic */
1094 1694 : if ((width == 0) || (width > sizeof(buf) - 1)) {
1095 1404 : width = sizeof(buf) - 1;
1096 : }
1097 1694 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1098 8189 : for (end = buf; width > 0; width--) {
1099 8125 : switch (*string) {
1100 : case '0': case '1': case '2': case '3':
1101 : case '4': case '5': case '6': case '7':
1102 : case '8': case '9':
1103 5759 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1104 5759 : goto addToFloat;
1105 : case '+':
1106 : case '-':
1107 452 : if (flags & SCAN_SIGNOK) {
1108 452 : flags &= ~SCAN_SIGNOK;
1109 452 : goto addToFloat;
1110 : }
1111 0 : break;
1112 : case '.':
1113 174 : if (flags & SCAN_PTOK) {
1114 174 : flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1115 174 : goto addToFloat;
1116 : }
1117 0 : break;
1118 : case 'e':
1119 : case 'E':
1120 : /*
1121 : * An exponent is not allowed until there has
1122 : * been at least one digit.
1123 : */
1124 117 : if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1125 117 : flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1126 : | SCAN_SIGNOK | SCAN_NODIGITS;
1127 117 : goto addToFloat;
1128 : }
1129 : break;
1130 : }
1131 :
1132 : /*
1133 : * We got an illegal character so we are done accumulating.
1134 : */
1135 1623 : break;
1136 :
1137 6502 : addToFloat:
1138 : /*
1139 : * Add the character to the temporary buffer.
1140 : */
1141 6502 : *end++ = *string++;
1142 6502 : if (*string == '\0') {
1143 7 : break;
1144 : }
1145 : }
1146 :
1147 : /*
1148 : * Check to see if we need to back up because we saw a
1149 : * trailing 'e' or sign.
1150 : */
1151 1694 : if (flags & SCAN_NODIGITS) {
1152 591 : if (flags & SCAN_EXPOK) {
1153 : /*
1154 : * There were no digits at all so scanning has
1155 : * failed and we are done.
1156 : */
1157 586 : if (*string == '\0') {
1158 0 : underflow = 1;
1159 : }
1160 586 : goto done;
1161 : }
1162 :
1163 : /*
1164 : * We got a bad exponent ('e' and maybe a sign).
1165 : */
1166 5 : end--;
1167 5 : string--;
1168 5 : if (*end != 'e' && *end != 'E') {
1169 0 : end--;
1170 0 : string--;
1171 : }
1172 : }
1173 :
1174 : /*
1175 : * Scan the value from the temporary buffer.
1176 : */
1177 1108 : if (!(flags & SCAN_SUPPRESS)) {
1178 : double dvalue;
1179 1015 : *end = '\0';
1180 1015 : dvalue = zend_strtod(buf, NULL);
1181 1015 : if (numVars && objIndex >= argCount) {
1182 : break;
1183 1015 : } else if (numVars) {
1184 14 : current = args[objIndex++];
1185 14 : zval_dtor(*current);
1186 14 : ZVAL_DOUBLE(*current, dvalue);
1187 : } else {
1188 1001 : add_index_double( *return_value, objIndex++, dvalue );
1189 : }
1190 : }
1191 : break;
1192 : } /* switch (op) */
1193 5033 : nconversions++;
1194 : } /* while (*format != '\0') */
1195 :
1196 7313 : done:
1197 7313 : result = SCAN_SUCCESS;
1198 :
1199 7706 : if (underflow && (0==nconversions)) {
1200 393 : scan_set_error_return( numVars, return_value );
1201 393 : result = SCAN_ERROR_EOF;
1202 6920 : } else if (numVars) {
1203 21 : convert_to_long( *return_value );
1204 21 : Z_LVAL_PP(return_value) = nconversions;
1205 6899 : } else if (nconversions < totalVars) {
1206 : /* TODO: not all elements converted. we need to prune the list - cc */
1207 : }
1208 7313 : return result;
1209 : }
1210 : /* }}} */
1211 :
1212 : /* the compiler choked when i tried to make this a macro */
1213 : static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
1214 981 : {
1215 981 : if (numVars) {
1216 6 : Z_TYPE_PP(return_value) = IS_LONG;
1217 6 : Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */
1218 : } else {
1219 : /* convert_to_null calls destructor */
1220 975 : convert_to_null( *return_value );
1221 : }
1222 981 : }
1223 : /* }}} */
1224 :
1225 : /*
1226 : * Local variables:
1227 : * tab-width: 4
1228 : * c-basic-offset: 4
1229 : * End:
1230 : * vim600: sw=4 ts=4 fdm=marker
1231 : * vim<600: sw=4 ts=4
1232 : */
|