1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 6 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Clayton Collie <clcollie@mindspring.com> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: scanf.c 281342 2009-05-28 20:46:05Z kalle $ */
20 :
21 : /*
22 : scanf.c --
23 :
24 : This file contains the base code which implements sscanf and by extension
25 : fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26 :
27 : This software is copyrighted by the Regents of the University of
28 : California, Sun Microsystems, Inc., Scriptics Corporation,
29 : and other parties. The following terms apply to all files associated
30 : with the software unless explicitly disclaimed in individual files.
31 :
32 : The authors hereby grant permission to use, copy, modify, distribute,
33 : and license this software and its documentation for any purpose, provided
34 : that existing copyright notices are retained in all copies and that this
35 : notice is included verbatim in any distributions. No written agreement,
36 : license, or royalty fee is required for any of the authorized uses.
37 : Modifications to this software may be copyrighted by their authors
38 : and need not follow the licensing terms described here, provided that
39 : the new terms are clearly indicated on the first page of each file where
40 : they apply.
41 :
42 : IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43 : FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44 : ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45 : DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46 : POSSIBILITY OF SUCH DAMAGE.
47 :
48 : THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49 : INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50 : FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
51 : IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52 : NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53 : MODIFICATIONS.
54 :
55 : GOVERNMENT USE: If you are acquiring this software on behalf of the
56 : U.S. government, the Government shall have only "Restricted Rights"
57 : in the software and related documentation as defined in the Federal
58 : Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
59 : are acquiring the software on behalf of the Department of Defense, the
60 : software shall be classified as "Commercial Computer Software" and the
61 : Government shall have only "Restricted Rights" as defined in Clause
62 : 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
63 : authors grant the U.S. Government and others acting in its behalf
64 : permission to use and distribute the software in accordance with the
65 : terms specified in this license.
66 : */
67 :
68 : #include <stdio.h>
69 : #include <limits.h>
70 : #include <ctype.h>
71 : #include "php.h"
72 : #include "php_variables.h"
73 : #ifdef HAVE_LOCALE_H
74 : #include <locale.h>
75 : #endif
76 : #include "zend_execute.h"
77 : #include "zend_operators.h"
78 : #include "zend_strtod.h"
79 : #include "php_globals.h"
80 : #include "basic_functions.h"
81 : #include "scanf.h"
82 :
83 : /*
84 : * Flag values used internally by [f|s]canf.
85 : */
86 : #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
87 : #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
88 : #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
89 : #define SCAN_WIDTH 0x8 /* A width value was supplied. */
90 :
91 : #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
92 : #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
93 : #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
94 : #define SCAN_XOK 0x80 /* An 'x' is allowed. */
95 : #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
96 : #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
97 :
98 : #define UCHAR(x) (zend_uchar)(x)
99 :
100 : /*
101 : * The following structure contains the information associated with
102 : * a character set.
103 : */
104 : typedef struct CharSet {
105 : int exclude; /* 1 if this is an exclusion set. */
106 : int nchars;
107 : char *chars;
108 : int nranges;
109 : struct Range {
110 : char start;
111 : char end;
112 : } *ranges;
113 : } CharSet;
114 :
115 : typedef struct u_CharSet {
116 : int exclude; /* 1 if this is an exclusion set. */
117 : int nchars;
118 : UChar *chars;
119 : int nranges;
120 : struct u_Range {
121 : UChar start;
122 : UChar end;
123 : } *ranges;
124 : } u_CharSet;
125 :
126 : /*
127 : * Declarations for functions used only in this file.
128 : */
129 : static char *BuildCharSet(CharSet *cset, char *format);
130 : static int CharInSet(CharSet *cset, int ch);
131 : static void ReleaseCharSet(CharSet *cset);
132 : static UChar *u_BuildCharSet(u_CharSet *cset, UChar *format);
133 : static int u_CharInSet(u_CharSet *cset, UChar ch);
134 : static void u_ReleaseCharSet(u_CharSet *cset);
135 : static inline void scan_set_error_return(int numVars, zval **return_value);
136 :
137 :
138 : /* {{{ BuildCharSet
139 : *----------------------------------------------------------------------
140 : *
141 : * BuildCharSet --
142 : *
143 : * This function examines a character set format specification
144 : * and builds a CharSet containing the individual characters and
145 : * character ranges specified.
146 : *
147 : * Results:
148 : * Returns the next format position.
149 : *
150 : * Side effects:
151 : * Initializes the charset.
152 : *
153 : *----------------------------------------------------------------------
154 : */
155 : static char * BuildCharSet(CharSet *cset, char *format)
156 577 : {
157 : char *ch, start;
158 : int nranges;
159 : char *end;
160 :
161 577 : memset(cset, 0, sizeof(CharSet));
162 :
163 577 : ch = format;
164 577 : if (*ch == '^') {
165 0 : cset->exclude = 1;
166 0 : ch = ++format;
167 : }
168 577 : end = format + 1; /* verify this - cc */
169 :
170 : /*
171 : * Find the close bracket so we can overallocate the set.
172 : */
173 577 : if (*ch == ']') {
174 0 : ch = end++;
175 : }
176 577 : nranges = 0;
177 4213 : while (*ch != ']') {
178 3059 : if (*ch == '-') {
179 921 : nranges++;
180 : }
181 3059 : ch = end++;
182 : }
183 :
184 577 : cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
185 577 : if (nranges > 0) {
186 577 : cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
187 : } else {
188 0 : cset->ranges = NULL;
189 : }
190 :
191 : /*
192 : * Now build the character set.
193 : */
194 577 : cset->nchars = cset->nranges = 0;
195 577 : ch = format++;
196 577 : start = *ch;
197 577 : if (*ch == ']' || *ch == '-') {
198 0 : cset->chars[cset->nchars++] = *ch;
199 0 : ch = format++;
200 : }
201 3292 : while (*ch != ']') {
202 2138 : if (*format == '-') {
203 : /*
204 : * This may be the first character of a range, so don't add
205 : * it yet.
206 : */
207 921 : start = *ch;
208 1217 : } else if (*ch == '-') {
209 : /*
210 : * Check to see if this is the last character in the set, in which
211 : * case it is not a range and we should add the previous character
212 : * as well as the dash.
213 : */
214 921 : if (*format == ']') {
215 0 : cset->chars[cset->nchars++] = start;
216 0 : cset->chars[cset->nchars++] = *ch;
217 : } else {
218 921 : ch = format++;
219 :
220 : /*
221 : * Check to see if the range is in reverse order.
222 : */
223 921 : if (start < *ch) {
224 921 : cset->ranges[cset->nranges].start = start;
225 921 : cset->ranges[cset->nranges].end = *ch;
226 : } else {
227 0 : cset->ranges[cset->nranges].start = *ch;
228 0 : cset->ranges[cset->nranges].end = start;
229 : }
230 921 : cset->nranges++;
231 : }
232 : } else {
233 296 : cset->chars[cset->nchars++] = *ch;
234 : }
235 2138 : ch = format++;
236 : }
237 577 : return format;
238 : }
239 : /* }}} */
240 :
241 : /* {{{ u_BuildCharSet
242 : *----------------------------------------------------------------------
243 : *
244 : * BuildCharSet --
245 : *
246 : * This function examines a character set format specification
247 : * and builds a CharSet containing the individual characters and
248 : * character ranges specified.
249 : *
250 : * Results:
251 : * Returns the next format position.
252 : *
253 : * Side effects:
254 : * Initializes the charset.
255 : *
256 : *----------------------------------------------------------------------
257 : */
258 : static UChar * u_BuildCharSet(u_CharSet *cset, UChar *format)
259 2 : {
260 : UChar *ch, start;
261 : int nranges;
262 : UChar *end;
263 :
264 2 : memset(cset, 0, sizeof(u_CharSet));
265 :
266 2 : ch = format;
267 2 : if (*ch == 0x5E /* '^' */) {
268 2 : cset->exclude = 1;
269 2 : ch = ++format;
270 : }
271 2 : end = format + 1; /* verify this - cc */
272 :
273 : /*
274 : * Find the close bracket so we can overallocate the set.
275 : */
276 2 : if (*ch == 0x5D /* ']' */) {
277 0 : ch = end++;
278 : }
279 2 : nranges = 0;
280 6 : while (*ch != 0x5D /* ']' */) {
281 2 : if (*ch == 0x2D /* '-' */) {
282 0 : nranges++;
283 : }
284 2 : ch = end++;
285 : }
286 :
287 2 : cset->chars = safe_emalloc(sizeof(UChar), (end - format - 1), 0);
288 2 : if (nranges > 0) {
289 0 : cset->ranges = (struct u_Range *) safe_emalloc(sizeof(struct u_Range), nranges, 0);
290 : } else {
291 2 : cset->ranges = NULL;
292 : }
293 :
294 : /*
295 : * Now build the character set.
296 : */
297 2 : cset->nchars = cset->nranges = 0;
298 2 : ch = format++;
299 2 : start = *ch;
300 2 : if (*ch == 0x5D /* ']' */ || *ch == 0x2D /* '-' */) {
301 0 : cset->chars[cset->nchars++] = *ch;
302 0 : ch = format++;
303 : }
304 6 : while (*ch != 0x5D /* ']' */) {
305 2 : if (*format == 0x2D /* '-' */) {
306 : /*
307 : * This may be the first character of a range, so don't add
308 : * it yet.
309 : */
310 0 : start = *ch;
311 2 : } else if (*ch == 0x2D /* '-' */) {
312 : /*
313 : * Check to see if this is the last character in the set, in which
314 : * case it is not a range and we should add the previous character
315 : * as well as the dash.
316 : */
317 0 : if (*format == 0x5D /* ']' */) {
318 0 : cset->chars[cset->nchars++] = start;
319 0 : cset->chars[cset->nchars++] = *ch;
320 : } else {
321 0 : ch = format++;
322 :
323 : /*
324 : * Check to see if the range is in reverse order.
325 : */
326 0 : if (start < *ch) {
327 0 : cset->ranges[cset->nranges].start = start;
328 0 : cset->ranges[cset->nranges].end = *ch;
329 : } else {
330 0 : cset->ranges[cset->nranges].start = *ch;
331 0 : cset->ranges[cset->nranges].end = start;
332 : }
333 0 : cset->nranges++;
334 : }
335 : } else {
336 2 : cset->chars[cset->nchars++] = *ch;
337 : }
338 2 : ch = format++;
339 : }
340 2 : return format;
341 : }
342 : /* }}} */
343 :
344 : /* {{{ CharInSet
345 : *----------------------------------------------------------------------
346 : *
347 : * CharInSet --
348 : *
349 : * Check to see if a character matches the given set.
350 : *
351 : * Results:
352 : * Returns non-zero if the character matches the given set.
353 : *
354 : * Side effects:
355 : * None.
356 : *
357 : *----------------------------------------------------------------------
358 : */
359 : static int CharInSet(CharSet *cset, int c)
360 1730 : {
361 1730 : char ch = (char) c;
362 1730 : int i, match = 0;
363 :
364 2758 : for (i = 0; i < cset->nchars; i++) {
365 1028 : if (cset->chars[i] == ch) {
366 0 : match = 1;
367 0 : break;
368 : }
369 : }
370 1730 : if (!match) {
371 3133 : for (i = 0; i < cset->nranges; i++) {
372 2556 : if ((cset->ranges[i].start <= ch)
373 : && (ch <= cset->ranges[i].end)) {
374 1153 : match = 1;
375 1153 : break;
376 : }
377 : }
378 : }
379 1730 : return (cset->exclude ? !match : match);
380 : }
381 : /* }}} */
382 :
383 : /* {{{ u_CharInSet
384 : *----------------------------------------------------------------------
385 : *
386 : * CharInSet --
387 : *
388 : * Check to see if a character matches the given set.
389 : *
390 : * Results:
391 : * Returns non-zero if the character matches the given set.
392 : *
393 : * Side effects:
394 : * None.
395 : *
396 : *----------------------------------------------------------------------
397 : */
398 : static int u_CharInSet(u_CharSet *cset, UChar c)
399 6 : {
400 6 : UChar ch = c;
401 6 : int i, match = 0;
402 :
403 12 : for (i = 0; i < cset->nchars; i++) {
404 6 : if (cset->chars[i] == ch) {
405 0 : match = 1;
406 0 : break;
407 : }
408 : }
409 6 : if (!match) {
410 6 : for (i = 0; i < cset->nranges; i++) {
411 0 : if ((cset->ranges[i].start <= ch)
412 : && (ch <= cset->ranges[i].end)) {
413 0 : match = 1;
414 0 : break;
415 : }
416 : }
417 : }
418 6 : return (cset->exclude ? !match : match);
419 : }
420 : /* }}} */
421 :
422 : /* {{{ ReleaseCharSet
423 : *----------------------------------------------------------------------
424 : *
425 : * ReleaseCharSet --
426 : *
427 : * Free the storage associated with a character set.
428 : *
429 : * Results:
430 : * None.
431 : *
432 : * Side effects:
433 : * None.
434 : *
435 : *----------------------------------------------------------------------
436 : */
437 : static void ReleaseCharSet(CharSet *cset)
438 577 : {
439 577 : efree((char *)cset->chars);
440 577 : if (cset->ranges) {
441 577 : efree((char *)cset->ranges);
442 : }
443 577 : }
444 : /* }}} */
445 :
446 : /* {{{ u_ReleaseCharSet
447 : *----------------------------------------------------------------------
448 : *
449 : * ReleaseCharSet --
450 : *
451 : * Free the storage associated with a character set.
452 : *
453 : * Results:
454 : * None.
455 : *
456 : * Side effects:
457 : * None.
458 : *
459 : *----------------------------------------------------------------------
460 : */
461 : static void u_ReleaseCharSet(u_CharSet *cset)
462 2 : {
463 2 : efree(cset->chars);
464 2 : if (cset->ranges) {
465 0 : efree(cset->ranges);
466 : }
467 2 : }
468 : /* }}} */
469 :
470 : /* {{{ ValidateFormat
471 : *----------------------------------------------------------------------
472 : *
473 : * ValidateFormat --
474 : *
475 : * Parse the format string and verify that it is properly formed
476 : * and that there are exactly enough variables on the command line.
477 : *
478 : * Results:
479 : * FAILURE or SUCCESS.
480 : *
481 : * Side effects:
482 : * May set php_error based on abnormal conditions.
483 : *
484 : * Parameters :
485 : * format The format string.
486 : * numVars The number of variables passed to the scan command.
487 : * totalSubs The number of variables that will be required.
488 : *
489 : *----------------------------------------------------------------------
490 : */
491 : PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
492 7722 : {
493 : #define STATIC_LIST_SIZE 16
494 : int gotXpg, gotSequential, value, i, flags;
495 7722 : char *end, *ch = NULL;
496 : int staticAssign[STATIC_LIST_SIZE];
497 7722 : int *nassign = staticAssign;
498 7722 : int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
499 : TSRMLS_FETCH();
500 :
501 : /*
502 : * Initialize an array that records the number of times a variable
503 : * is assigned to by the format string. We use this to detect if
504 : * a variable is multiply assigned or left unassigned.
505 : */
506 7722 : if (numVars > nspace) {
507 0 : nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
508 0 : nspace = numVars;
509 : }
510 131274 : for (i = 0; i < nspace; i++) {
511 123552 : nassign[i] = 0;
512 : }
513 :
514 7722 : xpgSize = objIndex = gotXpg = gotSequential = 0;
515 :
516 24933 : while (*format != '\0') {
517 10072 : ch = format++;
518 10072 : flags = 0;
519 :
520 10072 : if (*ch != '%') {
521 2349 : continue;
522 : }
523 7723 : ch = format++;
524 7723 : if (*ch == '%') {
525 0 : continue;
526 : }
527 7723 : if (*ch == '*') {
528 578 : flags |= SCAN_SUPPRESS;
529 578 : ch = format++;
530 578 : goto xpgCheckDone;
531 : }
532 :
533 7145 : if ( isdigit( (int)*ch ) ) {
534 : /*
535 : * Check for an XPG3-style %n$ specification. Note: there
536 : * must not be a mixture of XPG3 specs and non-XPG3 specs
537 : * in the same format string.
538 : */
539 1156 : value = strtoul(format-1, &end, 10);
540 1156 : if (*end != '$') {
541 1156 : goto notXpg;
542 : }
543 0 : format = end+1;
544 0 : ch = format++;
545 0 : gotXpg = 1;
546 0 : if (gotSequential) {
547 0 : goto mixedXPG;
548 : }
549 0 : objIndex = value - 1;
550 0 : if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
551 : goto badIndex;
552 0 : } else if (numVars == 0) {
553 : /*
554 : * In the case where no vars are specified, the user can
555 : * specify %9999$ legally, so we have to consider special
556 : * rules for growing the assign array. 'value' is
557 : * guaranteed to be > 0.
558 : */
559 :
560 : /* set a lower artificial limit on this
561 : * in the interest of security and resource friendliness
562 : * 255 arguments should be more than enough. - cc
563 : */
564 0 : if (value > SCAN_MAX_ARGS) {
565 0 : goto badIndex;
566 : }
567 :
568 0 : xpgSize = (xpgSize > value) ? xpgSize : value;
569 : }
570 0 : goto xpgCheckDone;
571 : }
572 :
573 7145 : notXpg:
574 7145 : gotSequential = 1;
575 7145 : if (gotXpg) {
576 0 : mixedXPG:
577 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
578 0 : goto error;
579 : }
580 :
581 7723 : xpgCheckDone:
582 : /*
583 : * Parse any width specifier.
584 : */
585 7723 : if (isdigit(UCHAR(*ch))) {
586 1156 : value = strtoul(format-1, &format, 10);
587 1156 : flags |= SCAN_WIDTH;
588 1156 : ch = format++;
589 : }
590 :
591 : /*
592 : * Ignore size specifier.
593 : */
594 7723 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
595 1717 : ch = format++;
596 : }
597 :
598 7723 : if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
599 1 : goto badIndex;
600 : }
601 :
602 : /*
603 : * Handle the various field types.
604 : */
605 7722 : switch (*ch) {
606 : case 'n':
607 : case 'd':
608 : case 'D':
609 : case 'i':
610 : case 'o':
611 : case 'x':
612 : case 'X':
613 : case 'u':
614 : case 'f':
615 : case 'e':
616 : case 'E':
617 : case 'g':
618 : case 's':
619 5724 : break;
620 :
621 : case 'c':
622 : /* we differ here with the TCL implementation in allowing for */
623 : /* a character width specification, to be more consistent with */
624 : /* ANSI. since Zend auto allocates space for vars, this is no */
625 : /* problem - cc */
626 : /*
627 : if (flags & SCAN_WIDTH) {
628 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
629 : goto error;
630 : }
631 : */
632 838 : break;
633 :
634 : case '[':
635 578 : if (*format == '\0') {
636 0 : goto badSet;
637 : }
638 578 : ch = format++;
639 578 : if (*ch == '^') {
640 0 : if (*format == '\0') {
641 0 : goto badSet;
642 : }
643 0 : ch = format++;
644 : }
645 578 : if (*ch == ']') {
646 0 : if (*format == '\0') {
647 0 : goto badSet;
648 : }
649 0 : ch = format++;
650 : }
651 4224 : while (*ch != ']') {
652 3068 : if (*format == '\0') {
653 0 : goto badSet;
654 : }
655 3068 : ch = format++;
656 : }
657 578 : break;
658 0 : badSet:
659 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
660 0 : goto error;
661 :
662 : default: {
663 582 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
664 582 : goto error;
665 : }
666 : }
667 :
668 7140 : if (!(flags & SCAN_SUPPRESS)) {
669 6562 : if (objIndex >= nspace) {
670 : /*
671 : * Expand the nassign buffer. If we are using XPG specifiers,
672 : * make sure that we grow to a large enough size. xpgSize is
673 : * guaranteed to be at least one larger than objIndex.
674 : */
675 0 : value = nspace;
676 0 : if (xpgSize) {
677 0 : nspace = xpgSize;
678 : } else {
679 0 : nspace += STATIC_LIST_SIZE;
680 : }
681 0 : if (nassign == staticAssign) {
682 0 : nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
683 0 : for (i = 0; i < STATIC_LIST_SIZE; ++i) {
684 0 : nassign[i] = staticAssign[i];
685 : }
686 : } else {
687 0 : nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
688 : }
689 0 : for (i = value; i < nspace; i++) {
690 0 : nassign[i] = 0;
691 : }
692 : }
693 6562 : nassign[objIndex]++;
694 6562 : objIndex++;
695 : }
696 : } /* while (*format != '\0') */
697 :
698 : /*
699 : * Verify that all of the variable were assigned exactly once.
700 : */
701 7139 : if (numVars == 0) {
702 7139 : if (xpgSize) {
703 0 : numVars = xpgSize;
704 : } else {
705 7139 : numVars = objIndex;
706 : }
707 : }
708 7139 : if (totalSubs) {
709 7139 : *totalSubs = numVars;
710 : }
711 13698 : for (i = 0; i < numVars; i++) {
712 6559 : if (nassign[i] > 1) {
713 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
714 0 : goto error;
715 6559 : } else if (!xpgSize && (nassign[i] == 0)) {
716 : /*
717 : * If the space is empty, and xpgSize is 0 (means XPG wasn't
718 : * used, and/or numVars != 0), then too many vars were given
719 : */
720 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
721 0 : goto error;
722 : }
723 : }
724 :
725 7139 : if (nassign != staticAssign) {
726 0 : efree((char *)nassign);
727 : }
728 7139 : return SCAN_SUCCESS;
729 :
730 1 : badIndex:
731 1 : if (gotXpg) {
732 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
733 : } else {
734 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
735 : }
736 :
737 583 : error:
738 583 : if (nassign != staticAssign) {
739 0 : efree((char *)nassign);
740 : }
741 583 : return SCAN_ERROR_INVALID_FORMAT;
742 : #undef STATIC_LIST_SIZE
743 : }
744 : /* }}} */
745 :
746 : /* {{{ u_ValidateFormat
747 : *----------------------------------------------------------------------
748 : *
749 : * ValidateFormat --
750 : *
751 : * Parse the format string and verify that it is properly formed
752 : * and that there are exactly enough variables on the command line.
753 : *
754 : * Results:
755 : * FAILURE or SUCCESS.
756 : *
757 : * Side effects:
758 : * May set php_error based on abnormal conditions.
759 : *
760 : * Parameters :
761 : * format The format string.
762 : * numVars The number of variables passed to the scan command.
763 : * totalSubs The number of variables that will be required.
764 : *
765 : *----------------------------------------------------------------------
766 : */
767 : PHPAPI int u_ValidateFormat(UChar *format, int numVars, int *totalSubs)
768 173 : {
769 : #define STATIC_LIST_SIZE 16
770 : int gotXpg, gotSequential, value, i, flags;
771 173 : UChar *end, *ch = NULL;
772 : int staticAssign[STATIC_LIST_SIZE];
773 173 : int *nassign = staticAssign;
774 173 : int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
775 : TSRMLS_FETCH();
776 :
777 : /*
778 : * Initialize an array that records the number of times a variable
779 : * is assigned to by the format string. We use this to detect if
780 : * a variable is multiply assigned or left unassigned.
781 : */
782 173 : if (numVars > nspace) {
783 0 : nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
784 0 : nspace = numVars;
785 : }
786 2941 : for (i = 0; i < nspace; i++) {
787 2768 : nassign[i] = 0;
788 : }
789 :
790 173 : xpgSize = objIndex = gotXpg = gotSequential = 0;
791 :
792 1029 : while (*format != 0x00) {
793 685 : ch = format++;
794 685 : flags = 0;
795 :
796 685 : if (*ch != 0x25 /* '%' */) {
797 439 : continue;
798 : }
799 246 : ch = format++;
800 246 : if (*ch == 0x25 /* '%' */) {
801 0 : continue;
802 : }
803 246 : if (*ch == 0x2A /* '*' */) {
804 0 : flags |= SCAN_SUPPRESS;
805 0 : ch = format++;
806 0 : goto xpgCheckDone;
807 : }
808 :
809 246 : if ( u_isdigit( *ch ) ) {
810 : /*
811 : * Check for an XPG3-style %n$ specification. Note: there
812 : * must not be a mixture of XPG3 specs and non-XPG3 specs
813 : * in the same format string.
814 : */
815 12 : value = zend_u_strtoul(format-1, &end, 10);
816 12 : if (*end != '$') {
817 0 : goto notXpg;
818 : }
819 12 : format = end+1;
820 12 : ch = format++;
821 12 : gotXpg = 1;
822 12 : if (gotSequential) {
823 0 : goto mixedXPG;
824 : }
825 12 : objIndex = value - 1;
826 12 : if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
827 : goto badIndex;
828 12 : } else if (numVars == 0) {
829 : /*
830 : * In the case where no vars are specified, the user can
831 : * specify %9999$ legally, so we have to consider special
832 : * rules for growing the assign array. 'value' is
833 : * guaranteed to be > 0.
834 : */
835 :
836 : /* set a lower artificial limit on this
837 : * in the interest of security and resource friendliness
838 : * 255 arguments should be more than enough. - cc
839 : */
840 7 : if (value > SCAN_MAX_ARGS) {
841 0 : goto badIndex;
842 : }
843 :
844 7 : xpgSize = (xpgSize > value) ? xpgSize : value;
845 : }
846 12 : goto xpgCheckDone;
847 : }
848 :
849 234 : notXpg:
850 234 : gotSequential = 1;
851 234 : if (gotXpg) {
852 0 : mixedXPG:
853 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
854 0 : goto error;
855 : }
856 :
857 246 : xpgCheckDone:
858 : /*
859 : * Parse any width specifier.
860 : */
861 246 : if (u_isdigit(*ch)) {
862 0 : value = zend_u_strtoul(format-1, &format, 10);
863 0 : flags |= SCAN_WIDTH;
864 0 : ch = format++;
865 : }
866 :
867 : /*
868 : * Ignore size specifier.
869 : */
870 246 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
871 0 : ch = format++;
872 : }
873 :
874 246 : if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
875 1 : goto badIndex;
876 : }
877 :
878 : /*
879 : * Handle the various field types.
880 : */
881 245 : switch (*ch) {
882 : case 'n':
883 : case 'd':
884 : case 'D':
885 : case 'i':
886 : case 'o':
887 : case 'x':
888 : case 'X':
889 : case 'u':
890 : case 'f':
891 : case 'e':
892 : case 'E':
893 : case 'g':
894 : case 's':
895 222 : break;
896 :
897 : case 'c':
898 : /* we differ here with the TCL implementation in allowing for */
899 : /* a character width specification, to be more consistent with */
900 : /* ANSI. since Zend auto allocates space for vars, this is no */
901 : /* problem - cc */
902 : /*
903 : if (flags & SCAN_WIDTH) {
904 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
905 : goto error;
906 : }
907 : */
908 20 : break;
909 :
910 : case '[':
911 2 : if (*format == '\0') {
912 0 : goto badSet;
913 : }
914 2 : ch = format++;
915 2 : if (*ch == '^') {
916 2 : if (*format == '\0') {
917 0 : goto badSet;
918 : }
919 2 : ch = format++;
920 : }
921 2 : if (*ch == ']') {
922 0 : if (*format == '\0') {
923 0 : goto badSet;
924 : }
925 0 : ch = format++;
926 : }
927 6 : while (*ch != ']') {
928 2 : if (*format == '\0') {
929 0 : goto badSet;
930 : }
931 2 : ch = format++;
932 : }
933 2 : break;
934 0 : badSet:
935 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
936 0 : goto error;
937 :
938 : default: {
939 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
940 1 : goto error;
941 : }
942 : }
943 :
944 244 : if (!(flags & SCAN_SUPPRESS)) {
945 244 : if (objIndex >= nspace) {
946 : /*
947 : * Expand the nassign buffer. If we are using XPG specifiers,
948 : * make sure that we grow to a large enough size. xpgSize is
949 : * guaranteed to be at least one larger than objIndex.
950 : */
951 0 : value = nspace;
952 0 : if (xpgSize) {
953 0 : nspace = xpgSize;
954 : } else {
955 0 : nspace += STATIC_LIST_SIZE;
956 : }
957 0 : if (nassign == staticAssign) {
958 0 : nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
959 0 : for (i = 0; i < STATIC_LIST_SIZE; ++i) {
960 0 : nassign[i] = staticAssign[i];
961 : }
962 : } else {
963 0 : nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
964 : }
965 0 : for (i = value; i < nspace; i++) {
966 0 : nassign[i] = 0;
967 : }
968 : }
969 244 : nassign[objIndex]++;
970 244 : objIndex++;
971 : }
972 : } /* while (*format != '\0') */
973 :
974 : /*
975 : * Verify that all of the variable were assigned exactly once.
976 : */
977 171 : if (numVars == 0) {
978 147 : if (xpgSize) {
979 4 : numVars = xpgSize;
980 : } else {
981 143 : numVars = objIndex;
982 : }
983 : }
984 171 : if (totalSubs) {
985 171 : *totalSubs = numVars;
986 : }
987 414 : for (i = 0; i < numVars; i++) {
988 246 : if (nassign[i] > 1) {
989 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
990 0 : goto error;
991 246 : } else if (!xpgSize && (nassign[i] == 0)) {
992 : /*
993 : * If the space is empty, and xpgSize is 0 (means XPG wasn't
994 : * used, and/or numVars != 0), then too many vars were given
995 : */
996 3 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
997 3 : goto error;
998 : }
999 : }
1000 :
1001 168 : if (nassign != staticAssign) {
1002 0 : efree((char *)nassign);
1003 : }
1004 168 : return SCAN_SUCCESS;
1005 :
1006 1 : badIndex:
1007 1 : if (gotXpg) {
1008 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
1009 : } else {
1010 1 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
1011 : }
1012 :
1013 5 : error:
1014 5 : if (nassign != staticAssign) {
1015 0 : efree((char *)nassign);
1016 : }
1017 5 : return SCAN_ERROR_INVALID_FORMAT;
1018 : #undef STATIC_LIST_SIZE
1019 : }
1020 : /* }}} */
1021 :
1022 : /* {{{ php_sscanf_internal
1023 : * This is the internal function which does processing on behalf of
1024 : * both sscanf() and fscanf()
1025 : *
1026 : * parameters :
1027 : * string literal string to be processed
1028 : * format format string
1029 : * argCount total number of elements in the args array
1030 : * args arguments passed in from user function (f|s)scanf
1031 : * varStart offset (in args) of 1st variable passed in to (f|s)scanf
1032 : * return_value set with the results of the scan
1033 : */
1034 :
1035 : PHPAPI int php_sscanf_internal( char *string, char *format,
1036 : int argCount, zval ***args,
1037 : int varStart, zval **return_value TSRMLS_DC)
1038 7722 : {
1039 7722 : int numVars, nconversions, totalVars = -1;
1040 : int i, value, result;
1041 : int objIndex;
1042 : char *end, *baseString;
1043 : zval **current;
1044 7722 : char op = 0;
1045 7722 : int base = 0;
1046 7722 : int underflow = 0;
1047 : size_t width;
1048 7722 : long (*fn)() = NULL;
1049 : char *ch, sch;
1050 : int flags;
1051 : char buf[64]; /* Temporary buffer to hold scanned number
1052 : * strings before they are passed to strtoul() */
1053 :
1054 : /* do some sanity checking */
1055 7722 : if ((varStart > argCount) || (varStart < 0)){
1056 0 : varStart = SCAN_MAX_ARGS + 1;
1057 : }
1058 7722 : numVars = argCount - varStart;
1059 7722 : if (numVars < 0) {
1060 0 : numVars = 0;
1061 : }
1062 :
1063 : #if 0
1064 : zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
1065 : string, format, numVars, varStart);
1066 : #endif
1067 : /*
1068 : * Check for errors in the format string.
1069 : */
1070 7722 : if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
1071 583 : scan_set_error_return( numVars, return_value );
1072 583 : return SCAN_ERROR_INVALID_FORMAT;
1073 : }
1074 :
1075 7139 : objIndex = numVars ? varStart : 0;
1076 :
1077 : /*
1078 : * If any variables are passed, make sure they are all passed by reference
1079 : */
1080 7139 : if (numVars) {
1081 0 : for (i = varStart;i < argCount;i++){
1082 0 : if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
1083 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
1084 0 : scan_set_error_return(numVars, return_value);
1085 0 : return SCAN_ERROR_VAR_PASSED_BYVAL;
1086 : }
1087 : }
1088 : }
1089 :
1090 : /*
1091 : * Allocate space for the result objects. Only happens when no variables
1092 : * are specified
1093 : */
1094 7139 : if (!numVars) {
1095 : zval *tmp;
1096 :
1097 : /* allocate an array for return */
1098 7139 : array_init(*return_value);
1099 :
1100 13698 : for (i = 0; i < totalVars; i++) {
1101 6559 : MAKE_STD_ZVAL(tmp);
1102 6559 : ZVAL_NULL(tmp);
1103 6559 : if (add_next_index_zval(*return_value, tmp) == FAILURE) {
1104 0 : scan_set_error_return(0, return_value);
1105 0 : return FAILURE;
1106 : }
1107 : }
1108 7139 : varStart = 0; /* Array index starts from 0 */
1109 : }
1110 :
1111 7139 : baseString = string;
1112 :
1113 : /*
1114 : * Iterate over the format string filling in the result objects until
1115 : * we reach the end of input, the end of the format string, or there
1116 : * is a mismatch.
1117 : */
1118 7139 : nconversions = 0;
1119 : /* note ! - we need to limit the loop for objIndex to keep it in bounds */
1120 :
1121 21241 : while (*format != '\0') {
1122 9292 : ch = format++;
1123 9292 : flags = 0;
1124 :
1125 : /*
1126 : * If we see whitespace in the format, skip whitespace in the string.
1127 : */
1128 9292 : if ( isspace( (int)*ch ) ) {
1129 2136 : sch = *string;
1130 4703 : while ( isspace( (int)sch ) ) {
1131 431 : if (*string == '\0') {
1132 0 : goto done;
1133 : }
1134 431 : string++;
1135 431 : sch = *string;
1136 : }
1137 2136 : continue;
1138 : }
1139 :
1140 7156 : if (*ch != '%') {
1141 19 : literal:
1142 19 : if (*string == '\0') {
1143 0 : underflow = 1;
1144 0 : goto done;
1145 : }
1146 19 : sch = *string;
1147 19 : string++;
1148 19 : if (*ch != sch) {
1149 19 : goto done;
1150 : }
1151 0 : continue;
1152 : }
1153 :
1154 7137 : ch = format++;
1155 7137 : if (*ch == '%') {
1156 0 : goto literal;
1157 : }
1158 :
1159 : /*
1160 : * Check for assignment suppression ('*') or an XPG3-style
1161 : * assignment ('%n$').
1162 : */
1163 7137 : if (*ch == '*') {
1164 578 : flags |= SCAN_SUPPRESS;
1165 578 : ch = format++;
1166 6559 : } else if ( isdigit(UCHAR(*ch))) {
1167 1156 : value = strtoul(format-1, &end, 10);
1168 1156 : if (*end == '$') {
1169 0 : format = end+1;
1170 0 : ch = format++;
1171 0 : objIndex = varStart + value - 1;
1172 : }
1173 : }
1174 :
1175 : /*
1176 : * Parse any width specifier.
1177 : */
1178 7137 : if ( isdigit(UCHAR(*ch))) {
1179 1156 : width = strtoul(format-1, &format, 10);
1180 1156 : ch = format++;
1181 : } else {
1182 5981 : width = 0;
1183 : }
1184 :
1185 : /*
1186 : * Ignore size specifier.
1187 : */
1188 7137 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
1189 1716 : ch = format++;
1190 : }
1191 :
1192 : /*
1193 : * Handle the various field types.
1194 : */
1195 7137 : switch (*ch) {
1196 : case 'n':
1197 0 : if (!(flags & SCAN_SUPPRESS)) {
1198 0 : if (numVars && objIndex >= argCount) {
1199 : break;
1200 0 : } else if (numVars) {
1201 : zend_uint refcount;
1202 :
1203 0 : current = args[objIndex++];
1204 0 : refcount = Z_REFCOUNT_PP(current);
1205 0 : zval_dtor( *current );
1206 0 : ZVAL_LONG( *current, (long)(string - baseString) );
1207 0 : Z_SET_REFCOUNT_PP(current, refcount);
1208 0 : Z_SET_ISREF_PP(current);
1209 : } else {
1210 0 : add_index_long(*return_value, objIndex++, string - baseString);
1211 : }
1212 : }
1213 0 : nconversions++;
1214 0 : continue;
1215 :
1216 : case 'd':
1217 : case 'D':
1218 784 : op = 'i';
1219 784 : base = 10;
1220 784 : fn = (long (*)())strtol;
1221 784 : break;
1222 : case 'i':
1223 0 : op = 'i';
1224 0 : base = 0;
1225 0 : fn = (long (*)())strtol;
1226 0 : break;
1227 : case 'o':
1228 784 : op = 'i';
1229 784 : base = 8;
1230 784 : fn = (long (*)())strtol;
1231 784 : break;
1232 : case 'x':
1233 : case 'X':
1234 784 : op = 'i';
1235 784 : base = 16;
1236 784 : fn = (long (*)())strtol;
1237 784 : break;
1238 : case 'u':
1239 784 : op = 'i';
1240 784 : base = 10;
1241 784 : flags |= SCAN_UNSIGNED;
1242 784 : fn = (long (*)())strtoul;
1243 784 : break;
1244 :
1245 : case 'f':
1246 : case 'e':
1247 : case 'E':
1248 : case 'g':
1249 1733 : op = 'f';
1250 1733 : break;
1251 :
1252 : case 's':
1253 852 : op = 's';
1254 852 : break;
1255 :
1256 : case 'c':
1257 838 : op = 's';
1258 838 : flags |= SCAN_NOSKIP;
1259 : /*-cc-*/
1260 838 : if (0 == width) {
1261 690 : width = 1;
1262 : }
1263 : /*-cc-*/
1264 838 : break;
1265 : case '[':
1266 578 : op = '[';
1267 578 : flags |= SCAN_NOSKIP;
1268 : break;
1269 : } /* switch */
1270 :
1271 : /*
1272 : * At this point, we will need additional characters from the
1273 : * string to proceed.
1274 : */
1275 7137 : if (*string == '\0') {
1276 123 : underflow = 1;
1277 123 : goto done;
1278 : }
1279 :
1280 : /*
1281 : * Skip any leading whitespace at the beginning of a field unless
1282 : * the format suppresses this behavior.
1283 : */
1284 7014 : if (!(flags & SCAN_NOSKIP)) {
1285 11510 : while (*string != '\0') {
1286 5635 : sch = *string;
1287 5635 : if (! isspace((int)sch) ) {
1288 5347 : break;
1289 : }
1290 288 : string++;
1291 : }
1292 5611 : if (*string == '\0') {
1293 264 : underflow = 1;
1294 264 : goto done;
1295 : }
1296 : }
1297 :
1298 : /*
1299 : * Perform the requested scanning operation.
1300 : */
1301 6750 : switch (op) {
1302 : case 'c':
1303 : case 's':
1304 : /*
1305 : * Scan a string up to width characters or whitespace.
1306 : */
1307 1568 : if (width == 0) {
1308 612 : width = (size_t) ~0;
1309 : }
1310 1568 : end = string;
1311 7740 : while (*end != '\0') {
1312 6171 : sch = *end;
1313 6171 : if ( isspace( (int)sch ) ) {
1314 815 : break;
1315 : }
1316 5356 : end++;
1317 5356 : if (--width == 0) {
1318 752 : break;
1319 : }
1320 : }
1321 1568 : if (!(flags & SCAN_SUPPRESS)) {
1322 1429 : if (numVars && objIndex >= argCount) {
1323 : break;
1324 1429 : } else if (numVars) {
1325 : zend_uint refcount;
1326 :
1327 0 : current = args[objIndex++];
1328 0 : refcount = Z_REFCOUNT_PP(current);
1329 0 : zval_dtor( *current );
1330 0 : ZVAL_STRINGL( *current, string, end-string, 1);
1331 0 : Z_SET_REFCOUNT_PP(current, refcount);
1332 0 : Z_SET_ISREF_PP(current);
1333 : } else {
1334 1429 : add_index_stringl( *return_value, objIndex++, string, end-string, 1);
1335 : }
1336 : }
1337 1568 : string = end;
1338 1568 : break;
1339 :
1340 : case '[': {
1341 : CharSet cset;
1342 :
1343 577 : if (width == 0) {
1344 577 : width = (size_t) ~0;
1345 : }
1346 577 : end = string;
1347 :
1348 577 : format = BuildCharSet(&cset, format);
1349 2307 : while (*end != '\0') {
1350 1730 : sch = *end;
1351 1730 : if (!CharInSet(&cset, (int)sch)) {
1352 577 : break;
1353 : }
1354 1153 : end++;
1355 1153 : if (--width == 0) {
1356 0 : break;
1357 : }
1358 : }
1359 577 : ReleaseCharSet(&cset);
1360 :
1361 577 : if (string == end) {
1362 : /*
1363 : * Nothing matched the range, stop processing
1364 : */
1365 320 : goto done;
1366 : }
1367 257 : if (!(flags & SCAN_SUPPRESS)) {
1368 257 : if (numVars && objIndex >= argCount) {
1369 : break;
1370 257 : } else if (numVars) {
1371 0 : current = args[objIndex++];
1372 0 : zval_dtor( *current );
1373 0 : ZVAL_STRINGL( *current, string, end-string, 1);
1374 : } else {
1375 257 : add_index_stringl(*return_value, objIndex++, string, end-string, 1);
1376 : }
1377 : }
1378 257 : string = end;
1379 257 : break;
1380 : }
1381 : /*
1382 : case 'c':
1383 : / Scan a single character./
1384 :
1385 : sch = *string;
1386 : string++;
1387 : if (!(flags & SCAN_SUPPRESS)) {
1388 : if (numVars) {
1389 : char __buf[2];
1390 : __buf[0] = sch;
1391 : __buf[1] = '\0';;
1392 : current = args[objIndex++];
1393 : zval_dtor(*current);
1394 : ZVAL_STRINGL( *current, __buf, 1, 1);
1395 : } else {
1396 : add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
1397 : }
1398 : }
1399 : break;
1400 : */
1401 : case 'i':
1402 : /*
1403 : * Scan an unsigned or signed integer.
1404 : */
1405 : /*-cc-*/
1406 2960 : buf[0] = '\0';
1407 : /*-cc-*/
1408 2960 : if ((width == 0) || (width > sizeof(buf) - 1)) {
1409 2440 : width = sizeof(buf) - 1;
1410 : }
1411 :
1412 2960 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
1413 12393 : for (end = buf; width > 0; width--) {
1414 12303 : switch (*string) {
1415 : /*
1416 : * The 0 digit has special meaning at the beginning of
1417 : * a number. If we are unsure of the base, it
1418 : * indicates that we are in base 8 or base 16 (if it is
1419 : * followed by an 'x').
1420 : */
1421 : case '0':
1422 : /*-cc-*/
1423 1790 : if (base == 16) {
1424 450 : flags |= SCAN_XOK;
1425 : }
1426 : /*-cc-*/
1427 1790 : if (base == 0) {
1428 0 : base = 8;
1429 0 : flags |= SCAN_XOK;
1430 : }
1431 1790 : if (flags & SCAN_NOZERO) {
1432 618 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
1433 : } else {
1434 1172 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1435 : }
1436 1790 : goto addToInt;
1437 :
1438 : case '1': case '2': case '3': case '4':
1439 : case '5': case '6': case '7':
1440 6072 : if (base == 0) {
1441 0 : base = 10;
1442 : }
1443 6072 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1444 6072 : goto addToInt;
1445 :
1446 : case '8': case '9':
1447 1056 : if (base == 0) {
1448 0 : base = 10;
1449 : }
1450 1056 : if (base <= 8) {
1451 189 : break;
1452 : }
1453 867 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1454 867 : goto addToInt;
1455 :
1456 : case 'A': case 'B': case 'C':
1457 : case 'D': case 'E': case 'F':
1458 : case 'a': case 'b': case 'c':
1459 : case 'd': case 'e': case 'f':
1460 670 : if (base <= 10) {
1461 474 : break;
1462 : }
1463 196 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1464 196 : goto addToInt;
1465 :
1466 : case '+': case '-':
1467 508 : if (flags & SCAN_SIGNOK) {
1468 508 : flags &= ~SCAN_SIGNOK;
1469 508 : goto addToInt;
1470 : }
1471 0 : break;
1472 :
1473 : case 'x': case 'X':
1474 0 : if ((flags & SCAN_XOK) && (end == buf+1)) {
1475 0 : base = 16;
1476 0 : flags &= ~SCAN_XOK;
1477 0 : goto addToInt;
1478 : }
1479 : break;
1480 : }
1481 :
1482 : /*
1483 : * We got an illegal character so we are done accumulating.
1484 : */
1485 2870 : break;
1486 :
1487 9433 : addToInt:
1488 : /*
1489 : * Add the character to the temporary buffer.
1490 : */
1491 9433 : *end++ = *string++;
1492 9433 : if (*string == '\0') {
1493 0 : break;
1494 : }
1495 : }
1496 :
1497 : /*
1498 : * Check to see if we need to back up because we only got a
1499 : * sign or a trailing x after a 0.
1500 : */
1501 2960 : if (flags & SCAN_NODIGITS) {
1502 1021 : if (*string == '\0') {
1503 0 : underflow = 1;
1504 : }
1505 1021 : goto done;
1506 1939 : } else if (end[-1] == 'x' || end[-1] == 'X') {
1507 0 : end--;
1508 0 : string--;
1509 : }
1510 :
1511 : /*
1512 : * Scan the value from the temporary buffer. If we are
1513 : * returning a large unsigned value, we have to convert it back
1514 : * to a string since PHP only supports signed values.
1515 : */
1516 1939 : if (!(flags & SCAN_SUPPRESS)) {
1517 1770 : *end = '\0';
1518 1770 : value = (int) (*fn)(buf, NULL, base);
1519 1876 : if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1520 106 : snprintf(buf, sizeof(buf), "%u", value); /* INTL: ISO digit */
1521 106 : if (numVars && objIndex >= argCount) {
1522 : break;
1523 106 : } else if (numVars) {
1524 : /* change passed value type to string */
1525 0 : current = args[objIndex++];
1526 0 : zval_dtor(*current);
1527 0 : ZVAL_STRING( *current, buf, 1 );
1528 : } else {
1529 106 : add_index_string(*return_value, objIndex++, buf, 1);
1530 : }
1531 : } else {
1532 1664 : if (numVars && objIndex >= argCount) {
1533 : break;
1534 1664 : } else if (numVars) {
1535 0 : current = args[objIndex++];
1536 0 : zval_dtor(*current);
1537 0 : ZVAL_LONG(*current, value);
1538 : } else {
1539 1664 : add_index_long(*return_value, objIndex++, value);
1540 : }
1541 : }
1542 : }
1543 1939 : break;
1544 :
1545 : case 'f':
1546 : /*
1547 : * Scan a floating point number
1548 : */
1549 1645 : buf[0] = '\0'; /* call me pedantic */
1550 1645 : if ((width == 0) || (width > sizeof(buf) - 1)) {
1551 1355 : width = sizeof(buf) - 1;
1552 : }
1553 1645 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1554 7854 : for (end = buf; width > 0; width--) {
1555 7790 : switch (*string) {
1556 : case '0': case '1': case '2': case '3':
1557 : case '4': case '5': case '6': case '7':
1558 : case '8': case '9':
1559 5518 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1560 5518 : goto addToFloat;
1561 : case '+':
1562 : case '-':
1563 442 : if (flags & SCAN_SIGNOK) {
1564 442 : flags &= ~SCAN_SIGNOK;
1565 442 : goto addToFloat;
1566 : }
1567 0 : break;
1568 : case '.':
1569 144 : if (flags & SCAN_PTOK) {
1570 144 : flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1571 144 : goto addToFloat;
1572 : }
1573 0 : break;
1574 : case 'e':
1575 : case 'E':
1576 : /*
1577 : * An exponent is not allowed until there has
1578 : * been at least one digit.
1579 : */
1580 105 : if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1581 105 : flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1582 : | SCAN_SIGNOK | SCAN_NODIGITS;
1583 105 : goto addToFloat;
1584 : }
1585 : break;
1586 : }
1587 :
1588 : /*
1589 : * We got an illegal character so we are done accumulating.
1590 : */
1591 1581 : break;
1592 :
1593 6209 : addToFloat:
1594 : /*
1595 : * Add the character to the temporary buffer.
1596 : */
1597 6209 : *end++ = *string++;
1598 6209 : if (*string == '\0') {
1599 0 : break;
1600 : }
1601 : }
1602 :
1603 : /*
1604 : * Check to see if we need to back up because we saw a
1605 : * trailing 'e' or sign.
1606 : */
1607 1645 : if (flags & SCAN_NODIGITS) {
1608 587 : if (flags & SCAN_EXPOK) {
1609 : /*
1610 : * There were no digits at all so scanning has
1611 : * failed and we are done.
1612 : */
1613 582 : if (*string == '\0') {
1614 0 : underflow = 1;
1615 : }
1616 582 : goto done;
1617 : }
1618 :
1619 : /*
1620 : * We got a bad exponent ('e' and maybe a sign).
1621 : */
1622 5 : end--;
1623 5 : string--;
1624 5 : if (*end != 'e' && *end != 'E') {
1625 0 : end--;
1626 0 : string--;
1627 : }
1628 : }
1629 :
1630 : /*
1631 : * Scan the value from the temporary buffer.
1632 : */
1633 1063 : if (!(flags & SCAN_SUPPRESS)) {
1634 : double dvalue;
1635 970 : *end = '\0';
1636 970 : dvalue = zend_strtod(buf, NULL);
1637 970 : if (numVars && objIndex >= argCount) {
1638 : break;
1639 970 : } else if (numVars) {
1640 0 : current = args[objIndex++];
1641 0 : zval_dtor(*current);
1642 0 : ZVAL_DOUBLE(*current, dvalue);
1643 : } else {
1644 970 : add_index_double( *return_value, objIndex++, dvalue );
1645 : }
1646 : }
1647 : break;
1648 : } /* switch (op) */
1649 4827 : nconversions++;
1650 : } /* while (*format != '\0') */
1651 :
1652 7139 : done:
1653 7139 : result = SCAN_SUCCESS;
1654 :
1655 7526 : if (underflow && (0==nconversions)) {
1656 387 : scan_set_error_return( numVars, return_value );
1657 387 : result = SCAN_ERROR_EOF;
1658 6752 : } else if (numVars) {
1659 0 : convert_to_long( *return_value );
1660 0 : Z_LVAL_PP(return_value) = nconversions;
1661 6752 : } else if (nconversions < totalVars) {
1662 : /* TODO: not all elements converted. we need to prune the list - cc */
1663 : }
1664 7139 : return result;
1665 : }
1666 : /* }}} */
1667 :
1668 : /* {{{ php_u_sscanf_internal
1669 : * This is the internal function which does processing on behalf of
1670 : * both sscanf() and fscanf()
1671 : *
1672 : * parameters :
1673 : * string literal string to be processed
1674 : * format format string
1675 : * argCount total number of elements in the args array
1676 : * args arguments passed in from user function (f|s)scanf
1677 : * varStart offset (in args) of 1st variable passed in to (f|s)scanf
1678 : * return_value set with the results of the scan
1679 : */
1680 :
1681 : PHPAPI int php_u_sscanf_internal( UChar *string, UChar *format,
1682 : int argCount, zval ***args,
1683 : int varStart, zval **return_value TSRMLS_DC)
1684 173 : {
1685 173 : int numVars, nconversions, totalVars = -1;
1686 : int i, value, result;
1687 : int objIndex;
1688 : UChar *end, *baseString;
1689 : zval **current;
1690 173 : char op = 0;
1691 173 : int base = 0;
1692 173 : int underflow = 0;
1693 : size_t width;
1694 173 : long (*fn)() = NULL;
1695 : UChar *ch, sch;
1696 : int flags;
1697 : UChar buf[64]; /* Temporary buffer to hold scanned number
1698 : * strings before they are passed to strtoul() */
1699 :
1700 : /* do some sanity checking */
1701 173 : if ((varStart > argCount) || (varStart < 0)){
1702 0 : varStart = SCAN_MAX_ARGS + 1;
1703 : }
1704 173 : numVars = argCount - varStart;
1705 173 : if (numVars < 0) {
1706 0 : numVars = 0;
1707 : }
1708 :
1709 : #if 0
1710 : zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
1711 : string, format, numVars, varStart);
1712 : #endif
1713 : /*
1714 : * Check for errors in the format string.
1715 : */
1716 173 : if (u_ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
1717 5 : scan_set_error_return( numVars, return_value );
1718 5 : return SCAN_ERROR_INVALID_FORMAT;
1719 : }
1720 :
1721 168 : objIndex = numVars ? varStart : 0;
1722 :
1723 : /*
1724 : * If any variables are passed, make sure they are all passed by reference
1725 : */
1726 168 : if (numVars) {
1727 90 : for (i = varStart;i < argCount;i++){
1728 69 : if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
1729 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
1730 0 : scan_set_error_return(numVars, return_value);
1731 0 : return SCAN_ERROR_VAR_PASSED_BYVAL;
1732 : }
1733 : }
1734 : }
1735 :
1736 : /*
1737 : * Allocate space for the result objects. Only happens when no variables
1738 : * are specified
1739 : */
1740 168 : if (!numVars) {
1741 : zval *tmp;
1742 :
1743 : /* allocate an array for return */
1744 147 : array_init(*return_value);
1745 :
1746 318 : for (i = 0; i < totalVars; i++) {
1747 171 : MAKE_STD_ZVAL(tmp);
1748 171 : ZVAL_NULL(tmp);
1749 171 : if (add_next_index_zval(*return_value, tmp) == FAILURE) {
1750 0 : scan_set_error_return(0, return_value);
1751 0 : return FAILURE;
1752 : }
1753 : }
1754 147 : varStart = 0; /* Array index starts from 0 */
1755 : }
1756 :
1757 168 : baseString = string;
1758 :
1759 : /*
1760 : * Iterate over the format string filling in the result objects until
1761 : * we reach the end of input, the end of the format string, or there
1762 : * is a mismatch.
1763 : */
1764 168 : nconversions = 0;
1765 : /* note ! - we need to limit the loop for objIndex to keep it in bounds */
1766 :
1767 809 : while (*format != 0x00) {
1768 515 : ch = format++;
1769 515 : flags = 0;
1770 :
1771 : /*
1772 : * If we see whitespace in the format, skip whitespace in the string.
1773 : */
1774 515 : if ( u_isspace(*ch) ) {
1775 112 : sch = *string;
1776 330 : while ( u_isspace(sch) ) {
1777 106 : if (*string == 0x00) {
1778 0 : goto done;
1779 : }
1780 106 : string++;
1781 106 : sch = *string;
1782 : }
1783 112 : continue;
1784 : }
1785 :
1786 403 : if (*ch != 0x25 /* '%' */) {
1787 173 : literal:
1788 173 : if (*string == 0x00) {
1789 3 : underflow = 1;
1790 3 : goto done;
1791 : }
1792 170 : sch = *string;
1793 170 : string++;
1794 170 : if (*ch != sch) {
1795 13 : goto done;
1796 : }
1797 157 : continue;
1798 : }
1799 :
1800 230 : ch = format++;
1801 230 : if (*ch == 0x25 /* '%' */) {
1802 0 : goto literal;
1803 : }
1804 :
1805 : /*
1806 : * Check for assignment suppression ('*') or an XPG3-style
1807 : * assignment ('%n$').
1808 : */
1809 230 : if (*ch == 0x2A /* '*' */) {
1810 0 : flags |= SCAN_SUPPRESS;
1811 0 : ch = format++;
1812 230 : } else if ( u_isdigit(*ch)) {
1813 11 : value = zend_u_strtoul(format-1, &end, 10);
1814 11 : if (*end == 0x24 /* '$' */) {
1815 11 : format = end+1;
1816 11 : ch = format++;
1817 11 : objIndex = varStart + value - 1;
1818 : }
1819 : }
1820 :
1821 : /*
1822 : * Parse any width specifier.
1823 : */
1824 230 : if ( u_isdigit(*ch)) {
1825 0 : width = zend_u_strtoul(format-1, &format, 10);
1826 0 : ch = format++;
1827 : } else {
1828 230 : width = 0;
1829 : }
1830 :
1831 : /*
1832 : * Ignore size specifier.
1833 : */
1834 230 : if ((*ch == 0x6C /* 'l' */) || (*ch == 0x4C /* 'L' */) || (*ch == 0x68 /* 'h' */)) {
1835 0 : ch = format++;
1836 : }
1837 :
1838 : /*
1839 : * Handle the various field types.
1840 : */
1841 230 : switch (*ch) {
1842 : case 0x6E /* 'n' */:
1843 2 : if (!(flags & SCAN_SUPPRESS)) {
1844 2 : if (numVars && objIndex >= argCount) {
1845 : break;
1846 2 : } else if (numVars) {
1847 : zend_uint refcount;
1848 :
1849 1 : current = args[objIndex++];
1850 1 : refcount = Z_REFCOUNT_PP(current);
1851 1 : zval_dtor( *current );
1852 1 : ZVAL_LONG( *current, (long)(string - baseString) );
1853 1 : Z_SET_REFCOUNT_PP(current, refcount);
1854 1 : Z_SET_ISREF_PP(current);
1855 : } else {
1856 1 : add_index_long(*return_value, objIndex++, string - baseString);
1857 : }
1858 : }
1859 2 : nconversions++;
1860 2 : continue;
1861 :
1862 : case 0x64 /* 'd' */:
1863 : case 0x44 /* 'D' */:
1864 21 : op = 'i';
1865 21 : base = 10;
1866 21 : fn = (long (*)())zend_u_strtol;
1867 21 : break;
1868 : case 0x69 /* 'i' */:
1869 0 : op = 'i';
1870 0 : base = 0;
1871 0 : fn = (long (*)())zend_u_strtol;
1872 0 : break;
1873 : case 0x6F /* 'o' */:
1874 22 : op = 'i';
1875 22 : base = 8;
1876 22 : fn = (long (*)())zend_u_strtol;
1877 22 : break;
1878 : case 0x78 /* 'x' */:
1879 : case 0x58 /* 'X' */:
1880 36 : op = 'i';
1881 36 : base = 16;
1882 36 : fn = (long (*)())zend_u_strtol;
1883 36 : break;
1884 : case 0x75 /* 'u' */:
1885 22 : op = 'i';
1886 22 : base = 10;
1887 22 : flags |= SCAN_UNSIGNED;
1888 22 : fn = (long (*)())zend_u_strtoul;
1889 22 : break;
1890 :
1891 : case 0x66 /* 'f' */:
1892 : case 0x65 /* 'e' */:
1893 : case 0x45 /* 'E' */:
1894 : case 0x67 /* 'g' */:
1895 49 : op = 'f';
1896 49 : break;
1897 :
1898 : case 0x73 /* 's' */:
1899 56 : op = 's';
1900 56 : break;
1901 :
1902 : case 0x63 /* 'c' */:
1903 20 : op = 's';
1904 20 : flags |= SCAN_NOSKIP;
1905 : /*-cc-*/
1906 20 : if (0 == width) {
1907 20 : width = 1;
1908 : }
1909 : /*-cc-*/
1910 20 : break;
1911 : case 0x5B /* '[' */:
1912 2 : op = '[';
1913 2 : flags |= SCAN_NOSKIP;
1914 : break;
1915 : } /* switch */
1916 :
1917 : /*
1918 : * At this point, we will need additional characters from the
1919 : * string to proceed.
1920 : */
1921 228 : if (*string == 0x00) {
1922 6 : underflow = 1;
1923 6 : goto done;
1924 : }
1925 :
1926 : /*
1927 : * Skip any leading whitespace at the beginning of a field unless
1928 : * the format suppresses this behavior.
1929 : */
1930 222 : if (!(flags & SCAN_NOSKIP)) {
1931 400 : while (*string != 0x00) {
1932 200 : sch = *string;
1933 200 : if (! u_isspace(sch) ) {
1934 200 : break;
1935 : }
1936 0 : string++;
1937 : }
1938 200 : if (*string == 0x00) {
1939 0 : underflow = 1;
1940 0 : goto done;
1941 : }
1942 : }
1943 :
1944 : /*
1945 : * Perform the requested scanning operation.
1946 : */
1947 222 : switch (op) {
1948 : case 'c':
1949 : case 's':
1950 : /*
1951 : * Scan a string up to width characters or whitespace.
1952 : */
1953 70 : if (width == 0) {
1954 50 : width = (size_t) ~0;
1955 : }
1956 70 : end = string;
1957 357 : while (*end != 0x00) {
1958 267 : sch = *end;
1959 267 : if ( u_isspace( sch ) ) {
1960 30 : break;
1961 : }
1962 237 : end++;
1963 237 : if (--width == 0) {
1964 20 : break;
1965 : }
1966 : }
1967 70 : if (!(flags & SCAN_SUPPRESS)) {
1968 70 : if (numVars && objIndex >= argCount) {
1969 : break;
1970 70 : } else if (numVars) {
1971 : zend_uint refcount;
1972 :
1973 20 : current = args[objIndex++];
1974 20 : refcount = Z_REFCOUNT_PP(current);
1975 20 : zval_dtor( *current );
1976 20 : ZVAL_UNICODEL( *current, string, end-string, 1);
1977 20 : Z_SET_REFCOUNT_PP(current, refcount);
1978 20 : Z_SET_ISREF_PP(current);
1979 : } else {
1980 50 : add_index_unicodel( *return_value, objIndex++, string, end-string, 1);
1981 : }
1982 : }
1983 70 : string = end;
1984 70 : break;
1985 :
1986 : case '[': {
1987 : u_CharSet cset;
1988 :
1989 2 : if (width == 0) {
1990 2 : width = (size_t) ~0;
1991 : }
1992 2 : end = string;
1993 :
1994 2 : format = u_BuildCharSet(&cset, format);
1995 10 : while (*end != 0x00) {
1996 6 : sch = *end;
1997 6 : if (!u_CharInSet(&cset, sch)) {
1998 0 : break;
1999 : }
2000 6 : end++;
2001 6 : if (--width == 0) {
2002 0 : break;
2003 : }
2004 : }
2005 2 : u_ReleaseCharSet(&cset);
2006 :
2007 2 : if (string == end) {
2008 : /*
2009 : * Nothing matched the range, stop processing
2010 : */
2011 0 : goto done;
2012 : }
2013 2 : if (!(flags & SCAN_SUPPRESS)) {
2014 2 : if (numVars && objIndex >= argCount) {
2015 : break;
2016 2 : } else if (numVars) {
2017 2 : current = args[objIndex++];
2018 2 : zval_dtor( *current );
2019 2 : ZVAL_UNICODEL( *current, string, end-string, 1);
2020 : } else {
2021 0 : add_index_unicodel(*return_value, objIndex++, string, end-string, 1);
2022 : }
2023 : }
2024 2 : string = end;
2025 2 : break;
2026 : }
2027 : /*
2028 : case 'c':
2029 : / Scan a single character./
2030 :
2031 : sch = *string;
2032 : string++;
2033 : if (!(flags & SCAN_SUPPRESS)) {
2034 : if (numVars) {
2035 : char __buf[2];
2036 : __buf[0] = sch;
2037 : __buf[1] = '\0';;
2038 : current = args[objIndex++];
2039 : zval_dtor(*current);
2040 : ZVAL_STRINGL( *current, __buf, 1, 1);
2041 : } else {
2042 : add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
2043 : }
2044 : }
2045 : break;
2046 : */
2047 : case 'i':
2048 : /*
2049 : * Scan an unsigned or signed integer.
2050 : */
2051 : /*-cc-*/
2052 101 : buf[0] = 0x00;
2053 : /*-cc-*/
2054 101 : if ((width == 0) || (width > sizeof(buf) - 1)) {
2055 101 : width = sizeof(buf) - 1;
2056 : }
2057 :
2058 101 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
2059 412 : for (end = buf; width > 0; width--) {
2060 412 : switch (*string) {
2061 : /*
2062 : * The 0 digit has special meaning at the beginning of
2063 : * a number. If we are unsure of the base, it
2064 : * indicates that we are in base 8 or base 16 (if it is
2065 : * followed by an 'x').
2066 : */
2067 : case 0x30 /* '0' */:
2068 : /*-cc-*/
2069 32 : if (base == 16) {
2070 9 : flags |= SCAN_XOK;
2071 : }
2072 : /*-cc-*/
2073 32 : if (base == 0) {
2074 0 : base = 8;
2075 0 : flags |= SCAN_XOK;
2076 : }
2077 32 : if (flags & SCAN_NOZERO) {
2078 20 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
2079 : } else {
2080 12 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
2081 : }
2082 32 : goto addToInt;
2083 :
2084 : case 0x31 /* '1' */: case 0x32 /* '2' */: case 0x33 /* '3' */: case 0x34 /* '4' */:
2085 : case 0x35 /* '5' */: case 0x36 /* '6' */: case 0x37 /* '7' */:
2086 204 : if (base == 0) {
2087 0 : base = 10;
2088 : }
2089 204 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
2090 204 : goto addToInt;
2091 :
2092 : case 0x38 /* '8' */: case 0x39 /* '9' */:
2093 24 : if (base == 0) {
2094 0 : base = 10;
2095 : }
2096 24 : if (base <= 8) {
2097 7 : break;
2098 : }
2099 17 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
2100 17 : goto addToInt;
2101 :
2102 : case 0x41 /* 'A' */: case 0x42 /* 'B' */: case 0x43 /* 'C' */:
2103 : case 0x44 /* 'D' */: case 0x45 /* 'E' */: case 0x46 /* 'F' */:
2104 : case 0x61 /* 'a' */: case 0x62 /* 'b' */: case 0x63 /* 'c' */:
2105 : case 0x64 /* 'd' */: case 0x65 /* 'e' */: case 0x66 /* 'f' */:
2106 41 : if (base <= 10) {
2107 7 : break;
2108 : }
2109 34 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
2110 34 : goto addToInt;
2111 :
2112 : case 0x2B /* '+' */: case 0x2D /* '-' */:
2113 26 : if (flags & SCAN_SIGNOK) {
2114 26 : flags &= ~SCAN_SIGNOK;
2115 26 : goto addToInt;
2116 : }
2117 0 : break;
2118 :
2119 : case 0x78 /* 'x' */: case 0x58 /* 'X' */:
2120 0 : if ((flags & SCAN_XOK) && (end == buf+1)) {
2121 0 : base = 16;
2122 0 : flags &= ~SCAN_XOK;
2123 0 : goto addToInt;
2124 : }
2125 : break;
2126 : }
2127 :
2128 : /*
2129 : * We got an illegal character so we are done accumulating.
2130 : */
2131 99 : break;
2132 :
2133 313 : addToInt:
2134 : /*
2135 : * Add the character to the temporary buffer.
2136 : */
2137 313 : *end++ = *string++;
2138 313 : if (*string == 0x00) {
2139 2 : break;
2140 : }
2141 : }
2142 :
2143 : /*
2144 : * Check to see if we need to back up because we only got a
2145 : * sign or a trailing x after a 0.
2146 : */
2147 101 : if (flags & SCAN_NODIGITS) {
2148 16 : if (*string == 0x00) {
2149 0 : underflow = 1;
2150 : }
2151 16 : goto done;
2152 85 : } else if (end[-1] == 0x78 /* 'x' */ || end[-1] == 0x58 /* 'X' */) {
2153 0 : end--;
2154 0 : string--;
2155 : }
2156 :
2157 : /*
2158 : * Scan the value from the temporary buffer. If we are
2159 : * returning a large unsigned value, we have to convert it back
2160 : * to a string since PHP only supports signed values.
2161 : */
2162 85 : if (!(flags & SCAN_SUPPRESS)) {
2163 85 : *end = 0x00;
2164 85 : value = (int) (*fn)(buf, NULL, base);
2165 92 : if ((flags & SCAN_UNSIGNED) && (value < 0)) {
2166 7 : u_sprintf(buf, "%u", value); /* INTL: ISO digit */
2167 7 : if (numVars && objIndex >= argCount) {
2168 : break;
2169 7 : } else if (numVars) {
2170 : /* change passed value type to string */
2171 2 : current = args[objIndex++];
2172 2 : zval_dtor(*current);
2173 2 : ZVAL_UNICODE( *current, buf, 1 );
2174 : } else {
2175 5 : add_index_unicode(*return_value, objIndex++, buf, 1);
2176 : }
2177 : } else {
2178 78 : if (numVars && objIndex >= argCount) {
2179 : break;
2180 78 : } else if (numVars) {
2181 21 : current = args[objIndex++];
2182 21 : zval_dtor(*current);
2183 21 : ZVAL_LONG(*current, value);
2184 : } else {
2185 57 : add_index_long(*return_value, objIndex++, value);
2186 : }
2187 : }
2188 : }
2189 85 : break;
2190 :
2191 : case 'f':
2192 : /*
2193 : * Scan a floating point number
2194 : */
2195 49 : buf[0] = 0x00; /* call me pedantic */
2196 49 : if ((width == 0) || (width > sizeof(buf) - 1)) {
2197 49 : width = sizeof(buf) - 1;
2198 : }
2199 49 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
2200 335 : for (end = buf; width > 0; width--) {
2201 335 : switch (*string) {
2202 : case 0x30 /* '0' */: case 0x31 /* '1' */: case 0x32 /* '2' */: case 0x33 /* '3' */:
2203 : case 0x34 /* '4' */: case 0x35 /* '5' */: case 0x36 /* '6' */: case 0x37 /* '7' */:
2204 : case 0x38 /* '8' */: case 0x39 /* '9' */:
2205 241 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
2206 241 : goto addToFloat;
2207 : case 0x2B /* '+' */:
2208 : case 0x2D /* '-' */:
2209 10 : if (flags & SCAN_SIGNOK) {
2210 10 : flags &= ~SCAN_SIGNOK;
2211 10 : goto addToFloat;
2212 : }
2213 0 : break;
2214 : case 0x2E /* '.' */:
2215 30 : if (flags & SCAN_PTOK) {
2216 30 : flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
2217 30 : goto addToFloat;
2218 : }
2219 0 : break;
2220 : case 0x65 /* 'e' */:
2221 : case 0x45 /* 'E' */:
2222 : /*
2223 : * An exponent is not allowed until there has
2224 : * been at least one digit.
2225 : */
2226 12 : if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
2227 12 : flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
2228 : | SCAN_SIGNOK | SCAN_NODIGITS;
2229 12 : goto addToFloat;
2230 : }
2231 : break;
2232 : }
2233 :
2234 : /*
2235 : * We got an illegal character so we are done accumulating.
2236 : */
2237 42 : break;
2238 :
2239 293 : addToFloat:
2240 : /*
2241 : * Add the character to the temporary buffer.
2242 : */
2243 293 : *end++ = *string++;
2244 293 : if (*string == 0x00) {
2245 7 : break;
2246 : }
2247 : }
2248 :
2249 : /*
2250 : * Check to see if we need to back up because we saw a
2251 : * trailing 'e' or sign.
2252 : */
2253 49 : if (flags & SCAN_NODIGITS) {
2254 4 : if (flags & SCAN_EXPOK) {
2255 : /*
2256 : * There were no digits at all so scanning has
2257 : * failed and we are done.
2258 : */
2259 4 : if (*string == 0x00) {
2260 0 : underflow = 1;
2261 : }
2262 4 : goto done;
2263 : }
2264 :
2265 : /*
2266 : * We got a bad exponent ('e' and maybe a sign).
2267 : */
2268 0 : end--;
2269 0 : string--;
2270 0 : if (*end != 0x65 /* 'e' */ && *end != 0x45 /* 'E' */) {
2271 0 : end--;
2272 0 : string--;
2273 : }
2274 : }
2275 :
2276 : /*
2277 : * Scan the value from the temporary buffer.
2278 : */
2279 45 : if (!(flags & SCAN_SUPPRESS)) {
2280 : double dvalue;
2281 45 : *end = 0x00;
2282 45 : dvalue = zend_u_strtod(buf, NULL);
2283 45 : if (numVars && objIndex >= argCount) {
2284 : break;
2285 45 : } else if (numVars) {
2286 14 : current = args[objIndex++];
2287 14 : zval_dtor(*current);
2288 14 : ZVAL_DOUBLE(*current, dvalue);
2289 : } else {
2290 31 : add_index_double( *return_value, objIndex++, dvalue );
2291 : }
2292 : }
2293 : break;
2294 : } /* switch (op) */
2295 202 : nconversions++;
2296 : } /* while (*format != '\0') */
2297 :
2298 168 : done:
2299 168 : result = SCAN_SUCCESS;
2300 :
2301 174 : if (underflow && (0==nconversions)) {
2302 6 : scan_set_error_return( numVars, return_value );
2303 6 : result = SCAN_ERROR_EOF;
2304 162 : } else if (numVars) {
2305 21 : convert_to_long( *return_value );
2306 21 : Z_LVAL_PP(return_value) = nconversions;
2307 141 : } else if (nconversions < totalVars) {
2308 : /* TODO: not all elements converted. we need to prune the list - cc */
2309 : }
2310 168 : return result;
2311 : }
2312 : /* }}} */
2313 :
2314 : /* the compiler choked when i tried to make this a macro */
2315 : static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
2316 981 : {
2317 981 : if (numVars) {
2318 6 : Z_TYPE_PP(return_value) = IS_LONG;
2319 6 : Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */
2320 : } else {
2321 : /* convert_to_null calls destructor */
2322 975 : convert_to_null( *return_value );
2323 : }
2324 981 : }
2325 : /* }}} */
2326 :
2327 : /*
2328 : * Local variables:
2329 : * tab-width: 4
2330 : * c-basic-offset: 4
2331 : * End:
2332 : * vim600: sw=4 ts=4 fdm=marker
2333 : * vim<600: sw=4 ts=4
2334 : */
|