PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/ereg/regex - regcomp.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 506 688 73.5 %
Date: 2015-05-23 Functions: 31 38 81.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #include <sys/types.h>
       2             : #include <stdio.h>
       3             : #include <string.h>
       4             : #include <ctype.h>
       5             : #include <limits.h>
       6             : #include <stdlib.h>
       7             : 
       8             : #define POSIX_MISTAKE
       9             : 
      10             : #include "utils.h"
      11             : #include "regex.h"
      12             : #include "regex2.h"
      13             : 
      14             : #include "cclass.h"
      15             : #include "cname.h"
      16             : 
      17             : /*
      18             :  * parse structure, passed up and down to avoid global variables and
      19             :  * other clumsinesses
      20             :  */
      21             : struct parse {
      22             :         unsigned char *next;            /* next character in RE */
      23             :         unsigned char *end;             /* end of string (-> NUL normally) */
      24             :         int error;              /* has an error been seen? */
      25             :         sop *strip;             /* malloced strip */
      26             :         sopno ssize;            /* malloced strip size (allocated) */
      27             :         sopno slen;             /* malloced strip length (used) */
      28             :         int ncsalloc;           /* number of csets allocated */
      29             :         struct re_guts *g;
      30             : #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
      31             :         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
      32             :         sopno pend[NPAREN];     /* -> ) ([0] unused) */
      33             : };
      34             : 
      35             : #include "regcomp.ih"
      36             : 
      37             : static unsigned char nuls[10];          /* place to point scanner in event of error */
      38             : 
      39             : /*
      40             :  * macros for use with parse structure
      41             :  * BEWARE:  these know that the parse structure is named `p' !!!
      42             :  */
      43             : #define PEEK()  (*p->next)
      44             : #define PEEK2() (*(p->next+1))
      45             : #define MORE()  (p->next < p->end)
      46             : #define MORE2() (p->next+1 < p->end)
      47             : #define SEE(c)  (MORE() && PEEK() == (c))
      48             : #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
      49             : #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
      50             : #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
      51             : #define NEXT()  (p->next++)
      52             : #define NEXT2() (p->next += 2)
      53             : #define NEXTn(n)        (p->next += (n))
      54             : #define GETNEXT()       (*p->next++)
      55             : #define SETERROR(e)     seterr(p, (e))
      56             : #define REQUIRE(co, e)  (void) ((co) || SETERROR(e))
      57             : #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
      58             : #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
      59             : #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
      60             : #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
      61             : #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
      62             : #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
      63             : #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
      64             : #define HERE()          (p->slen)
      65             : #define THERE()         (p->slen - 1)
      66             : #define THERETHERE()    (p->slen - 2)
      67             : #define DROP(n) (p->slen -= (n))
      68             : 
      69             : #ifndef NDEBUG
      70             : static int never = 0;           /* for use in asserts; shuts lint up */
      71             : #else
      72             : #define never   0               /* some <assert.h>s have bugs too */
      73             : #endif
      74             : 
      75             : /*
      76             :  - regcomp - interface for parser and compilation
      77             :  = API_EXPORT(int) regcomp(regex_t *, const char *, int);
      78             :  = #define      REG_BASIC       0000
      79             :  = #define      REG_EXTENDED    0001
      80             :  = #define      REG_ICASE       0002
      81             :  = #define      REG_NOSUB       0004
      82             :  = #define      REG_NEWLINE     0010
      83             :  = #define      REG_NOSPEC      0020
      84             :  = #define      REG_PEND        0040
      85             :  = #define      REG_DUMP        0200
      86             :  */
      87             : API_EXPORT(int)                 /* 0 success, otherwise REG_something */
      88         403 : regcomp(preg, pattern, cflags)
      89             : regex_t *preg;
      90             : const char *pattern;
      91             : int cflags;
      92             : {
      93             :         struct parse pa;
      94             :         register struct re_guts *g;
      95         403 :         register struct parse *p = &pa;
      96             :         register int i;
      97             :         register size_t len;
      98             : #ifdef REDEBUG
      99             : #       define  GOODFLAGS(f)    (f)
     100             : #else
     101             : #       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
     102             : #endif
     103             : 
     104         403 :         cflags = GOODFLAGS(cflags);
     105         403 :         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
     106           0 :                 return(REG_INVARG);
     107             : 
     108         403 :         if (cflags&REG_PEND) {
     109           0 :                 if (preg->re_endp < pattern)
     110           0 :                         return(REG_INVARG);
     111           0 :                 len = preg->re_endp - pattern;
     112             :         } else
     113         403 :                 len = strlen((char *)pattern);
     114             : 
     115             :         /* do the mallocs early so failure handling is easy */
     116         403 :         g = (struct re_guts *)malloc(sizeof(struct re_guts) +
     117             :                                                         (NC-1)*sizeof(cat_t));
     118         403 :         if (g == NULL)
     119           0 :                 return(REG_ESPACE);
     120             :         {
     121             :                 /* Patched for CERT Vulnerability Note VU#695940, Feb 2015. */
     122         403 :                 size_t new_ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
     123         403 :                 if (new_ssize < len || new_ssize > LONG_MAX / sizeof(sop)) {
     124           0 :                         free((char *) g);
     125           0 :                         return REG_INVARG;
     126             :                 }
     127         403 :                 p->ssize = new_ssize;
     128             :         }
     129         403 :         p->strip = (sop *)malloc(p->ssize * sizeof(sop));
     130         403 :         p->slen = 0;
     131         403 :         if (p->strip == NULL) {
     132           0 :                 free((char *)g);
     133           0 :                 return(REG_ESPACE);
     134             :         }
     135             : 
     136             :         /* set things up */
     137         403 :         p->g = g;
     138         403 :         p->next = (unsigned char *)pattern;  /* convenience; we do not modify it */
     139         403 :         p->end = p->next + len;
     140         403 :         p->error = 0;
     141         403 :         p->ncsalloc = 0;
     142        4433 :         for (i = 0; i < NPAREN; i++) {
     143        4030 :                 p->pbegin[i] = 0;
     144        4030 :                 p->pend[i] = 0;
     145             :         }
     146         403 :         g->csetsize = NC;
     147         403 :         g->sets = NULL;
     148         403 :         g->setbits = NULL;
     149         403 :         g->ncsets = 0;
     150         403 :         g->cflags = cflags;
     151         403 :         g->iflags = 0;
     152         403 :         g->nbol = 0;
     153         403 :         g->neol = 0;
     154         403 :         g->must = NULL;
     155         403 :         g->mlen = 0;
     156         403 :         g->nsub = 0;
     157         403 :         g->ncategories = 1;  /* category 0 is "everything else" */
     158         403 :         g->categories = &g->catspace[0];
     159         403 :         (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
     160         403 :         g->backrefs = 0;
     161             : 
     162             :         /* do it */
     163         403 :         EMIT(OEND, 0);
     164         403 :         g->firststate = THERE();
     165         403 :         if (cflags&REG_EXTENDED)
     166         373 :                 p_ere(p, OUT);
     167          30 :         else if (cflags&REG_NOSPEC)
     168           0 :                 p_str(p);
     169             :         else
     170          30 :                 p_bre(p, OUT, OUT);
     171         403 :         EMIT(OEND, 0);
     172         403 :         g->laststate = THERE();
     173             : 
     174             :         /* tidy up loose ends and fill things in */
     175         403 :         categorize(p, g);
     176         403 :         stripsnug(p, g);
     177         403 :         findmust(p, g);
     178         403 :         g->nplus = pluscount(p, g);
     179         403 :         g->magic = MAGIC2;
     180         403 :         preg->re_nsub = g->nsub;
     181         403 :         preg->re_g = g;
     182         403 :         preg->re_magic = MAGIC1;
     183             : #ifndef REDEBUG
     184             :         /* not debugging, so can't rely on the assert() in regexec() */
     185         403 :         if (g->iflags&BAD)
     186           0 :                 SETERROR(REG_ASSERT);
     187             : #endif
     188             : 
     189             :         /* win or lose, we're done */
     190         403 :         if (p->error != 0)   /* lose */
     191         142 :                 regfree(preg);
     192         403 :         return(p->error);
     193             : }
     194             : 
     195             : /*
     196             :  - p_ere - ERE parser top level, concatenation and alternation
     197             :  == static void p_ere(register struct parse *p, int stop);
     198             :  */
     199             : static void
     200        8682 : p_ere(p, stop)
     201             : register struct parse *p;
     202             : int stop;                       /* character this ERE should end at */
     203             : {
     204             :         register unsigned char c;
     205        8682 :         register sopno prevback = 0;
     206        8682 :         register sopno prevfwd = 0;
     207             :         register sopno conc;
     208        8682 :         register int first = 1;         /* is this the first alternative? */
     209             : 
     210             :         for (;;) {
     211             :                 /* do a bunch of concatenated expressions */
     212        8752 :                 conc = HERE();
     213       34951 :                 while (MORE() && (c = PEEK()) != '|' && c != stop)
     214       17447 :                         p_ere_exp(p);
     215        8752 :                 (void) REQUIRE(HERE() != conc, REG_EMPTY);      /* require nonempty */
     216             : 
     217        8752 :                 if (!EAT('|'))
     218        8682 :                         break;          /* NOTE BREAK OUT */
     219             : 
     220          70 :                 if (first) {
     221          48 :                         INSERT(OCH_, conc);     /* offset is wrong */
     222          48 :                         prevfwd = conc;
     223          48 :                         prevback = conc;
     224          48 :                         first = 0;
     225             :                 }
     226          70 :                 ASTERN(OOR1, prevback);
     227          70 :                 prevback = THERE();
     228          70 :                 AHEAD(prevfwd);                 /* fix previous offset */
     229          70 :                 prevfwd = HERE();
     230          70 :                 EMIT(OOR2, 0);                  /* offset is very wrong */
     231          70 :         }
     232             : 
     233        8682 :         if (!first) {           /* tail-end fixups */
     234          48 :                 AHEAD(prevfwd);
     235          48 :                 ASTERN(O_CH, prevback);
     236             :         }
     237             : 
     238             :         assert(!MORE() || SEE(stop));
     239        8682 : }
     240             : 
     241             : /*
     242             :  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
     243             :  == static void p_ere_exp(register struct parse *p);
     244             :  */
     245             : static void
     246       17447 : p_ere_exp(p)
     247             : register struct parse *p;
     248             : {
     249             :         register unsigned char c;
     250             :         register sopno pos;
     251             :         register int count;
     252             :         register int count2;
     253             :         register sopno subno;
     254       17447 :         int wascaret = 0;
     255             : 
     256             :         assert(MORE());         /* caller should have ensured this */
     257       17447 :         c = GETNEXT();
     258             : 
     259       17447 :         pos = HERE();
     260       17447 :         switch (c) {
     261             :         case '(':
     262        8319 :                 REQUIRE(MORE(), REG_EPAREN);
     263        8319 :                 p->g->nsub++;
     264        8319 :                 subno = p->g->nsub;
     265        8319 :                 if (subno < NPAREN)
     266         163 :                         p->pbegin[subno] = HERE();
     267        8319 :                 EMIT(OLPAREN, subno);
     268        8319 :                 if (!SEE(')'))
     269        8309 :                         p_ere(p, ')');
     270        8319 :                 if (subno < NPAREN) {
     271         163 :                         p->pend[subno] = HERE();
     272             :                         assert(p->pend[subno] != 0);
     273             :                 }
     274        8319 :                 EMIT(ORPAREN, subno);
     275        8319 :                 MUSTEAT(')', REG_EPAREN);
     276        8319 :                 break;
     277             : #ifndef POSIX_MISTAKE
     278             :         case ')':               /* happens only if no current unmatched ( */
     279             :                 /*
     280             :                  * You may ask, why the ifndef?  Because I didn't notice
     281             :                  * this until slightly too late for 1003.2, and none of the
     282             :                  * other 1003.2 regular-expression reviewers noticed it at
     283             :                  * all.  So an unmatched ) is legal POSIX, at least until
     284             :                  * we can get it fixed.
     285             :                  */
     286             :                 SETERROR(REG_EPAREN);
     287             :                 break;
     288             : #endif
     289             :         case '^':
     290          39 :                 EMIT(OBOL, 0);
     291          39 :                 p->g->iflags |= USEBOL;
     292          39 :                 p->g->nbol++;
     293          39 :                 wascaret = 1;
     294          39 :                 break;
     295             :         case '$':
     296          39 :                 EMIT(OEOL, 0);
     297          39 :                 p->g->iflags |= USEEOL;
     298          39 :                 p->g->neol++;
     299          39 :                 break;
     300             :         case '|':
     301           0 :                 SETERROR(REG_EMPTY);
     302           0 :                 break;
     303             :         case '*':
     304             :         case '+':
     305             :         case '?':
     306          22 :                 SETERROR(REG_BADRPT);
     307          22 :                 break;
     308             :         case '.':
     309        8252 :                 if (p->g->cflags&REG_NEWLINE)
     310           0 :                         nonnewline(p);
     311             :                 else
     312        8252 :                         EMIT(OANY, 0);
     313        8252 :                 break;
     314             :         case '[':
     315         154 :                 p_bracket(p);
     316         154 :                 break;
     317             :         case '\\':
     318         158 :                 REQUIRE(MORE(), REG_EESCAPE);
     319         158 :                 c = GETNEXT();
     320         158 :                 ordinary(p, c);
     321         158 :                 break;
     322             :         case '{':               /* okay as ordinary except if digit follows */
     323           0 :                 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
     324             :                 /* FALLTHROUGH */
     325             :         default:
     326         464 :                 ordinary(p, c);
     327             :                 break;
     328             :         }
     329             : 
     330       17447 :         if (!MORE())
     331         276 :                 return;
     332       17171 :         c = PEEK();
     333             :         /* we call { a repetition if followed by a digit */
     334       17403 :         if (!( c == '*' || c == '+' || c == '?' ||
     335         232 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ))
     336       17003 :                 return;         /* no repetition, we're done */
     337         168 :         NEXT();
     338             : 
     339         168 :         REQUIRE(!wascaret, REG_BADRPT);
     340         168 :         switch (c) {
     341             :         case '*':       /* implemented as +? */
     342             :                 /* this case does not require the (y|) trick, noKLUDGE */
     343          16 :                 INSERT(OPLUS_, pos);
     344          16 :                 ASTERN(O_PLUS, pos);
     345          16 :                 INSERT(OQUEST_, pos);
     346          16 :                 ASTERN(O_QUEST, pos);
     347          16 :                 break;
     348             :         case '+':
     349          25 :                 INSERT(OPLUS_, pos);
     350          25 :                 ASTERN(O_PLUS, pos);
     351          25 :                 break;
     352             :         case '?':
     353             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     354          11 :                 INSERT(OCH_, pos);              /* offset slightly wrong */
     355          11 :                 ASTERN(OOR1, pos);              /* this one's right */
     356          11 :                 AHEAD(pos);                     /* fix the OCH_ */
     357          11 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     358          11 :                 AHEAD(THERE());                 /* ...so fix it */
     359          11 :                 ASTERN(O_CH, THERETHERE());
     360          11 :                 break;
     361             :         case '{':
     362         116 :                 count = p_count(p);
     363         116 :                 if (EAT(',')) {
     364          56 :                         if (isdigit(PEEK())) {
     365          46 :                                 count2 = p_count(p);
     366          46 :                                 REQUIRE(count <= count2, REG_BADBR);
     367             :                         } else          /* single number with comma */
     368          10 :                                 count2 = INFINITY;
     369             :                 } else          /* just a single number */
     370          60 :                         count2 = count;
     371         116 :                 repeat(p, pos, count, count2);
     372         116 :                 if (!EAT('}')) {        /* error heuristics */
     373          24 :                         while (MORE() && PEEK() != '}')
     374           0 :                                 NEXT();
     375          12 :                         REQUIRE(MORE(), REG_EBRACE);
     376          12 :                         SETERROR(REG_BADBR);
     377             :                 }
     378             :                 break;
     379             :         }
     380             : 
     381         168 :         if (!MORE())
     382          61 :                 return;
     383         107 :         c = PEEK();
     384         107 :         if (!( c == '*' || c == '+' || c == '?' ||
     385           0 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
     386         107 :                 return;
     387           0 :         SETERROR(REG_BADRPT);
     388             : }
     389             : 
     390             : /*
     391             :  - p_str - string (no metacharacters) "parser"
     392             :  == static void p_str(register struct parse *p);
     393             :  */
     394             : static void
     395           0 : p_str(p)
     396             : register struct parse *p;
     397             : {
     398           0 :         REQUIRE(MORE(), REG_EMPTY);
     399           0 :         while (MORE())
     400           0 :                 ordinary(p, GETNEXT());
     401           0 : }
     402             : 
     403             : /*
     404             :  - p_bre - BRE parser top level, anchoring and concatenation
     405             :  == static void p_bre(register struct parse *p, register int end1, \
     406             :  ==     register int end2);
     407             :  * Giving end1 as OUT essentially eliminates the end1/end2 check.
     408             :  *
     409             :  * This implementation is a bit of a kludge, in that a trailing $ is first
     410             :  * taken as an ordinary character and then revised to be an anchor.  The
     411             :  * only undesirable side effect is that '$' gets included as a character
     412             :  * category in such cases.  This is fairly harmless; not worth fixing.
     413             :  * The amount of lookahead needed to avoid this kludge is excessive.
     414             :  */
     415             : static void
     416          30 : p_bre(p, end1, end2)
     417             : register struct parse *p;
     418             : register int end1;              /* first terminating character */
     419             : register int end2;              /* second terminating character */
     420             : {
     421          30 :         register sopno start = HERE();
     422          30 :         register int first = 1;                 /* first subexpression? */
     423          30 :         register int wasdollar = 0;
     424             : 
     425          30 :         if (EAT('^')) {
     426           0 :                 EMIT(OBOL, 0);
     427           0 :                 p->g->iflags |= USEBOL;
     428           0 :                 p->g->nbol++;
     429             :         }
     430         140 :         while (MORE() && !SEETWO(end1, end2)) {
     431          80 :                 wasdollar = p_simp_re(p, first);
     432          80 :                 first = 0;
     433             :         }
     434          30 :         if (wasdollar) {        /* oops, that was a trailing anchor */
     435           0 :                 DROP(1);
     436           0 :                 EMIT(OEOL, 0);
     437           0 :                 p->g->iflags |= USEEOL;
     438           0 :                 p->g->neol++;
     439             :         }
     440             : 
     441          30 :         REQUIRE(HERE() != start, REG_EMPTY);    /* require nonempty */
     442          30 : }
     443             : 
     444             : /*
     445             :  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
     446             :  == static int p_simp_re(register struct parse *p, int starordinary);
     447             :  */
     448             : static int                      /* was the simple RE an unbackslashed $? */
     449          80 : p_simp_re(p, starordinary)
     450             : register struct parse *p;
     451             : int starordinary;               /* is a leading * an ordinary character? */
     452             : {
     453             :         register int c;
     454             :         register int count;
     455             :         register int count2;
     456             :         register sopno pos;
     457             :         register int i;
     458             :         register sopno subno;
     459             : #       define  BACKSL  (1<<CHAR_BIT)
     460             : 
     461          80 :         pos = HERE();           /* repetion op, if any, covers from here */
     462             : 
     463             :         assert(MORE());         /* caller should have ensured this */
     464          80 :         c = GETNEXT();
     465          80 :         if (c == '\\') {
     466           0 :                 REQUIRE(MORE(), REG_EESCAPE);
     467           0 :                 c = BACKSL | (unsigned char)GETNEXT();
     468             :         }
     469          80 :         switch (c) {
     470             :         case '.':
     471           0 :                 if (p->g->cflags&REG_NEWLINE)
     472           0 :                         nonnewline(p);
     473             :                 else
     474           0 :                         EMIT(OANY, 0);
     475           0 :                 break;
     476             :         case '[':
     477           0 :                 p_bracket(p);
     478           0 :                 break;
     479             :         case BACKSL|'{':
     480           0 :                 SETERROR(REG_BADRPT);
     481           0 :                 break;
     482             :         case BACKSL|'(':
     483           0 :                 p->g->nsub++;
     484           0 :                 subno = p->g->nsub;
     485           0 :                 if (subno < NPAREN)
     486           0 :                         p->pbegin[subno] = HERE();
     487           0 :                 EMIT(OLPAREN, subno);
     488             :                 /* the MORE here is an error heuristic */
     489           0 :                 if (MORE() && !SEETWO('\\', ')'))
     490           0 :                         p_bre(p, '\\', ')');
     491           0 :                 if (subno < NPAREN) {
     492           0 :                         p->pend[subno] = HERE();
     493             :                         assert(p->pend[subno] != 0);
     494             :                 }
     495           0 :                 EMIT(ORPAREN, subno);
     496           0 :                 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
     497           0 :                 break;
     498             :         case BACKSL|')':        /* should not get here -- must be user */
     499             :         case BACKSL|'}':
     500           0 :                 SETERROR(REG_EPAREN);
     501           0 :                 break;
     502             :         case BACKSL|'1':
     503             :         case BACKSL|'2':
     504             :         case BACKSL|'3':
     505             :         case BACKSL|'4':
     506             :         case BACKSL|'5':
     507             :         case BACKSL|'6':
     508             :         case BACKSL|'7':
     509             :         case BACKSL|'8':
     510             :         case BACKSL|'9':
     511           0 :                 i = (c&~BACKSL) - '0';
     512             :                 assert(i < NPAREN);
     513           0 :                 if (p->pend[i] != 0) {
     514             :                         assert(i <= p->g->nsub);
     515           0 :                         EMIT(OBACK_, i);
     516             :                         assert(p->pbegin[i] != 0);
     517             :                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
     518             :                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
     519           0 :                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
     520           0 :                         EMIT(O_BACK, i);
     521             :                 } else
     522           0 :                         SETERROR(REG_ESUBREG);
     523           0 :                 p->g->backrefs = 1;
     524           0 :                 break;
     525             :         case '*':
     526           0 :                 REQUIRE(starordinary, REG_BADRPT);
     527             :                 /* FALLTHROUGH */
     528             :         default:
     529          80 :                 ordinary(p, (unsigned char)c);  /* takes off BACKSL, if any */
     530             :                 break;
     531             :         }
     532             : 
     533          80 :         if (EAT('*')) {         /* implemented as +? */
     534             :                 /* this case does not require the (y|) trick, noKLUDGE */
     535           0 :                 INSERT(OPLUS_, pos);
     536           0 :                 ASTERN(O_PLUS, pos);
     537           0 :                 INSERT(OQUEST_, pos);
     538           0 :                 ASTERN(O_QUEST, pos);
     539          80 :         } else if (EATTWO('\\', '{')) {
     540           0 :                 count = p_count(p);
     541           0 :                 if (EAT(',')) {
     542           0 :                         if (MORE() && isdigit(PEEK())) {
     543           0 :                                 count2 = p_count(p);
     544           0 :                                 REQUIRE(count <= count2, REG_BADBR);
     545             :                         } else          /* single number with comma */
     546           0 :                                 count2 = INFINITY;
     547             :                 } else          /* just a single number */
     548           0 :                         count2 = count;
     549           0 :                 repeat(p, pos, count, count2);
     550           0 :                 if (!EATTWO('\\', '}')) {       /* error heuristics */
     551           0 :                         while (MORE() && !SEETWO('\\', '}'))
     552           0 :                                 NEXT();
     553           0 :                         REQUIRE(MORE(), REG_EBRACE);
     554           0 :                         SETERROR(REG_BADBR);
     555             :                 }
     556          80 :         } else if (c == (unsigned char)'$')     /* $ (but not \$) ends it */
     557           0 :                 return(1);
     558             : 
     559          80 :         return(0);
     560             : }
     561             : 
     562             : /*
     563             :  - p_count - parse a repetition count
     564             :  == static int p_count(register struct parse *p);
     565             :  */
     566             : static int                      /* the value */
     567         162 : p_count(p)
     568             : register struct parse *p;
     569             : {
     570         162 :         register int count = 0;
     571         162 :         register int ndigits = 0;
     572             : 
     573         528 :         while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
     574         204 :                 count = count*10 + (GETNEXT() - '0');
     575         204 :                 ndigits++;
     576             :         }
     577             : 
     578         162 :         REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
     579         162 :         return(count);
     580             : }
     581             : 
     582             : /*
     583             :  - p_bracket - parse a bracketed character list
     584             :  == static void p_bracket(register struct parse *p);
     585             :  *
     586             :  * Note a significant property of this code:  if the allocset() did SETERROR,
     587             :  * no set operations are done.
     588             :  */
     589             : static void
     590         306 : p_bracket(p)
     591             : register struct parse *p;
     592             : {
     593         306 :         register cset *cs = allocset(p);
     594         306 :         register int invert = 0;
     595             : 
     596             :         /* Dept of Truly Sickening Special-Case Kludges */
     597         306 :         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
     598           0 :                 EMIT(OBOW, 0);
     599           0 :                 NEXTn(6);
     600           0 :                 return;
     601             :         }
     602         306 :         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
     603           0 :                 EMIT(OEOW, 0);
     604           0 :                 NEXTn(6);
     605           0 :                 return;
     606             :         }
     607             : 
     608         306 :         if (EAT('^'))
     609          21 :                 invert++;       /* make note to invert set at end */
     610         306 :         if (EAT(']'))
     611           0 :                 CHadd(cs, ']');
     612         306 :         else if (EAT('-'))
     613           1 :                 CHadd(cs, '-');
     614         963 :         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
     615         351 :                 p_b_term(p, cs);
     616         306 :         if (EAT('-'))
     617          10 :                 CHadd(cs, '-');
     618         306 :         MUSTEAT(']', REG_EBRACK);
     619             : 
     620         306 :         if (p->error != 0)   /* don't mess things up further */
     621          16 :                 return;
     622             : 
     623         290 :         if (p->g->cflags&REG_ICASE) {
     624             :                 register int i;
     625             :                 register int ci;
     626             : 
     627       54484 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     628       54272 :                         if (CHIN(cs, i) && isalpha(i)) {
     629        1201 :                                 ci = othercase(i);
     630        1201 :                                 if (ci != i)
     631        1201 :                                         CHadd(cs, ci);
     632             :                         }
     633         212 :                 if (cs->multis != NULL)
     634           0 :                         mccase(p, cs);
     635             :         }
     636         290 :         if (invert) {
     637             :                 register int i;
     638             : 
     639        5397 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     640        5376 :                         if (CHIN(cs, i))
     641         117 :                                 CHsub(cs, i);
     642             :                         else
     643        5259 :                                 CHadd(cs, i);
     644          21 :                 if (p->g->cflags&REG_NEWLINE)
     645           0 :                         CHsub(cs, '\n');
     646          21 :                 if (cs->multis != NULL)
     647           0 :                         mcinvert(p, cs);
     648             :         }
     649             : 
     650             :         assert(cs->multis == NULL);          /* xxx */
     651             : 
     652         290 :         if (nch(p, cs) == 1) {          /* optimize singleton sets */
     653           5 :                 ordinary(p, firstch(p, cs));
     654           5 :                 freeset(p, cs);
     655             :         } else
     656         285 :                 EMIT(OANYOF, freezeset(p, cs));
     657             : }
     658             : 
     659             : /*
     660             :  - p_b_term - parse one term of a bracketed character list
     661             :  == static void p_b_term(register struct parse *p, register cset *cs);
     662             :  */
     663             : static void
     664         351 : p_b_term(p, cs)
     665             : register struct parse *p;
     666             : register cset *cs;
     667             : {
     668             :         register unsigned char c;
     669             :         register unsigned char start, finish;
     670             :         register int i;
     671             : 
     672             :         /* classify what we've got */
     673         351 :         switch ((MORE()) ? PEEK() : '\0') {
     674             :         case '[':
     675          60 :                 c = (MORE2()) ? PEEK2() : '\0';
     676          60 :                 break;
     677             :         case '-':
     678           6 :                 SETERROR(REG_ERANGE);
     679           6 :                 return;                 /* NOTE RETURN */
     680             :                 break;
     681             :         default:
     682         285 :                 c = '\0';
     683             :                 break;
     684             :         }
     685             : 
     686         345 :         switch (c) {
     687             :         case ':':               /* character class */
     688          60 :                 NEXT2();
     689          60 :                 REQUIRE(MORE(), REG_EBRACK);
     690          60 :                 c = PEEK();
     691          60 :                 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
     692          60 :                 p_b_cclass(p, cs);
     693          60 :                 REQUIRE(MORE(), REG_EBRACK);
     694          60 :                 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
     695          60 :                 break;
     696             :         case '=':               /* equivalence class */
     697           0 :                 NEXT2();
     698           0 :                 REQUIRE(MORE(), REG_EBRACK);
     699           0 :                 c = PEEK();
     700           0 :                 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
     701           0 :                 p_b_eclass(p, cs);
     702           0 :                 REQUIRE(MORE(), REG_EBRACK);
     703           0 :                 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
     704           0 :                 break;
     705             :         default:                /* symbol, ordinary character, or range */
     706             : /* xxx revision needed for multichar stuff */
     707         285 :                 start = p_b_symbol(p);
     708         335 :                 if (SEE('-') && MORE2() && PEEK2() != ']') {
     709             :                         /* range */
     710          50 :                         NEXT();
     711          50 :                         if (EAT('-'))
     712           0 :                                 finish = '-';
     713             :                         else
     714          50 :                                 finish = p_b_symbol(p);
     715             :                 } else
     716         235 :                         finish = start;
     717             : /* xxx what about signed chars here... */
     718         285 :                 REQUIRE(start <= finish, REG_ERANGE);
     719        1082 :                 for (i = start; i <= finish; i++)
     720         797 :                         CHadd(cs, i);
     721             :                 break;
     722             :         }
     723             : }
     724             : 
     725             : /*
     726             :  - p_b_cclass - parse a character-class name and deal with it
     727             :  == static void p_b_cclass(register struct parse *p, register cset *cs);
     728             :  */
     729             : static void
     730          60 : p_b_cclass(p, cs)
     731             : register struct parse *p;
     732             : register cset *cs;
     733             : {
     734          60 :         register unsigned char *sp = p->next;
     735             :         register const struct cclass *cp;
     736             :         register size_t len;
     737             :         register const unsigned char *u;
     738             :         register unsigned char c;
     739             : 
     740         420 :         while (MORE() && isalpha(PEEK()))
     741         300 :                 NEXT();
     742          60 :         len = p->next - sp;
     743         312 :         for (cp = cclasses; cp->name != NULL; cp++)
     744         312 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     745          60 :                         break;
     746          60 :         if (cp->name == NULL) {
     747             :                 /* oops, didn't find it */
     748           0 :                 SETERROR(REG_ECTYPE);
     749           0 :                 return;
     750             :         }
     751             : 
     752          60 :         u = cp->chars;
     753        2050 :         while ((c = *u++) != '\0')
     754        1930 :                 CHadd(cs, c);
     755          60 :         for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
     756           0 :                 MCadd(p, cs, u);
     757             : }
     758             : 
     759             : /*
     760             :  - p_b_eclass - parse an equivalence-class name and deal with it
     761             :  == static void p_b_eclass(register struct parse *p, register cset *cs);
     762             :  *
     763             :  * This implementation is incomplete. xxx
     764             :  */
     765             : static void
     766           0 : p_b_eclass(p, cs)
     767             : register struct parse *p;
     768             : register cset *cs;
     769             : {
     770             :         register unsigned char c;
     771             : 
     772           0 :         c = p_b_coll_elem(p, '=');
     773           0 :         CHadd(cs, c);
     774           0 : }
     775             : 
     776             : /*
     777             :  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
     778             :  == static char p_b_symbol(register struct parse *p);
     779             :  */
     780             : static unsigned char                    /* value of symbol */
     781         335 : p_b_symbol(p)
     782             : register struct parse *p;
     783             : {
     784             :         register unsigned char value;
     785             : 
     786         335 :         REQUIRE(MORE(), REG_EBRACK);
     787         335 :         if (!EATTWO('[', '.'))
     788         335 :                 return(GETNEXT());
     789             : 
     790             :         /* collating symbol */
     791           0 :         value = p_b_coll_elem(p, '.');
     792           0 :         REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
     793           0 :         return(value);
     794             : }
     795             : 
     796             : /*
     797             :  - p_b_coll_elem - parse a collating-element name and look it up
     798             :  == static char p_b_coll_elem(register struct parse *p, int endc);
     799             :  */
     800             : static unsigned char                    /* value of collating element */
     801           0 : p_b_coll_elem(p, endc)
     802             : register struct parse *p;
     803             : int endc;                       /* name ended by endc,']' */
     804             : {
     805           0 :         register unsigned char *sp = p->next;
     806             :         register const struct cname *cp;
     807             :         register int len;
     808             : 
     809           0 :         while (MORE() && !SEETWO(endc, ']'))
     810           0 :                 NEXT();
     811           0 :         if (!MORE()) {
     812           0 :                 SETERROR(REG_EBRACK);
     813           0 :                 return(0);
     814             :         }
     815           0 :         len = p->next - sp;
     816           0 :         for (cp = cnames; cp->name != NULL; cp++)
     817           0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     818           0 :                         return(cp->code);    /* known name */
     819           0 :         if (len == 1)
     820           0 :                 return(*sp);    /* single character */
     821           0 :         SETERROR(REG_ECOLLATE);                 /* neither */
     822           0 :         return(0);
     823             : }
     824             : 
     825             : /*
     826             :  - othercase - return the case counterpart of an alphabetic
     827             :  == static char othercase(int ch);
     828             :  */
     829             : static unsigned char                    /* if no counterpart, return ch */
     830        1353 : othercase(ch)
     831             : int ch;
     832             : {
     833             :         assert(isalpha(ch));
     834        1353 :         if (isupper(ch))
     835         673 :                 return(tolower(ch));
     836         680 :         else if (islower(ch))
     837         680 :                 return(toupper(ch));
     838             :         else                    /* peculiar, but could happen */
     839           0 :                 return(ch);
     840             : }
     841             : 
     842             : /*
     843             :  - bothcases - emit a dualcase version of a two-case character
     844             :  == static void bothcases(register struct parse *p, int ch);
     845             :  *
     846             :  * Boy, is this implementation ever a kludge...
     847             :  */
     848             : static void
     849         152 : bothcases(p, ch)
     850             : register struct parse *p;
     851             : int ch;
     852             : {
     853         152 :         register unsigned char *oldnext = p->next;
     854         152 :         register unsigned char *oldend = p->end;
     855             :         unsigned char bracket[3];
     856             : 
     857             :         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
     858         152 :         p->next = bracket;
     859         152 :         p->end = bracket+2;
     860         152 :         bracket[0] = ch;
     861         152 :         bracket[1] = ']';
     862         152 :         bracket[2] = '\0';
     863         152 :         p_bracket(p);
     864             :         assert(p->next == bracket+2);
     865         152 :         p->next = oldnext;
     866         152 :         p->end = oldend;
     867         152 : }
     868             : 
     869             : /*
     870             :  - ordinary - emit an ordinary character
     871             :  == static void ordinary(register struct parse *p, register int ch);
     872             :  */
     873             : static void
     874         707 : ordinary(p, ch)
     875             : register struct parse *p;
     876             : register int ch;
     877             : {
     878         707 :         register cat_t *cap = p->g->categories;
     879             : 
     880         859 :         if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
     881         152 :                 bothcases(p, ch);
     882             :         else {
     883         555 :                 EMIT(OCHAR, (unsigned char)ch);
     884         555 :                 if (cap[ch] == 0)
     885         470 :                         cap[ch] = p->g->ncategories++;
     886             :         }
     887         707 : }
     888             : 
     889             : /*
     890             :  - nonnewline - emit REG_NEWLINE version of OANY
     891             :  == static void nonnewline(register struct parse *p);
     892             :  *
     893             :  * Boy, is this implementation ever a kludge...
     894             :  */
     895             : static void
     896           0 : nonnewline(p)
     897             : register struct parse *p;
     898             : {
     899           0 :         register unsigned char *oldnext = p->next;
     900           0 :         register unsigned char *oldend = p->end;
     901             :         unsigned char bracket[4];
     902             : 
     903           0 :         p->next = bracket;
     904           0 :         p->end = bracket+3;
     905           0 :         bracket[0] = '^';
     906           0 :         bracket[1] = '\n';
     907           0 :         bracket[2] = ']';
     908           0 :         bracket[3] = '\0';
     909           0 :         p_bracket(p);
     910             :         assert(p->next == bracket+3);
     911           0 :         p->next = oldnext;
     912           0 :         p->end = oldend;
     913           0 : }
     914             : 
     915             : /*
     916             :  - repeat - generate code for a bounded repetition, recursively if needed
     917             :  == static void repeat(register struct parse *p, sopno start, int from, int to);
     918             :  */
     919             : static void
     920         966 : repeat(p, start, from, to)
     921             : register struct parse *p;
     922             : sopno start;                    /* operand from here to end of strip */
     923             : int from;                       /* repeated from this number */
     924             : int to;                         /* to this number of times (maybe INFINITY) */
     925             : {
     926         966 :         register sopno finish = HERE();
     927             : #       define  N       2
     928             : #       define  INF     3
     929             : #       define  REP(f, t)       ((f)*8 + (t))
     930             : #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
     931             :         register sopno copy;
     932             : 
     933         966 :         if (p->error != 0)   /* head off possible runaway recursion */
     934          12 :                 return;
     935             : 
     936             :         assert(from <= to);
     937             : 
     938         954 :         switch (REP(MAP(from), MAP(to))) {
     939             :         case REP(0, 0):                 /* must be user doing this */
     940           6 :                 DROP(finish-start);     /* drop the operand */
     941           6 :                 break;
     942             :         case REP(0, 1):                 /* as x{1,1}? */
     943             :         case REP(0, N):                 /* as x{1,n}? */
     944             :         case REP(0, INF):               /* as x{1,}? */
     945             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     946          10 :                 INSERT(OCH_, start);            /* offset is wrong... */
     947          10 :                 repeat(p, start+1, 1, to);
     948          10 :                 ASTERN(OOR1, start);
     949          10 :                 AHEAD(start);                   /* ... fix it */
     950          10 :                 EMIT(OOR2, 0);
     951          10 :                 AHEAD(THERE());
     952          10 :                 ASTERN(O_CH, THERETHERE());
     953          10 :                 break;
     954             :         case REP(1, 1):                 /* trivial case */
     955             :                 /* done */
     956          88 :                 break;
     957             :         case REP(1, N):                 /* as x?x{1,n-1} */
     958             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     959         110 :                 INSERT(OCH_, start);
     960         110 :                 ASTERN(OOR1, start);
     961         110 :                 AHEAD(start);
     962         110 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     963         110 :                 AHEAD(THERE());                 /* ...so fix it */
     964         110 :                 ASTERN(O_CH, THERETHERE());
     965         110 :                 copy = dupl(p, start+1, finish+1);
     966             :                 assert(copy == finish+4);
     967         110 :                 repeat(p, copy, 1, to-1);
     968         110 :                 break;
     969             :         case REP(1, INF):               /* as x+ */
     970          10 :                 INSERT(OPLUS_, start);
     971          10 :                 ASTERN(O_PLUS, start);
     972          10 :                 break;
     973             :         case REP(N, N):                 /* as xx{m-1,n-1} */
     974         730 :                 copy = dupl(p, start, finish);
     975         730 :                 repeat(p, copy, from-1, to-1);
     976         730 :                 break;
     977             :         case REP(N, INF):               /* as xx{n-1,INF} */
     978           0 :                 copy = dupl(p, start, finish);
     979           0 :                 repeat(p, copy, from-1, to);
     980           0 :                 break;
     981             :         default:                        /* "can't happen" */
     982           0 :                 SETERROR(REG_ASSERT);   /* just in case */
     983             :                 break;
     984             :         }
     985             : }
     986             : 
     987             : /*
     988             :  - seterr - set an error condition
     989             :  == static int seterr(register struct parse *p, int e);
     990             :  */
     991             : static int                      /* useless but makes type checking happy */
     992         222 : seterr(p, e)
     993             : register struct parse *p;
     994             : int e;
     995             : {
     996         222 :         if (p->error == 0)   /* keep earliest error condition */
     997         142 :                 p->error = e;
     998         222 :         p->next = nuls;              /* try to bring things to a halt */
     999         222 :         p->end = nuls;
    1000         222 :         return(0);              /* make the return value well-defined */
    1001             : }
    1002             : 
    1003             : /*
    1004             :  - allocset - allocate a set of characters for []
    1005             :  == static cset *allocset(register struct parse *p);
    1006             :  */
    1007             : static cset *
    1008         306 : allocset(p)
    1009             : register struct parse *p;
    1010             : {
    1011         306 :         register int no = p->g->ncsets++;
    1012             :         register size_t nc;
    1013             :         register size_t nbytes;
    1014             :         register cset *cs;
    1015         306 :         register size_t css = (size_t)p->g->csetsize;
    1016             :         register int i;
    1017             : 
    1018         306 :         if (no >= p->ncsalloc) {  /* need another column of space */
    1019         172 :                 p->ncsalloc += CHAR_BIT;
    1020         172 :                 nc = p->ncsalloc;
    1021             :                 assert(nc % CHAR_BIT == 0);
    1022         172 :                 nbytes = nc / CHAR_BIT * css;
    1023         172 :                 if (p->g->sets == NULL)
    1024         172 :                         p->g->sets = (cset *)malloc(nc * sizeof(cset));
    1025             :                 else
    1026           0 :                         p->g->sets = (cset *)realloc((unsigned char *)p->g->sets,
    1027             :                                                         nc * sizeof(cset));
    1028         172 :                 if (p->g->setbits == NULL)
    1029         172 :                         p->g->setbits = (uch *)malloc(nbytes);
    1030             :                 else {
    1031           0 :                         p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits,
    1032             :                                                                 nbytes);
    1033             :                         /* xxx this isn't right if setbits is now NULL */
    1034           0 :                         for (i = 0; i < no; i++)
    1035           0 :                                 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
    1036             :                 }
    1037         344 :                 if (p->g->sets != NULL && p->g->setbits != NULL)
    1038         172 :                         (void) memset((unsigned char *)p->g->setbits + (nbytes - css),
    1039             :                                                                 0, css);
    1040             :                 else {
    1041           0 :                         no = 0;
    1042           0 :                         SETERROR(REG_ESPACE);
    1043             :                         /* caller's responsibility not to do set ops */
    1044             :                 }
    1045             :         }
    1046             : 
    1047             :         assert(p->g->sets != NULL);       /* xxx */
    1048         306 :         cs = &p->g->sets[no];
    1049         306 :         cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
    1050         306 :         cs->mask = 1 << ((no) % CHAR_BIT);
    1051         306 :         cs->hash = 0;
    1052         306 :         cs->smultis = 0;
    1053         306 :         cs->multis = NULL;
    1054             : 
    1055         306 :         return(cs);
    1056             : }
    1057             : 
    1058             : /*
    1059             :  - freeset - free a now-unused set
    1060             :  == static void freeset(register struct parse *p, register cset *cs);
    1061             :  */
    1062             : static void
    1063          42 : freeset(p, cs)
    1064             : register struct parse *p;
    1065             : register cset *cs;
    1066             : {
    1067             :         register size_t i;
    1068          42 :         register cset *top = &p->g->sets[p->g->ncsets];
    1069          42 :         register size_t css = (size_t)p->g->csetsize;
    1070             : 
    1071       10794 :         for (i = 0; i < css; i++)
    1072       10752 :                 CHsub(cs, i);
    1073          42 :         if (cs == top-1)        /* recover only the easy case */
    1074          42 :                 p->g->ncsets--;
    1075          42 : }
    1076             : 
    1077             : /*
    1078             :  - freezeset - final processing on a set of characters
    1079             :  == static int freezeset(register struct parse *p, register cset *cs);
    1080             :  *
    1081             :  * The main task here is merging identical sets.  This is usually a waste
    1082             :  * of time (although the hash code minimizes the overhead), but can win
    1083             :  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
    1084             :  * is done using addition rather than xor -- all ASCII [aA] sets xor to
    1085             :  * the same value!
    1086             :  */
    1087             : static int                      /* set number */
    1088         285 : freezeset(p, cs)
    1089             : register struct parse *p;
    1090             : register cset *cs;
    1091             : {
    1092         285 :         register uch h = cs->hash;
    1093             :         register size_t i;
    1094         285 :         register cset *top = &p->g->sets[p->g->ncsets];
    1095             :         register cset *cs2;
    1096         285 :         register size_t css = (size_t)p->g->csetsize;
    1097             : 
    1098             :         /* look for an earlier one which is the same */
    1099         757 :         for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
    1100         509 :                 if (cs2->hash == h && cs2 != cs) {
    1101             :                         /* maybe */
    1102        9509 :                         for (i = 0; i < css; i++)
    1103        9472 :                                 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
    1104           0 :                                         break;          /* no */
    1105          37 :                         if (i == css)
    1106          37 :                                 break;                  /* yes */
    1107             :                 }
    1108             : 
    1109         285 :         if (cs2 < top) {     /* found one */
    1110          37 :                 freeset(p, cs);
    1111          37 :                 cs = cs2;
    1112             :         }
    1113             : 
    1114         285 :         return((int)(cs - p->g->sets));
    1115             : }
    1116             : 
    1117             : /*
    1118             :  - firstch - return first character in a set (which must have at least one)
    1119             :  == static int firstch(register struct parse *p, register cset *cs);
    1120             :  */
    1121             : static int                      /* character; there is no "none" value */
    1122           5 : firstch(p, cs)
    1123             : register struct parse *p;
    1124             : register cset *cs;
    1125             : {
    1126             :         register size_t i;
    1127           5 :         register size_t css = (size_t)p->g->csetsize;
    1128             : 
    1129         605 :         for (i = 0; i < css; i++)
    1130         605 :                 if (CHIN(cs, i))
    1131           5 :                         return((unsigned char)i);
    1132             :         assert(never);
    1133           0 :         return(0);              /* arbitrary */
    1134             : }
    1135             : 
    1136             : /*
    1137             :  - nch - number of characters in a set
    1138             :  == static int nch(register struct parse *p, register cset *cs);
    1139             :  */
    1140             : static int
    1141         290 : nch(p, cs)
    1142             : register struct parse *p;
    1143             : register cset *cs;
    1144             : {
    1145             :         register size_t i;
    1146         290 :         register size_t css = (size_t)p->g->csetsize;
    1147         290 :         register int n = 0;
    1148             : 
    1149       74530 :         for (i = 0; i < css; i++)
    1150       74240 :                 if (CHIN(cs, i))
    1151        8252 :                         n++;
    1152         290 :         return(n);
    1153             : }
    1154             : 
    1155             : /*
    1156             :  - mcadd - add a collating element to a cset
    1157             :  == static void mcadd(register struct parse *p, register cset *cs, \
    1158             :  ==     register char *cp);
    1159             :  */
    1160             : static void
    1161           0 : mcadd(p, cs, cp)
    1162             : register struct parse *p;
    1163             : register cset *cs;
    1164             : register const unsigned char *cp;
    1165             : {
    1166           0 :         register size_t oldend = cs->smultis;
    1167             : 
    1168           0 :         cs->smultis += strlen(cp) + 1;
    1169           0 :         if (cs->multis == NULL)
    1170           0 :                 cs->multis = malloc(cs->smultis);
    1171             :         else
    1172           0 :                 cs->multis = realloc(cs->multis, cs->smultis);
    1173           0 :         if (cs->multis == NULL) {
    1174           0 :                 SETERROR(REG_ESPACE);
    1175           0 :                 return;
    1176             :         }
    1177             : 
    1178           0 :         (void) strcpy(cs->multis + oldend - 1, cp);
    1179           0 :         cs->multis[cs->smultis - 1] = '\0';
    1180             : }
    1181             : 
    1182             : #if 0
    1183             : /*
    1184             :  - mcsub - subtract a collating element from a cset
    1185             :  == static void mcsub(register cset *cs, register unsigned char *cp);
    1186             :  */
    1187             : static void
    1188             : mcsub(cs, cp)
    1189             : register unsigned cset *cs;
    1190             : register unsigned char *cp;
    1191             : {
    1192             :         register unsigned char *fp = mcfind(cs, cp);
    1193             :         register size_t len = strlen(fp);
    1194             : 
    1195             :         assert(fp != NULL);
    1196             :         (void) memmove(fp, fp + len + 1,
    1197             :                                 cs->smultis - (fp + len + 1 - cs->multis));
    1198             :         cs->smultis -= len;
    1199             : 
    1200             :         if (cs->smultis == 0) {
    1201             :                 free(cs->multis);
    1202             :                 cs->multis = NULL;
    1203             :                 return;
    1204             :         }
    1205             : 
    1206             :         cs->multis = realloc(cs->multis, cs->smultis);
    1207             :         assert(cs->multis != NULL);
    1208             : }
    1209             : 
    1210             : /*
    1211             :  - mcin - is a collating element in a cset?
    1212             :  == static int mcin(register cset *cs, register unsigned char *cp);
    1213             :  */
    1214             : static int
    1215             : mcin(cs, cp)
    1216             : register cset *cs;
    1217             : register unsigned char *cp;
    1218             : {
    1219             :         return(mcfind(cs, cp) != NULL);
    1220             : }
    1221             : 
    1222             : 
    1223             : /*
    1224             :  - mcfind - find a collating element in a cset
    1225             :  == static unsigned char *mcfind(register cset *cs, register unsigned char *cp);
    1226             :  */
    1227             : static unsigned char *
    1228             : mcfind(cs, cp)
    1229             : register cset *cs;
    1230             : register unsigned char *cp;
    1231             : {
    1232             :         register unsigned char *p;
    1233             : 
    1234             :         if (cs->multis == NULL)
    1235             :                 return(NULL);
    1236             :         for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
    1237             :                 if (strcmp(cp, p) == 0)
    1238             :                         return(p);
    1239             :         return(NULL);
    1240             : }
    1241             : #endif
    1242             : 
    1243             : /*
    1244             :  - mcinvert - invert the list of collating elements in a cset
    1245             :  == static void mcinvert(register struct parse *p, register cset *cs);
    1246             :  *
    1247             :  * This would have to know the set of possibilities.  Implementation
    1248             :  * is deferred.
    1249             :  */
    1250             : static void
    1251           0 : mcinvert(p, cs)
    1252             : register struct parse *p;
    1253             : register cset *cs;
    1254             : {
    1255             :         assert(cs->multis == NULL);  /* xxx */
    1256           0 : }
    1257             : 
    1258             : /*
    1259             :  - mccase - add case counterparts of the list of collating elements in a cset
    1260             :  == static void mccase(register struct parse *p, register cset *cs);
    1261             :  *
    1262             :  * This would have to know the set of possibilities.  Implementation
    1263             :  * is deferred.
    1264             :  */
    1265             : static void
    1266           0 : mccase(p, cs)
    1267             : register struct parse *p;
    1268             : register cset *cs;
    1269             : {
    1270             :         assert(cs->multis == NULL);  /* xxx */
    1271           0 : }
    1272             : 
    1273             : /*
    1274             :  - isinsets - is this character in any sets?
    1275             :  == static int isinsets(register struct re_guts *g, int c);
    1276             :  */
    1277             : static int                      /* predicate */
    1278       58498 : isinsets(g, c)
    1279             : register struct re_guts *g;
    1280             : int c;
    1281             : {
    1282             :         register uch *col;
    1283             :         register int i;
    1284       58498 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1285       58498 :         register unsigned uc = (unsigned char)c;
    1286             : 
    1287       58498 :         if (!g->setbits) {
    1288       31073 :                 return(0);
    1289             :         }
    1290             : 
    1291       54630 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1292       27425 :                 if (col[uc] != 0)
    1293         220 :                         return(1);
    1294       27205 :         return(0);
    1295             : }
    1296             : 
    1297             : /*
    1298             :  - samesets - are these two characters in exactly the same sets?
    1299             :  == static int samesets(register struct re_guts *g, int c1, int c2);
    1300             :  */
    1301             : static int                      /* predicate */
    1302       41796 : samesets(g, c1, c2)
    1303             : register struct re_guts *g;
    1304             : int c1;
    1305             : int c2;
    1306             : {
    1307             :         register uch *col;
    1308             :         register int i;
    1309       41796 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1310       41796 :         register unsigned uc1 = (unsigned char)c1;
    1311       41796 :         register unsigned uc2 = (unsigned char)c2;
    1312             : 
    1313       49677 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1314       41796 :                 if (col[uc1] != col[uc2])
    1315       33915 :                         return(0);
    1316        7881 :         return(1);
    1317             : }
    1318             : 
    1319             : /*
    1320             :  - categorize - sort out character categories
    1321             :  == static void categorize(struct parse *p, register struct re_guts *g);
    1322             :  */
    1323             : static void
    1324         403 : categorize(p, g)
    1325             : struct parse *p;
    1326             : register struct re_guts *g;
    1327             : {
    1328         403 :         register cat_t *cats = g->categories;
    1329             :         register int c;
    1330             :         register int c2;
    1331             :         register cat_t cat;
    1332             : 
    1333             :         /* avoid making error situations worse */
    1334         403 :         if (p->error != 0)
    1335         142 :                 return;
    1336             : 
    1337       67077 :         for (c = 0; c <= UCHAR_MAX; c++)
    1338       66816 :                 if (cats[c] == 0 && isinsets(g, c)) {
    1339         220 :                         cat = g->ncategories++;
    1340         220 :                         cats[c] = cat;
    1341       45187 :                         for (c2 = c+1; c2 <= UCHAR_MAX; c2++)
    1342       44967 :                                 if (cats[c2] == 0 && samesets(g, c, c2))
    1343        7881 :                                         cats[c2] = cat;
    1344             :                 }
    1345             : }
    1346             : 
    1347             : /*
    1348             :  - dupl - emit a duplicate of a bunch of sops
    1349             :  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
    1350             :  */
    1351             : static sopno                    /* start of duplicate */
    1352         840 : dupl(p, start, finish)
    1353             : register struct parse *p;
    1354             : sopno start;                    /* from here */
    1355             : sopno finish;                   /* to this less one */
    1356             : {
    1357         840 :         register sopno ret = HERE();
    1358         840 :         register sopno len = finish - start;
    1359             : 
    1360             :         assert(finish >= start);
    1361         840 :         if (len == 0)
    1362           0 :                 return(ret);
    1363         840 :         enlarge(p, p->ssize + len);  /* this many unexpected additions */
    1364             :         assert(p->ssize >= p->slen + len);
    1365        1680 :         (void) memcpy((char *)(p->strip + p->slen),
    1366         840 :                 (char *)(p->strip + start), (size_t)len*sizeof(sop));
    1367         840 :         p->slen += len;
    1368         840 :         return(ret);
    1369             : }
    1370             : 
    1371             : /*
    1372             :  - doemit - emit a strip operator
    1373             :  == static void doemit(register struct parse *p, sop op, size_t opnd);
    1374             :  *
    1375             :  * It might seem better to implement this as a macro with a function as
    1376             :  * hard-case backup, but it's just too big and messy unless there are
    1377             :  * some changes to the data structures.  Maybe later.
    1378             :  */
    1379             : static void
    1380       27508 : doemit(p, op, opnd)
    1381             : register struct parse *p;
    1382             : sop op;
    1383             : size_t opnd;
    1384             : {
    1385             :         /* avoid making error situations worse */
    1386       27508 :         if (p->error != 0)
    1387         162 :                 return;
    1388             : 
    1389             :         /* deal with oversize operands ("can't happen", more or less) */
    1390             :         assert(opnd < 1<<OPSHIFT);
    1391             : 
    1392             :         /* deal with undersized strip */
    1393       27346 :         if (p->slen >= p->ssize)
    1394          80 :                 enlarge(p, (p->ssize+1) / 2 * 3);    /* +50% */
    1395             :         assert(p->slen < p->ssize);
    1396             : 
    1397             :         /* finally, it's all reduced to the easy case */
    1398       27346 :         p->strip[p->slen++] = SOP(op, opnd);
    1399             : }
    1400             : 
    1401             : /*
    1402             :  - doinsert - insert a sop into the strip
    1403             :  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
    1404             :  */
    1405             : static void
    1406         246 : doinsert(p, op, opnd, pos)
    1407             : register struct parse *p;
    1408             : sop op;
    1409             : size_t opnd;
    1410             : sopno pos;
    1411             : {
    1412             :         register sopno sn;
    1413             :         register sop s;
    1414             :         register int i;
    1415             : 
    1416             :         /* avoid making error situations worse */
    1417         246 :         if (p->error != 0)
    1418           0 :                 return;
    1419             : 
    1420         246 :         sn = HERE();
    1421         246 :         EMIT(op, opnd);         /* do checks, ensure space */
    1422             :         assert(HERE() == sn+1);
    1423         246 :         s = p->strip[sn];
    1424             : 
    1425             :         /* adjust paren pointers */
    1426             :         assert(pos > 0);
    1427        2460 :         for (i = 1; i < NPAREN; i++) {
    1428        2214 :                 if (p->pbegin[i] >= pos) {
    1429           0 :                         p->pbegin[i]++;
    1430             :                 }
    1431        2214 :                 if (p->pend[i] >= pos) {
    1432           0 :                         p->pend[i]++;
    1433             :                 }
    1434             :         }
    1435             : 
    1436         246 :         memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
    1437         246 :                                                 (HERE()-pos-1)*sizeof(sop));
    1438         246 :         p->strip[pos] = s;
    1439             : }
    1440             : 
    1441             : /*
    1442             :  - dofwd - complete a forward reference
    1443             :  == static void dofwd(register struct parse *p, sopno pos, sop value);
    1444             :  */
    1445             : static void
    1446         380 : dofwd(p, pos, value)
    1447             : register struct parse *p;
    1448             : register sopno pos;
    1449             : sop value;
    1450             : {
    1451             :         /* avoid making error situations worse */
    1452         380 :         if (p->error != 0)
    1453           6 :                 return;
    1454             : 
    1455             :         assert(value < 1<<OPSHIFT);
    1456         374 :         p->strip[pos] = OP(p->strip[pos]) | value;
    1457             : }
    1458             : 
    1459             : /*
    1460             :  - enlarge - enlarge the strip
    1461             :  == static void enlarge(register struct parse *p, sopno size);
    1462             :  */
    1463             : static void
    1464         920 : enlarge(p, size)
    1465             : register struct parse *p;
    1466             : register sopno size;
    1467             : {
    1468             :         register sop *sp;
    1469             : 
    1470         920 :         if (p->ssize >= size)
    1471           0 :                 return;
    1472             : 
    1473         920 :         sp = (sop *)realloc(p->strip, size*sizeof(sop));
    1474         920 :         if (sp == NULL) {
    1475           0 :                 SETERROR(REG_ESPACE);
    1476           0 :                 return;
    1477             :         }
    1478         920 :         p->strip = sp;
    1479         920 :         p->ssize = size;
    1480             : }
    1481             : 
    1482             : /*
    1483             :  - stripsnug - compact the strip
    1484             :  == static void stripsnug(register struct parse *p, register struct re_guts *g);
    1485             :  */
    1486             : static void
    1487         403 : stripsnug(p, g)
    1488             : register struct parse *p;
    1489             : register struct re_guts *g;
    1490             : {
    1491         403 :         g->nstates = p->slen;
    1492         403 :         g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop));
    1493         403 :         if (g->strip == NULL) {
    1494           0 :                 SETERROR(REG_ESPACE);
    1495           0 :                 g->strip = p->strip;
    1496             :         }
    1497         403 : }
    1498             : 
    1499             : /*
    1500             :  - findmust - fill in must and mlen with longest mandatory literal string
    1501             :  == static void findmust(register struct parse *p, register struct re_guts *g);
    1502             :  *
    1503             :  * This algorithm could do fancy things like analyzing the operands of |
    1504             :  * for common subsequences.  Someday.  This code is simple and finds most
    1505             :  * of the interesting cases.
    1506             :  *
    1507             :  * Note that must and mlen got initialized during setup.
    1508             :  */
    1509             : static void
    1510         403 : findmust(p, g)
    1511             : struct parse *p;
    1512             : register struct re_guts *g;
    1513             : {
    1514             :         register sop *scan;
    1515         403 :         sop *start = NULL;
    1516         403 :         register sop *newstart = NULL;
    1517             :         register sopno newlen;
    1518             :         register sop s;
    1519             :         register unsigned char *cp;
    1520             :         register sopno i;
    1521             : 
    1522             :         /* avoid making error situations worse */
    1523         403 :         if (p->error != 0)
    1524         142 :                 return;
    1525             : 
    1526             :         /* find the longest OCHAR sequence in strip */
    1527         261 :         newlen = 0;
    1528         261 :         scan = g->strip + 1;
    1529             :         do {
    1530       27000 :                 s = *scan++;
    1531       27000 :                 switch (OP(s)) {
    1532             :                 case OCHAR:             /* sequence member */
    1533         466 :                         if (newlen == 0)                /* new sequence */
    1534         146 :                                 newstart = scan - 1;
    1535         466 :                         newlen++;
    1536         466 :                         break;
    1537             :                 case OPLUS_:            /* things that don't break one */
    1538             :                 case OLPAREN:
    1539             :                 case ORPAREN:
    1540       16669 :                         break;
    1541             :                 case OQUEST_:           /* things that must be skipped */
    1542             :                 case OCH_:
    1543         179 :                         scan--;
    1544             :                         do {
    1545         364 :                                 scan += OPND(s);
    1546         364 :                                 s = *scan;
    1547             :                                 /* assert() interferes w debug printouts */
    1548         549 :                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
    1549         185 :                                                         OP(s) != OOR2) {
    1550           0 :                                         g->iflags |= BAD;
    1551           0 :                                         return;
    1552             :                                 }
    1553         364 :                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
    1554             :                         /* fallthrough */
    1555             :                 default:                /* things that break a sequence */
    1556        9865 :                         if (newlen > g->mlen) {           /* ends one */
    1557         115 :                                 start = newstart;
    1558         115 :                                 g->mlen = newlen;
    1559             :                         }
    1560        9865 :                         newlen = 0;
    1561             :                         break;
    1562             :                 }
    1563       27000 :         } while (OP(s) != OEND);
    1564             : 
    1565         261 :         if (g->mlen == 0)            /* there isn't one */
    1566         154 :                 return;
    1567             : 
    1568         107 :         if (!start) {
    1569           0 :                 g->mlen = 0;
    1570           0 :                 return;
    1571             :         }
    1572             : 
    1573             :         /* turn it into a character string */
    1574         107 :         g->must = malloc((size_t)g->mlen + 1);
    1575         107 :         if (g->must == NULL) {               /* argh; just forget it */
    1576           0 :                 g->mlen = 0;
    1577           0 :                 return;
    1578             :         }
    1579         107 :         cp = g->must;
    1580         107 :         scan = start;
    1581         527 :         for (i = g->mlen; i > 0; i--) {
    1582         874 :                 while (OP(s = *scan++) != OCHAR)
    1583          34 :                         continue;
    1584             :                 assert(cp < g->must + g->mlen);
    1585         420 :                 *cp++ = (unsigned char)OPND(s);
    1586             :         }
    1587             :         assert(cp == g->must + g->mlen);
    1588         107 :         *cp++ = '\0';           /* just on general principles */
    1589             : }
    1590             : 
    1591             : /*
    1592             :  - pluscount - count + nesting
    1593             :  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
    1594             :  */
    1595             : static sopno                    /* nesting depth */
    1596         403 : pluscount(p, g)
    1597             : struct parse *p;
    1598             : register struct re_guts *g;
    1599             : {
    1600             :         register sop *scan;
    1601             :         register sop s;
    1602         403 :         register sopno plusnest = 0;
    1603         403 :         register sopno maxnest = 0;
    1604             : 
    1605         403 :         if (p->error != 0)
    1606         142 :                 return(0);      /* there may not be an OEND */
    1607             : 
    1608         261 :         scan = g->strip + 1;
    1609             :         do {
    1610       27715 :                 s = *scan++;
    1611       27715 :                 switch (OP(s)) {
    1612             :                 case OPLUS_:
    1613          51 :                         plusnest++;
    1614          51 :                         break;
    1615             :                 case O_PLUS:
    1616          51 :                         if (plusnest > maxnest)
    1617          34 :                                 maxnest = plusnest;
    1618          51 :                         plusnest--;
    1619             :                         break;
    1620             :                 }
    1621       27715 :         } while (OP(s) != OEND);
    1622         261 :         if (plusnest != 0)
    1623           0 :                 g->iflags |= BAD;
    1624         261 :         return(maxnest);
    1625             : }

Generated by: LCOV version 1.10

Generated at Sat, 23 May 2015 19:26:28 +0000 (6 days ago)

Copyright © 2005-2015 The PHP Group
All rights reserved.