PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/ereg/regex - regcomp.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 493 682 72.3 %
Date: 2014-10-22 Functions: 31 38 81.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #include <sys/types.h>
       2             : #include <stdio.h>
       3             : #include <string.h>
       4             : #include <ctype.h>
       5             : #include <limits.h>
       6             : #include <stdlib.h>
       7             : 
       8             : #define POSIX_MISTAKE
       9             : 
      10             : #include "utils.h"
      11             : #include "regex.h"
      12             : #include "regex2.h"
      13             : 
      14             : #include "cclass.h"
      15             : #include "cname.h"
      16             : 
      17             : /*
      18             :  * parse structure, passed up and down to avoid global variables and
      19             :  * other clumsinesses
      20             :  */
      21             : struct parse {
      22             :         unsigned char *next;            /* next character in RE */
      23             :         unsigned char *end;             /* end of string (-> NUL normally) */
      24             :         int error;              /* has an error been seen? */
      25             :         sop *strip;             /* malloced strip */
      26             :         sopno ssize;            /* malloced strip size (allocated) */
      27             :         sopno slen;             /* malloced strip length (used) */
      28             :         int ncsalloc;           /* number of csets allocated */
      29             :         struct re_guts *g;
      30             : #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
      31             :         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
      32             :         sopno pend[NPAREN];     /* -> ) ([0] unused) */
      33             : };
      34             : 
      35             : #include "regcomp.ih"
      36             : 
      37             : static unsigned char nuls[10];          /* place to point scanner in event of error */
      38             : 
      39             : /*
      40             :  * macros for use with parse structure
      41             :  * BEWARE:  these know that the parse structure is named `p' !!!
      42             :  */
      43             : #define PEEK()  (*p->next)
      44             : #define PEEK2() (*(p->next+1))
      45             : #define MORE()  (p->next < p->end)
      46             : #define MORE2() (p->next+1 < p->end)
      47             : #define SEE(c)  (MORE() && PEEK() == (c))
      48             : #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
      49             : #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
      50             : #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
      51             : #define NEXT()  (p->next++)
      52             : #define NEXT2() (p->next += 2)
      53             : #define NEXTn(n)        (p->next += (n))
      54             : #define GETNEXT()       (*p->next++)
      55             : #define SETERROR(e)     seterr(p, (e))
      56             : #define REQUIRE(co, e)  (void) ((co) || SETERROR(e))
      57             : #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
      58             : #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
      59             : #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
      60             : #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
      61             : #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
      62             : #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
      63             : #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
      64             : #define HERE()          (p->slen)
      65             : #define THERE()         (p->slen - 1)
      66             : #define THERETHERE()    (p->slen - 2)
      67             : #define DROP(n) (p->slen -= (n))
      68             : 
      69             : #ifndef NDEBUG
      70             : static int never = 0;           /* for use in asserts; shuts lint up */
      71             : #else
      72             : #define never   0               /* some <assert.h>s have bugs too */
      73             : #endif
      74             : 
      75             : /*
      76             :  - regcomp - interface for parser and compilation
      77             :  = API_EXPORT(int) regcomp(regex_t *, const char *, int);
      78             :  = #define      REG_BASIC       0000
      79             :  = #define      REG_EXTENDED    0001
      80             :  = #define      REG_ICASE       0002
      81             :  = #define      REG_NOSUB       0004
      82             :  = #define      REG_NEWLINE     0010
      83             :  = #define      REG_NOSPEC      0020
      84             :  = #define      REG_PEND        0040
      85             :  = #define      REG_DUMP        0200
      86             :  */
      87             : API_EXPORT(int)                 /* 0 success, otherwise REG_something */
      88         393 : regcomp(preg, pattern, cflags)
      89             : regex_t *preg;
      90             : const char *pattern;
      91             : int cflags;
      92             : {
      93             :         struct parse pa;
      94             :         register struct re_guts *g;
      95         393 :         register struct parse *p = &pa;
      96             :         register int i;
      97             :         register size_t len;
      98             : #ifdef REDEBUG
      99             : #       define  GOODFLAGS(f)    (f)
     100             : #else
     101             : #       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
     102             : #endif
     103             : 
     104         393 :         cflags = GOODFLAGS(cflags);
     105         393 :         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
     106           0 :                 return(REG_INVARG);
     107             : 
     108         393 :         if (cflags&REG_PEND) {
     109           0 :                 if (preg->re_endp < pattern)
     110           0 :                         return(REG_INVARG);
     111           0 :                 len = preg->re_endp - pattern;
     112             :         } else
     113         393 :                 len = strlen((char *)pattern);
     114             : 
     115             :         /* do the mallocs early so failure handling is easy */
     116         393 :         g = (struct re_guts *)malloc(sizeof(struct re_guts) +
     117             :                                                         (NC-1)*sizeof(cat_t));
     118         393 :         if (g == NULL)
     119           0 :                 return(REG_ESPACE);
     120         393 :         p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;      /* ugh */
     121         393 :         p->strip = (sop *)malloc(p->ssize * sizeof(sop));
     122         393 :         p->slen = 0;
     123         393 :         if (p->strip == NULL) {
     124           0 :                 free((char *)g);
     125           0 :                 return(REG_ESPACE);
     126             :         }
     127             : 
     128             :         /* set things up */
     129         393 :         p->g = g;
     130         393 :         p->next = (unsigned char *)pattern;  /* convenience; we do not modify it */
     131         393 :         p->end = p->next + len;
     132         393 :         p->error = 0;
     133         393 :         p->ncsalloc = 0;
     134        4323 :         for (i = 0; i < NPAREN; i++) {
     135        3930 :                 p->pbegin[i] = 0;
     136        3930 :                 p->pend[i] = 0;
     137             :         }
     138         393 :         g->csetsize = NC;
     139         393 :         g->sets = NULL;
     140         393 :         g->setbits = NULL;
     141         393 :         g->ncsets = 0;
     142         393 :         g->cflags = cflags;
     143         393 :         g->iflags = 0;
     144         393 :         g->nbol = 0;
     145         393 :         g->neol = 0;
     146         393 :         g->must = NULL;
     147         393 :         g->mlen = 0;
     148         393 :         g->nsub = 0;
     149         393 :         g->ncategories = 1;  /* category 0 is "everything else" */
     150         393 :         g->categories = &g->catspace[0];
     151         393 :         (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
     152         393 :         g->backrefs = 0;
     153             : 
     154             :         /* do it */
     155         393 :         EMIT(OEND, 0);
     156         393 :         g->firststate = THERE();
     157         393 :         if (cflags&REG_EXTENDED)
     158         363 :                 p_ere(p, OUT);
     159          30 :         else if (cflags&REG_NOSPEC)
     160           0 :                 p_str(p);
     161             :         else
     162          30 :                 p_bre(p, OUT, OUT);
     163         393 :         EMIT(OEND, 0);
     164         393 :         g->laststate = THERE();
     165             : 
     166             :         /* tidy up loose ends and fill things in */
     167         393 :         categorize(p, g);
     168         393 :         stripsnug(p, g);
     169         393 :         findmust(p, g);
     170         393 :         g->nplus = pluscount(p, g);
     171         393 :         g->magic = MAGIC2;
     172         393 :         preg->re_nsub = g->nsub;
     173         393 :         preg->re_g = g;
     174         393 :         preg->re_magic = MAGIC1;
     175             : #ifndef REDEBUG
     176             :         /* not debugging, so can't rely on the assert() in regexec() */
     177         393 :         if (g->iflags&BAD)
     178           0 :                 SETERROR(REG_ASSERT);
     179             : #endif
     180             : 
     181             :         /* win or lose, we're done */
     182         393 :         if (p->error != 0)   /* lose */
     183         142 :                 regfree(preg);
     184         393 :         return(p->error);
     185             : }
     186             : 
     187             : /*
     188             :  - p_ere - ERE parser top level, concatenation and alternation
     189             :  == static void p_ere(register struct parse *p, int stop);
     190             :  */
     191             : static void
     192        8662 : p_ere(p, stop)
     193             : register struct parse *p;
     194             : int stop;                       /* character this ERE should end at */
     195             : {
     196             :         register unsigned char c;
     197        8662 :         register sopno prevback = 0;
     198        8662 :         register sopno prevfwd = 0;
     199             :         register sopno conc;
     200        8662 :         register int first = 1;         /* is this the first alternative? */
     201             : 
     202             :         for (;;) {
     203             :                 /* do a bunch of concatenated expressions */
     204        8732 :                 conc = HERE();
     205       34861 :                 while (MORE() && (c = PEEK()) != '|' && c != stop)
     206       17397 :                         p_ere_exp(p);
     207        8732 :                 (void) REQUIRE(HERE() != conc, REG_EMPTY);      /* require nonempty */
     208             : 
     209        8732 :                 if (!EAT('|'))
     210        8662 :                         break;          /* NOTE BREAK OUT */
     211             : 
     212          70 :                 if (first) {
     213          48 :                         INSERT(OCH_, conc);     /* offset is wrong */
     214          48 :                         prevfwd = conc;
     215          48 :                         prevback = conc;
     216          48 :                         first = 0;
     217             :                 }
     218          70 :                 ASTERN(OOR1, prevback);
     219          70 :                 prevback = THERE();
     220          70 :                 AHEAD(prevfwd);                 /* fix previous offset */
     221          70 :                 prevfwd = HERE();
     222          70 :                 EMIT(OOR2, 0);                  /* offset is very wrong */
     223          70 :         }
     224             : 
     225        8662 :         if (!first) {           /* tail-end fixups */
     226          48 :                 AHEAD(prevfwd);
     227          48 :                 ASTERN(O_CH, prevback);
     228             :         }
     229             : 
     230             :         assert(!MORE() || SEE(stop));
     231        8662 : }
     232             : 
     233             : /*
     234             :  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
     235             :  == static void p_ere_exp(register struct parse *p);
     236             :  */
     237             : static void
     238       17397 : p_ere_exp(p)
     239             : register struct parse *p;
     240             : {
     241             :         register unsigned char c;
     242             :         register sopno pos;
     243             :         register int count;
     244             :         register int count2;
     245             :         register sopno subno;
     246       17397 :         int wascaret = 0;
     247             : 
     248             :         assert(MORE());         /* caller should have ensured this */
     249       17397 :         c = GETNEXT();
     250             : 
     251       17397 :         pos = HERE();
     252       17397 :         switch (c) {
     253             :         case '(':
     254        8309 :                 REQUIRE(MORE(), REG_EPAREN);
     255        8309 :                 p->g->nsub++;
     256        8309 :                 subno = p->g->nsub;
     257        8309 :                 if (subno < NPAREN)
     258         153 :                         p->pbegin[subno] = HERE();
     259        8309 :                 EMIT(OLPAREN, subno);
     260        8309 :                 if (!SEE(')'))
     261        8299 :                         p_ere(p, ')');
     262        8309 :                 if (subno < NPAREN) {
     263         153 :                         p->pend[subno] = HERE();
     264             :                         assert(p->pend[subno] != 0);
     265             :                 }
     266        8309 :                 EMIT(ORPAREN, subno);
     267        8309 :                 MUSTEAT(')', REG_EPAREN);
     268        8309 :                 break;
     269             : #ifndef POSIX_MISTAKE
     270             :         case ')':               /* happens only if no current unmatched ( */
     271             :                 /*
     272             :                  * You may ask, why the ifndef?  Because I didn't notice
     273             :                  * this until slightly too late for 1003.2, and none of the
     274             :                  * other 1003.2 regular-expression reviewers noticed it at
     275             :                  * all.  So an unmatched ) is legal POSIX, at least until
     276             :                  * we can get it fixed.
     277             :                  */
     278             :                 SETERROR(REG_EPAREN);
     279             :                 break;
     280             : #endif
     281             :         case '^':
     282          29 :                 EMIT(OBOL, 0);
     283          29 :                 p->g->iflags |= USEBOL;
     284          29 :                 p->g->nbol++;
     285          29 :                 wascaret = 1;
     286          29 :                 break;
     287             :         case '$':
     288          29 :                 EMIT(OEOL, 0);
     289          29 :                 p->g->iflags |= USEEOL;
     290          29 :                 p->g->neol++;
     291          29 :                 break;
     292             :         case '|':
     293           0 :                 SETERROR(REG_EMPTY);
     294           0 :                 break;
     295             :         case '*':
     296             :         case '+':
     297             :         case '?':
     298          22 :                 SETERROR(REG_BADRPT);
     299          22 :                 break;
     300             :         case '.':
     301        8252 :                 if (p->g->cflags&REG_NEWLINE)
     302           0 :                         nonnewline(p);
     303             :                 else
     304        8252 :                         EMIT(OANY, 0);
     305        8252 :                 break;
     306             :         case '[':
     307         134 :                 p_bracket(p);
     308         134 :                 break;
     309             :         case '\\':
     310         158 :                 REQUIRE(MORE(), REG_EESCAPE);
     311         158 :                 c = GETNEXT();
     312         158 :                 ordinary(p, c);
     313         158 :                 break;
     314             :         case '{':               /* okay as ordinary except if digit follows */
     315           0 :                 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
     316             :                 /* FALLTHROUGH */
     317             :         default:
     318         464 :                 ordinary(p, c);
     319             :                 break;
     320             :         }
     321             : 
     322       17397 :         if (!MORE())
     323         266 :                 return;
     324       17131 :         c = PEEK();
     325             :         /* we call { a repetition if followed by a digit */
     326       17343 :         if (!( c == '*' || c == '+' || c == '?' ||
     327         212 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ))
     328       16983 :                 return;         /* no repetition, we're done */
     329         148 :         NEXT();
     330             : 
     331         148 :         REQUIRE(!wascaret, REG_BADRPT);
     332         148 :         switch (c) {
     333             :         case '*':       /* implemented as +? */
     334             :                 /* this case does not require the (y|) trick, noKLUDGE */
     335          16 :                 INSERT(OPLUS_, pos);
     336          16 :                 ASTERN(O_PLUS, pos);
     337          16 :                 INSERT(OQUEST_, pos);
     338          16 :                 ASTERN(O_QUEST, pos);
     339          16 :                 break;
     340             :         case '+':
     341          15 :                 INSERT(OPLUS_, pos);
     342          15 :                 ASTERN(O_PLUS, pos);
     343          15 :                 break;
     344             :         case '?':
     345             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     346          11 :                 INSERT(OCH_, pos);              /* offset slightly wrong */
     347          11 :                 ASTERN(OOR1, pos);              /* this one's right */
     348          11 :                 AHEAD(pos);                     /* fix the OCH_ */
     349          11 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     350          11 :                 AHEAD(THERE());                 /* ...so fix it */
     351          11 :                 ASTERN(O_CH, THERETHERE());
     352          11 :                 break;
     353             :         case '{':
     354         106 :                 count = p_count(p);
     355         106 :                 if (EAT(',')) {
     356          46 :                         if (isdigit(PEEK())) {
     357          36 :                                 count2 = p_count(p);
     358          36 :                                 REQUIRE(count <= count2, REG_BADBR);
     359             :                         } else          /* single number with comma */
     360          10 :                                 count2 = INFINITY;
     361             :                 } else          /* just a single number */
     362          60 :                         count2 = count;
     363         106 :                 repeat(p, pos, count, count2);
     364         106 :                 if (!EAT('}')) {        /* error heuristics */
     365          24 :                         while (MORE() && PEEK() != '}')
     366           0 :                                 NEXT();
     367          12 :                         REQUIRE(MORE(), REG_EBRACE);
     368          12 :                         SETERROR(REG_BADBR);
     369             :                 }
     370             :                 break;
     371             :         }
     372             : 
     373         148 :         if (!MORE())
     374          61 :                 return;
     375          87 :         c = PEEK();
     376          87 :         if (!( c == '*' || c == '+' || c == '?' ||
     377           0 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
     378          87 :                 return;
     379           0 :         SETERROR(REG_BADRPT);
     380             : }
     381             : 
     382             : /*
     383             :  - p_str - string (no metacharacters) "parser"
     384             :  == static void p_str(register struct parse *p);
     385             :  */
     386             : static void
     387           0 : p_str(p)
     388             : register struct parse *p;
     389             : {
     390           0 :         REQUIRE(MORE(), REG_EMPTY);
     391           0 :         while (MORE())
     392           0 :                 ordinary(p, GETNEXT());
     393           0 : }
     394             : 
     395             : /*
     396             :  - p_bre - BRE parser top level, anchoring and concatenation
     397             :  == static void p_bre(register struct parse *p, register int end1, \
     398             :  ==     register int end2);
     399             :  * Giving end1 as OUT essentially eliminates the end1/end2 check.
     400             :  *
     401             :  * This implementation is a bit of a kludge, in that a trailing $ is first
     402             :  * taken as an ordinary character and then revised to be an anchor.  The
     403             :  * only undesirable side effect is that '$' gets included as a character
     404             :  * category in such cases.  This is fairly harmless; not worth fixing.
     405             :  * The amount of lookahead needed to avoid this kludge is excessive.
     406             :  */
     407             : static void
     408          30 : p_bre(p, end1, end2)
     409             : register struct parse *p;
     410             : register int end1;              /* first terminating character */
     411             : register int end2;              /* second terminating character */
     412             : {
     413          30 :         register sopno start = HERE();
     414          30 :         register int first = 1;                 /* first subexpression? */
     415          30 :         register int wasdollar = 0;
     416             : 
     417          30 :         if (EAT('^')) {
     418           0 :                 EMIT(OBOL, 0);
     419           0 :                 p->g->iflags |= USEBOL;
     420           0 :                 p->g->nbol++;
     421             :         }
     422         140 :         while (MORE() && !SEETWO(end1, end2)) {
     423          80 :                 wasdollar = p_simp_re(p, first);
     424          80 :                 first = 0;
     425             :         }
     426          30 :         if (wasdollar) {        /* oops, that was a trailing anchor */
     427           0 :                 DROP(1);
     428           0 :                 EMIT(OEOL, 0);
     429           0 :                 p->g->iflags |= USEEOL;
     430           0 :                 p->g->neol++;
     431             :         }
     432             : 
     433          30 :         REQUIRE(HERE() != start, REG_EMPTY);    /* require nonempty */
     434          30 : }
     435             : 
     436             : /*
     437             :  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
     438             :  == static int p_simp_re(register struct parse *p, int starordinary);
     439             :  */
     440             : static int                      /* was the simple RE an unbackslashed $? */
     441          80 : p_simp_re(p, starordinary)
     442             : register struct parse *p;
     443             : int starordinary;               /* is a leading * an ordinary character? */
     444             : {
     445             :         register int c;
     446             :         register int count;
     447             :         register int count2;
     448             :         register sopno pos;
     449             :         register int i;
     450             :         register sopno subno;
     451             : #       define  BACKSL  (1<<CHAR_BIT)
     452             : 
     453          80 :         pos = HERE();           /* repetion op, if any, covers from here */
     454             : 
     455             :         assert(MORE());         /* caller should have ensured this */
     456          80 :         c = GETNEXT();
     457          80 :         if (c == '\\') {
     458           0 :                 REQUIRE(MORE(), REG_EESCAPE);
     459           0 :                 c = BACKSL | (unsigned char)GETNEXT();
     460             :         }
     461          80 :         switch (c) {
     462             :         case '.':
     463           0 :                 if (p->g->cflags&REG_NEWLINE)
     464           0 :                         nonnewline(p);
     465             :                 else
     466           0 :                         EMIT(OANY, 0);
     467           0 :                 break;
     468             :         case '[':
     469           0 :                 p_bracket(p);
     470           0 :                 break;
     471             :         case BACKSL|'{':
     472           0 :                 SETERROR(REG_BADRPT);
     473           0 :                 break;
     474             :         case BACKSL|'(':
     475           0 :                 p->g->nsub++;
     476           0 :                 subno = p->g->nsub;
     477           0 :                 if (subno < NPAREN)
     478           0 :                         p->pbegin[subno] = HERE();
     479           0 :                 EMIT(OLPAREN, subno);
     480             :                 /* the MORE here is an error heuristic */
     481           0 :                 if (MORE() && !SEETWO('\\', ')'))
     482           0 :                         p_bre(p, '\\', ')');
     483           0 :                 if (subno < NPAREN) {
     484           0 :                         p->pend[subno] = HERE();
     485             :                         assert(p->pend[subno] != 0);
     486             :                 }
     487           0 :                 EMIT(ORPAREN, subno);
     488           0 :                 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
     489           0 :                 break;
     490             :         case BACKSL|')':        /* should not get here -- must be user */
     491             :         case BACKSL|'}':
     492           0 :                 SETERROR(REG_EPAREN);
     493           0 :                 break;
     494             :         case BACKSL|'1':
     495             :         case BACKSL|'2':
     496             :         case BACKSL|'3':
     497             :         case BACKSL|'4':
     498             :         case BACKSL|'5':
     499             :         case BACKSL|'6':
     500             :         case BACKSL|'7':
     501             :         case BACKSL|'8':
     502             :         case BACKSL|'9':
     503           0 :                 i = (c&~BACKSL) - '0';
     504             :                 assert(i < NPAREN);
     505           0 :                 if (p->pend[i] != 0) {
     506             :                         assert(i <= p->g->nsub);
     507           0 :                         EMIT(OBACK_, i);
     508             :                         assert(p->pbegin[i] != 0);
     509             :                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
     510             :                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
     511           0 :                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
     512           0 :                         EMIT(O_BACK, i);
     513             :                 } else
     514           0 :                         SETERROR(REG_ESUBREG);
     515           0 :                 p->g->backrefs = 1;
     516           0 :                 break;
     517             :         case '*':
     518           0 :                 REQUIRE(starordinary, REG_BADRPT);
     519             :                 /* FALLTHROUGH */
     520             :         default:
     521          80 :                 ordinary(p, (unsigned char)c);  /* takes off BACKSL, if any */
     522             :                 break;
     523             :         }
     524             : 
     525          80 :         if (EAT('*')) {         /* implemented as +? */
     526             :                 /* this case does not require the (y|) trick, noKLUDGE */
     527           0 :                 INSERT(OPLUS_, pos);
     528           0 :                 ASTERN(O_PLUS, pos);
     529           0 :                 INSERT(OQUEST_, pos);
     530           0 :                 ASTERN(O_QUEST, pos);
     531          80 :         } else if (EATTWO('\\', '{')) {
     532           0 :                 count = p_count(p);
     533           0 :                 if (EAT(',')) {
     534           0 :                         if (MORE() && isdigit(PEEK())) {
     535           0 :                                 count2 = p_count(p);
     536           0 :                                 REQUIRE(count <= count2, REG_BADBR);
     537             :                         } else          /* single number with comma */
     538           0 :                                 count2 = INFINITY;
     539             :                 } else          /* just a single number */
     540           0 :                         count2 = count;
     541           0 :                 repeat(p, pos, count, count2);
     542           0 :                 if (!EATTWO('\\', '}')) {       /* error heuristics */
     543           0 :                         while (MORE() && !SEETWO('\\', '}'))
     544           0 :                                 NEXT();
     545           0 :                         REQUIRE(MORE(), REG_EBRACE);
     546           0 :                         SETERROR(REG_BADBR);
     547             :                 }
     548          80 :         } else if (c == (unsigned char)'$')     /* $ (but not \$) ends it */
     549           0 :                 return(1);
     550             : 
     551          80 :         return(0);
     552             : }
     553             : 
     554             : /*
     555             :  - p_count - parse a repetition count
     556             :  == static int p_count(register struct parse *p);
     557             :  */
     558             : static int                      /* the value */
     559         142 : p_count(p)
     560             : register struct parse *p;
     561             : {
     562         142 :         register int count = 0;
     563         142 :         register int ndigits = 0;
     564             : 
     565         468 :         while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
     566         184 :                 count = count*10 + (GETNEXT() - '0');
     567         184 :                 ndigits++;
     568             :         }
     569             : 
     570         142 :         REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
     571         142 :         return(count);
     572             : }
     573             : 
     574             : /*
     575             :  - p_bracket - parse a bracketed character list
     576             :  == static void p_bracket(register struct parse *p);
     577             :  *
     578             :  * Note a significant property of this code:  if the allocset() did SETERROR,
     579             :  * no set operations are done.
     580             :  */
     581             : static void
     582         286 : p_bracket(p)
     583             : register struct parse *p;
     584             : {
     585         286 :         register cset *cs = allocset(p);
     586         286 :         register int invert = 0;
     587             : 
     588             :         /* Dept of Truly Sickening Special-Case Kludges */
     589         286 :         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
     590           0 :                 EMIT(OBOW, 0);
     591           0 :                 NEXTn(6);
     592           0 :                 return;
     593             :         }
     594         286 :         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
     595           0 :                 EMIT(OEOW, 0);
     596           0 :                 NEXTn(6);
     597           0 :                 return;
     598             :         }
     599             : 
     600         286 :         if (EAT('^'))
     601          21 :                 invert++;       /* make note to invert set at end */
     602         286 :         if (EAT(']'))
     603           0 :                 CHadd(cs, ']');
     604         286 :         else if (EAT('-'))
     605           1 :                 CHadd(cs, '-');
     606         903 :         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
     607         331 :                 p_b_term(p, cs);
     608         286 :         if (EAT('-'))
     609           0 :                 CHadd(cs, '-');
     610         286 :         MUSTEAT(']', REG_EBRACK);
     611             : 
     612         286 :         if (p->error != 0)   /* don't mess things up further */
     613          16 :                 return;
     614             : 
     615         270 :         if (p->g->cflags&REG_ICASE) {
     616             :                 register int i;
     617             :                 register int ci;
     618             : 
     619       54484 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     620       54272 :                         if (CHIN(cs, i) && isalpha(i)) {
     621        1201 :                                 ci = othercase(i);
     622        1201 :                                 if (ci != i)
     623        1201 :                                         CHadd(cs, ci);
     624             :                         }
     625         212 :                 if (cs->multis != NULL)
     626           0 :                         mccase(p, cs);
     627             :         }
     628         270 :         if (invert) {
     629             :                 register int i;
     630             : 
     631        5397 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     632        5376 :                         if (CHIN(cs, i))
     633         117 :                                 CHsub(cs, i);
     634             :                         else
     635        5259 :                                 CHadd(cs, i);
     636          21 :                 if (p->g->cflags&REG_NEWLINE)
     637           0 :                         CHsub(cs, '\n');
     638          21 :                 if (cs->multis != NULL)
     639           0 :                         mcinvert(p, cs);
     640             :         }
     641             : 
     642             :         assert(cs->multis == NULL);          /* xxx */
     643             : 
     644         270 :         if (nch(p, cs) == 1) {          /* optimize singleton sets */
     645           5 :                 ordinary(p, firstch(p, cs));
     646           5 :                 freeset(p, cs);
     647             :         } else
     648         265 :                 EMIT(OANYOF, freezeset(p, cs));
     649             : }
     650             : 
     651             : /*
     652             :  - p_b_term - parse one term of a bracketed character list
     653             :  == static void p_b_term(register struct parse *p, register cset *cs);
     654             :  */
     655             : static void
     656         331 : p_b_term(p, cs)
     657             : register struct parse *p;
     658             : register cset *cs;
     659             : {
     660             :         register unsigned char c;
     661             :         register unsigned char start, finish;
     662             :         register int i;
     663             : 
     664             :         /* classify what we've got */
     665         331 :         switch ((MORE()) ? PEEK() : '\0') {
     666             :         case '[':
     667          60 :                 c = (MORE2()) ? PEEK2() : '\0';
     668          60 :                 break;
     669             :         case '-':
     670           6 :                 SETERROR(REG_ERANGE);
     671           6 :                 return;                 /* NOTE RETURN */
     672             :                 break;
     673             :         default:
     674         265 :                 c = '\0';
     675             :                 break;
     676             :         }
     677             : 
     678         325 :         switch (c) {
     679             :         case ':':               /* character class */
     680          60 :                 NEXT2();
     681          60 :                 REQUIRE(MORE(), REG_EBRACK);
     682          60 :                 c = PEEK();
     683          60 :                 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
     684          60 :                 p_b_cclass(p, cs);
     685          60 :                 REQUIRE(MORE(), REG_EBRACK);
     686          60 :                 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
     687          60 :                 break;
     688             :         case '=':               /* equivalence class */
     689           0 :                 NEXT2();
     690           0 :                 REQUIRE(MORE(), REG_EBRACK);
     691           0 :                 c = PEEK();
     692           0 :                 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
     693           0 :                 p_b_eclass(p, cs);
     694           0 :                 REQUIRE(MORE(), REG_EBRACK);
     695           0 :                 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
     696           0 :                 break;
     697             :         default:                /* symbol, ordinary character, or range */
     698             : /* xxx revision needed for multichar stuff */
     699         265 :                 start = p_b_symbol(p);
     700         305 :                 if (SEE('-') && MORE2() && PEEK2() != ']') {
     701             :                         /* range */
     702          40 :                         NEXT();
     703          40 :                         if (EAT('-'))
     704           0 :                                 finish = '-';
     705             :                         else
     706          40 :                                 finish = p_b_symbol(p);
     707             :                 } else
     708         225 :                         finish = start;
     709             : /* xxx what about signed chars here... */
     710         265 :                 REQUIRE(start <= finish, REG_ERANGE);
     711         952 :                 for (i = start; i <= finish; i++)
     712         687 :                         CHadd(cs, i);
     713             :                 break;
     714             :         }
     715             : }
     716             : 
     717             : /*
     718             :  - p_b_cclass - parse a character-class name and deal with it
     719             :  == static void p_b_cclass(register struct parse *p, register cset *cs);
     720             :  */
     721             : static void
     722          60 : p_b_cclass(p, cs)
     723             : register struct parse *p;
     724             : register cset *cs;
     725             : {
     726          60 :         register unsigned char *sp = p->next;
     727             :         register const struct cclass *cp;
     728             :         register size_t len;
     729             :         register const unsigned char *u;
     730             :         register unsigned char c;
     731             : 
     732         420 :         while (MORE() && isalpha(PEEK()))
     733         300 :                 NEXT();
     734          60 :         len = p->next - sp;
     735         312 :         for (cp = cclasses; cp->name != NULL; cp++)
     736         312 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     737          60 :                         break;
     738          60 :         if (cp->name == NULL) {
     739             :                 /* oops, didn't find it */
     740           0 :                 SETERROR(REG_ECTYPE);
     741           0 :                 return;
     742             :         }
     743             : 
     744          60 :         u = cp->chars;
     745        2050 :         while ((c = *u++) != '\0')
     746        1930 :                 CHadd(cs, c);
     747          60 :         for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
     748           0 :                 MCadd(p, cs, u);
     749             : }
     750             : 
     751             : /*
     752             :  - p_b_eclass - parse an equivalence-class name and deal with it
     753             :  == static void p_b_eclass(register struct parse *p, register cset *cs);
     754             :  *
     755             :  * This implementation is incomplete. xxx
     756             :  */
     757             : static void
     758           0 : p_b_eclass(p, cs)
     759             : register struct parse *p;
     760             : register cset *cs;
     761             : {
     762             :         register unsigned char c;
     763             : 
     764           0 :         c = p_b_coll_elem(p, '=');
     765           0 :         CHadd(cs, c);
     766           0 : }
     767             : 
     768             : /*
     769             :  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
     770             :  == static char p_b_symbol(register struct parse *p);
     771             :  */
     772             : static unsigned char                    /* value of symbol */
     773         305 : p_b_symbol(p)
     774             : register struct parse *p;
     775             : {
     776             :         register unsigned char value;
     777             : 
     778         305 :         REQUIRE(MORE(), REG_EBRACK);
     779         305 :         if (!EATTWO('[', '.'))
     780         305 :                 return(GETNEXT());
     781             : 
     782             :         /* collating symbol */
     783           0 :         value = p_b_coll_elem(p, '.');
     784           0 :         REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
     785           0 :         return(value);
     786             : }
     787             : 
     788             : /*
     789             :  - p_b_coll_elem - parse a collating-element name and look it up
     790             :  == static char p_b_coll_elem(register struct parse *p, int endc);
     791             :  */
     792             : static unsigned char                    /* value of collating element */
     793           0 : p_b_coll_elem(p, endc)
     794             : register struct parse *p;
     795             : int endc;                       /* name ended by endc,']' */
     796             : {
     797           0 :         register unsigned char *sp = p->next;
     798             :         register const struct cname *cp;
     799             :         register int len;
     800             : 
     801           0 :         while (MORE() && !SEETWO(endc, ']'))
     802           0 :                 NEXT();
     803           0 :         if (!MORE()) {
     804           0 :                 SETERROR(REG_EBRACK);
     805           0 :                 return(0);
     806             :         }
     807           0 :         len = p->next - sp;
     808           0 :         for (cp = cnames; cp->name != NULL; cp++)
     809           0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     810           0 :                         return(cp->code);    /* known name */
     811           0 :         if (len == 1)
     812           0 :                 return(*sp);    /* single character */
     813           0 :         SETERROR(REG_ECOLLATE);                 /* neither */
     814           0 :         return(0);
     815             : }
     816             : 
     817             : /*
     818             :  - othercase - return the case counterpart of an alphabetic
     819             :  == static char othercase(int ch);
     820             :  */
     821             : static unsigned char                    /* if no counterpart, return ch */
     822        1353 : othercase(ch)
     823             : int ch;
     824             : {
     825             :         assert(isalpha(ch));
     826        1353 :         if (isupper(ch))
     827         673 :                 return(tolower(ch));
     828         680 :         else if (islower(ch))
     829         680 :                 return(toupper(ch));
     830             :         else                    /* peculiar, but could happen */
     831           0 :                 return(ch);
     832             : }
     833             : 
     834             : /*
     835             :  - bothcases - emit a dualcase version of a two-case character
     836             :  == static void bothcases(register struct parse *p, int ch);
     837             :  *
     838             :  * Boy, is this implementation ever a kludge...
     839             :  */
     840             : static void
     841         152 : bothcases(p, ch)
     842             : register struct parse *p;
     843             : int ch;
     844             : {
     845         152 :         register unsigned char *oldnext = p->next;
     846         152 :         register unsigned char *oldend = p->end;
     847             :         unsigned char bracket[3];
     848             : 
     849             :         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
     850         152 :         p->next = bracket;
     851         152 :         p->end = bracket+2;
     852         152 :         bracket[0] = ch;
     853         152 :         bracket[1] = ']';
     854         152 :         bracket[2] = '\0';
     855         152 :         p_bracket(p);
     856             :         assert(p->next == bracket+2);
     857         152 :         p->next = oldnext;
     858         152 :         p->end = oldend;
     859         152 : }
     860             : 
     861             : /*
     862             :  - ordinary - emit an ordinary character
     863             :  == static void ordinary(register struct parse *p, register int ch);
     864             :  */
     865             : static void
     866         707 : ordinary(p, ch)
     867             : register struct parse *p;
     868             : register int ch;
     869             : {
     870         707 :         register cat_t *cap = p->g->categories;
     871             : 
     872         859 :         if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
     873         152 :                 bothcases(p, ch);
     874             :         else {
     875         555 :                 EMIT(OCHAR, (unsigned char)ch);
     876         555 :                 if (cap[ch] == 0)
     877         470 :                         cap[ch] = p->g->ncategories++;
     878             :         }
     879         707 : }
     880             : 
     881             : /*
     882             :  - nonnewline - emit REG_NEWLINE version of OANY
     883             :  == static void nonnewline(register struct parse *p);
     884             :  *
     885             :  * Boy, is this implementation ever a kludge...
     886             :  */
     887             : static void
     888           0 : nonnewline(p)
     889             : register struct parse *p;
     890             : {
     891           0 :         register unsigned char *oldnext = p->next;
     892           0 :         register unsigned char *oldend = p->end;
     893             :         unsigned char bracket[4];
     894             : 
     895           0 :         p->next = bracket;
     896           0 :         p->end = bracket+3;
     897           0 :         bracket[0] = '^';
     898           0 :         bracket[1] = '\n';
     899           0 :         bracket[2] = ']';
     900           0 :         bracket[3] = '\0';
     901           0 :         p_bracket(p);
     902             :         assert(p->next == bracket+3);
     903           0 :         p->next = oldnext;
     904           0 :         p->end = oldend;
     905           0 : }
     906             : 
     907             : /*
     908             :  - repeat - generate code for a bounded repetition, recursively if needed
     909             :  == static void repeat(register struct parse *p, sopno start, int from, int to);
     910             :  */
     911             : static void
     912         946 : repeat(p, start, from, to)
     913             : register struct parse *p;
     914             : sopno start;                    /* operand from here to end of strip */
     915             : int from;                       /* repeated from this number */
     916             : int to;                         /* to this number of times (maybe INFINITY) */
     917             : {
     918         946 :         register sopno finish = HERE();
     919             : #       define  N       2
     920             : #       define  INF     3
     921             : #       define  REP(f, t)       ((f)*8 + (t))
     922             : #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
     923             :         register sopno copy;
     924             : 
     925         946 :         if (p->error != 0)   /* head off possible runaway recursion */
     926          12 :                 return;
     927             : 
     928             :         assert(from <= to);
     929             : 
     930         934 :         switch (REP(MAP(from), MAP(to))) {
     931             :         case REP(0, 0):                 /* must be user doing this */
     932           6 :                 DROP(finish-start);     /* drop the operand */
     933           6 :                 break;
     934             :         case REP(0, 1):                 /* as x{1,1}? */
     935             :         case REP(0, N):                 /* as x{1,n}? */
     936             :         case REP(0, INF):               /* as x{1,}? */
     937             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     938           0 :                 INSERT(OCH_, start);            /* offset is wrong... */
     939           0 :                 repeat(p, start+1, 1, to);
     940           0 :                 ASTERN(OOR1, start);
     941           0 :                 AHEAD(start);                   /* ... fix it */
     942           0 :                 EMIT(OOR2, 0);
     943           0 :                 AHEAD(THERE());
     944           0 :                 ASTERN(O_CH, THERETHERE());
     945           0 :                 break;
     946             :         case REP(1, 1):                 /* trivial case */
     947             :                 /* done */
     948          78 :                 break;
     949             :         case REP(1, N):                 /* as x?x{1,n-1} */
     950             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     951         110 :                 INSERT(OCH_, start);
     952         110 :                 ASTERN(OOR1, start);
     953         110 :                 AHEAD(start);
     954         110 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     955         110 :                 AHEAD(THERE());                 /* ...so fix it */
     956         110 :                 ASTERN(O_CH, THERETHERE());
     957         110 :                 copy = dupl(p, start+1, finish+1);
     958             :                 assert(copy == finish+4);
     959         110 :                 repeat(p, copy, 1, to-1);
     960         110 :                 break;
     961             :         case REP(1, INF):               /* as x+ */
     962          10 :                 INSERT(OPLUS_, start);
     963          10 :                 ASTERN(O_PLUS, start);
     964          10 :                 break;
     965             :         case REP(N, N):                 /* as xx{m-1,n-1} */
     966         730 :                 copy = dupl(p, start, finish);
     967         730 :                 repeat(p, copy, from-1, to-1);
     968         730 :                 break;
     969             :         case REP(N, INF):               /* as xx{n-1,INF} */
     970           0 :                 copy = dupl(p, start, finish);
     971           0 :                 repeat(p, copy, from-1, to);
     972           0 :                 break;
     973             :         default:                        /* "can't happen" */
     974           0 :                 SETERROR(REG_ASSERT);   /* just in case */
     975             :                 break;
     976             :         }
     977             : }
     978             : 
     979             : /*
     980             :  - seterr - set an error condition
     981             :  == static int seterr(register struct parse *p, int e);
     982             :  */
     983             : static int                      /* useless but makes type checking happy */
     984         222 : seterr(p, e)
     985             : register struct parse *p;
     986             : int e;
     987             : {
     988         222 :         if (p->error == 0)   /* keep earliest error condition */
     989         142 :                 p->error = e;
     990         222 :         p->next = nuls;              /* try to bring things to a halt */
     991         222 :         p->end = nuls;
     992         222 :         return(0);              /* make the return value well-defined */
     993             : }
     994             : 
     995             : /*
     996             :  - allocset - allocate a set of characters for []
     997             :  == static cset *allocset(register struct parse *p);
     998             :  */
     999             : static cset *
    1000         286 : allocset(p)
    1001             : register struct parse *p;
    1002             : {
    1003         286 :         register int no = p->g->ncsets++;
    1004             :         register size_t nc;
    1005             :         register size_t nbytes;
    1006             :         register cset *cs;
    1007         286 :         register size_t css = (size_t)p->g->csetsize;
    1008             :         register int i;
    1009             : 
    1010         286 :         if (no >= p->ncsalloc) {  /* need another column of space */
    1011         162 :                 p->ncsalloc += CHAR_BIT;
    1012         162 :                 nc = p->ncsalloc;
    1013             :                 assert(nc % CHAR_BIT == 0);
    1014         162 :                 nbytes = nc / CHAR_BIT * css;
    1015         162 :                 if (p->g->sets == NULL)
    1016         162 :                         p->g->sets = (cset *)malloc(nc * sizeof(cset));
    1017             :                 else
    1018           0 :                         p->g->sets = (cset *)realloc((unsigned char *)p->g->sets,
    1019             :                                                         nc * sizeof(cset));
    1020         162 :                 if (p->g->setbits == NULL)
    1021         162 :                         p->g->setbits = (uch *)malloc(nbytes);
    1022             :                 else {
    1023           0 :                         p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits,
    1024             :                                                                 nbytes);
    1025             :                         /* xxx this isn't right if setbits is now NULL */
    1026           0 :                         for (i = 0; i < no; i++)
    1027           0 :                                 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
    1028             :                 }
    1029         324 :                 if (p->g->sets != NULL && p->g->setbits != NULL)
    1030         162 :                         (void) memset((unsigned char *)p->g->setbits + (nbytes - css),
    1031             :                                                                 0, css);
    1032             :                 else {
    1033           0 :                         no = 0;
    1034           0 :                         SETERROR(REG_ESPACE);
    1035             :                         /* caller's responsibility not to do set ops */
    1036             :                 }
    1037             :         }
    1038             : 
    1039             :         assert(p->g->sets != NULL);       /* xxx */
    1040         286 :         cs = &p->g->sets[no];
    1041         286 :         cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
    1042         286 :         cs->mask = 1 << ((no) % CHAR_BIT);
    1043         286 :         cs->hash = 0;
    1044         286 :         cs->smultis = 0;
    1045         286 :         cs->multis = NULL;
    1046             : 
    1047         286 :         return(cs);
    1048             : }
    1049             : 
    1050             : /*
    1051             :  - freeset - free a now-unused set
    1052             :  == static void freeset(register struct parse *p, register cset *cs);
    1053             :  */
    1054             : static void
    1055          42 : freeset(p, cs)
    1056             : register struct parse *p;
    1057             : register cset *cs;
    1058             : {
    1059             :         register size_t i;
    1060          42 :         register cset *top = &p->g->sets[p->g->ncsets];
    1061          42 :         register size_t css = (size_t)p->g->csetsize;
    1062             : 
    1063       10794 :         for (i = 0; i < css; i++)
    1064       10752 :                 CHsub(cs, i);
    1065          42 :         if (cs == top-1)        /* recover only the easy case */
    1066          42 :                 p->g->ncsets--;
    1067          42 : }
    1068             : 
    1069             : /*
    1070             :  - freezeset - final processing on a set of characters
    1071             :  == static int freezeset(register struct parse *p, register cset *cs);
    1072             :  *
    1073             :  * The main task here is merging identical sets.  This is usually a waste
    1074             :  * of time (although the hash code minimizes the overhead), but can win
    1075             :  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
    1076             :  * is done using addition rather than xor -- all ASCII [aA] sets xor to
    1077             :  * the same value!
    1078             :  */
    1079             : static int                      /* set number */
    1080         265 : freezeset(p, cs)
    1081             : register struct parse *p;
    1082             : register cset *cs;
    1083             : {
    1084         265 :         register uch h = cs->hash;
    1085             :         register size_t i;
    1086         265 :         register cset *top = &p->g->sets[p->g->ncsets];
    1087             :         register cset *cs2;
    1088         265 :         register size_t css = (size_t)p->g->csetsize;
    1089             : 
    1090             :         /* look for an earlier one which is the same */
    1091         707 :         for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
    1092         479 :                 if (cs2->hash == h && cs2 != cs) {
    1093             :                         /* maybe */
    1094        9509 :                         for (i = 0; i < css; i++)
    1095        9472 :                                 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
    1096           0 :                                         break;          /* no */
    1097          37 :                         if (i == css)
    1098          37 :                                 break;                  /* yes */
    1099             :                 }
    1100             : 
    1101         265 :         if (cs2 < top) {     /* found one */
    1102          37 :                 freeset(p, cs);
    1103          37 :                 cs = cs2;
    1104             :         }
    1105             : 
    1106         265 :         return((int)(cs - p->g->sets));
    1107             : }
    1108             : 
    1109             : /*
    1110             :  - firstch - return first character in a set (which must have at least one)
    1111             :  == static int firstch(register struct parse *p, register cset *cs);
    1112             :  */
    1113             : static int                      /* character; there is no "none" value */
    1114           5 : firstch(p, cs)
    1115             : register struct parse *p;
    1116             : register cset *cs;
    1117             : {
    1118             :         register size_t i;
    1119           5 :         register size_t css = (size_t)p->g->csetsize;
    1120             : 
    1121         605 :         for (i = 0; i < css; i++)
    1122         605 :                 if (CHIN(cs, i))
    1123           5 :                         return((unsigned char)i);
    1124             :         assert(never);
    1125           0 :         return(0);              /* arbitrary */
    1126             : }
    1127             : 
    1128             : /*
    1129             :  - nch - number of characters in a set
    1130             :  == static int nch(register struct parse *p, register cset *cs);
    1131             :  */
    1132             : static int
    1133         270 : nch(p, cs)
    1134             : register struct parse *p;
    1135             : register cset *cs;
    1136             : {
    1137             :         register size_t i;
    1138         270 :         register size_t css = (size_t)p->g->csetsize;
    1139         270 :         register int n = 0;
    1140             : 
    1141       69390 :         for (i = 0; i < css; i++)
    1142       69120 :                 if (CHIN(cs, i))
    1143        8132 :                         n++;
    1144         270 :         return(n);
    1145             : }
    1146             : 
    1147             : /*
    1148             :  - mcadd - add a collating element to a cset
    1149             :  == static void mcadd(register struct parse *p, register cset *cs, \
    1150             :  ==     register char *cp);
    1151             :  */
    1152             : static void
    1153           0 : mcadd(p, cs, cp)
    1154             : register struct parse *p;
    1155             : register cset *cs;
    1156             : register const unsigned char *cp;
    1157             : {
    1158           0 :         register size_t oldend = cs->smultis;
    1159             : 
    1160           0 :         cs->smultis += strlen(cp) + 1;
    1161           0 :         if (cs->multis == NULL)
    1162           0 :                 cs->multis = malloc(cs->smultis);
    1163             :         else
    1164           0 :                 cs->multis = realloc(cs->multis, cs->smultis);
    1165           0 :         if (cs->multis == NULL) {
    1166           0 :                 SETERROR(REG_ESPACE);
    1167           0 :                 return;
    1168             :         }
    1169             : 
    1170           0 :         (void) strcpy(cs->multis + oldend - 1, cp);
    1171           0 :         cs->multis[cs->smultis - 1] = '\0';
    1172             : }
    1173             : 
    1174             : #if 0
    1175             : /*
    1176             :  - mcsub - subtract a collating element from a cset
    1177             :  == static void mcsub(register cset *cs, register unsigned char *cp);
    1178             :  */
    1179             : static void
    1180             : mcsub(cs, cp)
    1181             : register unsigned cset *cs;
    1182             : register unsigned char *cp;
    1183             : {
    1184             :         register unsigned char *fp = mcfind(cs, cp);
    1185             :         register size_t len = strlen(fp);
    1186             : 
    1187             :         assert(fp != NULL);
    1188             :         (void) memmove(fp, fp + len + 1,
    1189             :                                 cs->smultis - (fp + len + 1 - cs->multis));
    1190             :         cs->smultis -= len;
    1191             : 
    1192             :         if (cs->smultis == 0) {
    1193             :                 free(cs->multis);
    1194             :                 cs->multis = NULL;
    1195             :                 return;
    1196             :         }
    1197             : 
    1198             :         cs->multis = realloc(cs->multis, cs->smultis);
    1199             :         assert(cs->multis != NULL);
    1200             : }
    1201             : 
    1202             : /*
    1203             :  - mcin - is a collating element in a cset?
    1204             :  == static int mcin(register cset *cs, register unsigned char *cp);
    1205             :  */
    1206             : static int
    1207             : mcin(cs, cp)
    1208             : register cset *cs;
    1209             : register unsigned char *cp;
    1210             : {
    1211             :         return(mcfind(cs, cp) != NULL);
    1212             : }
    1213             : 
    1214             : 
    1215             : /*
    1216             :  - mcfind - find a collating element in a cset
    1217             :  == static unsigned char *mcfind(register cset *cs, register unsigned char *cp);
    1218             :  */
    1219             : static unsigned char *
    1220             : mcfind(cs, cp)
    1221             : register cset *cs;
    1222             : register unsigned char *cp;
    1223             : {
    1224             :         register unsigned char *p;
    1225             : 
    1226             :         if (cs->multis == NULL)
    1227             :                 return(NULL);
    1228             :         for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
    1229             :                 if (strcmp(cp, p) == 0)
    1230             :                         return(p);
    1231             :         return(NULL);
    1232             : }
    1233             : #endif
    1234             : 
    1235             : /*
    1236             :  - mcinvert - invert the list of collating elements in a cset
    1237             :  == static void mcinvert(register struct parse *p, register cset *cs);
    1238             :  *
    1239             :  * This would have to know the set of possibilities.  Implementation
    1240             :  * is deferred.
    1241             :  */
    1242             : static void
    1243           0 : mcinvert(p, cs)
    1244             : register struct parse *p;
    1245             : register cset *cs;
    1246             : {
    1247             :         assert(cs->multis == NULL);  /* xxx */
    1248           0 : }
    1249             : 
    1250             : /*
    1251             :  - mccase - add case counterparts of the list of collating elements in a cset
    1252             :  == static void mccase(register struct parse *p, register cset *cs);
    1253             :  *
    1254             :  * This would have to know the set of possibilities.  Implementation
    1255             :  * is deferred.
    1256             :  */
    1257             : static void
    1258           0 : mccase(p, cs)
    1259             : register struct parse *p;
    1260             : register cset *cs;
    1261             : {
    1262             :         assert(cs->multis == NULL);  /* xxx */
    1263           0 : }
    1264             : 
    1265             : /*
    1266             :  - isinsets - is this character in any sets?
    1267             :  == static int isinsets(register struct re_guts *g, int c);
    1268             :  */
    1269             : static int                      /* predicate */
    1270       56038 : isinsets(g, c)
    1271             : register struct re_guts *g;
    1272             : int c;
    1273             : {
    1274             :         register uch *col;
    1275             :         register int i;
    1276       56038 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1277       56038 :         register unsigned uc = (unsigned char)c;
    1278             : 
    1279       80803 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1280       24965 :                 if (col[uc] != 0)
    1281         200 :                         return(1);
    1282       55838 :         return(0);
    1283             : }
    1284             : 
    1285             : /*
    1286             :  - samesets - are these two characters in exactly the same sets?
    1287             :  == static int samesets(register struct re_guts *g, int c1, int c2);
    1288             :  */
    1289             : static int                      /* predicate */
    1290       37606 : samesets(g, c1, c2)
    1291             : register struct re_guts *g;
    1292             : int c1;
    1293             : int c2;
    1294             : {
    1295             :         register uch *col;
    1296             :         register int i;
    1297       37606 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1298       37606 :         register unsigned uc1 = (unsigned char)c1;
    1299       37606 :         register unsigned uc2 = (unsigned char)c2;
    1300             : 
    1301       45387 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1302       37606 :                 if (col[uc1] != col[uc2])
    1303       29825 :                         return(0);
    1304        7781 :         return(1);
    1305             : }
    1306             : 
    1307             : /*
    1308             :  - categorize - sort out character categories
    1309             :  == static void categorize(struct parse *p, register struct re_guts *g);
    1310             :  */
    1311             : static void
    1312         393 : categorize(p, g)
    1313             : struct parse *p;
    1314             : register struct re_guts *g;
    1315             : {
    1316         393 :         register cat_t *cats = g->categories;
    1317             :         register int c;
    1318             :         register int c2;
    1319             :         register cat_t cat;
    1320             : 
    1321             :         /* avoid making error situations worse */
    1322         393 :         if (p->error != 0)
    1323         142 :                 return;
    1324             : 
    1325       64507 :         for (c = 0; c <= UCHAR_MAX; c++)
    1326       64256 :                 if (cats[c] == 0 && isinsets(g, c)) {
    1327         200 :                         cat = g->ncategories++;
    1328         200 :                         cats[c] = cat;
    1329       40977 :                         for (c2 = c+1; c2 <= UCHAR_MAX; c2++)
    1330       40777 :                                 if (cats[c2] == 0 && samesets(g, c, c2))
    1331        7781 :                                         cats[c2] = cat;
    1332             :                 }
    1333             : }
    1334             : 
    1335             : /*
    1336             :  - dupl - emit a duplicate of a bunch of sops
    1337             :  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
    1338             :  */
    1339             : static sopno                    /* start of duplicate */
    1340         840 : dupl(p, start, finish)
    1341             : register struct parse *p;
    1342             : sopno start;                    /* from here */
    1343             : sopno finish;                   /* to this less one */
    1344             : {
    1345         840 :         register sopno ret = HERE();
    1346         840 :         register sopno len = finish - start;
    1347             : 
    1348             :         assert(finish >= start);
    1349         840 :         if (len == 0)
    1350           0 :                 return(ret);
    1351         840 :         enlarge(p, p->ssize + len);  /* this many unexpected additions */
    1352             :         assert(p->ssize >= p->slen + len);
    1353        1680 :         (void) memcpy((char *)(p->strip + p->slen),
    1354         840 :                 (char *)(p->strip + start), (size_t)len*sizeof(sop));
    1355         840 :         p->slen += len;
    1356         840 :         return(ret);
    1357             : }
    1358             : 
    1359             : /*
    1360             :  - doemit - emit a strip operator
    1361             :  == static void doemit(register struct parse *p, sop op, size_t opnd);
    1362             :  *
    1363             :  * It might seem better to implement this as a macro with a function as
    1364             :  * hard-case backup, but it's just too big and messy unless there are
    1365             :  * some changes to the data structures.  Maybe later.
    1366             :  */
    1367             : static void
    1368       27368 : doemit(p, op, opnd)
    1369             : register struct parse *p;
    1370             : sop op;
    1371             : size_t opnd;
    1372             : {
    1373             :         /* avoid making error situations worse */
    1374       27368 :         if (p->error != 0)
    1375         162 :                 return;
    1376             : 
    1377             :         /* deal with oversize operands ("can't happen", more or less) */
    1378             :         assert(opnd < 1<<OPSHIFT);
    1379             : 
    1380             :         /* deal with undersized strip */
    1381       27206 :         if (p->slen >= p->ssize)
    1382          80 :                 enlarge(p, (p->ssize+1) / 2 * 3);    /* +50% */
    1383             :         assert(p->slen < p->ssize);
    1384             : 
    1385             :         /* finally, it's all reduced to the easy case */
    1386       27206 :         p->strip[p->slen++] = SOP(op, opnd);
    1387             : }
    1388             : 
    1389             : /*
    1390             :  - doinsert - insert a sop into the strip
    1391             :  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
    1392             :  */
    1393             : static void
    1394         226 : doinsert(p, op, opnd, pos)
    1395             : register struct parse *p;
    1396             : sop op;
    1397             : size_t opnd;
    1398             : sopno pos;
    1399             : {
    1400             :         register sopno sn;
    1401             :         register sop s;
    1402             :         register int i;
    1403             : 
    1404             :         /* avoid making error situations worse */
    1405         226 :         if (p->error != 0)
    1406           0 :                 return;
    1407             : 
    1408         226 :         sn = HERE();
    1409         226 :         EMIT(op, opnd);         /* do checks, ensure space */
    1410             :         assert(HERE() == sn+1);
    1411         226 :         s = p->strip[sn];
    1412             : 
    1413             :         /* adjust paren pointers */
    1414             :         assert(pos > 0);
    1415        2260 :         for (i = 1; i < NPAREN; i++) {
    1416        2034 :                 if (p->pbegin[i] >= pos) {
    1417           0 :                         p->pbegin[i]++;
    1418             :                 }
    1419        2034 :                 if (p->pend[i] >= pos) {
    1420           0 :                         p->pend[i]++;
    1421             :                 }
    1422             :         }
    1423             : 
    1424         226 :         memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
    1425         226 :                                                 (HERE()-pos-1)*sizeof(sop));
    1426         226 :         p->strip[pos] = s;
    1427             : }
    1428             : 
    1429             : /*
    1430             :  - dofwd - complete a forward reference
    1431             :  == static void dofwd(register struct parse *p, sopno pos, sop value);
    1432             :  */
    1433             : static void
    1434         360 : dofwd(p, pos, value)
    1435             : register struct parse *p;
    1436             : register sopno pos;
    1437             : sop value;
    1438             : {
    1439             :         /* avoid making error situations worse */
    1440         360 :         if (p->error != 0)
    1441           6 :                 return;
    1442             : 
    1443             :         assert(value < 1<<OPSHIFT);
    1444         354 :         p->strip[pos] = OP(p->strip[pos]) | value;
    1445             : }
    1446             : 
    1447             : /*
    1448             :  - enlarge - enlarge the strip
    1449             :  == static void enlarge(register struct parse *p, sopno size);
    1450             :  */
    1451             : static void
    1452         920 : enlarge(p, size)
    1453             : register struct parse *p;
    1454             : register sopno size;
    1455             : {
    1456             :         register sop *sp;
    1457             : 
    1458         920 :         if (p->ssize >= size)
    1459           0 :                 return;
    1460             : 
    1461         920 :         sp = (sop *)realloc(p->strip, size*sizeof(sop));
    1462         920 :         if (sp == NULL) {
    1463           0 :                 SETERROR(REG_ESPACE);
    1464           0 :                 return;
    1465             :         }
    1466         920 :         p->strip = sp;
    1467         920 :         p->ssize = size;
    1468             : }
    1469             : 
    1470             : /*
    1471             :  - stripsnug - compact the strip
    1472             :  == static void stripsnug(register struct parse *p, register struct re_guts *g);
    1473             :  */
    1474             : static void
    1475         393 : stripsnug(p, g)
    1476             : register struct parse *p;
    1477             : register struct re_guts *g;
    1478             : {
    1479         393 :         g->nstates = p->slen;
    1480         393 :         g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop));
    1481         393 :         if (g->strip == NULL) {
    1482           0 :                 SETERROR(REG_ESPACE);
    1483           0 :                 g->strip = p->strip;
    1484             :         }
    1485         393 : }
    1486             : 
    1487             : /*
    1488             :  - findmust - fill in must and mlen with longest mandatory literal string
    1489             :  == static void findmust(register struct parse *p, register struct re_guts *g);
    1490             :  *
    1491             :  * This algorithm could do fancy things like analyzing the operands of |
    1492             :  * for common subsequences.  Someday.  This code is simple and finds most
    1493             :  * of the interesting cases.
    1494             :  *
    1495             :  * Note that must and mlen got initialized during setup.
    1496             :  */
    1497             : static void
    1498         393 : findmust(p, g)
    1499             : struct parse *p;
    1500             : register struct re_guts *g;
    1501             : {
    1502             :         register sop *scan;
    1503         393 :         sop *start = NULL;
    1504         393 :         register sop *newstart = NULL;
    1505             :         register sopno newlen;
    1506             :         register sop s;
    1507             :         register unsigned char *cp;
    1508             :         register sopno i;
    1509             : 
    1510             :         /* avoid making error situations worse */
    1511         393 :         if (p->error != 0)
    1512         142 :                 return;
    1513             : 
    1514             :         /* find the longest OCHAR sequence in strip */
    1515         251 :         newlen = 0;
    1516         251 :         scan = g->strip + 1;
    1517             :         do {
    1518       26900 :                 s = *scan++;
    1519       26900 :                 switch (OP(s)) {
    1520             :                 case OCHAR:             /* sequence member */
    1521         466 :                         if (newlen == 0)                /* new sequence */
    1522         146 :                                 newstart = scan - 1;
    1523         466 :                         newlen++;
    1524         466 :                         break;
    1525             :                 case OPLUS_:            /* things that don't break one */
    1526             :                 case OLPAREN:
    1527             :                 case ORPAREN:
    1528       16639 :                         break;
    1529             :                 case OQUEST_:           /* things that must be skipped */
    1530             :                 case OCH_:
    1531         169 :                         scan--;
    1532             :                         do {
    1533         344 :                                 scan += OPND(s);
    1534         344 :                                 s = *scan;
    1535             :                                 /* assert() interferes w debug printouts */
    1536         519 :                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
    1537         175 :                                                         OP(s) != OOR2) {
    1538           0 :                                         g->iflags |= BAD;
    1539           0 :                                         return;
    1540             :                                 }
    1541         344 :                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
    1542             :                         /* fallthrough */
    1543             :                 default:                /* things that break a sequence */
    1544        9795 :                         if (newlen > g->mlen) {           /* ends one */
    1545         115 :                                 start = newstart;
    1546         115 :                                 g->mlen = newlen;
    1547             :                         }
    1548        9795 :                         newlen = 0;
    1549             :                         break;
    1550             :                 }
    1551       26900 :         } while (OP(s) != OEND);
    1552             : 
    1553         251 :         if (g->mlen == 0)            /* there isn't one */
    1554         144 :                 return;
    1555             : 
    1556         107 :         if (!start) {
    1557           0 :                 g->mlen = 0;
    1558           0 :                 return;
    1559             :         }
    1560             : 
    1561             :         /* turn it into a character string */
    1562         107 :         g->must = malloc((size_t)g->mlen + 1);
    1563         107 :         if (g->must == NULL) {               /* argh; just forget it */
    1564           0 :                 g->mlen = 0;
    1565           0 :                 return;
    1566             :         }
    1567         107 :         cp = g->must;
    1568         107 :         scan = start;
    1569         527 :         for (i = g->mlen; i > 0; i--) {
    1570         874 :                 while (OP(s = *scan++) != OCHAR)
    1571          34 :                         continue;
    1572             :                 assert(cp < g->must + g->mlen);
    1573         420 :                 *cp++ = (unsigned char)OPND(s);
    1574             :         }
    1575             :         assert(cp == g->must + g->mlen);
    1576         107 :         *cp++ = '\0';           /* just on general principles */
    1577             : }
    1578             : 
    1579             : /*
    1580             :  - pluscount - count + nesting
    1581             :  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
    1582             :  */
    1583             : static sopno                    /* nesting depth */
    1584         393 : pluscount(p, g)
    1585             : struct parse *p;
    1586             : register struct re_guts *g;
    1587             : {
    1588             :         register sop *scan;
    1589             :         register sop s;
    1590         393 :         register sopno plusnest = 0;
    1591         393 :         register sopno maxnest = 0;
    1592             : 
    1593         393 :         if (p->error != 0)
    1594         142 :                 return(0);      /* there may not be an OEND */
    1595             : 
    1596         251 :         scan = g->strip + 1;
    1597             :         do {
    1598       27585 :                 s = *scan++;
    1599       27585 :                 switch (OP(s)) {
    1600             :                 case OPLUS_:
    1601          41 :                         plusnest++;
    1602          41 :                         break;
    1603             :                 case O_PLUS:
    1604          41 :                         if (plusnest > maxnest)
    1605          24 :                                 maxnest = plusnest;
    1606          41 :                         plusnest--;
    1607             :                         break;
    1608             :                 }
    1609       27585 :         } while (OP(s) != OEND);
    1610         251 :         if (plusnest != 0)
    1611           0 :                 g->iflags |= BAD;
    1612         251 :         return(maxnest);
    1613             : }

Generated by: LCOV version 1.10

Generated at Wed, 22 Oct 2014 07:24:48 +0000 (3 days ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.