PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/ereg/regex - regcomp.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 0 682 0.0 %
Date: 2014-04-16 Functions: 0 38 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : #include <sys/types.h>
       2             : #include <stdio.h>
       3             : #include <string.h>
       4             : #include <ctype.h>
       5             : #include <limits.h>
       6             : #include <stdlib.h>
       7             : 
       8             : #define POSIX_MISTAKE
       9             : 
      10             : #include "utils.h"
      11             : #include "regex.h"
      12             : #include "regex2.h"
      13             : 
      14             : #include "cclass.h"
      15             : #include "cname.h"
      16             : 
      17             : /*
      18             :  * parse structure, passed up and down to avoid global variables and
      19             :  * other clumsinesses
      20             :  */
      21             : struct parse {
      22             :         unsigned char *next;            /* next character in RE */
      23             :         unsigned char *end;             /* end of string (-> NUL normally) */
      24             :         int error;              /* has an error been seen? */
      25             :         sop *strip;             /* malloced strip */
      26             :         sopno ssize;            /* malloced strip size (allocated) */
      27             :         sopno slen;             /* malloced strip length (used) */
      28             :         int ncsalloc;           /* number of csets allocated */
      29             :         struct re_guts *g;
      30             : #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
      31             :         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
      32             :         sopno pend[NPAREN];     /* -> ) ([0] unused) */
      33             : };
      34             : 
      35             : #include "regcomp.ih"
      36             : 
      37             : static unsigned char nuls[10];          /* place to point scanner in event of error */
      38             : 
      39             : /*
      40             :  * macros for use with parse structure
      41             :  * BEWARE:  these know that the parse structure is named `p' !!!
      42             :  */
      43             : #define PEEK()  (*p->next)
      44             : #define PEEK2() (*(p->next+1))
      45             : #define MORE()  (p->next < p->end)
      46             : #define MORE2() (p->next+1 < p->end)
      47             : #define SEE(c)  (MORE() && PEEK() == (c))
      48             : #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
      49             : #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
      50             : #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
      51             : #define NEXT()  (p->next++)
      52             : #define NEXT2() (p->next += 2)
      53             : #define NEXTn(n)        (p->next += (n))
      54             : #define GETNEXT()       (*p->next++)
      55             : #define SETERROR(e)     seterr(p, (e))
      56             : #define REQUIRE(co, e)  (void) ((co) || SETERROR(e))
      57             : #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
      58             : #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
      59             : #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
      60             : #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
      61             : #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
      62             : #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
      63             : #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
      64             : #define HERE()          (p->slen)
      65             : #define THERE()         (p->slen - 1)
      66             : #define THERETHERE()    (p->slen - 2)
      67             : #define DROP(n) (p->slen -= (n))
      68             : 
      69             : #ifndef NDEBUG
      70             : static int never = 0;           /* for use in asserts; shuts lint up */
      71             : #else
      72             : #define never   0               /* some <assert.h>s have bugs too */
      73             : #endif
      74             : 
      75             : /*
      76             :  - regcomp - interface for parser and compilation
      77             :  = API_EXPORT(int) regcomp(regex_t *, const char *, int);
      78             :  = #define      REG_BASIC       0000
      79             :  = #define      REG_EXTENDED    0001
      80             :  = #define      REG_ICASE       0002
      81             :  = #define      REG_NOSUB       0004
      82             :  = #define      REG_NEWLINE     0010
      83             :  = #define      REG_NOSPEC      0020
      84             :  = #define      REG_PEND        0040
      85             :  = #define      REG_DUMP        0200
      86             :  */
      87             : API_EXPORT(int)                 /* 0 success, otherwise REG_something */
      88           0 : regcomp(preg, pattern, cflags)
      89             : regex_t *preg;
      90             : const char *pattern;
      91             : int cflags;
      92             : {
      93             :         struct parse pa;
      94             :         register struct re_guts *g;
      95           0 :         register struct parse *p = &pa;
      96             :         register int i;
      97             :         register size_t len;
      98             : #ifdef REDEBUG
      99             : #       define  GOODFLAGS(f)    (f)
     100             : #else
     101             : #       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
     102             : #endif
     103             : 
     104           0 :         cflags = GOODFLAGS(cflags);
     105           0 :         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
     106           0 :                 return(REG_INVARG);
     107             : 
     108           0 :         if (cflags&REG_PEND) {
     109           0 :                 if (preg->re_endp < pattern)
     110           0 :                         return(REG_INVARG);
     111           0 :                 len = preg->re_endp - pattern;
     112             :         } else
     113           0 :                 len = strlen((char *)pattern);
     114             : 
     115             :         /* do the mallocs early so failure handling is easy */
     116           0 :         g = (struct re_guts *)malloc(sizeof(struct re_guts) +
     117             :                                                         (NC-1)*sizeof(cat_t));
     118           0 :         if (g == NULL)
     119           0 :                 return(REG_ESPACE);
     120           0 :         p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;      /* ugh */
     121           0 :         p->strip = (sop *)malloc(p->ssize * sizeof(sop));
     122           0 :         p->slen = 0;
     123           0 :         if (p->strip == NULL) {
     124           0 :                 free((char *)g);
     125           0 :                 return(REG_ESPACE);
     126             :         }
     127             : 
     128             :         /* set things up */
     129           0 :         p->g = g;
     130           0 :         p->next = (unsigned char *)pattern;  /* convenience; we do not modify it */
     131           0 :         p->end = p->next + len;
     132           0 :         p->error = 0;
     133           0 :         p->ncsalloc = 0;
     134           0 :         for (i = 0; i < NPAREN; i++) {
     135           0 :                 p->pbegin[i] = 0;
     136           0 :                 p->pend[i] = 0;
     137             :         }
     138           0 :         g->csetsize = NC;
     139           0 :         g->sets = NULL;
     140           0 :         g->setbits = NULL;
     141           0 :         g->ncsets = 0;
     142           0 :         g->cflags = cflags;
     143           0 :         g->iflags = 0;
     144           0 :         g->nbol = 0;
     145           0 :         g->neol = 0;
     146           0 :         g->must = NULL;
     147           0 :         g->mlen = 0;
     148           0 :         g->nsub = 0;
     149           0 :         g->ncategories = 1;  /* category 0 is "everything else" */
     150           0 :         g->categories = &g->catspace[0];
     151           0 :         (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
     152           0 :         g->backrefs = 0;
     153             : 
     154             :         /* do it */
     155           0 :         EMIT(OEND, 0);
     156           0 :         g->firststate = THERE();
     157           0 :         if (cflags&REG_EXTENDED)
     158           0 :                 p_ere(p, OUT);
     159           0 :         else if (cflags&REG_NOSPEC)
     160           0 :                 p_str(p);
     161             :         else
     162           0 :                 p_bre(p, OUT, OUT);
     163           0 :         EMIT(OEND, 0);
     164           0 :         g->laststate = THERE();
     165             : 
     166             :         /* tidy up loose ends and fill things in */
     167           0 :         categorize(p, g);
     168           0 :         stripsnug(p, g);
     169           0 :         findmust(p, g);
     170           0 :         g->nplus = pluscount(p, g);
     171           0 :         g->magic = MAGIC2;
     172           0 :         preg->re_nsub = g->nsub;
     173           0 :         preg->re_g = g;
     174           0 :         preg->re_magic = MAGIC1;
     175             : #ifndef REDEBUG
     176             :         /* not debugging, so can't rely on the assert() in regexec() */
     177           0 :         if (g->iflags&BAD)
     178           0 :                 SETERROR(REG_ASSERT);
     179             : #endif
     180             : 
     181             :         /* win or lose, we're done */
     182           0 :         if (p->error != 0)   /* lose */
     183           0 :                 regfree(preg);
     184           0 :         return(p->error);
     185             : }
     186             : 
     187             : /*
     188             :  - p_ere - ERE parser top level, concatenation and alternation
     189             :  == static void p_ere(register struct parse *p, int stop);
     190             :  */
     191             : static void
     192           0 : p_ere(p, stop)
     193             : register struct parse *p;
     194             : int stop;                       /* character this ERE should end at */
     195             : {
     196             :         register unsigned char c;
     197           0 :         register sopno prevback = 0;
     198           0 :         register sopno prevfwd = 0;
     199             :         register sopno conc;
     200           0 :         register int first = 1;         /* is this the first alternative? */
     201             : 
     202             :         for (;;) {
     203             :                 /* do a bunch of concatenated expressions */
     204           0 :                 conc = HERE();
     205           0 :                 while (MORE() && (c = PEEK()) != '|' && c != stop)
     206           0 :                         p_ere_exp(p);
     207           0 :                 (void) REQUIRE(HERE() != conc, REG_EMPTY);      /* require nonempty */
     208             : 
     209           0 :                 if (!EAT('|'))
     210           0 :                         break;          /* NOTE BREAK OUT */
     211             : 
     212           0 :                 if (first) {
     213           0 :                         INSERT(OCH_, conc);     /* offset is wrong */
     214           0 :                         prevfwd = conc;
     215           0 :                         prevback = conc;
     216           0 :                         first = 0;
     217             :                 }
     218           0 :                 ASTERN(OOR1, prevback);
     219           0 :                 prevback = THERE();
     220           0 :                 AHEAD(prevfwd);                 /* fix previous offset */
     221           0 :                 prevfwd = HERE();
     222           0 :                 EMIT(OOR2, 0);                  /* offset is very wrong */
     223           0 :         }
     224             : 
     225           0 :         if (!first) {           /* tail-end fixups */
     226           0 :                 AHEAD(prevfwd);
     227           0 :                 ASTERN(O_CH, prevback);
     228             :         }
     229             : 
     230             :         assert(!MORE() || SEE(stop));
     231           0 : }
     232             : 
     233             : /*
     234             :  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
     235             :  == static void p_ere_exp(register struct parse *p);
     236             :  */
     237             : static void
     238           0 : p_ere_exp(p)
     239             : register struct parse *p;
     240             : {
     241             :         register unsigned char c;
     242             :         register sopno pos;
     243             :         register int count;
     244             :         register int count2;
     245             :         register sopno subno;
     246           0 :         int wascaret = 0;
     247             : 
     248             :         assert(MORE());         /* caller should have ensured this */
     249           0 :         c = GETNEXT();
     250             : 
     251           0 :         pos = HERE();
     252           0 :         switch (c) {
     253             :         case '(':
     254           0 :                 REQUIRE(MORE(), REG_EPAREN);
     255           0 :                 p->g->nsub++;
     256           0 :                 subno = p->g->nsub;
     257           0 :                 if (subno < NPAREN)
     258           0 :                         p->pbegin[subno] = HERE();
     259           0 :                 EMIT(OLPAREN, subno);
     260           0 :                 if (!SEE(')'))
     261           0 :                         p_ere(p, ')');
     262           0 :                 if (subno < NPAREN) {
     263           0 :                         p->pend[subno] = HERE();
     264             :                         assert(p->pend[subno] != 0);
     265             :                 }
     266           0 :                 EMIT(ORPAREN, subno);
     267           0 :                 MUSTEAT(')', REG_EPAREN);
     268           0 :                 break;
     269             : #ifndef POSIX_MISTAKE
     270             :         case ')':               /* happens only if no current unmatched ( */
     271             :                 /*
     272             :                  * You may ask, why the ifndef?  Because I didn't notice
     273             :                  * this until slightly too late for 1003.2, and none of the
     274             :                  * other 1003.2 regular-expression reviewers noticed it at
     275             :                  * all.  So an unmatched ) is legal POSIX, at least until
     276             :                  * we can get it fixed.
     277             :                  */
     278             :                 SETERROR(REG_EPAREN);
     279             :                 break;
     280             : #endif
     281             :         case '^':
     282           0 :                 EMIT(OBOL, 0);
     283           0 :                 p->g->iflags |= USEBOL;
     284           0 :                 p->g->nbol++;
     285           0 :                 wascaret = 1;
     286           0 :                 break;
     287             :         case '$':
     288           0 :                 EMIT(OEOL, 0);
     289           0 :                 p->g->iflags |= USEEOL;
     290           0 :                 p->g->neol++;
     291           0 :                 break;
     292             :         case '|':
     293           0 :                 SETERROR(REG_EMPTY);
     294           0 :                 break;
     295             :         case '*':
     296             :         case '+':
     297             :         case '?':
     298           0 :                 SETERROR(REG_BADRPT);
     299           0 :                 break;
     300             :         case '.':
     301           0 :                 if (p->g->cflags&REG_NEWLINE)
     302           0 :                         nonnewline(p);
     303             :                 else
     304           0 :                         EMIT(OANY, 0);
     305           0 :                 break;
     306             :         case '[':
     307           0 :                 p_bracket(p);
     308           0 :                 break;
     309             :         case '\\':
     310           0 :                 REQUIRE(MORE(), REG_EESCAPE);
     311           0 :                 c = GETNEXT();
     312           0 :                 ordinary(p, c);
     313           0 :                 break;
     314             :         case '{':               /* okay as ordinary except if digit follows */
     315           0 :                 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
     316             :                 /* FALLTHROUGH */
     317             :         default:
     318           0 :                 ordinary(p, c);
     319             :                 break;
     320             :         }
     321             : 
     322           0 :         if (!MORE())
     323           0 :                 return;
     324           0 :         c = PEEK();
     325             :         /* we call { a repetition if followed by a digit */
     326           0 :         if (!( c == '*' || c == '+' || c == '?' ||
     327           0 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ))
     328           0 :                 return;         /* no repetition, we're done */
     329           0 :         NEXT();
     330             : 
     331           0 :         REQUIRE(!wascaret, REG_BADRPT);
     332           0 :         switch (c) {
     333             :         case '*':       /* implemented as +? */
     334             :                 /* this case does not require the (y|) trick, noKLUDGE */
     335           0 :                 INSERT(OPLUS_, pos);
     336           0 :                 ASTERN(O_PLUS, pos);
     337           0 :                 INSERT(OQUEST_, pos);
     338           0 :                 ASTERN(O_QUEST, pos);
     339           0 :                 break;
     340             :         case '+':
     341           0 :                 INSERT(OPLUS_, pos);
     342           0 :                 ASTERN(O_PLUS, pos);
     343           0 :                 break;
     344             :         case '?':
     345             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     346           0 :                 INSERT(OCH_, pos);              /* offset slightly wrong */
     347           0 :                 ASTERN(OOR1, pos);              /* this one's right */
     348           0 :                 AHEAD(pos);                     /* fix the OCH_ */
     349           0 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     350           0 :                 AHEAD(THERE());                 /* ...so fix it */
     351           0 :                 ASTERN(O_CH, THERETHERE());
     352           0 :                 break;
     353             :         case '{':
     354           0 :                 count = p_count(p);
     355           0 :                 if (EAT(',')) {
     356           0 :                         if (isdigit(PEEK())) {
     357           0 :                                 count2 = p_count(p);
     358           0 :                                 REQUIRE(count <= count2, REG_BADBR);
     359             :                         } else          /* single number with comma */
     360           0 :                                 count2 = INFINITY;
     361             :                 } else          /* just a single number */
     362           0 :                         count2 = count;
     363           0 :                 repeat(p, pos, count, count2);
     364           0 :                 if (!EAT('}')) {        /* error heuristics */
     365           0 :                         while (MORE() && PEEK() != '}')
     366           0 :                                 NEXT();
     367           0 :                         REQUIRE(MORE(), REG_EBRACE);
     368           0 :                         SETERROR(REG_BADBR);
     369             :                 }
     370             :                 break;
     371             :         }
     372             : 
     373           0 :         if (!MORE())
     374           0 :                 return;
     375           0 :         c = PEEK();
     376           0 :         if (!( c == '*' || c == '+' || c == '?' ||
     377           0 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
     378           0 :                 return;
     379           0 :         SETERROR(REG_BADRPT);
     380             : }
     381             : 
     382             : /*
     383             :  - p_str - string (no metacharacters) "parser"
     384             :  == static void p_str(register struct parse *p);
     385             :  */
     386             : static void
     387           0 : p_str(p)
     388             : register struct parse *p;
     389             : {
     390           0 :         REQUIRE(MORE(), REG_EMPTY);
     391           0 :         while (MORE())
     392           0 :                 ordinary(p, GETNEXT());
     393           0 : }
     394             : 
     395             : /*
     396             :  - p_bre - BRE parser top level, anchoring and concatenation
     397             :  == static void p_bre(register struct parse *p, register int end1, \
     398             :  ==     register int end2);
     399             :  * Giving end1 as OUT essentially eliminates the end1/end2 check.
     400             :  *
     401             :  * This implementation is a bit of a kludge, in that a trailing $ is first
     402             :  * taken as an ordinary character and then revised to be an anchor.  The
     403             :  * only undesirable side effect is that '$' gets included as a character
     404             :  * category in such cases.  This is fairly harmless; not worth fixing.
     405             :  * The amount of lookahead needed to avoid this kludge is excessive.
     406             :  */
     407             : static void
     408           0 : p_bre(p, end1, end2)
     409             : register struct parse *p;
     410             : register int end1;              /* first terminating character */
     411             : register int end2;              /* second terminating character */
     412             : {
     413           0 :         register sopno start = HERE();
     414           0 :         register int first = 1;                 /* first subexpression? */
     415           0 :         register int wasdollar = 0;
     416             : 
     417           0 :         if (EAT('^')) {
     418           0 :                 EMIT(OBOL, 0);
     419           0 :                 p->g->iflags |= USEBOL;
     420           0 :                 p->g->nbol++;
     421             :         }
     422           0 :         while (MORE() && !SEETWO(end1, end2)) {
     423           0 :                 wasdollar = p_simp_re(p, first);
     424           0 :                 first = 0;
     425             :         }
     426           0 :         if (wasdollar) {        /* oops, that was a trailing anchor */
     427           0 :                 DROP(1);
     428           0 :                 EMIT(OEOL, 0);
     429           0 :                 p->g->iflags |= USEEOL;
     430           0 :                 p->g->neol++;
     431             :         }
     432             : 
     433           0 :         REQUIRE(HERE() != start, REG_EMPTY);    /* require nonempty */
     434           0 : }
     435             : 
     436             : /*
     437             :  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
     438             :  == static int p_simp_re(register struct parse *p, int starordinary);
     439             :  */
     440             : static int                      /* was the simple RE an unbackslashed $? */
     441           0 : p_simp_re(p, starordinary)
     442             : register struct parse *p;
     443             : int starordinary;               /* is a leading * an ordinary character? */
     444             : {
     445             :         register int c;
     446             :         register int count;
     447             :         register int count2;
     448             :         register sopno pos;
     449             :         register int i;
     450             :         register sopno subno;
     451             : #       define  BACKSL  (1<<CHAR_BIT)
     452             : 
     453           0 :         pos = HERE();           /* repetion op, if any, covers from here */
     454             : 
     455             :         assert(MORE());         /* caller should have ensured this */
     456           0 :         c = GETNEXT();
     457           0 :         if (c == '\\') {
     458           0 :                 REQUIRE(MORE(), REG_EESCAPE);
     459           0 :                 c = BACKSL | (unsigned char)GETNEXT();
     460             :         }
     461           0 :         switch (c) {
     462             :         case '.':
     463           0 :                 if (p->g->cflags&REG_NEWLINE)
     464           0 :                         nonnewline(p);
     465             :                 else
     466           0 :                         EMIT(OANY, 0);
     467           0 :                 break;
     468             :         case '[':
     469           0 :                 p_bracket(p);
     470           0 :                 break;
     471             :         case BACKSL|'{':
     472           0 :                 SETERROR(REG_BADRPT);
     473           0 :                 break;
     474             :         case BACKSL|'(':
     475           0 :                 p->g->nsub++;
     476           0 :                 subno = p->g->nsub;
     477           0 :                 if (subno < NPAREN)
     478           0 :                         p->pbegin[subno] = HERE();
     479           0 :                 EMIT(OLPAREN, subno);
     480             :                 /* the MORE here is an error heuristic */
     481           0 :                 if (MORE() && !SEETWO('\\', ')'))
     482           0 :                         p_bre(p, '\\', ')');
     483           0 :                 if (subno < NPAREN) {
     484           0 :                         p->pend[subno] = HERE();
     485             :                         assert(p->pend[subno] != 0);
     486             :                 }
     487           0 :                 EMIT(ORPAREN, subno);
     488           0 :                 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
     489           0 :                 break;
     490             :         case BACKSL|')':        /* should not get here -- must be user */
     491             :         case BACKSL|'}':
     492           0 :                 SETERROR(REG_EPAREN);
     493           0 :                 break;
     494             :         case BACKSL|'1':
     495             :         case BACKSL|'2':
     496             :         case BACKSL|'3':
     497             :         case BACKSL|'4':
     498             :         case BACKSL|'5':
     499             :         case BACKSL|'6':
     500             :         case BACKSL|'7':
     501             :         case BACKSL|'8':
     502             :         case BACKSL|'9':
     503           0 :                 i = (c&~BACKSL) - '0';
     504             :                 assert(i < NPAREN);
     505           0 :                 if (p->pend[i] != 0) {
     506             :                         assert(i <= p->g->nsub);
     507           0 :                         EMIT(OBACK_, i);
     508             :                         assert(p->pbegin[i] != 0);
     509             :                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
     510             :                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
     511           0 :                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
     512           0 :                         EMIT(O_BACK, i);
     513             :                 } else
     514           0 :                         SETERROR(REG_ESUBREG);
     515           0 :                 p->g->backrefs = 1;
     516           0 :                 break;
     517             :         case '*':
     518           0 :                 REQUIRE(starordinary, REG_BADRPT);
     519             :                 /* FALLTHROUGH */
     520             :         default:
     521           0 :                 ordinary(p, (unsigned char)c);  /* takes off BACKSL, if any */
     522             :                 break;
     523             :         }
     524             : 
     525           0 :         if (EAT('*')) {         /* implemented as +? */
     526             :                 /* this case does not require the (y|) trick, noKLUDGE */
     527           0 :                 INSERT(OPLUS_, pos);
     528           0 :                 ASTERN(O_PLUS, pos);
     529           0 :                 INSERT(OQUEST_, pos);
     530           0 :                 ASTERN(O_QUEST, pos);
     531           0 :         } else if (EATTWO('\\', '{')) {
     532           0 :                 count = p_count(p);
     533           0 :                 if (EAT(',')) {
     534           0 :                         if (MORE() && isdigit(PEEK())) {
     535           0 :                                 count2 = p_count(p);
     536           0 :                                 REQUIRE(count <= count2, REG_BADBR);
     537             :                         } else          /* single number with comma */
     538           0 :                                 count2 = INFINITY;
     539             :                 } else          /* just a single number */
     540           0 :                         count2 = count;
     541           0 :                 repeat(p, pos, count, count2);
     542           0 :                 if (!EATTWO('\\', '}')) {       /* error heuristics */
     543           0 :                         while (MORE() && !SEETWO('\\', '}'))
     544           0 :                                 NEXT();
     545           0 :                         REQUIRE(MORE(), REG_EBRACE);
     546           0 :                         SETERROR(REG_BADBR);
     547             :                 }
     548           0 :         } else if (c == (unsigned char)'$')     /* $ (but not \$) ends it */
     549           0 :                 return(1);
     550             : 
     551           0 :         return(0);
     552             : }
     553             : 
     554             : /*
     555             :  - p_count - parse a repetition count
     556             :  == static int p_count(register struct parse *p);
     557             :  */
     558             : static int                      /* the value */
     559           0 : p_count(p)
     560             : register struct parse *p;
     561             : {
     562           0 :         register int count = 0;
     563           0 :         register int ndigits = 0;
     564             : 
     565           0 :         while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
     566           0 :                 count = count*10 + (GETNEXT() - '0');
     567           0 :                 ndigits++;
     568             :         }
     569             : 
     570           0 :         REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
     571           0 :         return(count);
     572             : }
     573             : 
     574             : /*
     575             :  - p_bracket - parse a bracketed character list
     576             :  == static void p_bracket(register struct parse *p);
     577             :  *
     578             :  * Note a significant property of this code:  if the allocset() did SETERROR,
     579             :  * no set operations are done.
     580             :  */
     581             : static void
     582           0 : p_bracket(p)
     583             : register struct parse *p;
     584             : {
     585           0 :         register cset *cs = allocset(p);
     586           0 :         register int invert = 0;
     587             : 
     588             :         /* Dept of Truly Sickening Special-Case Kludges */
     589           0 :         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
     590           0 :                 EMIT(OBOW, 0);
     591           0 :                 NEXTn(6);
     592           0 :                 return;
     593             :         }
     594           0 :         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
     595           0 :                 EMIT(OEOW, 0);
     596           0 :                 NEXTn(6);
     597           0 :                 return;
     598             :         }
     599             : 
     600           0 :         if (EAT('^'))
     601           0 :                 invert++;       /* make note to invert set at end */
     602           0 :         if (EAT(']'))
     603           0 :                 CHadd(cs, ']');
     604           0 :         else if (EAT('-'))
     605           0 :                 CHadd(cs, '-');
     606           0 :         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
     607           0 :                 p_b_term(p, cs);
     608           0 :         if (EAT('-'))
     609           0 :                 CHadd(cs, '-');
     610           0 :         MUSTEAT(']', REG_EBRACK);
     611             : 
     612           0 :         if (p->error != 0)   /* don't mess things up further */
     613           0 :                 return;
     614             : 
     615           0 :         if (p->g->cflags&REG_ICASE) {
     616             :                 register int i;
     617             :                 register int ci;
     618             : 
     619           0 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     620           0 :                         if (CHIN(cs, i) && isalpha(i)) {
     621           0 :                                 ci = othercase(i);
     622           0 :                                 if (ci != i)
     623           0 :                                         CHadd(cs, ci);
     624             :                         }
     625           0 :                 if (cs->multis != NULL)
     626           0 :                         mccase(p, cs);
     627             :         }
     628           0 :         if (invert) {
     629             :                 register int i;
     630             : 
     631           0 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     632           0 :                         if (CHIN(cs, i))
     633           0 :                                 CHsub(cs, i);
     634             :                         else
     635           0 :                                 CHadd(cs, i);
     636           0 :                 if (p->g->cflags&REG_NEWLINE)
     637           0 :                         CHsub(cs, '\n');
     638           0 :                 if (cs->multis != NULL)
     639           0 :                         mcinvert(p, cs);
     640             :         }
     641             : 
     642             :         assert(cs->multis == NULL);          /* xxx */
     643             : 
     644           0 :         if (nch(p, cs) == 1) {          /* optimize singleton sets */
     645           0 :                 ordinary(p, firstch(p, cs));
     646           0 :                 freeset(p, cs);
     647             :         } else
     648           0 :                 EMIT(OANYOF, freezeset(p, cs));
     649             : }
     650             : 
     651             : /*
     652             :  - p_b_term - parse one term of a bracketed character list
     653             :  == static void p_b_term(register struct parse *p, register cset *cs);
     654             :  */
     655             : static void
     656           0 : p_b_term(p, cs)
     657             : register struct parse *p;
     658             : register cset *cs;
     659             : {
     660             :         register unsigned char c;
     661             :         register unsigned char start, finish;
     662             :         register int i;
     663             : 
     664             :         /* classify what we've got */
     665           0 :         switch ((MORE()) ? PEEK() : '\0') {
     666             :         case '[':
     667           0 :                 c = (MORE2()) ? PEEK2() : '\0';
     668           0 :                 break;
     669             :         case '-':
     670           0 :                 SETERROR(REG_ERANGE);
     671           0 :                 return;                 /* NOTE RETURN */
     672             :                 break;
     673             :         default:
     674           0 :                 c = '\0';
     675             :                 break;
     676             :         }
     677             : 
     678           0 :         switch (c) {
     679             :         case ':':               /* character class */
     680           0 :                 NEXT2();
     681           0 :                 REQUIRE(MORE(), REG_EBRACK);
     682           0 :                 c = PEEK();
     683           0 :                 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
     684           0 :                 p_b_cclass(p, cs);
     685           0 :                 REQUIRE(MORE(), REG_EBRACK);
     686           0 :                 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
     687           0 :                 break;
     688             :         case '=':               /* equivalence class */
     689           0 :                 NEXT2();
     690           0 :                 REQUIRE(MORE(), REG_EBRACK);
     691           0 :                 c = PEEK();
     692           0 :                 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
     693           0 :                 p_b_eclass(p, cs);
     694           0 :                 REQUIRE(MORE(), REG_EBRACK);
     695           0 :                 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
     696           0 :                 break;
     697             :         default:                /* symbol, ordinary character, or range */
     698             : /* xxx revision needed for multichar stuff */
     699           0 :                 start = p_b_symbol(p);
     700           0 :                 if (SEE('-') && MORE2() && PEEK2() != ']') {
     701             :                         /* range */
     702           0 :                         NEXT();
     703           0 :                         if (EAT('-'))
     704           0 :                                 finish = '-';
     705             :                         else
     706           0 :                                 finish = p_b_symbol(p);
     707             :                 } else
     708           0 :                         finish = start;
     709             : /* xxx what about signed chars here... */
     710           0 :                 REQUIRE(start <= finish, REG_ERANGE);
     711           0 :                 for (i = start; i <= finish; i++)
     712           0 :                         CHadd(cs, i);
     713             :                 break;
     714             :         }
     715             : }
     716             : 
     717             : /*
     718             :  - p_b_cclass - parse a character-class name and deal with it
     719             :  == static void p_b_cclass(register struct parse *p, register cset *cs);
     720             :  */
     721             : static void
     722           0 : p_b_cclass(p, cs)
     723             : register struct parse *p;
     724             : register cset *cs;
     725             : {
     726           0 :         register unsigned char *sp = p->next;
     727             :         register const struct cclass *cp;
     728             :         register size_t len;
     729             :         register const unsigned char *u;
     730             :         register unsigned char c;
     731             : 
     732           0 :         while (MORE() && isalpha(PEEK()))
     733           0 :                 NEXT();
     734           0 :         len = p->next - sp;
     735           0 :         for (cp = cclasses; cp->name != NULL; cp++)
     736           0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     737           0 :                         break;
     738           0 :         if (cp->name == NULL) {
     739             :                 /* oops, didn't find it */
     740           0 :                 SETERROR(REG_ECTYPE);
     741           0 :                 return;
     742             :         }
     743             : 
     744           0 :         u = cp->chars;
     745           0 :         while ((c = *u++) != '\0')
     746           0 :                 CHadd(cs, c);
     747           0 :         for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
     748           0 :                 MCadd(p, cs, u);
     749             : }
     750             : 
     751             : /*
     752             :  - p_b_eclass - parse an equivalence-class name and deal with it
     753             :  == static void p_b_eclass(register struct parse *p, register cset *cs);
     754             :  *
     755             :  * This implementation is incomplete. xxx
     756             :  */
     757             : static void
     758           0 : p_b_eclass(p, cs)
     759             : register struct parse *p;
     760             : register cset *cs;
     761             : {
     762             :         register unsigned char c;
     763             : 
     764           0 :         c = p_b_coll_elem(p, '=');
     765           0 :         CHadd(cs, c);
     766           0 : }
     767             : 
     768             : /*
     769             :  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
     770             :  == static char p_b_symbol(register struct parse *p);
     771             :  */
     772             : static unsigned char                    /* value of symbol */
     773           0 : p_b_symbol(p)
     774             : register struct parse *p;
     775             : {
     776             :         register unsigned char value;
     777             : 
     778           0 :         REQUIRE(MORE(), REG_EBRACK);
     779           0 :         if (!EATTWO('[', '.'))
     780           0 :                 return(GETNEXT());
     781             : 
     782             :         /* collating symbol */
     783           0 :         value = p_b_coll_elem(p, '.');
     784           0 :         REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
     785           0 :         return(value);
     786             : }
     787             : 
     788             : /*
     789             :  - p_b_coll_elem - parse a collating-element name and look it up
     790             :  == static char p_b_coll_elem(register struct parse *p, int endc);
     791             :  */
     792             : static unsigned char                    /* value of collating element */
     793           0 : p_b_coll_elem(p, endc)
     794             : register struct parse *p;
     795             : int endc;                       /* name ended by endc,']' */
     796             : {
     797           0 :         register unsigned char *sp = p->next;
     798             :         register const struct cname *cp;
     799             :         register int len;
     800             : 
     801           0 :         while (MORE() && !SEETWO(endc, ']'))
     802           0 :                 NEXT();
     803           0 :         if (!MORE()) {
     804           0 :                 SETERROR(REG_EBRACK);
     805           0 :                 return(0);
     806             :         }
     807           0 :         len = p->next - sp;
     808           0 :         for (cp = cnames; cp->name != NULL; cp++)
     809           0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     810           0 :                         return(cp->code);    /* known name */
     811           0 :         if (len == 1)
     812           0 :                 return(*sp);    /* single character */
     813           0 :         SETERROR(REG_ECOLLATE);                 /* neither */
     814           0 :         return(0);
     815             : }
     816             : 
     817             : /*
     818             :  - othercase - return the case counterpart of an alphabetic
     819             :  == static char othercase(int ch);
     820             :  */
     821             : static unsigned char                    /* if no counterpart, return ch */
     822           0 : othercase(ch)
     823             : int ch;
     824             : {
     825             :         assert(isalpha(ch));
     826           0 :         if (isupper(ch))
     827           0 :                 return(tolower(ch));
     828           0 :         else if (islower(ch))
     829           0 :                 return(toupper(ch));
     830             :         else                    /* peculiar, but could happen */
     831           0 :                 return(ch);
     832             : }
     833             : 
     834             : /*
     835             :  - bothcases - emit a dualcase version of a two-case character
     836             :  == static void bothcases(register struct parse *p, int ch);
     837             :  *
     838             :  * Boy, is this implementation ever a kludge...
     839             :  */
     840             : static void
     841           0 : bothcases(p, ch)
     842             : register struct parse *p;
     843             : int ch;
     844             : {
     845           0 :         register unsigned char *oldnext = p->next;
     846           0 :         register unsigned char *oldend = p->end;
     847             :         unsigned char bracket[3];
     848             : 
     849             :         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
     850           0 :         p->next = bracket;
     851           0 :         p->end = bracket+2;
     852           0 :         bracket[0] = ch;
     853           0 :         bracket[1] = ']';
     854           0 :         bracket[2] = '\0';
     855           0 :         p_bracket(p);
     856             :         assert(p->next == bracket+2);
     857           0 :         p->next = oldnext;
     858           0 :         p->end = oldend;
     859           0 : }
     860             : 
     861             : /*
     862             :  - ordinary - emit an ordinary character
     863             :  == static void ordinary(register struct parse *p, register int ch);
     864             :  */
     865             : static void
     866           0 : ordinary(p, ch)
     867             : register struct parse *p;
     868             : register int ch;
     869             : {
     870           0 :         register cat_t *cap = p->g->categories;
     871             : 
     872           0 :         if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
     873           0 :                 bothcases(p, ch);
     874             :         else {
     875           0 :                 EMIT(OCHAR, (unsigned char)ch);
     876           0 :                 if (cap[ch] == 0)
     877           0 :                         cap[ch] = p->g->ncategories++;
     878             :         }
     879           0 : }
     880             : 
     881             : /*
     882             :  - nonnewline - emit REG_NEWLINE version of OANY
     883             :  == static void nonnewline(register struct parse *p);
     884             :  *
     885             :  * Boy, is this implementation ever a kludge...
     886             :  */
     887             : static void
     888           0 : nonnewline(p)
     889             : register struct parse *p;
     890             : {
     891           0 :         register unsigned char *oldnext = p->next;
     892           0 :         register unsigned char *oldend = p->end;
     893             :         unsigned char bracket[4];
     894             : 
     895           0 :         p->next = bracket;
     896           0 :         p->end = bracket+3;
     897           0 :         bracket[0] = '^';
     898           0 :         bracket[1] = '\n';
     899           0 :         bracket[2] = ']';
     900           0 :         bracket[3] = '\0';
     901           0 :         p_bracket(p);
     902             :         assert(p->next == bracket+3);
     903           0 :         p->next = oldnext;
     904           0 :         p->end = oldend;
     905           0 : }
     906             : 
     907             : /*
     908             :  - repeat - generate code for a bounded repetition, recursively if needed
     909             :  == static void repeat(register struct parse *p, sopno start, int from, int to);
     910             :  */
     911             : static void
     912           0 : repeat(p, start, from, to)
     913             : register struct parse *p;
     914             : sopno start;                    /* operand from here to end of strip */
     915             : int from;                       /* repeated from this number */
     916             : int to;                         /* to this number of times (maybe INFINITY) */
     917             : {
     918           0 :         register sopno finish = HERE();
     919             : #       define  N       2
     920             : #       define  INF     3
     921             : #       define  REP(f, t)       ((f)*8 + (t))
     922             : #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
     923             :         register sopno copy;
     924             : 
     925           0 :         if (p->error != 0)   /* head off possible runaway recursion */
     926           0 :                 return;
     927             : 
     928             :         assert(from <= to);
     929             : 
     930           0 :         switch (REP(MAP(from), MAP(to))) {
     931             :         case REP(0, 0):                 /* must be user doing this */
     932           0 :                 DROP(finish-start);     /* drop the operand */
     933           0 :                 break;
     934             :         case REP(0, 1):                 /* as x{1,1}? */
     935             :         case REP(0, N):                 /* as x{1,n}? */
     936             :         case REP(0, INF):               /* as x{1,}? */
     937             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     938           0 :                 INSERT(OCH_, start);            /* offset is wrong... */
     939           0 :                 repeat(p, start+1, 1, to);
     940           0 :                 ASTERN(OOR1, start);
     941           0 :                 AHEAD(start);                   /* ... fix it */
     942           0 :                 EMIT(OOR2, 0);
     943           0 :                 AHEAD(THERE());
     944           0 :                 ASTERN(O_CH, THERETHERE());
     945           0 :                 break;
     946             :         case REP(1, 1):                 /* trivial case */
     947             :                 /* done */
     948           0 :                 break;
     949             :         case REP(1, N):                 /* as x?x{1,n-1} */
     950             :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     951           0 :                 INSERT(OCH_, start);
     952           0 :                 ASTERN(OOR1, start);
     953           0 :                 AHEAD(start);
     954           0 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     955           0 :                 AHEAD(THERE());                 /* ...so fix it */
     956           0 :                 ASTERN(O_CH, THERETHERE());
     957           0 :                 copy = dupl(p, start+1, finish+1);
     958             :                 assert(copy == finish+4);
     959           0 :                 repeat(p, copy, 1, to-1);
     960           0 :                 break;
     961             :         case REP(1, INF):               /* as x+ */
     962           0 :                 INSERT(OPLUS_, start);
     963           0 :                 ASTERN(O_PLUS, start);
     964           0 :                 break;
     965             :         case REP(N, N):                 /* as xx{m-1,n-1} */
     966           0 :                 copy = dupl(p, start, finish);
     967           0 :                 repeat(p, copy, from-1, to-1);
     968           0 :                 break;
     969             :         case REP(N, INF):               /* as xx{n-1,INF} */
     970           0 :                 copy = dupl(p, start, finish);
     971           0 :                 repeat(p, copy, from-1, to);
     972           0 :                 break;
     973             :         default:                        /* "can't happen" */
     974           0 :                 SETERROR(REG_ASSERT);   /* just in case */
     975             :                 break;
     976             :         }
     977             : }
     978             : 
     979             : /*
     980             :  - seterr - set an error condition
     981             :  == static int seterr(register struct parse *p, int e);
     982             :  */
     983             : static int                      /* useless but makes type checking happy */
     984           0 : seterr(p, e)
     985             : register struct parse *p;
     986             : int e;
     987             : {
     988           0 :         if (p->error == 0)   /* keep earliest error condition */
     989           0 :                 p->error = e;
     990           0 :         p->next = nuls;              /* try to bring things to a halt */
     991           0 :         p->end = nuls;
     992           0 :         return(0);              /* make the return value well-defined */
     993             : }
     994             : 
     995             : /*
     996             :  - allocset - allocate a set of characters for []
     997             :  == static cset *allocset(register struct parse *p);
     998             :  */
     999             : static cset *
    1000           0 : allocset(p)
    1001             : register struct parse *p;
    1002             : {
    1003           0 :         register int no = p->g->ncsets++;
    1004             :         register size_t nc;
    1005             :         register size_t nbytes;
    1006             :         register cset *cs;
    1007           0 :         register size_t css = (size_t)p->g->csetsize;
    1008             :         register int i;
    1009             : 
    1010           0 :         if (no >= p->ncsalloc) {  /* need another column of space */
    1011           0 :                 p->ncsalloc += CHAR_BIT;
    1012           0 :                 nc = p->ncsalloc;
    1013             :                 assert(nc % CHAR_BIT == 0);
    1014           0 :                 nbytes = nc / CHAR_BIT * css;
    1015           0 :                 if (p->g->sets == NULL)
    1016           0 :                         p->g->sets = (cset *)malloc(nc * sizeof(cset));
    1017             :                 else
    1018           0 :                         p->g->sets = (cset *)realloc((unsigned char *)p->g->sets,
    1019             :                                                         nc * sizeof(cset));
    1020           0 :                 if (p->g->setbits == NULL)
    1021           0 :                         p->g->setbits = (uch *)malloc(nbytes);
    1022             :                 else {
    1023           0 :                         p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits,
    1024             :                                                                 nbytes);
    1025             :                         /* xxx this isn't right if setbits is now NULL */
    1026           0 :                         for (i = 0; i < no; i++)
    1027           0 :                                 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
    1028             :                 }
    1029           0 :                 if (p->g->sets != NULL && p->g->setbits != NULL)
    1030           0 :                         (void) memset((unsigned char *)p->g->setbits + (nbytes - css),
    1031             :                                                                 0, css);
    1032             :                 else {
    1033           0 :                         no = 0;
    1034           0 :                         SETERROR(REG_ESPACE);
    1035             :                         /* caller's responsibility not to do set ops */
    1036             :                 }
    1037             :         }
    1038             : 
    1039             :         assert(p->g->sets != NULL);       /* xxx */
    1040           0 :         cs = &p->g->sets[no];
    1041           0 :         cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
    1042           0 :         cs->mask = 1 << ((no) % CHAR_BIT);
    1043           0 :         cs->hash = 0;
    1044           0 :         cs->smultis = 0;
    1045           0 :         cs->multis = NULL;
    1046             : 
    1047           0 :         return(cs);
    1048             : }
    1049             : 
    1050             : /*
    1051             :  - freeset - free a now-unused set
    1052             :  == static void freeset(register struct parse *p, register cset *cs);
    1053             :  */
    1054             : static void
    1055           0 : freeset(p, cs)
    1056             : register struct parse *p;
    1057             : register cset *cs;
    1058             : {
    1059             :         register size_t i;
    1060           0 :         register cset *top = &p->g->sets[p->g->ncsets];
    1061           0 :         register size_t css = (size_t)p->g->csetsize;
    1062             : 
    1063           0 :         for (i = 0; i < css; i++)
    1064           0 :                 CHsub(cs, i);
    1065           0 :         if (cs == top-1)        /* recover only the easy case */
    1066           0 :                 p->g->ncsets--;
    1067           0 : }
    1068             : 
    1069             : /*
    1070             :  - freezeset - final processing on a set of characters
    1071             :  == static int freezeset(register struct parse *p, register cset *cs);
    1072             :  *
    1073             :  * The main task here is merging identical sets.  This is usually a waste
    1074             :  * of time (although the hash code minimizes the overhead), but can win
    1075             :  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
    1076             :  * is done using addition rather than xor -- all ASCII [aA] sets xor to
    1077             :  * the same value!
    1078             :  */
    1079             : static int                      /* set number */
    1080           0 : freezeset(p, cs)
    1081             : register struct parse *p;
    1082             : register cset *cs;
    1083             : {
    1084           0 :         register uch h = cs->hash;
    1085             :         register size_t i;
    1086           0 :         register cset *top = &p->g->sets[p->g->ncsets];
    1087             :         register cset *cs2;
    1088           0 :         register size_t css = (size_t)p->g->csetsize;
    1089             : 
    1090             :         /* look for an earlier one which is the same */
    1091           0 :         for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
    1092           0 :                 if (cs2->hash == h && cs2 != cs) {
    1093             :                         /* maybe */
    1094           0 :                         for (i = 0; i < css; i++)
    1095           0 :                                 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
    1096           0 :                                         break;          /* no */
    1097           0 :                         if (i == css)
    1098           0 :                                 break;                  /* yes */
    1099             :                 }
    1100             : 
    1101           0 :         if (cs2 < top) {     /* found one */
    1102           0 :                 freeset(p, cs);
    1103           0 :                 cs = cs2;
    1104             :         }
    1105             : 
    1106           0 :         return((int)(cs - p->g->sets));
    1107             : }
    1108             : 
    1109             : /*
    1110             :  - firstch - return first character in a set (which must have at least one)
    1111             :  == static int firstch(register struct parse *p, register cset *cs);
    1112             :  */
    1113             : static int                      /* character; there is no "none" value */
    1114           0 : firstch(p, cs)
    1115             : register struct parse *p;
    1116             : register cset *cs;
    1117             : {
    1118             :         register size_t i;
    1119           0 :         register size_t css = (size_t)p->g->csetsize;
    1120             : 
    1121           0 :         for (i = 0; i < css; i++)
    1122           0 :                 if (CHIN(cs, i))
    1123           0 :                         return((unsigned char)i);
    1124             :         assert(never);
    1125           0 :         return(0);              /* arbitrary */
    1126             : }
    1127             : 
    1128             : /*
    1129             :  - nch - number of characters in a set
    1130             :  == static int nch(register struct parse *p, register cset *cs);
    1131             :  */
    1132             : static int
    1133           0 : nch(p, cs)
    1134             : register struct parse *p;
    1135             : register cset *cs;
    1136             : {
    1137             :         register size_t i;
    1138           0 :         register size_t css = (size_t)p->g->csetsize;
    1139           0 :         register int n = 0;
    1140             : 
    1141           0 :         for (i = 0; i < css; i++)
    1142           0 :                 if (CHIN(cs, i))
    1143           0 :                         n++;
    1144           0 :         return(n);
    1145             : }
    1146             : 
    1147             : /*
    1148             :  - mcadd - add a collating element to a cset
    1149             :  == static void mcadd(register struct parse *p, register cset *cs, \
    1150             :  ==     register char *cp);
    1151             :  */
    1152             : static void
    1153           0 : mcadd(p, cs, cp)
    1154             : register struct parse *p;
    1155             : register cset *cs;
    1156             : register const unsigned char *cp;
    1157             : {
    1158           0 :         register size_t oldend = cs->smultis;
    1159             : 
    1160           0 :         cs->smultis += strlen(cp) + 1;
    1161           0 :         if (cs->multis == NULL)
    1162           0 :                 cs->multis = malloc(cs->smultis);
    1163             :         else
    1164           0 :                 cs->multis = realloc(cs->multis, cs->smultis);
    1165           0 :         if (cs->multis == NULL) {
    1166           0 :                 SETERROR(REG_ESPACE);
    1167           0 :                 return;
    1168             :         }
    1169             : 
    1170           0 :         (void) strcpy(cs->multis + oldend - 1, cp);
    1171           0 :         cs->multis[cs->smultis - 1] = '\0';
    1172             : }
    1173             : 
    1174             : #if 0
    1175             : /*
    1176             :  - mcsub - subtract a collating element from a cset
    1177             :  == static void mcsub(register cset *cs, register unsigned char *cp);
    1178             :  */
    1179             : static void
    1180             : mcsub(cs, cp)
    1181             : register unsigned cset *cs;
    1182             : register unsigned char *cp;
    1183             : {
    1184             :         register unsigned char *fp = mcfind(cs, cp);
    1185             :         register size_t len = strlen(fp);
    1186             : 
    1187             :         assert(fp != NULL);
    1188             :         (void) memmove(fp, fp + len + 1,
    1189             :                                 cs->smultis - (fp + len + 1 - cs->multis));
    1190             :         cs->smultis -= len;
    1191             : 
    1192             :         if (cs->smultis == 0) {
    1193             :                 free(cs->multis);
    1194             :                 cs->multis = NULL;
    1195             :                 return;
    1196             :         }
    1197             : 
    1198             :         cs->multis = realloc(cs->multis, cs->smultis);
    1199             :         assert(cs->multis != NULL);
    1200             : }
    1201             : 
    1202             : /*
    1203             :  - mcin - is a collating element in a cset?
    1204             :  == static int mcin(register cset *cs, register unsigned char *cp);
    1205             :  */
    1206             : static int
    1207             : mcin(cs, cp)
    1208             : register cset *cs;
    1209             : register unsigned char *cp;
    1210             : {
    1211             :         return(mcfind(cs, cp) != NULL);
    1212             : }
    1213             : 
    1214             : 
    1215             : /*
    1216             :  - mcfind - find a collating element in a cset
    1217             :  == static unsigned char *mcfind(register cset *cs, register unsigned char *cp);
    1218             :  */
    1219             : static unsigned char *
    1220             : mcfind(cs, cp)
    1221             : register cset *cs;
    1222             : register unsigned char *cp;
    1223             : {
    1224             :         register unsigned char *p;
    1225             : 
    1226             :         if (cs->multis == NULL)
    1227             :                 return(NULL);
    1228             :         for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
    1229             :                 if (strcmp(cp, p) == 0)
    1230             :                         return(p);
    1231             :         return(NULL);
    1232             : }
    1233             : #endif
    1234             : 
    1235             : /*
    1236             :  - mcinvert - invert the list of collating elements in a cset
    1237             :  == static void mcinvert(register struct parse *p, register cset *cs);
    1238             :  *
    1239             :  * This would have to know the set of possibilities.  Implementation
    1240             :  * is deferred.
    1241             :  */
    1242             : static void
    1243           0 : mcinvert(p, cs)
    1244             : register struct parse *p;
    1245             : register cset *cs;
    1246             : {
    1247             :         assert(cs->multis == NULL);  /* xxx */
    1248           0 : }
    1249             : 
    1250             : /*
    1251             :  - mccase - add case counterparts of the list of collating elements in a cset
    1252             :  == static void mccase(register struct parse *p, register cset *cs);
    1253             :  *
    1254             :  * This would have to know the set of possibilities.  Implementation
    1255             :  * is deferred.
    1256             :  */
    1257             : static void
    1258           0 : mccase(p, cs)
    1259             : register struct parse *p;
    1260             : register cset *cs;
    1261             : {
    1262             :         assert(cs->multis == NULL);  /* xxx */
    1263           0 : }
    1264             : 
    1265             : /*
    1266             :  - isinsets - is this character in any sets?
    1267             :  == static int isinsets(register struct re_guts *g, int c);
    1268             :  */
    1269             : static int                      /* predicate */
    1270           0 : isinsets(g, c)
    1271             : register struct re_guts *g;
    1272             : int c;
    1273             : {
    1274             :         register uch *col;
    1275             :         register int i;
    1276           0 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1277           0 :         register unsigned uc = (unsigned char)c;
    1278             : 
    1279           0 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1280           0 :                 if (col[uc] != 0)
    1281           0 :                         return(1);
    1282           0 :         return(0);
    1283             : }
    1284             : 
    1285             : /*
    1286             :  - samesets - are these two characters in exactly the same sets?
    1287             :  == static int samesets(register struct re_guts *g, int c1, int c2);
    1288             :  */
    1289             : static int                      /* predicate */
    1290           0 : samesets(g, c1, c2)
    1291             : register struct re_guts *g;
    1292             : int c1;
    1293             : int c2;
    1294             : {
    1295             :         register uch *col;
    1296             :         register int i;
    1297           0 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1298           0 :         register unsigned uc1 = (unsigned char)c1;
    1299           0 :         register unsigned uc2 = (unsigned char)c2;
    1300             : 
    1301           0 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1302           0 :                 if (col[uc1] != col[uc2])
    1303           0 :                         return(0);
    1304           0 :         return(1);
    1305             : }
    1306             : 
    1307             : /*
    1308             :  - categorize - sort out character categories
    1309             :  == static void categorize(struct parse *p, register struct re_guts *g);
    1310             :  */
    1311             : static void
    1312           0 : categorize(p, g)
    1313             : struct parse *p;
    1314             : register struct re_guts *g;
    1315             : {
    1316           0 :         register cat_t *cats = g->categories;
    1317             :         register int c;
    1318             :         register int c2;
    1319             :         register cat_t cat;
    1320             : 
    1321             :         /* avoid making error situations worse */
    1322           0 :         if (p->error != 0)
    1323           0 :                 return;
    1324             : 
    1325           0 :         for (c = 0; c <= UCHAR_MAX; c++)
    1326           0 :                 if (cats[c] == 0 && isinsets(g, c)) {
    1327           0 :                         cat = g->ncategories++;
    1328           0 :                         cats[c] = cat;
    1329           0 :                         for (c2 = c+1; c2 <= UCHAR_MAX; c2++)
    1330           0 :                                 if (cats[c2] == 0 && samesets(g, c, c2))
    1331           0 :                                         cats[c2] = cat;
    1332             :                 }
    1333             : }
    1334             : 
    1335             : /*
    1336             :  - dupl - emit a duplicate of a bunch of sops
    1337             :  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
    1338             :  */
    1339             : static sopno                    /* start of duplicate */
    1340           0 : dupl(p, start, finish)
    1341             : register struct parse *p;
    1342             : sopno start;                    /* from here */
    1343             : sopno finish;                   /* to this less one */
    1344             : {
    1345           0 :         register sopno ret = HERE();
    1346           0 :         register sopno len = finish - start;
    1347             : 
    1348             :         assert(finish >= start);
    1349           0 :         if (len == 0)
    1350           0 :                 return(ret);
    1351           0 :         enlarge(p, p->ssize + len);  /* this many unexpected additions */
    1352             :         assert(p->ssize >= p->slen + len);
    1353           0 :         (void) memcpy((char *)(p->strip + p->slen),
    1354           0 :                 (char *)(p->strip + start), (size_t)len*sizeof(sop));
    1355           0 :         p->slen += len;
    1356           0 :         return(ret);
    1357             : }
    1358             : 
    1359             : /*
    1360             :  - doemit - emit a strip operator
    1361             :  == static void doemit(register struct parse *p, sop op, size_t opnd);
    1362             :  *
    1363             :  * It might seem better to implement this as a macro with a function as
    1364             :  * hard-case backup, but it's just too big and messy unless there are
    1365             :  * some changes to the data structures.  Maybe later.
    1366             :  */
    1367             : static void
    1368           0 : doemit(p, op, opnd)
    1369             : register struct parse *p;
    1370             : sop op;
    1371             : size_t opnd;
    1372             : {
    1373             :         /* avoid making error situations worse */
    1374           0 :         if (p->error != 0)
    1375           0 :                 return;
    1376             : 
    1377             :         /* deal with oversize operands ("can't happen", more or less) */
    1378             :         assert(opnd < 1<<OPSHIFT);
    1379             : 
    1380             :         /* deal with undersized strip */
    1381           0 :         if (p->slen >= p->ssize)
    1382           0 :                 enlarge(p, (p->ssize+1) / 2 * 3);    /* +50% */
    1383             :         assert(p->slen < p->ssize);
    1384             : 
    1385             :         /* finally, it's all reduced to the easy case */
    1386           0 :         p->strip[p->slen++] = SOP(op, opnd);
    1387             : }
    1388             : 
    1389             : /*
    1390             :  - doinsert - insert a sop into the strip
    1391             :  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
    1392             :  */
    1393             : static void
    1394           0 : doinsert(p, op, opnd, pos)
    1395             : register struct parse *p;
    1396             : sop op;
    1397             : size_t opnd;
    1398             : sopno pos;
    1399             : {
    1400             :         register sopno sn;
    1401             :         register sop s;
    1402             :         register int i;
    1403             : 
    1404             :         /* avoid making error situations worse */
    1405           0 :         if (p->error != 0)
    1406           0 :                 return;
    1407             : 
    1408           0 :         sn = HERE();
    1409           0 :         EMIT(op, opnd);         /* do checks, ensure space */
    1410             :         assert(HERE() == sn+1);
    1411           0 :         s = p->strip[sn];
    1412             : 
    1413             :         /* adjust paren pointers */
    1414             :         assert(pos > 0);
    1415           0 :         for (i = 1; i < NPAREN; i++) {
    1416           0 :                 if (p->pbegin[i] >= pos) {
    1417           0 :                         p->pbegin[i]++;
    1418             :                 }
    1419           0 :                 if (p->pend[i] >= pos) {
    1420           0 :                         p->pend[i]++;
    1421             :                 }
    1422             :         }
    1423             : 
    1424           0 :         memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
    1425           0 :                                                 (HERE()-pos-1)*sizeof(sop));
    1426           0 :         p->strip[pos] = s;
    1427             : }
    1428             : 
    1429             : /*
    1430             :  - dofwd - complete a forward reference
    1431             :  == static void dofwd(register struct parse *p, sopno pos, sop value);
    1432             :  */
    1433             : static void
    1434           0 : dofwd(p, pos, value)
    1435             : register struct parse *p;
    1436             : register sopno pos;
    1437             : sop value;
    1438             : {
    1439             :         /* avoid making error situations worse */
    1440           0 :         if (p->error != 0)
    1441           0 :                 return;
    1442             : 
    1443             :         assert(value < 1<<OPSHIFT);
    1444           0 :         p->strip[pos] = OP(p->strip[pos]) | value;
    1445             : }
    1446             : 
    1447             : /*
    1448             :  - enlarge - enlarge the strip
    1449             :  == static void enlarge(register struct parse *p, sopno size);
    1450             :  */
    1451             : static void
    1452           0 : enlarge(p, size)
    1453             : register struct parse *p;
    1454             : register sopno size;
    1455             : {
    1456             :         register sop *sp;
    1457             : 
    1458           0 :         if (p->ssize >= size)
    1459           0 :                 return;
    1460             : 
    1461           0 :         sp = (sop *)realloc(p->strip, size*sizeof(sop));
    1462           0 :         if (sp == NULL) {
    1463           0 :                 SETERROR(REG_ESPACE);
    1464           0 :                 return;
    1465             :         }
    1466           0 :         p->strip = sp;
    1467           0 :         p->ssize = size;
    1468             : }
    1469             : 
    1470             : /*
    1471             :  - stripsnug - compact the strip
    1472             :  == static void stripsnug(register struct parse *p, register struct re_guts *g);
    1473             :  */
    1474             : static void
    1475           0 : stripsnug(p, g)
    1476             : register struct parse *p;
    1477             : register struct re_guts *g;
    1478             : {
    1479           0 :         g->nstates = p->slen;
    1480           0 :         g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop));
    1481           0 :         if (g->strip == NULL) {
    1482           0 :                 SETERROR(REG_ESPACE);
    1483           0 :                 g->strip = p->strip;
    1484             :         }
    1485           0 : }
    1486             : 
    1487             : /*
    1488             :  - findmust - fill in must and mlen with longest mandatory literal string
    1489             :  == static void findmust(register struct parse *p, register struct re_guts *g);
    1490             :  *
    1491             :  * This algorithm could do fancy things like analyzing the operands of |
    1492             :  * for common subsequences.  Someday.  This code is simple and finds most
    1493             :  * of the interesting cases.
    1494             :  *
    1495             :  * Note that must and mlen got initialized during setup.
    1496             :  */
    1497             : static void
    1498           0 : findmust(p, g)
    1499             : struct parse *p;
    1500             : register struct re_guts *g;
    1501             : {
    1502             :         register sop *scan;
    1503           0 :         sop *start = NULL;
    1504           0 :         register sop *newstart = NULL;
    1505             :         register sopno newlen;
    1506             :         register sop s;
    1507             :         register unsigned char *cp;
    1508             :         register sopno i;
    1509             : 
    1510             :         /* avoid making error situations worse */
    1511           0 :         if (p->error != 0)
    1512           0 :                 return;
    1513             : 
    1514             :         /* find the longest OCHAR sequence in strip */
    1515           0 :         newlen = 0;
    1516           0 :         scan = g->strip + 1;
    1517             :         do {
    1518           0 :                 s = *scan++;
    1519           0 :                 switch (OP(s)) {
    1520             :                 case OCHAR:             /* sequence member */
    1521           0 :                         if (newlen == 0)                /* new sequence */
    1522           0 :                                 newstart = scan - 1;
    1523           0 :                         newlen++;
    1524           0 :                         break;
    1525             :                 case OPLUS_:            /* things that don't break one */
    1526             :                 case OLPAREN:
    1527             :                 case ORPAREN:
    1528           0 :                         break;
    1529             :                 case OQUEST_:           /* things that must be skipped */
    1530             :                 case OCH_:
    1531           0 :                         scan--;
    1532             :                         do {
    1533           0 :                                 scan += OPND(s);
    1534           0 :                                 s = *scan;
    1535             :                                 /* assert() interferes w debug printouts */
    1536           0 :                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
    1537           0 :                                                         OP(s) != OOR2) {
    1538           0 :                                         g->iflags |= BAD;
    1539           0 :                                         return;
    1540             :                                 }
    1541           0 :                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
    1542             :                         /* fallthrough */
    1543             :                 default:                /* things that break a sequence */
    1544           0 :                         if (newlen > g->mlen) {           /* ends one */
    1545           0 :                                 start = newstart;
    1546           0 :                                 g->mlen = newlen;
    1547             :                         }
    1548           0 :                         newlen = 0;
    1549             :                         break;
    1550             :                 }
    1551           0 :         } while (OP(s) != OEND);
    1552             : 
    1553           0 :         if (g->mlen == 0)            /* there isn't one */
    1554           0 :                 return;
    1555             : 
    1556           0 :         if (!start) {
    1557           0 :                 g->mlen = 0;
    1558           0 :                 return;
    1559             :         }
    1560             : 
    1561             :         /* turn it into a character string */
    1562           0 :         g->must = malloc((size_t)g->mlen + 1);
    1563           0 :         if (g->must == NULL) {               /* argh; just forget it */
    1564           0 :                 g->mlen = 0;
    1565           0 :                 return;
    1566             :         }
    1567           0 :         cp = g->must;
    1568           0 :         scan = start;
    1569           0 :         for (i = g->mlen; i > 0; i--) {
    1570           0 :                 while (OP(s = *scan++) != OCHAR)
    1571           0 :                         continue;
    1572             :                 assert(cp < g->must + g->mlen);
    1573           0 :                 *cp++ = (unsigned char)OPND(s);
    1574             :         }
    1575             :         assert(cp == g->must + g->mlen);
    1576           0 :         *cp++ = '\0';           /* just on general principles */
    1577             : }
    1578             : 
    1579             : /*
    1580             :  - pluscount - count + nesting
    1581             :  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
    1582             :  */
    1583             : static sopno                    /* nesting depth */
    1584           0 : pluscount(p, g)
    1585             : struct parse *p;
    1586             : register struct re_guts *g;
    1587             : {
    1588             :         register sop *scan;
    1589             :         register sop s;
    1590           0 :         register sopno plusnest = 0;
    1591           0 :         register sopno maxnest = 0;
    1592             : 
    1593           0 :         if (p->error != 0)
    1594           0 :                 return(0);      /* there may not be an OEND */
    1595             : 
    1596           0 :         scan = g->strip + 1;
    1597             :         do {
    1598           0 :                 s = *scan++;
    1599           0 :                 switch (OP(s)) {
    1600             :                 case OPLUS_:
    1601           0 :                         plusnest++;
    1602           0 :                         break;
    1603             :                 case O_PLUS:
    1604           0 :                         if (plusnest > maxnest)
    1605           0 :                                 maxnest = plusnest;
    1606           0 :                         plusnest--;
    1607             :                         break;
    1608             :                 }
    1609           0 :         } while (OP(s) != OEND);
    1610           0 :         if (plusnest != 0)
    1611           0 :                 g->iflags |= BAD;
    1612           0 :         return(maxnest);
    1613             : }

Generated by: LCOV version 1.10

Generated at Wed, 16 Apr 2014 12:47:47 +0000 (3 days ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.