PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - var/php_gcov/PHP_5_2/regex - regcomp.c
Test: PHP Code Coverage
Date: 2009-11-19 Instrumented lines: 676
Code covered: 73.5 % Executed lines: 497
Legend: not executed executed

       1                 : #include <sys/types.h>
       2                 : #include <stdio.h>
       3                 : #include <string.h>
       4                 : #include <ctype.h>
       5                 : #include <limits.h>
       6                 : #include <stdlib.h>
       7                 : 
       8                 : #define POSIX_MISTAKE
       9                 : 
      10                 : #include "utils.h"
      11                 : #include "regex.h"
      12                 : #include "regex2.h"
      13                 : 
      14                 : #include "cclass.h"
      15                 : #include "cname.h"
      16                 : 
      17                 : /*
      18                 :  * parse structure, passed up and down to avoid global variables and
      19                 :  * other clumsinesses
      20                 :  */
      21                 : struct parse {
      22                 :         unsigned char *next;            /* next character in RE */
      23                 :         unsigned char *end;             /* end of string (-> NUL normally) */
      24                 :         int error;              /* has an error been seen? */
      25                 :         sop *strip;             /* malloced strip */
      26                 :         sopno ssize;            /* malloced strip size (allocated) */
      27                 :         sopno slen;             /* malloced strip length (used) */
      28                 :         int ncsalloc;           /* number of csets allocated */
      29                 :         struct re_guts *g;
      30                 : #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
      31                 :         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
      32                 :         sopno pend[NPAREN];     /* -> ) ([0] unused) */
      33                 : };
      34                 : 
      35                 : #include "regcomp.ih"
      36                 : 
      37                 : static unsigned char nuls[10];          /* place to point scanner in event of error */
      38                 : 
      39                 : /*
      40                 :  * macros for use with parse structure
      41                 :  * BEWARE:  these know that the parse structure is named `p' !!!
      42                 :  */
      43                 : #define PEEK()  (*p->next)
      44                 : #define PEEK2() (*(p->next+1))
      45                 : #define MORE()  (p->next < p->end)
      46                 : #define MORE2() (p->next+1 < p->end)
      47                 : #define SEE(c)  (MORE() && PEEK() == (c))
      48                 : #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
      49                 : #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
      50                 : #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
      51                 : #define NEXT()  (p->next++)
      52                 : #define NEXT2() (p->next += 2)
      53                 : #define NEXTn(n)        (p->next += (n))
      54                 : #define GETNEXT()       (*p->next++)
      55                 : #define SETERROR(e)     seterr(p, (e))
      56                 : #define REQUIRE(co, e)  (void) ((co) || SETERROR(e))
      57                 : #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
      58                 : #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
      59                 : #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
      60                 : #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
      61                 : #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
      62                 : #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
      63                 : #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
      64                 : #define HERE()          (p->slen)
      65                 : #define THERE()         (p->slen - 1)
      66                 : #define THERETHERE()    (p->slen - 2)
      67                 : #define DROP(n) (p->slen -= (n))
      68                 : 
      69                 : #ifndef NDEBUG
      70                 : static int never = 0;           /* for use in asserts; shuts lint up */
      71                 : #else
      72                 : #define never   0               /* some <assert.h>s have bugs too */
      73                 : #endif
      74                 : 
      75                 : /*
      76                 :  - regcomp - interface for parser and compilation
      77                 :  = API_EXPORT(int) regcomp(regex_t *, const char *, int);
      78                 :  = #define      REG_BASIC       0000
      79                 :  = #define      REG_EXTENDED    0001
      80                 :  = #define      REG_ICASE       0002
      81                 :  = #define      REG_NOSUB       0004
      82                 :  = #define      REG_NEWLINE     0010
      83                 :  = #define      REG_NOSPEC      0020
      84                 :  = #define      REG_PEND        0040
      85                 :  = #define      REG_DUMP        0200
      86                 :  */
      87                 : API_EXPORT(int)                 /* 0 success, otherwise REG_something */
      88                 : regcomp(preg, pattern, cflags)
      89                 : regex_t *preg;
      90                 : const char *pattern;
      91                 : int cflags;
      92             415 : {
      93                 :         struct parse pa;
      94                 :         register struct re_guts *g;
      95             415 :         register struct parse *p = &pa;
      96                 :         register int i;
      97                 :         register size_t len;
      98                 : #ifdef REDEBUG
      99                 : #       define  GOODFLAGS(f)    (f)
     100                 : #else
     101                 : #       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
     102                 : #endif
     103                 : 
     104             415 :         cflags = GOODFLAGS(cflags);
     105             415 :         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
     106               0 :                 return(REG_INVARG);
     107                 : 
     108             415 :         if (cflags&REG_PEND) {
     109               0 :                 if (preg->re_endp < pattern)
     110               0 :                         return(REG_INVARG);
     111               0 :                 len = preg->re_endp - pattern;
     112                 :         } else
     113             415 :                 len = strlen((char *)pattern);
     114                 : 
     115                 :         /* do the mallocs early so failure handling is easy */
     116             415 :         g = (struct re_guts *)malloc(sizeof(struct re_guts) +
     117                 :                                                         (NC-1)*sizeof(cat_t));
     118             415 :         if (g == NULL)
     119               0 :                 return(REG_ESPACE);
     120             415 :         p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;      /* ugh */
     121             415 :         p->strip = (sop *)malloc(p->ssize * sizeof(sop));
     122             415 :         p->slen = 0;
     123             415 :         if (p->strip == NULL) {
     124               0 :                 free((char *)g);
     125               0 :                 return(REG_ESPACE);
     126                 :         }
     127                 : 
     128                 :         /* set things up */
     129             415 :         p->g = g;
     130             415 :         p->next = (unsigned char *)pattern;  /* convenience; we do not modify it */
     131             415 :         p->end = p->next + len;
     132             415 :         p->error = 0;
     133             415 :         p->ncsalloc = 0;
     134            4565 :         for (i = 0; i < NPAREN; i++) {
     135            4150 :                 p->pbegin[i] = 0;
     136            4150 :                 p->pend[i] = 0;
     137                 :         }
     138             415 :         g->csetsize = NC;
     139             415 :         g->sets = NULL;
     140             415 :         g->setbits = NULL;
     141             415 :         g->ncsets = 0;
     142             415 :         g->cflags = cflags;
     143             415 :         g->iflags = 0;
     144             415 :         g->nbol = 0;
     145             415 :         g->neol = 0;
     146             415 :         g->must = NULL;
     147             415 :         g->mlen = 0;
     148             415 :         g->nsub = 0;
     149             415 :         g->ncategories = 1;  /* category 0 is "everything else" */
     150             415 :         g->categories = &g->catspace[0];
     151             415 :         (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
     152             415 :         g->backrefs = 0;
     153                 : 
     154                 :         /* do it */
     155             415 :         EMIT(OEND, 0);
     156             415 :         g->firststate = THERE();
     157             415 :         if (cflags&REG_EXTENDED)
     158             385 :                 p_ere(p, OUT);
     159              30 :         else if (cflags&REG_NOSPEC)
     160               0 :                 p_str(p);
     161                 :         else
     162              30 :                 p_bre(p, OUT, OUT);
     163             415 :         EMIT(OEND, 0);
     164             415 :         g->laststate = THERE();
     165                 : 
     166                 :         /* tidy up loose ends and fill things in */
     167             415 :         categorize(p, g);
     168             415 :         stripsnug(p, g);
     169             415 :         findmust(p, g);
     170             415 :         g->nplus = pluscount(p, g);
     171             415 :         g->magic = MAGIC2;
     172             415 :         preg->re_nsub = g->nsub;
     173             415 :         preg->re_g = g;
     174             415 :         preg->re_magic = MAGIC1;
     175                 : #ifndef REDEBUG
     176                 :         /* not debugging, so can't rely on the assert() in regexec() */
     177             415 :         if (g->iflags&BAD)
     178               0 :                 SETERROR(REG_ASSERT);
     179                 : #endif
     180                 : 
     181                 :         /* win or lose, we're done */
     182             415 :         if (p->error != 0)   /* lose */
     183             146 :                 regfree(preg);
     184             415 :         return(p->error);
     185                 : }
     186                 : 
     187                 : /*
     188                 :  - p_ere - ERE parser top level, concatenation and alternation
     189                 :  == static void p_ere(register struct parse *p, int stop);
     190                 :  */
     191                 : static void
     192                 : p_ere(p, stop)
     193                 : register struct parse *p;
     194                 : int stop;                       /* character this ERE should end at */
     195            8698 : {
     196                 :         register unsigned char c;
     197            8698 :         register sopno prevback = 0;
     198            8698 :         register sopno prevfwd = 0;
     199                 :         register sopno conc;
     200            8698 :         register int first = 1;         /* is this the first alternative? */
     201                 : 
     202                 :         for (;;) {
     203                 :                 /* do a bunch of concatenated expressions */
     204            8768 :                 conc = HERE();
     205           35017 :                 while (MORE() && (c = PEEK()) != '|' && c != stop)
     206           17481 :                         p_ere_exp(p);
     207            8768 :                 (void) REQUIRE(HERE() != conc, REG_EMPTY);      /* require nonempty */
     208                 : 
     209            8768 :                 if (!EAT('|'))
     210            8698 :                         break;          /* NOTE BREAK OUT */
     211                 : 
     212              70 :                 if (first) {
     213              48 :                         INSERT(OCH_, conc);     /* offset is wrong */
     214              48 :                         prevfwd = conc;
     215              48 :                         prevback = conc;
     216              48 :                         first = 0;
     217                 :                 }
     218              70 :                 ASTERN(OOR1, prevback);
     219              70 :                 prevback = THERE();
     220              70 :                 AHEAD(prevfwd);                 /* fix previous offset */
     221              70 :                 prevfwd = HERE();
     222              70 :                 EMIT(OOR2, 0);                  /* offset is very wrong */
     223              70 :         }
     224                 : 
     225            8698 :         if (!first) {           /* tail-end fixups */
     226              48 :                 AHEAD(prevfwd);
     227              48 :                 ASTERN(O_CH, prevback);
     228                 :         }
     229                 : 
     230                 :         assert(!MORE() || SEE(stop));
     231            8698 : }
     232                 : 
     233                 : /*
     234                 :  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
     235                 :  == static void p_ere_exp(register struct parse *p);
     236                 :  */
     237                 : static void
     238                 : p_ere_exp(p)
     239                 : register struct parse *p;
     240           17481 : {
     241                 :         register unsigned char c;
     242                 :         register sopno pos;
     243                 :         register int count;
     244                 :         register int count2;
     245                 :         register sopno subno;
     246           17481 :         int wascaret = 0;
     247                 : 
     248                 :         assert(MORE());         /* caller should have ensured this */
     249           17481 :         c = GETNEXT();
     250                 : 
     251           17481 :         pos = HERE();
     252           17481 :         switch (c) {
     253                 :         case '(':
     254            8323 :                 REQUIRE(MORE(), REG_EPAREN);
     255            8323 :                 p->g->nsub++;
     256            8323 :                 subno = p->g->nsub;
     257            8323 :                 if (subno < NPAREN)
     258             167 :                         p->pbegin[subno] = HERE();
     259            8323 :                 EMIT(OLPAREN, subno);
     260            8323 :                 if (!SEE(')'))
     261            8313 :                         p_ere(p, ')');
     262            8323 :                 if (subno < NPAREN) {
     263             167 :                         p->pend[subno] = HERE();
     264                 :                         assert(p->pend[subno] != 0);
     265                 :                 }
     266            8323 :                 EMIT(ORPAREN, subno);
     267            8323 :                 MUSTEAT(')', REG_EPAREN);
     268            8323 :                 break;
     269                 : #ifndef POSIX_MISTAKE
     270                 :         case ')':               /* happens only if no current unmatched ( */
     271                 :                 /*
     272                 :                  * You may ask, why the ifndef?  Because I didn't notice
     273                 :                  * this until slightly too late for 1003.2, and none of the
     274                 :                  * other 1003.2 regular-expression reviewers noticed it at
     275                 :                  * all.  So an unmatched ) is legal POSIX, at least until
     276                 :                  * we can get it fixed.
     277                 :                  */
     278                 :                 SETERROR(REG_EPAREN);
     279                 :                 break;
     280                 : #endif
     281                 :         case '^':
     282              39 :                 EMIT(OBOL, 0);
     283              39 :                 p->g->iflags |= USEBOL;
     284              39 :                 p->g->nbol++;
     285              39 :                 wascaret = 1;
     286              39 :                 break;
     287                 :         case '$':
     288              39 :                 EMIT(OEOL, 0);
     289              39 :                 p->g->iflags |= USEEOL;
     290              39 :                 p->g->neol++;
     291              39 :                 break;
     292                 :         case '|':
     293               0 :                 SETERROR(REG_EMPTY);
     294               0 :                 break;
     295                 :         case '*':
     296                 :         case '+':
     297                 :         case '?':
     298              24 :                 SETERROR(REG_BADRPT);
     299              24 :                 break;
     300                 :         case '.':
     301            8252 :                 if (p->g->cflags&REG_NEWLINE)
     302               0 :                         nonnewline(p);
     303                 :                 else
     304            8252 :                         EMIT(OANY, 0);
     305            8252 :                 break;
     306                 :         case '[':
     307             157 :                 p_bracket(p);
     308             157 :                 break;
     309                 :         case '\\':
     310             159 :                 REQUIRE(MORE(), REG_EESCAPE);
     311             159 :                 c = GETNEXT();
     312             159 :                 ordinary(p, c);
     313             159 :                 break;
     314                 :         case '{':               /* okay as ordinary except if digit follows */
     315               0 :                 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
     316                 :                 /* FALLTHROUGH */
     317                 :         default:
     318             488 :                 ordinary(p, c);
     319                 :                 break;
     320                 :         }
     321                 : 
     322           17481 :         if (!MORE())
     323             291 :                 return;
     324           17190 :         c = PEEK();
     325                 :         /* we call { a repetition if followed by a digit */
     326           17190 :         if (!( c == '*' || c == '+' || c == '?' ||
     327                 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ))
     328           17021 :                 return;         /* no repetition, we're done */
     329             169 :         NEXT();
     330                 : 
     331             169 :         REQUIRE(!wascaret, REG_BADRPT);
     332             169 :         switch (c) {
     333                 :         case '*':       /* implemented as +? */
     334                 :                 /* this case does not require the (y|) trick, noKLUDGE */
     335              16 :                 INSERT(OPLUS_, pos);
     336              16 :                 ASTERN(O_PLUS, pos);
     337              16 :                 INSERT(OQUEST_, pos);
     338              16 :                 ASTERN(O_QUEST, pos);
     339              16 :                 break;
     340                 :         case '+':
     341              26 :                 INSERT(OPLUS_, pos);
     342              26 :                 ASTERN(O_PLUS, pos);
     343              26 :                 break;
     344                 :         case '?':
     345                 :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     346              11 :                 INSERT(OCH_, pos);              /* offset slightly wrong */
     347              11 :                 ASTERN(OOR1, pos);              /* this one's right */
     348              11 :                 AHEAD(pos);                     /* fix the OCH_ */
     349              11 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     350              11 :                 AHEAD(THERE());                 /* ...so fix it */
     351              11 :                 ASTERN(O_CH, THERETHERE());
     352              11 :                 break;
     353                 :         case '{':
     354             116 :                 count = p_count(p);
     355             116 :                 if (EAT(',')) {
     356              56 :                         if (isdigit(PEEK())) {
     357              46 :                                 count2 = p_count(p);
     358              46 :                                 REQUIRE(count <= count2, REG_BADBR);
     359                 :                         } else          /* single number with comma */
     360              10 :                                 count2 = INFINITY;
     361                 :                 } else          /* just a single number */
     362              60 :                         count2 = count;
     363             116 :                 repeat(p, pos, count, count2);
     364             116 :                 if (!EAT('}')) {        /* error heuristics */
     365              24 :                         while (MORE() && PEEK() != '}')
     366               0 :                                 NEXT();
     367              12 :                         REQUIRE(MORE(), REG_EBRACE);
     368              12 :                         SETERROR(REG_BADBR);
     369                 :                 }
     370                 :                 break;
     371                 :         }
     372                 : 
     373             169 :         if (!MORE())
     374              62 :                 return;
     375             107 :         c = PEEK();
     376             107 :         if (!( c == '*' || c == '+' || c == '?' ||
     377                 :                                 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
     378             107 :                 return;
     379               0 :         SETERROR(REG_BADRPT);
     380                 : }
     381                 : 
     382                 : /*
     383                 :  - p_str - string (no metacharacters) "parser"
     384                 :  == static void p_str(register struct parse *p);
     385                 :  */
     386                 : static void
     387                 : p_str(p)
     388                 : register struct parse *p;
     389               0 : {
     390               0 :         REQUIRE(MORE(), REG_EMPTY);
     391               0 :         while (MORE())
     392               0 :                 ordinary(p, GETNEXT());
     393               0 : }
     394                 : 
     395                 : /*
     396                 :  - p_bre - BRE parser top level, anchoring and concatenation
     397                 :  == static void p_bre(register struct parse *p, register int end1, \
     398                 :  ==     register int end2);
     399                 :  * Giving end1 as OUT essentially eliminates the end1/end2 check.
     400                 :  *
     401                 :  * This implementation is a bit of a kludge, in that a trailing $ is first
     402                 :  * taken as an ordinary character and then revised to be an anchor.  The
     403                 :  * only undesirable side effect is that '$' gets included as a character
     404                 :  * category in such cases.  This is fairly harmless; not worth fixing.
     405                 :  * The amount of lookahead needed to avoid this kludge is excessive.
     406                 :  */
     407                 : static void
     408                 : p_bre(p, end1, end2)
     409                 : register struct parse *p;
     410                 : register int end1;              /* first terminating character */
     411                 : register int end2;              /* second terminating character */
     412              30 : {
     413              30 :         register sopno start = HERE();
     414              30 :         register int first = 1;                 /* first subexpression? */
     415              30 :         register int wasdollar = 0;
     416                 : 
     417              30 :         if (EAT('^')) {
     418               0 :                 EMIT(OBOL, 0);
     419               0 :                 p->g->iflags |= USEBOL;
     420               0 :                 p->g->nbol++;
     421                 :         }
     422             138 :         while (MORE() && !SEETWO(end1, end2)) {
     423              78 :                 wasdollar = p_simp_re(p, first);
     424              78 :                 first = 0;
     425                 :         }
     426              30 :         if (wasdollar) {        /* oops, that was a trailing anchor */
     427               0 :                 DROP(1);
     428               0 :                 EMIT(OEOL, 0);
     429               0 :                 p->g->iflags |= USEEOL;
     430               0 :                 p->g->neol++;
     431                 :         }
     432                 : 
     433              30 :         REQUIRE(HERE() != start, REG_EMPTY);    /* require nonempty */
     434              30 : }
     435                 : 
     436                 : /*
     437                 :  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
     438                 :  == static int p_simp_re(register struct parse *p, int starordinary);
     439                 :  */
     440                 : static int                      /* was the simple RE an unbackslashed $? */
     441                 : p_simp_re(p, starordinary)
     442                 : register struct parse *p;
     443                 : int starordinary;               /* is a leading * an ordinary character? */
     444              78 : {
     445                 :         register int c;
     446                 :         register int count;
     447                 :         register int count2;
     448                 :         register sopno pos;
     449                 :         register int i;
     450                 :         register sopno subno;
     451                 : #       define  BACKSL  (1<<CHAR_BIT)
     452                 : 
     453              78 :         pos = HERE();           /* repetion op, if any, covers from here */
     454                 : 
     455                 :         assert(MORE());         /* caller should have ensured this */
     456              78 :         c = GETNEXT();
     457              78 :         if (c == '\\') {
     458               0 :                 REQUIRE(MORE(), REG_EESCAPE);
     459               0 :                 c = BACKSL | (unsigned char)GETNEXT();
     460                 :         }
     461              78 :         switch (c) {
     462                 :         case '.':
     463               0 :                 if (p->g->cflags&REG_NEWLINE)
     464               0 :                         nonnewline(p);
     465                 :                 else
     466               0 :                         EMIT(OANY, 0);
     467               0 :                 break;
     468                 :         case '[':
     469               0 :                 p_bracket(p);
     470               0 :                 break;
     471                 :         case BACKSL|'{':
     472               0 :                 SETERROR(REG_BADRPT);
     473               0 :                 break;
     474                 :         case BACKSL|'(':
     475               0 :                 p->g->nsub++;
     476               0 :                 subno = p->g->nsub;
     477               0 :                 if (subno < NPAREN)
     478               0 :                         p->pbegin[subno] = HERE();
     479               0 :                 EMIT(OLPAREN, subno);
     480                 :                 /* the MORE here is an error heuristic */
     481               0 :                 if (MORE() && !SEETWO('\\', ')'))
     482               0 :                         p_bre(p, '\\', ')');
     483               0 :                 if (subno < NPAREN) {
     484               0 :                         p->pend[subno] = HERE();
     485                 :                         assert(p->pend[subno] != 0);
     486                 :                 }
     487               0 :                 EMIT(ORPAREN, subno);
     488               0 :                 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
     489               0 :                 break;
     490                 :         case BACKSL|')':        /* should not get here -- must be user */
     491                 :         case BACKSL|'}':
     492               0 :                 SETERROR(REG_EPAREN);
     493               0 :                 break;
     494                 :         case BACKSL|'1':
     495                 :         case BACKSL|'2':
     496                 :         case BACKSL|'3':
     497                 :         case BACKSL|'4':
     498                 :         case BACKSL|'5':
     499                 :         case BACKSL|'6':
     500                 :         case BACKSL|'7':
     501                 :         case BACKSL|'8':
     502                 :         case BACKSL|'9':
     503               0 :                 i = (c&~BACKSL) - '0';
     504                 :                 assert(i < NPAREN);
     505               0 :                 if (p->pend[i] != 0) {
     506                 :                         assert(i <= p->g->nsub);
     507               0 :                         EMIT(OBACK_, i);
     508                 :                         assert(p->pbegin[i] != 0);
     509                 :                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
     510                 :                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
     511               0 :                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
     512               0 :                         EMIT(O_BACK, i);
     513                 :                 } else
     514               0 :                         SETERROR(REG_ESUBREG);
     515               0 :                 p->g->backrefs = 1;
     516               0 :                 break;
     517                 :         case '*':
     518               0 :                 REQUIRE(starordinary, REG_BADRPT);
     519                 :                 /* FALLTHROUGH */
     520                 :         default:
     521              78 :                 ordinary(p, (unsigned char)c);  /* takes off BACKSL, if any */
     522                 :                 break;
     523                 :         }
     524                 : 
     525              78 :         if (EAT('*')) {         /* implemented as +? */
     526                 :                 /* this case does not require the (y|) trick, noKLUDGE */
     527               0 :                 INSERT(OPLUS_, pos);
     528               0 :                 ASTERN(O_PLUS, pos);
     529               0 :                 INSERT(OQUEST_, pos);
     530               0 :                 ASTERN(O_QUEST, pos);
     531              78 :         } else if (EATTWO('\\', '{')) {
     532               0 :                 count = p_count(p);
     533               0 :                 if (EAT(',')) {
     534               0 :                         if (MORE() && isdigit(PEEK())) {
     535               0 :                                 count2 = p_count(p);
     536               0 :                                 REQUIRE(count <= count2, REG_BADBR);
     537                 :                         } else          /* single number with comma */
     538               0 :                                 count2 = INFINITY;
     539                 :                 } else          /* just a single number */
     540               0 :                         count2 = count;
     541               0 :                 repeat(p, pos, count, count2);
     542               0 :                 if (!EATTWO('\\', '}')) {       /* error heuristics */
     543               0 :                         while (MORE() && !SEETWO('\\', '}'))
     544               0 :                                 NEXT();
     545               0 :                         REQUIRE(MORE(), REG_EBRACE);
     546               0 :                         SETERROR(REG_BADBR);
     547                 :                 }
     548              78 :         } else if (c == (unsigned char)'$')     /* $ (but not \$) ends it */
     549               0 :                 return(1);
     550                 : 
     551              78 :         return(0);
     552                 : }
     553                 : 
     554                 : /*
     555                 :  - p_count - parse a repetition count
     556                 :  == static int p_count(register struct parse *p);
     557                 :  */
     558                 : static int                      /* the value */
     559                 : p_count(p)
     560                 : register struct parse *p;
     561             162 : {
     562             162 :         register int count = 0;
     563             162 :         register int ndigits = 0;
     564                 : 
     565             528 :         while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
     566             204 :                 count = count*10 + (GETNEXT() - '0');
     567             204 :                 ndigits++;
     568                 :         }
     569                 : 
     570             162 :         REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
     571             162 :         return(count);
     572                 : }
     573                 : 
     574                 : /*
     575                 :  - p_bracket - parse a bracketed character list
     576                 :  == static void p_bracket(register struct parse *p);
     577                 :  *
     578                 :  * Note a significant property of this code:  if the allocset() did SETERROR,
     579                 :  * no set operations are done.
     580                 :  */
     581                 : static void
     582                 : p_bracket(p)
     583                 : register struct parse *p;
     584             320 : {
     585             320 :         register cset *cs = allocset(p);
     586             320 :         register int invert = 0;
     587                 : 
     588                 :         /* Dept of Truly Sickening Special-Case Kludges */
     589             320 :         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
     590               0 :                 EMIT(OBOW, 0);
     591               0 :                 NEXTn(6);
     592               0 :                 return;
     593                 :         }
     594             320 :         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
     595               0 :                 EMIT(OEOW, 0);
     596               0 :                 NEXTn(6);
     597               0 :                 return;
     598                 :         }
     599                 : 
     600             320 :         if (EAT('^'))
     601              21 :                 invert++;       /* make note to invert set at end */
     602             320 :         if (EAT(']'))
     603               0 :                 CHadd(cs, ']');
     604             320 :         else if (EAT('-'))
     605               1 :                 CHadd(cs, '-');
     606            1005 :         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
     607             365 :                 p_b_term(p, cs);
     608             320 :         if (EAT('-'))
     609              10 :                 CHadd(cs, '-');
     610             320 :         MUSTEAT(']', REG_EBRACK);
     611                 : 
     612             320 :         if (p->error != 0)   /* don't mess things up further */
     613              18 :                 return;
     614                 : 
     615             302 :         if (p->g->cflags&REG_ICASE) {
     616                 :                 register int i;
     617                 :                 register int ci;
     618                 : 
     619           57311 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     620           57088 :                         if (CHIN(cs, i) && isalpha(i)) {
     621            1221 :                                 ci = othercase(i);
     622            1221 :                                 if (ci != i)
     623            1221 :                                         CHadd(cs, ci);
     624                 :                         }
     625             223 :                 if (cs->multis != NULL)
     626               0 :                         mccase(p, cs);
     627                 :         }
     628             302 :         if (invert) {
     629                 :                 register int i;
     630                 : 
     631            5397 :                 for (i = p->g->csetsize - 1; i >= 0; i--)
     632            5376 :                         if (CHIN(cs, i))
     633             117 :                                 CHsub(cs, i);
     634                 :                         else
     635            5259 :                                 CHadd(cs, i);
     636              21 :                 if (p->g->cflags&REG_NEWLINE)
     637               0 :                         CHsub(cs, '\n');
     638              21 :                 if (cs->multis != NULL)
     639               0 :                         mcinvert(p, cs);
     640                 :         }
     641                 : 
     642                 :         assert(cs->multis == NULL);          /* xxx */
     643                 : 
     644             302 :         if (nch(p, cs) == 1) {          /* optimize singleton sets */
     645               5 :                 ordinary(p, firstch(p, cs));
     646               5 :                 freeset(p, cs);
     647                 :         } else
     648             297 :                 EMIT(OANYOF, freezeset(p, cs));
     649                 : }
     650                 : 
     651                 : /*
     652                 :  - p_b_term - parse one term of a bracketed character list
     653                 :  == static void p_b_term(register struct parse *p, register cset *cs);
     654                 :  */
     655                 : static void
     656                 : p_b_term(p, cs)
     657                 : register struct parse *p;
     658                 : register cset *cs;
     659             365 : {
     660                 :         register unsigned char c;
     661                 :         register unsigned char start, finish;
     662                 :         register int i;
     663                 : 
     664                 :         /* classify what we've got */
     665             365 :         switch ((MORE()) ? PEEK() : '\0') {
     666                 :         case '[':
     667              61 :                 c = (MORE2()) ? PEEK2() : '\0';
     668              61 :                 break;
     669                 :         case '-':
     670               6 :                 SETERROR(REG_ERANGE);
     671               6 :                 return;                 /* NOTE RETURN */
     672                 :                 break;
     673                 :         default:
     674             298 :                 c = '\0';
     675                 :                 break;
     676                 :         }
     677                 : 
     678             359 :         switch (c) {
     679                 :         case ':':               /* character class */
     680              61 :                 NEXT2();
     681              61 :                 REQUIRE(MORE(), REG_EBRACK);
     682              61 :                 c = PEEK();
     683              61 :                 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
     684              61 :                 p_b_cclass(p, cs);
     685              61 :                 REQUIRE(MORE(), REG_EBRACK);
     686              61 :                 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
     687              61 :                 break;
     688                 :         case '=':               /* equivalence class */
     689               0 :                 NEXT2();
     690               0 :                 REQUIRE(MORE(), REG_EBRACK);
     691               0 :                 c = PEEK();
     692               0 :                 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
     693               0 :                 p_b_eclass(p, cs);
     694               0 :                 REQUIRE(MORE(), REG_EBRACK);
     695               0 :                 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
     696               0 :                 break;
     697                 :         default:                /* symbol, ordinary character, or range */
     698                 : /* xxx revision needed for multichar stuff */
     699             298 :                 start = p_b_symbol(p);
     700             350 :                 if (SEE('-') && MORE2() && PEEK2() != ']') {
     701                 :                         /* range */
     702              52 :                         NEXT();
     703              52 :                         if (EAT('-'))
     704               0 :                                 finish = '-';
     705                 :                         else
     706              52 :                                 finish = p_b_symbol(p);
     707                 :                 } else
     708             246 :                         finish = start;
     709                 : /* xxx what about signed chars here... */
     710             298 :                 REQUIRE(start <= finish, REG_ERANGE);
     711            1106 :                 for (i = start; i <= finish; i++)
     712             808 :                         CHadd(cs, i);
     713                 :                 break;
     714                 :         }
     715                 : }
     716                 : 
     717                 : /*
     718                 :  - p_b_cclass - parse a character-class name and deal with it
     719                 :  == static void p_b_cclass(register struct parse *p, register cset *cs);
     720                 :  */
     721                 : static void
     722                 : p_b_cclass(p, cs)
     723                 : register struct parse *p;
     724                 : register cset *cs;
     725              61 : {
     726              61 :         register unsigned char *sp = p->next;
     727                 :         register struct cclass *cp;
     728                 :         register size_t len;
     729                 :         register unsigned char *u;
     730                 :         register unsigned char c;
     731                 : 
     732             427 :         while (MORE() && isalpha(PEEK()))
     733             305 :                 NEXT();
     734              61 :         len = p->next - sp;
     735             322 :         for (cp = cclasses; cp->name != NULL; cp++)
     736             322 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     737              61 :                         break;
     738              61 :         if (cp->name == NULL) {
     739                 :                 /* oops, didn't find it */
     740               0 :                 SETERROR(REG_ECTYPE);
     741               0 :                 return;
     742                 :         }
     743                 : 
     744              61 :         u = cp->chars;
     745            2058 :         while ((c = *u++) != '\0')
     746            1936 :                 CHadd(cs, c);
     747              61 :         for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
     748               0 :                 MCadd(p, cs, u);
     749                 : }
     750                 : 
     751                 : /*
     752                 :  - p_b_eclass - parse an equivalence-class name and deal with it
     753                 :  == static void p_b_eclass(register struct parse *p, register cset *cs);
     754                 :  *
     755                 :  * This implementation is incomplete. xxx
     756                 :  */
     757                 : static void
     758                 : p_b_eclass(p, cs)
     759                 : register struct parse *p;
     760                 : register cset *cs;
     761               0 : {
     762                 :         register unsigned char c;
     763                 : 
     764               0 :         c = p_b_coll_elem(p, '=');
     765               0 :         CHadd(cs, c);
     766               0 : }
     767                 : 
     768                 : /*
     769                 :  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
     770                 :  == static char p_b_symbol(register struct parse *p);
     771                 :  */
     772                 : static unsigned char                    /* value of symbol */
     773                 : p_b_symbol(p)
     774                 : register struct parse *p;
     775             350 : {
     776                 :         register unsigned char value;
     777                 : 
     778             350 :         REQUIRE(MORE(), REG_EBRACK);
     779             350 :         if (!EATTWO('[', '.'))
     780             350 :                 return(GETNEXT());
     781                 : 
     782                 :         /* collating symbol */
     783               0 :         value = p_b_coll_elem(p, '.');
     784               0 :         REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
     785               0 :         return(value);
     786                 : }
     787                 : 
     788                 : /*
     789                 :  - p_b_coll_elem - parse a collating-element name and look it up
     790                 :  == static char p_b_coll_elem(register struct parse *p, int endc);
     791                 :  */
     792                 : static unsigned char                    /* value of collating element */
     793                 : p_b_coll_elem(p, endc)
     794                 : register struct parse *p;
     795                 : int endc;                       /* name ended by endc,']' */
     796               0 : {
     797               0 :         register unsigned char *sp = p->next;
     798                 :         register struct cname *cp;
     799                 :         register int len;
     800                 : 
     801               0 :         while (MORE() && !SEETWO(endc, ']'))
     802               0 :                 NEXT();
     803               0 :         if (!MORE()) {
     804               0 :                 SETERROR(REG_EBRACK);
     805               0 :                 return(0);
     806                 :         }
     807               0 :         len = p->next - sp;
     808               0 :         for (cp = cnames; cp->name != NULL; cp++)
     809               0 :                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     810               0 :                         return(cp->code);    /* known name */
     811               0 :         if (len == 1)
     812               0 :                 return(*sp);    /* single character */
     813               0 :         SETERROR(REG_ECOLLATE);                 /* neither */
     814               0 :         return(0);
     815                 : }
     816                 : 
     817                 : /*
     818                 :  - othercase - return the case counterpart of an alphabetic
     819                 :  == static char othercase(int ch);
     820                 :  */
     821                 : static unsigned char                    /* if no counterpart, return ch */
     822                 : othercase(ch)
     823                 : int ch;
     824            1384 : {
     825                 :         assert(isalpha(ch));
     826            1384 :         if (isupper(ch))
     827             686 :                 return(tolower(ch));
     828             698 :         else if (islower(ch))
     829             698 :                 return(toupper(ch));
     830                 :         else                    /* peculiar, but could happen */
     831               0 :                 return(ch);
     832                 : }
     833                 : 
     834                 : /*
     835                 :  - bothcases - emit a dualcase version of a two-case character
     836                 :  == static void bothcases(register struct parse *p, int ch);
     837                 :  *
     838                 :  * Boy, is this implementation ever a kludge...
     839                 :  */
     840                 : static void
     841                 : bothcases(p, ch)
     842                 : register struct parse *p;
     843                 : int ch;
     844             163 : {
     845             163 :         register unsigned char *oldnext = p->next;
     846             163 :         register unsigned char *oldend = p->end;
     847                 :         unsigned char bracket[3];
     848                 : 
     849                 :         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
     850             163 :         p->next = bracket;
     851             163 :         p->end = bracket+2;
     852             163 :         bracket[0] = ch;
     853             163 :         bracket[1] = ']';
     854             163 :         bracket[2] = '\0';
     855             163 :         p_bracket(p);
     856                 :         assert(p->next == bracket+2);
     857             163 :         p->next = oldnext;
     858             163 :         p->end = oldend;
     859             163 : }
     860                 : 
     861                 : /*
     862                 :  - ordinary - emit an ordinary character
     863                 :  == static void ordinary(register struct parse *p, register int ch);
     864                 :  */
     865                 : static void
     866                 : ordinary(p, ch)
     867                 : register struct parse *p;
     868                 : register int ch;
     869             730 : {
     870             730 :         register cat_t *cap = p->g->categories;
     871                 : 
     872             893 :         if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
     873             163 :                 bothcases(p, ch);
     874                 :         else {
     875             567 :                 EMIT(OCHAR, (unsigned char)ch);
     876             567 :                 if (cap[ch] == 0)
     877             479 :                         cap[ch] = p->g->ncategories++;
     878                 :         }
     879             730 : }
     880                 : 
     881                 : /*
     882                 :  - nonnewline - emit REG_NEWLINE version of OANY
     883                 :  == static void nonnewline(register struct parse *p);
     884                 :  *
     885                 :  * Boy, is this implementation ever a kludge...
     886                 :  */
     887                 : static void
     888                 : nonnewline(p)
     889                 : register struct parse *p;
     890               0 : {
     891               0 :         register unsigned char *oldnext = p->next;
     892               0 :         register unsigned char *oldend = p->end;
     893                 :         unsigned char bracket[4];
     894                 : 
     895               0 :         p->next = bracket;
     896               0 :         p->end = bracket+3;
     897               0 :         bracket[0] = '^';
     898               0 :         bracket[1] = '\n';
     899               0 :         bracket[2] = ']';
     900               0 :         bracket[3] = '\0';
     901               0 :         p_bracket(p);
     902                 :         assert(p->next == bracket+3);
     903               0 :         p->next = oldnext;
     904               0 :         p->end = oldend;
     905               0 : }
     906                 : 
     907                 : /*
     908                 :  - repeat - generate code for a bounded repetition, recursively if needed
     909                 :  == static void repeat(register struct parse *p, sopno start, int from, int to);
     910                 :  */
     911                 : static void
     912                 : repeat(p, start, from, to)
     913                 : register struct parse *p;
     914                 : sopno start;                    /* operand from here to end of strip */
     915                 : int from;                       /* repeated from this number */
     916                 : int to;                         /* to this number of times (maybe INFINITY) */
     917             966 : {
     918             966 :         register sopno finish = HERE();
     919                 : #       define  N       2
     920                 : #       define  INF     3
     921                 : #       define  REP(f, t)       ((f)*8 + (t))
     922                 : #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
     923                 :         register sopno copy;
     924                 : 
     925             966 :         if (p->error != 0)   /* head off possible runaway recursion */
     926              12 :                 return;
     927                 : 
     928                 :         assert(from <= to);
     929                 : 
     930             954 :         switch (REP(MAP(from), MAP(to))) {
     931                 :         case REP(0, 0):                 /* must be user doing this */
     932               6 :                 DROP(finish-start);     /* drop the operand */
     933               6 :                 break;
     934                 :         case REP(0, 1):                 /* as x{1,1}? */
     935                 :         case REP(0, N):                 /* as x{1,n}? */
     936                 :         case REP(0, INF):               /* as x{1,}? */
     937                 :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     938              10 :                 INSERT(OCH_, start);            /* offset is wrong... */
     939              10 :                 repeat(p, start+1, 1, to);
     940              10 :                 ASTERN(OOR1, start);
     941              10 :                 AHEAD(start);                   /* ... fix it */
     942              10 :                 EMIT(OOR2, 0);
     943              10 :                 AHEAD(THERE());
     944              10 :                 ASTERN(O_CH, THERETHERE());
     945              10 :                 break;
     946                 :         case REP(1, 1):                 /* trivial case */
     947                 :                 /* done */
     948              88 :                 break;
     949                 :         case REP(1, N):                 /* as x?x{1,n-1} */
     950                 :                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
     951             110 :                 INSERT(OCH_, start);
     952             110 :                 ASTERN(OOR1, start);
     953             110 :                 AHEAD(start);
     954             110 :                 EMIT(OOR2, 0);                  /* offset very wrong... */
     955             110 :                 AHEAD(THERE());                 /* ...so fix it */
     956             110 :                 ASTERN(O_CH, THERETHERE());
     957             110 :                 copy = dupl(p, start+1, finish+1);
     958                 :                 assert(copy == finish+4);
     959             110 :                 repeat(p, copy, 1, to-1);
     960             110 :                 break;
     961                 :         case REP(1, INF):               /* as x+ */
     962              10 :                 INSERT(OPLUS_, start);
     963              10 :                 ASTERN(O_PLUS, start);
     964              10 :                 break;
     965                 :         case REP(N, N):                 /* as xx{m-1,n-1} */
     966             730 :                 copy = dupl(p, start, finish);
     967             730 :                 repeat(p, copy, from-1, to-1);
     968             730 :                 break;
     969                 :         case REP(N, INF):               /* as xx{n-1,INF} */
     970               0 :                 copy = dupl(p, start, finish);
     971               0 :                 repeat(p, copy, from-1, to);
     972               0 :                 break;
     973                 :         default:                        /* "can't happen" */
     974               0 :                 SETERROR(REG_ASSERT);   /* just in case */
     975                 :                 break;
     976                 :         }
     977                 : }
     978                 : 
     979                 : /*
     980                 :  - seterr - set an error condition
     981                 :  == static int seterr(register struct parse *p, int e);
     982                 :  */
     983                 : static int                      /* useless but makes type checking happy */
     984                 : seterr(p, e)
     985                 : register struct parse *p;
     986                 : int e;
     987             236 : {
     988             236 :         if (p->error == 0)   /* keep earliest error condition */
     989             146 :                 p->error = e;
     990             236 :         p->next = nuls;              /* try to bring things to a halt */
     991             236 :         p->end = nuls;
     992             236 :         return(0);              /* make the return value well-defined */
     993                 : }
     994                 : 
     995                 : /*
     996                 :  - allocset - allocate a set of characters for []
     997                 :  == static cset *allocset(register struct parse *p);
     998                 :  */
     999                 : static cset *
    1000                 : allocset(p)
    1001                 : register struct parse *p;
    1002             320 : {
    1003             320 :         register int no = p->g->ncsets++;
    1004                 :         register size_t nc;
    1005                 :         register size_t nbytes;
    1006                 :         register cset *cs;
    1007             320 :         register size_t css = (size_t)p->g->csetsize;
    1008                 :         register int i;
    1009                 : 
    1010             320 :         if (no >= p->ncsalloc) {  /* need another column of space */
    1011             177 :                 p->ncsalloc += CHAR_BIT;
    1012             177 :                 nc = p->ncsalloc;
    1013                 :                 assert(nc % CHAR_BIT == 0);
    1014             177 :                 nbytes = nc / CHAR_BIT * css;
    1015             177 :                 if (p->g->sets == NULL)
    1016             177 :                         p->g->sets = (cset *)malloc(nc * sizeof(cset));
    1017                 :                 else
    1018               0 :                         p->g->sets = (cset *)realloc((unsigned char *)p->g->sets,
    1019                 :                                                         nc * sizeof(cset));
    1020             177 :                 if (p->g->setbits == NULL)
    1021             177 :                         p->g->setbits = (uch *)malloc(nbytes);
    1022                 :                 else {
    1023               0 :                         p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits,
    1024                 :                                                                 nbytes);
    1025                 :                         /* xxx this isn't right if setbits is now NULL */
    1026               0 :                         for (i = 0; i < no; i++)
    1027               0 :                                 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
    1028                 :                 }
    1029             354 :                 if (p->g->sets != NULL && p->g->setbits != NULL)
    1030             177 :                         (void) memset((unsigned char *)p->g->setbits + (nbytes - css),
    1031                 :                                                                 0, css);
    1032                 :                 else {
    1033               0 :                         no = 0;
    1034               0 :                         SETERROR(REG_ESPACE);
    1035                 :                         /* caller's responsibility not to do set ops */
    1036                 :                 }
    1037                 :         }
    1038                 : 
    1039                 :         assert(p->g->sets != NULL);       /* xxx */
    1040             320 :         cs = &p->g->sets[no];
    1041             320 :         cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
    1042             320 :         cs->mask = 1 << ((no) % CHAR_BIT);
    1043             320 :         cs->hash = 0;
    1044             320 :         cs->smultis = 0;
    1045             320 :         cs->multis = NULL;
    1046                 : 
    1047             320 :         return(cs);
    1048                 : }
    1049                 : 
    1050                 : /*
    1051                 :  - freeset - free a now-unused set
    1052                 :  == static void freeset(register struct parse *p, register cset *cs);
    1053                 :  */
    1054                 : static void
    1055                 : freeset(p, cs)
    1056                 : register struct parse *p;
    1057                 : register cset *cs;
    1058              43 : {
    1059                 :         register size_t i;
    1060              43 :         register cset *top = &p->g->sets[p->g->ncsets];
    1061              43 :         register size_t css = (size_t)p->g->csetsize;
    1062                 : 
    1063           11051 :         for (i = 0; i < css; i++)
    1064           11008 :                 CHsub(cs, i);
    1065              43 :         if (cs == top-1)        /* recover only the easy case */
    1066              43 :                 p->g->ncsets--;
    1067              43 : }
    1068                 : 
    1069                 : /*
    1070                 :  - freezeset - final processing on a set of characters
    1071                 :  == static int freezeset(register struct parse *p, register cset *cs);
    1072                 :  *
    1073                 :  * The main task here is merging identical sets.  This is usually a waste
    1074                 :  * of time (although the hash code minimizes the overhead), but can win
    1075                 :  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
    1076                 :  * is done using addition rather than xor -- all ASCII [aA] sets xor to
    1077                 :  * the same value!
    1078                 :  */
    1079                 : static int                      /* set number */
    1080                 : freezeset(p, cs)
    1081                 : register struct parse *p;
    1082                 : register cset *cs;
    1083             297 : {
    1084             297 :         register uch h = cs->hash;
    1085                 :         register size_t i;
    1086             297 :         register cset *top = &p->g->sets[p->g->ncsets];
    1087                 :         register cset *cs2;
    1088             297 :         register size_t css = (size_t)p->g->csetsize;
    1089                 : 
    1090                 :         /* look for an earlier one which is the same */
    1091             802 :         for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
    1092             543 :                 if (cs2->hash == h && cs2 != cs) {
    1093                 :                         /* maybe */
    1094            9766 :                         for (i = 0; i < css; i++)
    1095            9728 :                                 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
    1096               0 :                                         break;          /* no */
    1097              38 :                         if (i == css)
    1098              38 :                                 break;                  /* yes */
    1099                 :                 }
    1100                 : 
    1101             297 :         if (cs2 < top) {     /* found one */
    1102              38 :                 freeset(p, cs);
    1103              38 :                 cs = cs2;
    1104                 :         }
    1105                 : 
    1106             297 :         return((int)(cs - p->g->sets));
    1107                 : }
    1108                 : 
    1109                 : /*
    1110                 :  - firstch - return first character in a set (which must have at least one)
    1111                 :  == static int firstch(register struct parse *p, register cset *cs);
    1112                 :  */
    1113                 : static int                      /* character; there is no "none" value */
    1114                 : firstch(p, cs)
    1115                 : register struct parse *p;
    1116                 : register cset *cs;
    1117               5 : {
    1118                 :         register size_t i;
    1119               5 :         register size_t css = (size_t)p->g->csetsize;
    1120                 : 
    1121             605 :         for (i = 0; i < css; i++)
    1122             605 :                 if (CHIN(cs, i))
    1123               5 :                         return((unsigned char)i);
    1124                 :         assert(never);
    1125               0 :         return(0);              /* arbitrary */
    1126                 : }
    1127                 : 
    1128                 : /*
    1129                 :  - nch - number of characters in a set
    1130                 :  == static int nch(register struct parse *p, register cset *cs);
    1131                 :  */
    1132                 : static int
    1133                 : nch(p, cs)
    1134                 : register struct parse *p;
    1135                 : register cset *cs;
    1136             302 : {
    1137                 :         register size_t i;
    1138             302 :         register size_t css = (size_t)p->g->csetsize;
    1139             302 :         register int n = 0;
    1140                 : 
    1141           77614 :         for (i = 0; i < css; i++)
    1142           77312 :                 if (CHIN(cs, i))
    1143            8280 :                         n++;
    1144             302 :         return(n);
    1145                 : }
    1146                 : 
    1147                 : /*
    1148                 :  - mcadd - add a collating element to a cset
    1149                 :  == static void mcadd(register struct parse *p, register cset *cs, \
    1150                 :  ==     register char *cp);
    1151                 :  */
    1152                 : static void
    1153                 : mcadd(p, cs, cp)
    1154                 : register struct parse *p;
    1155                 : register cset *cs;
    1156                 : register unsigned char *cp;
    1157               0 : {
    1158               0 :         register size_t oldend = cs->smultis;
    1159                 : 
    1160               0 :         cs->smultis += strlen(cp) + 1;
    1161               0 :         if (cs->multis == NULL)
    1162               0 :                 cs->multis = malloc(cs->smultis);
    1163                 :         else
    1164               0 :                 cs->multis = realloc(cs->multis, cs->smultis);
    1165               0 :         if (cs->multis == NULL) {
    1166               0 :                 SETERROR(REG_ESPACE);
    1167               0 :                 return;
    1168                 :         }
    1169                 : 
    1170               0 :         (void) strcpy(cs->multis + oldend - 1, cp);
    1171               0 :         cs->multis[cs->smultis - 1] = '\0';
    1172                 : }
    1173                 : 
    1174                 : #if 0
    1175                 : /*
    1176                 :  - mcsub - subtract a collating element from a cset
    1177                 :  == static void mcsub(register cset *cs, register unsigned char *cp);
    1178                 :  */
    1179                 : static void
    1180                 : mcsub(cs, cp)
    1181                 : register unsigned cset *cs;
    1182                 : register unsigned char *cp;
    1183                 : {
    1184                 :         register unsigned char *fp = mcfind(cs, cp);
    1185                 :         register size_t len = strlen(fp);
    1186                 : 
    1187                 :         assert(fp != NULL);
    1188                 :         (void) memmove(fp, fp + len + 1,
    1189                 :                                 cs->smultis - (fp + len + 1 - cs->multis));
    1190                 :         cs->smultis -= len;
    1191                 : 
    1192                 :         if (cs->smultis == 0) {
    1193                 :                 free(cs->multis);
    1194                 :                 cs->multis = NULL;
    1195                 :                 return;
    1196                 :         }
    1197                 : 
    1198                 :         cs->multis = realloc(cs->multis, cs->smultis);
    1199                 :         assert(cs->multis != NULL);
    1200                 : }
    1201                 : 
    1202                 : /*
    1203                 :  - mcin - is a collating element in a cset?
    1204                 :  == static int mcin(register cset *cs, register unsigned char *cp);
    1205                 :  */
    1206                 : static int
    1207                 : mcin(cs, cp)
    1208                 : register cset *cs;
    1209                 : register unsigned char *cp;
    1210                 : {
    1211                 :         return(mcfind(cs, cp) != NULL);
    1212                 : }
    1213                 : 
    1214                 : 
    1215                 : /*
    1216                 :  - mcfind - find a collating element in a cset
    1217                 :  == static unsigned char *mcfind(register cset *cs, register unsigned char *cp);
    1218                 :  */
    1219                 : static unsigned char *
    1220                 : mcfind(cs, cp)
    1221                 : register cset *cs;
    1222                 : register unsigned char *cp;
    1223                 : {
    1224                 :         register unsigned char *p;
    1225                 : 
    1226                 :         if (cs->multis == NULL)
    1227                 :                 return(NULL);
    1228                 :         for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
    1229                 :                 if (strcmp(cp, p) == 0)
    1230                 :                         return(p);
    1231                 :         return(NULL);
    1232                 : }
    1233                 : #endif
    1234                 : 
    1235                 : /*
    1236                 :  - mcinvert - invert the list of collating elements in a cset
    1237                 :  == static void mcinvert(register struct parse *p, register cset *cs);
    1238                 :  *
    1239                 :  * This would have to know the set of possibilities.  Implementation
    1240                 :  * is deferred.
    1241                 :  */
    1242                 : static void
    1243                 : mcinvert(p, cs)
    1244                 : register struct parse *p;
    1245                 : register cset *cs;
    1246               0 : {
    1247                 :         assert(cs->multis == NULL);  /* xxx */
    1248               0 : }
    1249                 : 
    1250                 : /*
    1251                 :  - mccase - add case counterparts of the list of collating elements in a cset
    1252                 :  == static void mccase(register struct parse *p, register cset *cs);
    1253                 :  *
    1254                 :  * This would have to know the set of possibilities.  Implementation
    1255                 :  * is deferred.
    1256                 :  */
    1257                 : static void
    1258                 : mccase(p, cs)
    1259                 : register struct parse *p;
    1260                 : register cset *cs;
    1261               0 : {
    1262                 :         assert(cs->multis == NULL);  /* xxx */
    1263               0 : }
    1264                 : 
    1265                 : /*
    1266                 :  - isinsets - is this character in any sets?
    1267                 :  == static int isinsets(register struct re_guts *g, int c);
    1268                 :  */
    1269                 : static int                      /* predicate */
    1270                 : isinsets(g, c)
    1271                 : register struct re_guts *g;
    1272                 : int c;
    1273           60523 : {
    1274                 :         register uch *col;
    1275                 :         register int i;
    1276           60523 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1277           60523 :         register unsigned uc = (unsigned char)c;
    1278                 : 
    1279           88472 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1280           28179 :                 if (col[uc] != 0)
    1281             230 :                         return(1);
    1282           60293 :         return(0);
    1283                 : }
    1284                 : 
    1285                 : /*
    1286                 :  - samesets - are these two characters in exactly the same sets?
    1287                 :  == static int samesets(register struct re_guts *g, int c1, int c2);
    1288                 :  */
    1289                 : static int                      /* predicate */
    1290                 : samesets(g, c1, c2)
    1291                 : register struct re_guts *g;
    1292                 : int c1;
    1293                 : int c2;
    1294           43644 : {
    1295                 :         register uch *col;
    1296                 :         register int i;
    1297           43644 :         register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
    1298           43644 :         register unsigned uc1 = (unsigned char)c1;
    1299           43644 :         register unsigned uc2 = (unsigned char)c2;
    1300                 : 
    1301           51539 :         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
    1302           43644 :                 if (col[uc1] != col[uc2])
    1303           35749 :                         return(0);
    1304            7895 :         return(1);
    1305                 : }
    1306                 : 
    1307                 : /*
    1308                 :  - categorize - sort out character categories
    1309                 :  == static void categorize(struct parse *p, register struct re_guts *g);
    1310                 :  */
    1311                 : static void
    1312                 : categorize(p, g)
    1313                 : struct parse *p;
    1314                 : register struct re_guts *g;
    1315             415 : {
    1316             415 :         register cat_t *cats = g->categories;
    1317                 :         register int c;
    1318                 :         register int c2;
    1319                 :         register cat_t cat;
    1320                 : 
    1321                 :         /* avoid making error situations worse */
    1322             415 :         if (p->error != 0)
    1323             146 :                 return;
    1324                 : 
    1325           69133 :         for (c = 0; c <= UCHAR_MAX; c++)
    1326           68864 :                 if (cats[c] == 0 && isinsets(g, c)) {
    1327             230 :                         cat = g->ncategories++;
    1328             230 :                         cats[c] = cat;
    1329           47063 :                         for (c2 = c+1; c2 <= UCHAR_MAX; c2++)
    1330           46833 :                                 if (cats[c2] == 0 && samesets(g, c, c2))
    1331            7895 :                                         cats[c2] = cat;
    1332                 :                 }
    1333                 : }
    1334                 : 
    1335                 : /*
    1336                 :  - dupl - emit a duplicate of a bunch of sops
    1337                 :  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
    1338                 :  */
    1339                 : static sopno                    /* start of duplicate */
    1340                 : dupl(p, start, finish)
    1341                 : register struct parse *p;
    1342                 : sopno start;                    /* from here */
    1343                 : sopno finish;                   /* to this less one */
    1344             840 : {
    1345             840 :         register sopno ret = HERE();
    1346             840 :         register sopno len = finish - start;
    1347                 : 
    1348                 :         assert(finish >= start);
    1349             840 :         if (len == 0)
    1350               0 :                 return(ret);
    1351             840 :         enlarge(p, p->ssize + len);  /* this many unexpected additions */
    1352                 :         assert(p->ssize >= p->slen + len);
    1353             840 :         (void) memcpy((char *)(p->strip + p->slen),
    1354                 :                 (char *)(p->strip + start), (size_t)len*sizeof(sop));
    1355             840 :         p->slen += len;
    1356             840 :         return(ret);
    1357                 : }
    1358                 : 
    1359                 : /*
    1360                 :  - doemit - emit a strip operator
    1361                 :  == static void doemit(register struct parse *p, sop op, size_t opnd);
    1362                 :  *
    1363                 :  * It might seem better to implement this as a macro with a function as
    1364                 :  * hard-case backup, but it's just too big and messy unless there are
    1365                 :  * some changes to the data structures.  Maybe later.
    1366                 :  */
    1367                 : static void
    1368                 : doemit(p, op, opnd)
    1369                 : register struct parse *p;
    1370                 : sop op;
    1371                 : size_t opnd;
    1372           27566 : {
    1373                 :         /* avoid making error situations worse */
    1374           27566 :         if (p->error != 0)
    1375             170 :                 return;
    1376                 : 
    1377                 :         /* deal with oversize operands ("can't happen", more or less) */
    1378                 :         assert(opnd < 1<<OPSHIFT);
    1379                 : 
    1380                 :         /* deal with undersized strip */
    1381           27396 :         if (p->slen >= p->ssize)
    1382              82 :                 enlarge(p, (p->ssize+1) / 2 * 3);    /* +50% */
    1383                 :         assert(p->slen < p->ssize);
    1384                 : 
    1385                 :         /* finally, it's all reduced to the easy case */
    1386           27396 :         p->strip[p->slen++] = SOP(op, opnd);
    1387                 : }
    1388                 : 
    1389                 : /*
    1390                 :  - doinsert - insert a sop into the strip
    1391                 :  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
    1392                 :  */
    1393                 : static void
    1394                 : doinsert(p, op, opnd, pos)
    1395                 : register struct parse *p;
    1396                 : sop op;
    1397                 : size_t opnd;
    1398                 : sopno pos;
    1399             247 : {
    1400                 :         register sopno sn;
    1401                 :         register sop s;
    1402                 :         register int i;
    1403                 : 
    1404                 :         /* avoid making error situations worse */
    1405             247 :         if (p->error != 0)
    1406               0 :                 return;
    1407                 : 
    1408             247 :         sn = HERE();
    1409             247 :         EMIT(op, opnd);         /* do checks, ensure space */
    1410                 :         assert(HERE() == sn+1);
    1411             247 :         s = p->strip[sn];
    1412                 : 
    1413                 :         /* adjust paren pointers */
    1414                 :         assert(pos > 0);
    1415            2470 :         for (i = 1; i < NPAREN; i++) {
    1416            2223 :                 if (p->pbegin[i] >= pos) {
    1417               0 :                         p->pbegin[i]++;
    1418                 :                 }
    1419            2223 :                 if (p->pend[i] >= pos) {
    1420               0 :                         p->pend[i]++;
    1421                 :                 }
    1422                 :         }
    1423                 : 
    1424             247 :         memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
    1425                 :                                                 (HERE()-pos-1)*sizeof(sop));
    1426             247 :         p->strip[pos] = s;
    1427                 : }
    1428                 : 
    1429                 : /*
    1430                 :  - dofwd - complete a forward reference
    1431                 :  == static void dofwd(register struct parse *p, sopno pos, sop value);
    1432                 :  */
    1433                 : static void
    1434                 : dofwd(p, pos, value)
    1435                 : register struct parse *p;
    1436                 : register sopno pos;
    1437                 : sop value;
    1438             380 : {
    1439                 :         /* avoid making error situations worse */
    1440             380 :         if (p->error != 0)
    1441               6 :                 return;
    1442                 : 
    1443                 :         assert(value < 1<<OPSHIFT);
    1444             374 :         p->strip[pos] = OP(p->strip[pos]) | value;
    1445                 : }
    1446                 : 
    1447                 : /*
    1448                 :  - enlarge - enlarge the strip
    1449                 :  == static void enlarge(register struct parse *p, sopno size);
    1450                 :  */
    1451                 : static void
    1452                 : enlarge(p, size)
    1453                 : register struct parse *p;
    1454                 : register sopno size;
    1455             922 : {
    1456                 :         register sop *sp;
    1457                 : 
    1458             922 :         if (p->ssize >= size)
    1459               0 :                 return;
    1460                 : 
    1461             922 :         sp = (sop *)realloc(p->strip, size*sizeof(sop));
    1462             922 :         if (sp == NULL) {
    1463               0 :                 SETERROR(REG_ESPACE);
    1464               0 :                 return;
    1465                 :         }
    1466             922 :         p->strip = sp;
    1467             922 :         p->ssize = size;
    1468                 : }
    1469                 : 
    1470                 : /*
    1471                 :  - stripsnug - compact the strip
    1472                 :  == static void stripsnug(register struct parse *p, register struct re_guts *g);
    1473                 :  */
    1474                 : static void
    1475                 : stripsnug(p, g)
    1476                 : register struct parse *p;
    1477                 : register struct re_guts *g;
    1478             415 : {
    1479             415 :         g->nstates = p->slen;
    1480             415 :         g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop));
    1481             415 :         if (g->strip == NULL) {
    1482               0 :                 SETERROR(REG_ESPACE);
    1483               0 :                 g->strip = p->strip;
    1484                 :         }
    1485             415 : }
    1486                 : 
    1487                 : /*
    1488                 :  - findmust - fill in must and mlen with longest mandatory literal string
    1489                 :  == static void findmust(register struct parse *p, register struct re_guts *g);
    1490                 :  *
    1491                 :  * This algorithm could do fancy things like analyzing the operands of |
    1492                 :  * for common subsequences.  Someday.  This code is simple and finds most
    1493                 :  * of the interesting cases.
    1494                 :  *
    1495                 :  * Note that must and mlen got initialized during setup.
    1496                 :  */
    1497                 : static void
    1498                 : findmust(p, g)
    1499                 : struct parse *p;
    1500                 : register struct re_guts *g;
    1501             415 : {
    1502                 :         register sop *scan;
    1503             415 :         sop *start = NULL;
    1504             415 :         register sop *newstart = NULL;
    1505                 :         register sopno newlen;
    1506                 :         register sop s;
    1507                 :         register unsigned char *cp;
    1508                 :         register sopno i;
    1509                 : 
    1510                 :         /* avoid making error situations worse */
    1511             415 :         if (p->error != 0)
    1512             146 :                 return;
    1513                 : 
    1514                 :         /* find the longest OCHAR sequence in strip */
    1515             269 :         newlen = 0;
    1516             269 :         scan = g->strip + 1;
    1517                 :         do {
    1518           27034 :                 s = *scan++;
    1519           27034 :                 switch (OP(s)) {
    1520                 :                 case OCHAR:             /* sequence member */
    1521             478 :                         if (newlen == 0)                /* new sequence */
    1522             151 :                                 newstart = scan - 1;
    1523             478 :                         newlen++;
    1524             478 :                         break;
    1525                 :                 case OPLUS_:            /* things that don't break one */
    1526                 :                 case OLPAREN:
    1527                 :                 case ORPAREN:
    1528           16670 :                         break;
    1529                 :                 case OQUEST_:           /* things that must be skipped */
    1530                 :                 case OCH_:
    1531             179 :                         scan--;
    1532                 :                         do {
    1533             364 :                                 scan += OPND(s);
    1534             364 :                                 s = *scan;
    1535                 :                                 /* assert() interferes w debug printouts */
    1536             364 :                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
    1537                 :                                                         OP(s) != OOR2) {
    1538               0 :                                         g->iflags |= BAD;
    1539               0 :                                         return;
    1540                 :                                 }
    1541             364 :                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
    1542                 :                         /* fallthrough */
    1543                 :                 default:                /* things that break a sequence */
    1544            9886 :                         if (newlen > g->mlen) {           /* ends one */
    1545             120 :                                 start = newstart;
    1546             120 :                                 g->mlen = newlen;
    1547                 :                         }
    1548            9886 :                         newlen = 0;
    1549                 :                         break;
    1550                 :                 }
    1551           27034 :         } while (OP(s) != OEND);
    1552                 : 
    1553             269 :         if (g->mlen == 0)            /* there isn't one */
    1554             157 :                 return;
    1555                 : 
    1556             112 :         if (!start) {
    1557               0 :                 g->mlen = 0;
    1558               0 :                 return;
    1559                 :         }
    1560                 : 
    1561                 :         /* turn it into a character string */
    1562             112 :         g->must = malloc((size_t)g->mlen + 1);
    1563             112 :         if (g->must == NULL) {               /* argh; just forget it */
    1564               0 :                 g->mlen = 0;
    1565               0 :                 return;
    1566                 :         }
    1567             112 :         cp = g->must;
    1568             112 :         scan = start;
    1569             544 :         for (i = g->mlen; i > 0; i--) {
    1570             466 :                 while (OP(s = *scan++) != OCHAR)
    1571                 :                         continue;
    1572                 :                 assert(cp < g->must + g->mlen);
    1573             432 :                 *cp++ = (unsigned char)OPND(s);
    1574                 :         }
    1575                 :         assert(cp == g->must + g->mlen);
    1576             112 :         *cp++ = '\0';           /* just on general principles */
    1577                 : }
    1578                 : 
    1579                 : /*
    1580                 :  - pluscount - count + nesting
    1581                 :  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
    1582                 :  */
    1583                 : static sopno                    /* nesting depth */
    1584                 : pluscount(p, g)
    1585                 : struct parse *p;
    1586                 : register struct re_guts *g;
    1587             415 : {
    1588                 :         register sop *scan;
    1589                 :         register sop s;
    1590             415 :         register sopno plusnest = 0;
    1591             415 :         register sopno maxnest = 0;
    1592                 : 
    1593             415 :         if (p->error != 0)
    1594             146 :                 return(0);      /* there may not be an OEND */
    1595                 : 
    1596             269 :         scan = g->strip + 1;
    1597                 :         do {
    1598           27749 :                 s = *scan++;
    1599           27749 :                 switch (OP(s)) {
    1600                 :                 case OPLUS_:
    1601              52 :                         plusnest++;
    1602              52 :                         break;
    1603                 :                 case O_PLUS:
    1604              52 :                         if (plusnest > maxnest)
    1605              35 :                                 maxnest = plusnest;
    1606              52 :                         plusnest--;
    1607                 :                         break;
    1608                 :                 }
    1609           27749 :         } while (OP(s) != OEND);
    1610             269 :         if (plusnest != 0)
    1611               0 :                 g->iflags |= BAD;
    1612             269 :         return(maxnest);
    1613                 : }

Generated by: LTP GCOV extension version 1.5

Generated at Thu, 19 Nov 2009 08:20:29 +0000 (5 days ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.