PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/ereg/regex - engine.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 278 427 65.1 %
Date: 2014-08-04 Functions: 10 12 83.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * The matching engine and friends.  This file is #included by regexec.c
       3             :  * after suitable #defines of a variety of macros used herein, so that
       4             :  * different state representations can be used without duplicating masses
       5             :  * of code.
       6             :  */
       7             : 
       8             : #ifdef SNAMES
       9             : #define matcher smatcher
      10             : #define fast    sfast
      11             : #define slow    sslow
      12             : #define dissect sdissect
      13             : #define backref sbackref
      14             : #define step    sstep
      15             : #define print   sprint
      16             : #define at      sat
      17             : #define match   smat
      18             : #endif
      19             : #ifdef LNAMES
      20             : #define matcher lmatcher
      21             : #define fast    lfast
      22             : #define slow    lslow
      23             : #define dissect ldissect
      24             : #define backref lbackref
      25             : #define step    lstep
      26             : #define print   lprint
      27             : #define at      lat
      28             : #define match   lmat
      29             : #endif
      30             : 
      31             : /* another structure passed up and down to avoid zillions of parameters */
      32             : struct match {
      33             :         struct re_guts *g;
      34             :         int eflags;
      35             :         regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
      36             :         unsigned char *offp;            /* offsets work from here */
      37             :         unsigned char *beginp;          /* start of string -- virtual NUL precedes */
      38             :         unsigned char *endp;            /* end of string -- virtual NUL here */
      39             :         unsigned char *coldp;           /* can be no match starting before here */
      40             :         unsigned char **lastpos;                /* [nplus+1] */
      41             :         STATEVARS;
      42             :         states st;              /* current states */
      43             :         states fresh;           /* states for a fresh start */
      44             :         states tmp;             /* temporary */
      45             :         states empty;           /* empty set of states */
      46             : };
      47             : 
      48             : #include "engine.ih"
      49             : 
      50             : #ifdef REDEBUG
      51             : #define SP(t, s, c)     print(m, t, s, c, stdout)
      52             : #define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
      53             : #define NOTE(str)       { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
      54             : #else
      55             : #define SP(t, s, c)     /* nothing */
      56             : #define AT(t, p1, p2, s1, s2)   /* nothing */
      57             : #define NOTE(s) /* nothing */
      58             : #endif
      59             : 
      60             : /*
      61             :  - matcher - the actual matching engine
      62             :  == static int matcher(register struct re_guts *g, char *string, \
      63             :  ==     size_t nmatch, regmatch_t pmatch[], int eflags);
      64             :  */
      65             : static int                      /* 0 success, REG_NOMATCH failure */
      66        1033 : matcher(g, string, nmatch, pmatch, eflags)
      67             : register struct re_guts *g;
      68             : unsigned char *string;
      69             : size_t nmatch;
      70             : regmatch_t pmatch[];
      71             : int eflags;
      72             : {
      73             :         register unsigned char *endp;
      74             :         register size_t i;
      75             :         struct match mv;
      76        1033 :         register struct match *m = &mv;
      77             :         register unsigned char *dp;
      78        1033 :         const register sopno gf = g->firststate+1;   /* +1 for OEND */
      79        1033 :         const register sopno gl = g->laststate;
      80             :         unsigned char *start;
      81             :         unsigned char *stop;
      82             : 
      83             :         /* simplify the situation where possible */
      84        1033 :         if (g->cflags&REG_NOSUB)
      85          31 :                 nmatch = 0;
      86        1033 :         if (eflags&REG_STARTEND) {
      87           0 :                 start = string + pmatch[0].rm_so;
      88           0 :                 stop = string + pmatch[0].rm_eo;
      89             :         } else {
      90        1033 :                 start = string;
      91        1033 :                 stop = start + strlen(start);
      92             :         }
      93        1033 :         if (stop < start)
      94           0 :                 return(REG_INVARG);
      95             : 
      96             :         /* prescreening; this does wonders for this rather slow code */
      97        1033 :         if (g->must != NULL) {
      98        1741 :                 for (dp = start; dp < stop; dp++)
      99        1825 :                         if (*dp == g->must[0] && stop - dp >= g->mlen &&
     100         265 :                                 memcmp(dp, g->must, (size_t)g->mlen) == 0)
     101         209 :                                 break;
     102         390 :                 if (dp == stop)         /* we didn't find g->must */
     103         181 :                         return(REG_NOMATCH);
     104             :         }
     105             : 
     106             :         /* match struct setup */
     107         852 :         m->g = g;
     108         852 :         m->eflags = eflags;
     109         852 :         m->pmatch = NULL;
     110         852 :         m->lastpos = NULL;
     111         852 :         m->offp = string;
     112         852 :         m->beginp = start;
     113         852 :         m->endp = stop;
     114          64 :         STATESETUP(m, 4);
     115         852 :         SETUP(m->st);
     116         852 :         SETUP(m->fresh);
     117         852 :         SETUP(m->tmp);
     118         852 :         SETUP(m->empty);
     119         852 :         CLEAR(m->empty);
     120             : 
     121             :         /* this loop does only one repetition except for backrefs */
     122             :         for (;;) {
     123         852 :                 endp = fast(m, start, stop, gf, gl);
     124         852 :                 if (endp == NULL) {             /* a miss */
     125          12 :                         STATETEARDOWN(m);
     126         145 :                         return(REG_NOMATCH);
     127             :                 }
     128         707 :                 if (nmatch == 0 && !g->backrefs)
     129          29 :                         break;          /* no further info needed */
     130             : 
     131             :                 /* where? */
     132             :                 assert(m->coldp != NULL);
     133             :                 for (;;) {
     134             :                         NOTE("finding start");
     135         872 :                         endp = slow(m, m->coldp, stop, gf, gl);
     136         872 :                         if (endp != NULL)
     137         678 :                                 break;
     138             :                         assert(m->coldp < m->endp);
     139         194 :                         m->coldp++;
     140         194 :                 }
     141         678 :                 if (nmatch == 1 && !g->backrefs)
     142         458 :                         break;          /* no further info needed */
     143             : 
     144             :                 /* oh my, he wants the subexpressions... */
     145         220 :                 if (m->pmatch == NULL)
     146         220 :                         m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
     147             :                                                         sizeof(regmatch_t));
     148         220 :                 if (m->pmatch == NULL) {
     149           0 :                         STATETEARDOWN(m);
     150           0 :                         return(REG_ESPACE);
     151             :                 }
     152        4568 :                 for (i = 1; i <= m->g->nsub; i++)
     153        4348 :                         m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
     154         440 :                 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
     155             :                         NOTE("dissecting");
     156         220 :                         dp = dissect(m, m->coldp, endp, gf, gl);
     157             :                 } else {
     158           0 :                         if (g->nplus > 0 && m->lastpos == NULL)
     159           0 :                                 m->lastpos = (unsigned char **)malloc((g->nplus+1) *
     160             :                                                         sizeof(unsigned char *));
     161           0 :                         if (g->nplus > 0 && m->lastpos == NULL) {
     162           0 :                                 free((char *)m->pmatch);
     163           0 :                                 STATETEARDOWN(m);
     164           0 :                                 return(REG_ESPACE);
     165             :                         }
     166             :                         NOTE("backref dissect");
     167           0 :                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
     168             :                 }
     169         220 :                 if (dp != NULL)
     170         220 :                         break;
     171             : 
     172             :                 /* uh-oh... we couldn't find a subexpression-level match */
     173             :                 assert(g->backrefs); /* must be back references doing it */
     174             :                 assert(g->nplus == 0 || m->lastpos != NULL);
     175             :                 for (;;) {
     176           0 :                         if (dp != NULL || endp <= m->coldp)
     177             :                                 break;          /* defeat */
     178             :                         NOTE("backoff");
     179           0 :                         endp = slow(m, m->coldp, endp-1, gf, gl);
     180           0 :                         if (endp == NULL)
     181           0 :                                 break;          /* defeat */
     182             :                         /* try it on a shorter possibility */
     183             : #ifndef NDEBUG
     184             :                         for (i = 1; i <= m->g->nsub; i++) {
     185             :                                 assert(m->pmatch[i].rm_so == -1);
     186             :                                 assert(m->pmatch[i].rm_eo == -1);
     187             :                         }
     188             : #endif
     189             :                         NOTE("backoff dissect");
     190           0 :                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
     191           0 :                 }
     192             :                 assert(dp == NULL || dp == endp);
     193           0 :                 if (dp != NULL)         /* found a shorter one */
     194           0 :                         break;
     195             : 
     196             :                 /* despite initial appearances, there is no match here */
     197             :                 NOTE("false alarm");
     198           0 :                 start = m->coldp + 1;        /* recycle starting later */
     199             :                 assert(start <= stop);
     200           0 :         }
     201             : 
     202             :         /* fill in the details if requested */
     203         707 :         if (nmatch > 0) {
     204         678 :                 pmatch[0].rm_so = m->coldp - m->offp;
     205         678 :                 pmatch[0].rm_eo = endp - m->offp;
     206             :         }
     207         707 :         if (nmatch > 1) {
     208             :                 assert(m->pmatch != NULL);
     209        4568 :                 for (i = 1; i < nmatch; i++)
     210        4348 :                         if (i <= m->g->nsub)
     211        4348 :                                 pmatch[i] = m->pmatch[i];
     212             :                         else {
     213           0 :                                 pmatch[i].rm_so = -1;
     214           0 :                                 pmatch[i].rm_eo = -1;
     215             :                         }
     216             :         }
     217             : 
     218         707 :         if (m->pmatch != NULL)
     219         220 :                 free((char *)m->pmatch);
     220         707 :         if (m->lastpos != NULL)
     221           0 :                 free((char *)m->lastpos);
     222          52 :         STATETEARDOWN(m);
     223         707 :         return(0);
     224             : }
     225             : 
     226             : /*
     227             :  - dissect - figure out what matched what, no back references
     228             :  == static unsigned char *dissect(register struct match *m, unsigned char *start, \
     229             :  ==     unsigned char *stop, sopno startst, sopno stopst);
     230             :  */
     231             : static unsigned char *                  /* == stop (success) always */
     232         398 : dissect(m, start, stop, startst, stopst)
     233             : register struct match *m;
     234             : unsigned char *start;
     235             : unsigned char *stop;
     236             : sopno startst;
     237             : sopno stopst;
     238             : {
     239             :         register int i;
     240             :         register sopno ss;      /* start sop of current subRE */
     241             :         register sopno es;      /* end sop of current subRE */
     242             :         register unsigned char *sp;     /* start of string matched by it */
     243             :         register unsigned char *stp;    /* string matched by it cannot pass here */
     244             :         register unsigned char *rest;   /* start of rest of string */
     245             :         register unsigned char *tail;   /* string unmatched by rest of RE */
     246             :         register sopno ssub;    /* start sop of subsubRE */
     247             :         register sopno esub;    /* end sop of subsubRE */
     248             :         register unsigned char *ssp;    /* start of string matched by subsubRE */
     249             :         register unsigned char *sep;    /* end of string matched by subsubRE */
     250             :         register unsigned char *oldssp; /* previous ssp */
     251             :         register unsigned char *dp;
     252             : 
     253             :         AT("diss", start, stop, startst, stopst);
     254         398 :         sp = start;
     255       13765 :         for (ss = startst; ss < stopst; ss = es) {
     256             :                 /* identify end of subRE */
     257       13367 :                 es = ss;
     258       13367 :                 switch (OP(m->g->strip[es])) {
     259             :                 case OPLUS_:
     260             :                 case OQUEST_:
     261         131 :                         es += OPND(m->g->strip[es]);
     262         131 :                         break;
     263             :                 case OCH_:
     264         200 :                         while (OP(m->g->strip[es]) != O_CH)
     265         106 :                                 es += OPND(m->g->strip[es]);
     266             :                         break;
     267             :                 }
     268       13367 :                 es++;
     269             : 
     270             :                 /* figure out what it matched */
     271       13367 :                 switch (OP(m->g->strip[ss])) {
     272             :                 case OEND:
     273             :                         assert(PHP_REGEX_NOPE);
     274           0 :                         break;
     275             :                 case OCHAR:
     276         174 :                         sp++;
     277         174 :                         break;
     278             :                 case OBOL:
     279             :                 case OEOL:
     280             :                 case OBOW:
     281             :                 case OEOW:
     282          22 :                         break;
     283             :                 case OANY:
     284             :                 case OANYOF:
     285        4297 :                         sp++;
     286        4297 :                         break;
     287             :                 case OBACK_:
     288             :                 case O_BACK:
     289             :                         assert(PHP_REGEX_NOPE);
     290           0 :                         break;
     291             :                 /* cases where length of match is hard to find */
     292             :                 case OQUEST_:
     293          52 :                         stp = stop;
     294             :                         for (;;) {
     295             :                                 /* how long could this one be? */
     296         245 :                                 rest = slow(m, sp, stp, ss, es);
     297             :                                 assert(rest != NULL);   /* it did match */
     298             :                                 /* could the rest match the rest? */
     299         245 :                                 tail = slow(m, rest, stop, es, stopst);
     300         245 :                                 if (tail == stop)
     301          52 :                                         break;          /* yes! */
     302             :                                 /* no -- try a shorter match for this one */
     303         193 :                                 stp = rest - 1;
     304             :                                 assert(stp >= sp);   /* it did work */
     305         193 :                         }
     306          52 :                         ssub = ss + 1;
     307          52 :                         esub = es - 1;
     308             :                         /* did innards match? */
     309          52 :                         if (slow(m, sp, rest, ssub, esub) != NULL) {
     310          52 :                                 dp = dissect(m, sp, rest, ssub, esub);
     311             :                                 assert(dp == rest);
     312             :                         } else          /* no */
     313             :                                 assert(sp == rest);
     314          52 :                         sp = rest;
     315          52 :                         break;
     316             :                 case OPLUS_:
     317          79 :                         stp = stop;
     318             :                         for (;;) {
     319             :                                 /* how long could this one be? */
     320          79 :                                 rest = slow(m, sp, stp, ss, es);
     321             :                                 assert(rest != NULL);   /* it did match */
     322             :                                 /* could the rest match the rest? */
     323          79 :                                 tail = slow(m, rest, stop, es, stopst);
     324          79 :                                 if (tail == stop)
     325          79 :                                         break;          /* yes! */
     326             :                                 /* no -- try a shorter match for this one */
     327           0 :                                 stp = rest - 1;
     328             :                                 assert(stp >= sp);   /* it did work */
     329           0 :                         }
     330          79 :                         ssub = ss + 1;
     331          79 :                         esub = es - 1;
     332          79 :                         ssp = sp;
     333          79 :                         oldssp = ssp;
     334             :                         for (;;) {      /* find last match of innards */
     335         314 :                                 sep = slow(m, ssp, rest, ssub, esub);
     336         314 :                                 if (sep == NULL || sep == ssp)
     337             :                                         break;  /* failed or matched null */
     338         235 :                                 oldssp = ssp;   /* on to next try */
     339         235 :                                 ssp = sep;
     340         235 :                         }
     341          79 :                         if (sep == NULL) {
     342             :                                 /* last successful match */
     343          79 :                                 sep = ssp;
     344          79 :                                 ssp = oldssp;
     345             :                         }
     346             :                         assert(sep == rest);    /* must exhaust substring */
     347             :                         assert(slow(m, ssp, sep, ssub, esub) == rest);
     348          79 :                         dp = dissect(m, ssp, sep, ssub, esub);
     349             :                         assert(dp == sep);
     350          79 :                         sp = rest;
     351          79 :                         break;
     352             :                 case OCH_:
     353          47 :                         stp = stop;
     354             :                         for (;;) {
     355             :                                 /* how long could this one be? */
     356          53 :                                 rest = slow(m, sp, stp, ss, es);
     357             :                                 assert(rest != NULL);   /* it did match */
     358             :                                 /* could the rest match the rest? */
     359          53 :                                 tail = slow(m, rest, stop, es, stopst);
     360          53 :                                 if (tail == stop)
     361          47 :                                         break;          /* yes! */
     362             :                                 /* no -- try a shorter match for this one */
     363           6 :                                 stp = rest - 1;
     364             :                                 assert(stp >= sp);   /* it did work */
     365           6 :                         }
     366          47 :                         ssub = ss + 1;
     367          47 :                         esub = ss + OPND(m->g->strip[ss]) - 1;
     368             :                         assert(OP(m->g->strip[esub]) == OOR1);
     369             :                         for (;;) {      /* find first matching branch */
     370          70 :                                 if (slow(m, sp, rest, ssub, esub) == rest)
     371          47 :                                         break;  /* it matched all of it */
     372             :                                 /* that one missed, try next one */
     373             :                                 assert(OP(m->g->strip[esub]) == OOR1);
     374          23 :                                 esub++;
     375             :                                 assert(OP(m->g->strip[esub]) == OOR2);
     376          23 :                                 ssub = esub + 1;
     377          23 :                                 esub += OPND(m->g->strip[esub]);
     378          23 :                                 if (OP(m->g->strip[esub]) == OOR2)
     379           6 :                                         esub--;
     380             :                                 else
     381             :                                         assert(OP(m->g->strip[esub]) == O_CH);
     382          23 :                         }
     383          47 :                         dp = dissect(m, sp, rest, ssub, esub);
     384             :                         assert(dp == rest);
     385          47 :                         sp = rest;
     386          47 :                         break;
     387             :                 case O_PLUS:
     388             :                 case O_QUEST:
     389             :                 case OOR1:
     390             :                 case OOR2:
     391             :                 case O_CH:
     392             :                         assert(PHP_REGEX_NOPE);
     393           0 :                         break;
     394             :                 case OLPAREN:
     395        4348 :                         i = OPND(m->g->strip[ss]);
     396             :                         assert(0 < i && i <= m->g->nsub);
     397        4348 :                         m->pmatch[i].rm_so = sp - m->offp;
     398        4348 :                         break;
     399             :                 case ORPAREN:
     400        4348 :                         i = OPND(m->g->strip[ss]);
     401             :                         assert(0 < i && i <= m->g->nsub);
     402        4348 :                         m->pmatch[i].rm_eo = sp - m->offp;
     403             :                         break;
     404             :                 default:                /* uh oh */
     405             :                         assert(PHP_REGEX_NOPE);
     406             :                         break;
     407             :                 }
     408             :         }
     409             : 
     410             :         assert(sp == stop);
     411         398 :         return(sp);
     412             : }
     413             : 
     414             : /*
     415             :  - backref - figure out what matched what, figuring in back references
     416             :  == static unsigned char *backref(register struct match *m, unsigned char *start, \
     417             :  ==     unsigned char *stop, sopno startst, sopno stopst, sopno lev);
     418             :  */
     419             : static unsigned char *                  /* == stop (success) or NULL (failure) */
     420           0 : backref(m, start, stop, startst, stopst, lev)
     421             : register struct match *m;
     422             : unsigned char *start;
     423             : unsigned char *stop;
     424             : sopno startst;
     425             : sopno stopst;
     426             : sopno lev;                      /* PLUS nesting level */
     427             : {
     428             :         register int i;
     429             :         register sopno ss;      /* start sop of current subRE */
     430             :         register unsigned char *sp;     /* start of string matched by it */
     431             :         register sopno ssub;    /* start sop of subsubRE */
     432             :         register sopno esub;    /* end sop of subsubRE */
     433             :         register unsigned char *ssp;    /* start of string matched by subsubRE */
     434             :         register unsigned char *dp;
     435             :         register size_t len;
     436             :         register int hard;
     437             :         register sop s;
     438             :         register regoff_t offsave;
     439             :         register cset *cs;
     440             : 
     441             :         AT("back", start, stop, startst, stopst);
     442           0 :         sp = start;
     443             : 
     444             :         /* get as far as we can with easy stuff */
     445           0 :         hard = 0;
     446           0 :         for (ss = startst; !hard && ss < stopst; ss++)
     447           0 :                 switch (OP(s = m->g->strip[ss])) {
     448             :                 case OCHAR:
     449           0 :                         if (sp == stop || *sp++ != (unsigned char)OPND(s))
     450           0 :                                 return(NULL);
     451           0 :                         break;
     452             :                 case OANY:
     453           0 :                         if (sp == stop)
     454           0 :                                 return(NULL);
     455           0 :                         sp++;
     456           0 :                         break;
     457             :                 case OANYOF:
     458           0 :                         cs = &m->g->sets[OPND(s)];
     459           0 :                         if (sp == stop || !CHIN(cs, *sp++))
     460           0 :                                 return(NULL);
     461           0 :                         break;
     462             :                 case OBOL:
     463           0 :                         if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
     464           0 :                                         (sp < m->endp && *(sp-1) == '\n' &&
     465           0 :                                                 (m->g->cflags&REG_NEWLINE)) )
     466             :                                 { /* yes */ }
     467             :                         else
     468           0 :                                 return(NULL);
     469           0 :                         break;
     470             :                 case OEOL:
     471           0 :                         if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
     472           0 :                                         (sp < m->endp && *sp == '\n' &&
     473           0 :                                                 (m->g->cflags&REG_NEWLINE)) )
     474             :                                 { /* yes */ }
     475             :                         else
     476           0 :                                 return(NULL);
     477           0 :                         break;
     478             :                 case OBOW:
     479           0 :                         if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
     480           0 :                                         (sp < m->endp && *(sp-1) == '\n' &&
     481           0 :                                                 (m->g->cflags&REG_NEWLINE)) ||
     482           0 :                                         (sp > m->beginp &&
     483           0 :                                                         !ISWORD(*(sp-1))) ) &&
     484           0 :                                         (sp < m->endp && ISWORD(*sp)) )
     485             :                                 { /* yes */ }
     486             :                         else
     487           0 :                                 return(NULL);
     488           0 :                         break;
     489             :                 case OEOW:
     490           0 :                         if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
     491           0 :                                         (sp < m->endp && *sp == '\n' &&
     492           0 :                                                 (m->g->cflags&REG_NEWLINE)) ||
     493           0 :                                         (sp < m->endp && !ISWORD(*sp)) ) &&
     494           0 :                                         (sp > m->beginp && ISWORD(*(sp-1))) )
     495             :                                 { /* yes */ }
     496             :                         else
     497           0 :                                 return(NULL);
     498           0 :                         break;
     499             :                 case O_QUEST:
     500           0 :                         break;
     501             :                 case OOR1:      /* matches null but needs to skip */
     502           0 :                         ss++;
     503           0 :                         s = m->g->strip[ss];
     504             :                         do {
     505             :                                 assert(OP(s) == OOR2);
     506           0 :                                 ss += OPND(s);
     507           0 :                         } while (OP(s = m->g->strip[ss]) != O_CH);
     508             :                         /* note that the ss++ gets us past the O_CH */
     509           0 :                         break;
     510             :                 default:        /* have to make a choice */
     511           0 :                         hard = 1;
     512             :                         break;
     513             :                 }
     514           0 :         if (!hard) {            /* that was it! */
     515           0 :                 if (sp != stop)
     516           0 :                         return(NULL);
     517           0 :                 return(sp);
     518             :         }
     519           0 :         ss--;                   /* adjust for the for's final increment */
     520             : 
     521             :         /* the hard stuff */
     522             :         AT("hard", sp, stop, ss, stopst);
     523           0 :         s = m->g->strip[ss];
     524           0 :         switch (OP(s)) {
     525             :         case OBACK_:            /* the vilest depths */
     526           0 :                 i = OPND(s);
     527             :                 assert(0 < i && i <= m->g->nsub);
     528           0 :                 if (m->pmatch[i].rm_eo == -1)
     529           0 :                         return(NULL);
     530             :                 assert(m->pmatch[i].rm_so != -1);
     531           0 :                 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
     532             :                 assert(stop - m->beginp >= len);
     533           0 :                 if (sp > stop - len)
     534           0 :                         return(NULL);   /* not enough left to match */
     535           0 :                 ssp = m->offp + m->pmatch[i].rm_so;
     536           0 :                 if (memcmp(sp, ssp, len) != 0)
     537           0 :                         return(NULL);
     538           0 :                 while (m->g->strip[ss] != SOP(O_BACK, i))
     539           0 :                         ss++;
     540           0 :                 return(backref(m, sp+len, stop, ss+1, stopst, lev));
     541             :                 break;
     542             :         case OQUEST_:           /* to null or not */
     543           0 :                 dp = backref(m, sp, stop, ss+1, stopst, lev);
     544           0 :                 if (dp != NULL)
     545           0 :                         return(dp);     /* not */
     546           0 :                 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
     547             :                 break;
     548             :         case OPLUS_:
     549             :                 assert(m->lastpos != NULL);
     550             :                 assert(lev+1 <= m->g->nplus);
     551           0 :                 m->lastpos[lev+1] = sp;
     552           0 :                 return(backref(m, sp, stop, ss+1, stopst, lev+1));
     553             :                 break;
     554             :         case O_PLUS:
     555           0 :                 if (sp == m->lastpos[lev])   /* last pass matched null */
     556           0 :                         return(backref(m, sp, stop, ss+1, stopst, lev-1));
     557             :                 /* try another pass */
     558           0 :                 m->lastpos[lev] = sp;
     559           0 :                 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
     560           0 :                 if (dp == NULL)
     561           0 :                         return(backref(m, sp, stop, ss+1, stopst, lev-1));
     562             :                 else
     563           0 :                         return(dp);
     564             :                 break;
     565             :         case OCH_:              /* find the right one, if any */
     566           0 :                 ssub = ss + 1;
     567           0 :                 esub = ss + OPND(s) - 1;
     568             :                 assert(OP(m->g->strip[esub]) == OOR1);
     569             :                 for (;;) {      /* find first matching branch */
     570           0 :                         dp = backref(m, sp, stop, ssub, esub, lev);
     571           0 :                         if (dp != NULL)
     572           0 :                                 return(dp);
     573             :                         /* that one missed, try next one */
     574           0 :                         if (OP(m->g->strip[esub]) == O_CH)
     575           0 :                                 return(NULL);   /* there is none */
     576           0 :                         esub++;
     577             :                         assert(OP(m->g->strip[esub]) == OOR2);
     578           0 :                         ssub = esub + 1;
     579           0 :                         esub += OPND(m->g->strip[esub]);
     580           0 :                         if (OP(m->g->strip[esub]) == OOR2)
     581           0 :                                 esub--;
     582             :                         else
     583             :                                 assert(OP(m->g->strip[esub]) == O_CH);
     584           0 :                 }
     585             :                 break;
     586             :         case OLPAREN:           /* must undo assignment if rest fails */
     587           0 :                 i = OPND(s);
     588             :                 assert(0 < i && i <= m->g->nsub);
     589           0 :                 offsave = m->pmatch[i].rm_so;
     590           0 :                 m->pmatch[i].rm_so = sp - m->offp;
     591           0 :                 dp = backref(m, sp, stop, ss+1, stopst, lev);
     592           0 :                 if (dp != NULL)
     593           0 :                         return(dp);
     594           0 :                 m->pmatch[i].rm_so = offsave;
     595           0 :                 return(NULL);
     596             :                 break;
     597             :         case ORPAREN:           /* must undo assignment if rest fails */
     598           0 :                 i = OPND(s);
     599             :                 assert(0 < i && i <= m->g->nsub);
     600           0 :                 offsave = m->pmatch[i].rm_eo;
     601           0 :                 m->pmatch[i].rm_eo = sp - m->offp;
     602           0 :                 dp = backref(m, sp, stop, ss+1, stopst, lev);
     603           0 :                 if (dp != NULL)
     604           0 :                         return(dp);
     605           0 :                 m->pmatch[i].rm_eo = offsave;
     606           0 :                 return(NULL);
     607             :                 break;
     608             :         default:                /* uh oh */
     609             :                 assert(PHP_REGEX_NOPE);
     610             :                 break;
     611             :         }
     612             : 
     613             :         /* "can't happen" */
     614             :         assert(PHP_REGEX_NOPE);
     615             :         /* NOTREACHED */
     616           0 :         return((unsigned char *)NULL);  /* dummy */
     617             : }
     618             : 
     619             : /*
     620             :  - fast - step through the string at top speed
     621             :  == static unsigned char *fast(register struct match *m, unsigned char *start, \
     622             :  ==     unsigned char *stop, sopno startst, sopno stopst);
     623             :  */
     624             : static unsigned char *                  /* where tentative match ended, or NULL */
     625         852 : fast(m, start, stop, startst, stopst)
     626             : register struct match *m;
     627             : unsigned char *start;
     628             : unsigned char *stop;
     629             : sopno startst;
     630             : sopno stopst;
     631             : {
     632         852 :         register states st = m->st;
     633         852 :         register states fresh = m->fresh;
     634         852 :         register states tmp = m->tmp;
     635         852 :         register unsigned char *p = start;
     636         852 :         register int c = (start == m->beginp) ? OUT : *(start-1);
     637             :         register int lastc;     /* previous c */
     638             :         register int flagch;
     639             :         register int i;
     640             :         register unsigned char *coldp;  /* last p after which no match was underway */
     641             : 
     642         852 :         CLEAR(st);
     643         852 :         SET1(st, startst);
     644         852 :         st = step(m->g, startst, stopst, st, NOTHING, st);
     645         852 :         ASSIGN(fresh, st);
     646             :         SP("start", st, *p);
     647         852 :         coldp = NULL;
     648             :         for (;;) {
     649             :                 /* next character */
     650       13665 :                 lastc = c;
     651       13665 :                 c = (p == m->endp) ? OUT : *p;
     652       13665 :                 if (EQ(st, fresh))
     653        2784 :                         coldp = p;
     654             : 
     655             :                 /* is there an EOL and/or BOL between lastc and c? */
     656       13665 :                 flagch = '\0';
     657       13665 :                 i = 0;
     658       14517 :                 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
     659         852 :                                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
     660         541 :                         flagch = BOL;
     661         541 :                         i = m->g->nbol;
     662             :                 }
     663       13955 :                 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
     664         290 :                                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
     665         290 :                         flagch = (flagch == BOL) ? BOLEOL : EOL;
     666         290 :                         i += m->g->neol;
     667             :                 }
     668       13665 :                 if (i != 0) {
     669         180 :                         for (; i > 0; i--)
     670          90 :                                 st = step(m->g, startst, stopst, st, flagch, st);
     671             :                         SP("boleol", st, c);
     672             :                 }
     673             : 
     674             :                 /* how about a word boundary? */
     675       15220 :                 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
     676        1555 :                                         (c != OUT && ISWORD(c)) ) {
     677         721 :                         flagch = BOW;
     678             :                 }
     679       25191 :                 if ( (lastc != OUT && ISWORD(lastc)) &&
     680       11526 :                                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
     681         598 :                         flagch = EOW;
     682             :                 }
     683       13665 :                 if (flagch == BOW || flagch == EOW) {
     684        1319 :                         st = step(m->g, startst, stopst, st, flagch, st);
     685             :                         SP("boweow", st, c);
     686             :                 }
     687             : 
     688             :                 /* are we done? */
     689       13665 :                 if (ISSET(st, stopst) || p == stop)
     690             :                         break;          /* NOTE BREAK OUT */
     691             : 
     692             :                 /* no, we must deal with this character */
     693       12813 :                 ASSIGN(tmp, st);
     694       12813 :                 ASSIGN(st, fresh);
     695             :                 assert(c != OUT);
     696       12813 :                 st = step(m->g, startst, stopst, tmp, c, st);
     697             :                 SP("aft", st, c);
     698             :                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
     699       12813 :                 p++;
     700       12813 :         }
     701             : 
     702             :         assert(coldp != NULL);
     703         852 :         m->coldp = coldp;
     704         852 :         if (ISSET(st, stopst))
     705         707 :                 return(p+1);
     706             :         else
     707         145 :                 return(NULL);
     708             : }
     709             : 
     710             : /*
     711             :  - slow - step through the string more deliberately
     712             :  == static unsigned char *slow(register struct match *m, unsigned char *start, \
     713             :  ==     unsigned char *stop, sopno startst, sopno stopst);
     714             :  */
     715             : static unsigned char *                  /* where it ended */
     716        2062 : slow(m, start, stop, startst, stopst)
     717             : register struct match *m;
     718             : unsigned char *start;
     719             : unsigned char *stop;
     720             : sopno startst;
     721             : sopno stopst;
     722             : {
     723        2062 :         register states st = m->st;
     724        2062 :         register states empty = m->empty;
     725        2062 :         register states tmp = m->tmp;
     726        2062 :         register unsigned char *p = start;
     727        2062 :         register int c = (start == m->beginp) ? OUT : *(start-1);
     728             :         register int lastc;     /* previous c */
     729             :         register int flagch;
     730             :         register int i;
     731             :         register unsigned char *matchp; /* last p at which a match ended */
     732             : 
     733             :         AT("slow", start, stop, startst, stopst);
     734        2062 :         CLEAR(st);
     735        2062 :         SET1(st, startst);
     736             :         SP("sstart", st, *p);
     737        2062 :         st = step(m->g, startst, stopst, st, NOTHING, st);
     738        2062 :         matchp = NULL;
     739             :         for (;;) {
     740             :                 /* next character */
     741       12505 :                 lastc = c;
     742       12505 :                 c = (p == m->endp) ? OUT : *p;
     743             : 
     744             :                 /* is there an EOL and/or BOL between lastc and c? */
     745       12505 :                 flagch = '\0';
     746       12505 :                 i = 0;
     747       13110 :                 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
     748         605 :                                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
     749         366 :                         flagch = BOL;
     750         366 :                         i = m->g->nbol;
     751             :                 }
     752       12944 :                 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
     753         439 :                                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
     754         439 :                         flagch = (flagch == BOL) ? BOLEOL : EOL;
     755         439 :                         i += m->g->neol;
     756             :                 }
     757       12505 :                 if (i != 0) {
     758         380 :                         for (; i > 0; i--)
     759         190 :                                 st = step(m->g, startst, stopst, st, flagch, st);
     760             :                         SP("sboleol", st, c);
     761             :                 }
     762             : 
     763             :                 /* how about a word boundary? */
     764       14422 :                 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
     765        1917 :                                         (c != OUT && ISWORD(c)) ) {
     766        1151 :                         flagch = BOW;
     767             :                 }
     768       22422 :                 if ( (lastc != OUT && ISWORD(lastc)) &&
     769        9917 :                                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
     770        1386 :                         flagch = EOW;
     771             :                 }
     772       12505 :                 if (flagch == BOW || flagch == EOW) {
     773        2537 :                         st = step(m->g, startst, stopst, st, flagch, st);
     774             :                         SP("sboweow", st, c);
     775             :                 }
     776             : 
     777             :                 /* are we done? */
     778       12505 :                 if (ISSET(st, stopst))
     779        3549 :                         matchp = p;
     780       12505 :                 if (EQ(st, empty) || p == stop)
     781             :                         break;          /* NOTE BREAK OUT */
     782             : 
     783             :                 /* no, we must deal with this character */
     784       10443 :                 ASSIGN(tmp, st);
     785       10443 :                 ASSIGN(st, empty);
     786             :                 assert(c != OUT);
     787       10443 :                 st = step(m->g, startst, stopst, tmp, c, st);
     788             :                 SP("saft", st, c);
     789             :                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
     790       10443 :                 p++;
     791       10443 :         }
     792             : 
     793        2062 :         return(matchp);
     794             : }
     795             : 
     796             : 
     797             : /*
     798             :  - step - map set of states reachable before char to set reachable after
     799             :  == static states step(register struct re_guts *g, sopno start, sopno stop, \
     800             :  ==     register states bef, int ch, register states aft);
     801             :  == #define     BOL     (OUT+1)
     802             :  == #define     EOL     (BOL+1)
     803             :  == #define     BOLEOL  (BOL+2)
     804             :  == #define     NOTHING (BOL+3)
     805             :  == #define     BOW     (BOL+4)
     806             :  == #define     EOW     (BOL+5)
     807             :  == #define     CODEMAX (BOL+5)         // highest code used
     808             :  == #define     NONCHAR(c)      ((c) > UCHAR_MAX)
     809             :  == #define     NNONCHAR        (CODEMAX-UCHAR_MAX)
     810             :  */
     811             : static states
     812       30306 : step(g, start, stop, bef, ch, aft)
     813             : register struct re_guts *g;
     814             : sopno start;                    /* start state within strip */
     815             : sopno stop;                     /* state after stop state within strip */
     816             : register states bef;            /* states reachable before */
     817             : int ch;                         /* character or NONCHAR code */
     818             : register states aft;            /* states already known reachable after */
     819             : {
     820             :         register cset *cs;
     821             :         register sop s;
     822             :         register sopno pc;
     823             :         register onestate here;         /* note, macros know this name */
     824             :         register sopno look;
     825             :         register long i;
     826             : 
     827    75913214 :         for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
     828    75882908 :                 s = g->strip[pc];
     829    75882908 :                 switch (OP(s)) {
     830             :                 case OEND:
     831             :                         assert(pc == stop-1);
     832           0 :                         break;
     833             :                 case OCHAR:
     834             :                         /* only characters can match */
     835             :                         assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s));
     836       29957 :                         if (ch == (unsigned char)OPND(s))
     837        3298 :                                 FWD(aft, bef, 1);
     838       29957 :                         break;
     839             :                 case OBOL:
     840        2245 :                         if (ch == BOL || ch == BOLEOL)
     841          69 :                                 FWD(aft, bef, 1);
     842        2245 :                         break;
     843             :                 case OEOL:
     844        2759 :                         if (ch == EOL || ch == BOLEOL)
     845         101 :                                 FWD(aft, bef, 1);
     846        2759 :                         break;
     847             :                 case OBOW:
     848           0 :                         if (ch == BOW)
     849           0 :                                 FWD(aft, bef, 1);
     850           0 :                         break;
     851             :                 case OEOW:
     852           0 :                         if (ch == EOW)
     853           0 :                                 FWD(aft, bef, 1);
     854           0 :                         break;
     855             :                 case OANY:
     856    25214132 :                         if (!NONCHAR(ch))
     857    25174046 :                                 FWD(aft, bef, 1);
     858    25214132 :                         break;
     859             :                 case OANYOF:
     860      128368 :                         cs = &g->sets[OPND(s)];
     861      128368 :                         if (!NONCHAR(ch) && CHIN(cs, ch))
     862       96417 :                                 FWD(aft, bef, 1);
     863      128368 :                         break;
     864             :                 case OBACK_:            /* ignored here */
     865             :                 case O_BACK:
     866           0 :                         FWD(aft, aft, 1);
     867           0 :                         break;
     868             :                 case OPLUS_:            /* forward, this is just an empty */
     869       11048 :                         FWD(aft, aft, 1);
     870       11048 :                         break;
     871             :                 case O_PLUS:            /* both forward and back */
     872       11048 :                         FWD(aft, aft, 1);
     873       11048 :                         i = ISSETBACK(aft, OPND(s));
     874       11048 :                         BACK(aft, aft, OPND(s));
     875       11048 :                         if (!i && ISSETBACK(aft, OPND(s))) {
     876             :                                 /* oho, must reconsider loop body */
     877        3154 :                                 pc -= OPND(s) + 1;
     878        3154 :                                 INIT(here, pc);
     879             :                         }
     880       11048 :                         break;
     881             :                 case OQUEST_:           /* two branches, both forward */
     882        4153 :                         FWD(aft, aft, 1);
     883        4153 :                         FWD(aft, aft, OPND(s));
     884        4153 :                         break;
     885             :                 case O_QUEST:           /* just an empty */
     886        4153 :                         FWD(aft, aft, 1);
     887        4153 :                         break;
     888             :                 case OLPAREN:           /* not significant here */
     889             :                 case ORPAREN:
     890    50426633 :                         FWD(aft, aft, 1);
     891    50426633 :                         break;
     892             :                 case OCH_:              /* mark the first two branches */
     893       10931 :                         FWD(aft, aft, 1);
     894             :                         assert(OP(g->strip[pc+OPND(s)]) == OOR2);
     895       10931 :                         FWD(aft, aft, OPND(s));
     896       10931 :                         break;
     897             :                 case OOR1:              /* done a branch, find the O_CH */
     898       13275 :                         if (ISSTATEIN(aft, here)) {
     899        6388 :                                 for (look = 1;
     900        5128 :                                                 OP(s = g->strip[pc+look]) != O_CH;
     901        1304 :                                                 look += OPND(s))
     902             :                                         assert(OP(s) == OOR2);
     903        1260 :                                 FWD(aft, aft, look);
     904             :                         }
     905       13275 :                         break;
     906             :                 case OOR2:              /* propagate OCH_'s marking */
     907       13275 :                         FWD(aft, aft, 1);
     908       13275 :                         if (OP(g->strip[pc+OPND(s)]) != O_CH) {
     909             :                                 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
     910        2344 :                                 FWD(aft, aft, OPND(s));
     911             :                         }
     912       13275 :                         break;
     913             :                 case O_CH:              /* just empty */
     914       10931 :                         FWD(aft, aft, 1);
     915             :                         break;
     916             :                 default:                /* ooooops... */
     917             :                         assert(PHP_REGEX_NOPE);
     918             :                         break;
     919             :                 }
     920             :         }
     921             : 
     922       30306 :         return(aft);
     923             : }
     924             : 
     925             : #ifdef REDEBUG
     926             : /*
     927             :  - print - print a set of states
     928             :  == #ifdef REDEBUG
     929             :  == static void print(struct match *m, unsigned char *caption, states st, \
     930             :  ==     int ch, FILE *d);
     931             :  == #endif
     932             :  */
     933             : static void
     934             : print(m, caption, st, ch, d)
     935             : struct match *m;
     936             : unsigned char *caption;
     937             : states st;
     938             : int ch;
     939             : FILE *d;
     940             : {
     941             :         register struct re_guts *g = m->g;
     942             :         register int i;
     943             :         register int first = 1;
     944             : 
     945             :         if (!(m->eflags&REG_TRACE))
     946             :                 return;
     947             : 
     948             :         fprintf(d, "%s", caption);
     949             :         if (ch != '\0')
     950             :                 fprintf(d, " %s", pchar(ch));
     951             :         for (i = 0; i < g->nstates; i++)
     952             :                 if (ISSET(st, i)) {
     953             :                         fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
     954             :                         first = 0;
     955             :                 }
     956             :         fprintf(d, "\n");
     957             : }
     958             : 
     959             : /* 
     960             :  - at - print current situation
     961             :  == #ifdef REDEBUG
     962             :  == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \
     963             :  ==                                             sopno startst, sopno stopst);
     964             :  == #endif
     965             :  */
     966             : static void
     967             : at(m, title, start, stop, startst, stopst)
     968             : struct match *m;
     969             : unsigned char *title;
     970             : unsigned char *start;
     971             : unsigned char *stop;
     972             : sopno startst;
     973             : sopno stopst;
     974             : {
     975             :         if (!(m->eflags&REG_TRACE))
     976             :                 return;
     977             : 
     978             :         printf("%s %s-", title, pchar(*start));
     979             :         printf("%s ", pchar(*stop));
     980             :         printf("%ld-%ld\n", (long)startst, (long)stopst);
     981             : }
     982             : 
     983             : #ifndef PCHARDONE
     984             : #define PCHARDONE       /* never again */
     985             : /*
     986             :  - pchar - make a character printable
     987             :  == #ifdef REDEBUG
     988             :  == static unsigned char *pchar(int ch);
     989             :  == #endif
     990             :  *
     991             :  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
     992             :  * duplicate here avoids having a debugging-capable regexec.o tied to
     993             :  * a matching debug.o, and this is convenient.  It all disappears in
     994             :  * the non-debug compilation anyway, so it doesn't matter much.
     995             :  */
     996             : static unsigned char *                  /* -> representation */
     997             : pchar(ch)
     998             : int ch;
     999             : {
    1000             :         static unsigned char pbuf[10];
    1001             : 
    1002             :         if (isprint(ch) || ch == ' ')
    1003             :                 sprintf(pbuf, "%c", ch);
    1004             :         else
    1005             :                 sprintf(pbuf, "\\%o", ch);
    1006             :         return(pbuf);
    1007             : }
    1008             : #endif
    1009             : #endif
    1010             : 
    1011             : #undef  matcher
    1012             : #undef  fast
    1013             : #undef  slow
    1014             : #undef  dissect
    1015             : #undef  backref
    1016             : #undef  step
    1017             : #undef  print
    1018             : #undef  at
    1019             : #undef  match

Generated by: LCOV version 1.10

Generated at Mon, 04 Aug 2014 15:49:03 +0000 (45 days ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.