PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - pcre/pcrelib - pcre_exec.c
Test: PHP Code Coverage
Date: 2009-11-19 Instrumented lines: 1752
Code covered: 41.6 % Executed lines: 728
Legend: not executed executed

       1                 : /*************************************************
       2                 : *      Perl-Compatible Regular Expressions       *
       3                 : *************************************************/
       4                 : 
       5                 : /* PCRE is a library of functions to support regular expressions whose syntax
       6                 : and semantics are as close as possible to those of the Perl 5 language.
       7                 : 
       8                 :                        Written by Philip Hazel
       9                 :            Copyright (c) 1997-2009 University of Cambridge
      10                 : 
      11                 : -----------------------------------------------------------------------------
      12                 : Redistribution and use in source and binary forms, with or without
      13                 : modification, are permitted provided that the following conditions are met:
      14                 : 
      15                 :     * Redistributions of source code must retain the above copyright notice,
      16                 :       this list of conditions and the following disclaimer.
      17                 : 
      18                 :     * Redistributions in binary form must reproduce the above copyright
      19                 :       notice, this list of conditions and the following disclaimer in the
      20                 :       documentation and/or other materials provided with the distribution.
      21                 : 
      22                 :     * Neither the name of the University of Cambridge nor the names of its
      23                 :       contributors may be used to endorse or promote products derived from
      24                 :       this software without specific prior written permission.
      25                 : 
      26                 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
      27                 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      28                 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      29                 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
      30                 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      31                 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
      32                 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
      33                 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
      34                 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
      35                 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
      36                 : POSSIBILITY OF SUCH DAMAGE.
      37                 : -----------------------------------------------------------------------------
      38                 : */
      39                 : 
      40                 : 
      41                 : /* This module contains pcre_exec(), the externally visible function that does
      42                 : pattern matching using an NFA algorithm, trying to mimic Perl as closely as
      43                 : possible. There are also some static supporting functions. */
      44                 : 
      45                 : #include "config.h"
      46                 : 
      47                 : #define NLBLOCK md             /* Block containing newline information */
      48                 : #define PSSTART start_subject  /* Field containing processed string start */
      49                 : #define PSEND   end_subject    /* Field containing processed string end */
      50                 : 
      51                 : #include "pcre_internal.h"
      52                 : 
      53                 : /* Undefine some potentially clashing cpp symbols */
      54                 : 
      55                 : #undef min
      56                 : #undef max
      57                 : 
      58                 : /* Flag bits for the match() function */
      59                 : 
      60                 : #define match_condassert     0x01  /* Called to check a condition assertion */
      61                 : #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
      62                 : 
      63                 : /* Non-error returns from the match() function. Error returns are externally
      64                 : defined PCRE_ERROR_xxx codes, which are all negative. */
      65                 : 
      66                 : #define MATCH_MATCH        1
      67                 : #define MATCH_NOMATCH      0
      68                 : 
      69                 : /* Special internal returns from the match() function. Make them sufficiently
      70                 : negative to avoid the external error codes. */
      71                 : 
      72                 : #define MATCH_COMMIT       (-999)
      73                 : #define MATCH_PRUNE        (-998)
      74                 : #define MATCH_SKIP         (-997)
      75                 : #define MATCH_THEN         (-996)
      76                 : 
      77                 : /* Maximum number of ints of offset to save on the stack for recursive calls.
      78                 : If the offset vector is bigger, malloc is used. This should be a multiple of 3,
      79                 : because the offset vector is always a multiple of 3 long. */
      80                 : 
      81                 : #define REC_STACK_SAVE_MAX 30
      82                 : 
      83                 : /* Min and max values for the common repeats; for the maxima, 0 => infinity */
      84                 : 
      85                 : static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
      86                 : static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
      87                 : 
      88                 : 
      89                 : 
      90                 : #ifdef DEBUG
      91                 : /*************************************************
      92                 : *        Debugging function to print chars       *
      93                 : *************************************************/
      94                 : 
      95                 : /* Print a sequence of chars in printable format, stopping at the end of the
      96                 : subject if the requested.
      97                 : 
      98                 : Arguments:
      99                 :   p           points to characters
     100                 :   length      number to print
     101                 :   is_subject  TRUE if printing from within md->start_subject
     102                 :   md          pointer to matching data block, if is_subject is TRUE
     103                 : 
     104                 : Returns:     nothing
     105                 : */
     106                 : 
     107                 : static void
     108                 : pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
     109                 : {
     110                 : unsigned int c;
     111                 : if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
     112                 : while (length-- > 0)
     113                 :   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
     114                 : }
     115                 : #endif
     116                 : 
     117                 : 
     118                 : 
     119                 : /*************************************************
     120                 : *          Match a back-reference                *
     121                 : *************************************************/
     122                 : 
     123                 : /* If a back reference hasn't been set, the length that is passed is greater
     124                 : than the number of characters left in the string, so the match fails.
     125                 : 
     126                 : Arguments:
     127                 :   offset      index into the offset vector
     128                 :   eptr        points into the subject
     129                 :   length      length to be matched
     130                 :   md          points to match data block
     131                 :   ims         the ims flags
     132                 : 
     133                 : Returns:      TRUE if matched
     134                 : */
     135                 : 
     136                 : static BOOL
     137                 : match_ref(int offset, register USPTR eptr, int length, match_data *md,
     138                 :   unsigned long int ims)
     139            2796 : {
     140            2796 : USPTR p = md->start_subject + md->offset_vector[offset];
     141                 : 
     142                 : #ifdef DEBUG
     143                 : if (eptr >= md->end_subject)
     144                 :   printf("matching subject <null>");
     145                 : else
     146                 :   {
     147                 :   printf("matching subject ");
     148                 :   pchars(eptr, length, TRUE, md);
     149                 :   }
     150                 : printf(" against backref ");
     151                 : pchars(p, length, FALSE, md);
     152                 : printf("\n");
     153                 : #endif
     154                 : 
     155                 : /* Always fail if not enough characters left */
     156                 : 
     157            2796 : if (length > md->end_subject - eptr) return FALSE;
     158                 : 
     159                 : /* Separate the caseless case for speed. In UTF-8 mode we can only do this
     160                 : properly if Unicode properties are supported. Otherwise, we can check only
     161                 : ASCII characters. */
     162                 : 
     163            2697 : if ((ims & PCRE_CASELESS) != 0)
     164                 :   {
     165                 : #ifdef SUPPORT_UTF8
     166                 : #ifdef SUPPORT_UCP
     167             380 :   if (md->utf8)
     168                 :     {
     169               0 :     USPTR endptr = eptr + length;
     170               0 :     while (eptr < endptr)
     171                 :       {
     172                 :       int c, d;
     173               0 :       GETCHARINC(c, eptr);
     174               0 :       GETCHARINC(d, p);
     175               0 :       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
     176                 :       }
     177                 :     }
     178                 :   else
     179                 : #endif
     180                 : #endif
     181                 : 
     182                 :   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
     183                 :   is no UCP support. */
     184                 : 
     185             760 :   while (length-- > 0)
     186             380 :     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
     187                 :   }
     188                 : 
     189                 : /* In the caseful case, we can just compare the bytes, whether or not we
     190                 : are in UTF-8 mode. */
     191                 : 
     192                 : else
     193            2317 :   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
     194                 : 
     195             424 : return TRUE;
     196                 : }
     197                 : 
     198                 : 
     199                 : 
     200                 : /***************************************************************************
     201                 : ****************************************************************************
     202                 :                    RECURSION IN THE match() FUNCTION
     203                 : 
     204                 : The match() function is highly recursive, though not every recursive call
     205                 : increases the recursive depth. Nevertheless, some regular expressions can cause
     206                 : it to recurse to a great depth. I was writing for Unix, so I just let it call
     207                 : itself recursively. This uses the stack for saving everything that has to be
     208                 : saved for a recursive call. On Unix, the stack can be large, and this works
     209                 : fine.
     210                 : 
     211                 : It turns out that on some non-Unix-like systems there are problems with
     212                 : programs that use a lot of stack. (This despite the fact that every last chip
     213                 : has oodles of memory these days, and techniques for extending the stack have
     214                 : been known for decades.) So....
     215                 : 
     216                 : There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
     217                 : calls by keeping local variables that need to be preserved in blocks of memory
     218                 : obtained from malloc() instead instead of on the stack. Macros are used to
     219                 : achieve this so that the actual code doesn't look very different to what it
     220                 : always used to.
     221                 : 
     222                 : The original heap-recursive code used longjmp(). However, it seems that this
     223                 : can be very slow on some operating systems. Following a suggestion from Stan
     224                 : Switzer, the use of longjmp() has been abolished, at the cost of having to
     225                 : provide a unique number for each call to RMATCH. There is no way of generating
     226                 : a sequence of numbers at compile time in C. I have given them names, to make
     227                 : them stand out more clearly.
     228                 : 
     229                 : Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
     230                 : FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
     231                 : tests. Furthermore, not using longjmp() means that local dynamic variables
     232                 : don't have indeterminate values; this has meant that the frame size can be
     233                 : reduced because the result can be "passed back" by straight setting of the
     234                 : variable instead of being passed in the frame.
     235                 : ****************************************************************************
     236                 : ***************************************************************************/
     237                 : 
     238                 : /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
     239                 : below must be updated in sync.  */
     240                 : 
     241                 : enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
     242                 :        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
     243                 :        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
     244                 :        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
     245                 :        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
     246                 :        RM51,  RM52, RM53, RM54 };
     247                 : 
     248                 : /* These versions of the macros use the stack, as normal. There are debugging
     249                 : versions and production versions. Note that the "rw" argument of RMATCH isn't
     250                 : actuall used in this definition. */
     251                 : 
     252                 : #ifndef NO_RECURSE
     253                 : #define REGISTER register
     254                 : 
     255                 : #ifdef DEBUG
     256                 : #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
     257                 :   { \
     258                 :   printf("match() called in line %d\n", __LINE__); \
     259                 :   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
     260                 :   printf("to line %d\n", __LINE__); \
     261                 :   }
     262                 : #define RRETURN(ra) \
     263                 :   { \
     264                 :   printf("match() returned %d from line %d ", ra, __LINE__); \
     265                 :   return ra; \
     266                 :   }
     267                 : #else
     268                 : #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
     269                 :   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
     270                 : #define RRETURN(ra) return ra
     271                 : #endif
     272                 : 
     273                 : #else
     274                 : 
     275                 : 
     276                 : /* These versions of the macros manage a private stack on the heap. Note that
     277                 : the "rd" argument of RMATCH isn't actually used in this definition. It's the md
     278                 : argument of match(), which never changes. */
     279                 : 
     280                 : #define REGISTER
     281                 : 
     282                 : #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
     283                 :   {\
     284                 :   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
     285                 :   frame->Xwhere = rw; \
     286                 :   newframe->Xeptr = ra;\
     287                 :   newframe->Xecode = rb;\
     288                 :   newframe->Xmstart = mstart;\
     289                 :   newframe->Xoffset_top = rc;\
     290                 :   newframe->Xims = re;\
     291                 :   newframe->Xeptrb = rf;\
     292                 :   newframe->Xflags = rg;\
     293                 :   newframe->Xrdepth = frame->Xrdepth + 1;\
     294                 :   newframe->Xprevframe = frame;\
     295                 :   frame = newframe;\
     296                 :   DPRINTF(("restarting from line %d\n", __LINE__));\
     297                 :   goto HEAP_RECURSE;\
     298                 :   L_##rw:\
     299                 :   DPRINTF(("jumped back to line %d\n", __LINE__));\
     300                 :   }
     301                 : 
     302                 : #define RRETURN(ra)\
     303                 :   {\
     304                 :   heapframe *newframe = frame;\
     305                 :   frame = newframe->Xprevframe;\
     306                 :   (pcre_stack_free)(newframe);\
     307                 :   if (frame != NULL)\
     308                 :     {\
     309                 :     rrc = ra;\
     310                 :     goto HEAP_RETURN;\
     311                 :     }\
     312                 :   return ra;\
     313                 :   }
     314                 : 
     315                 : 
     316                 : /* Structure for remembering the local variables in a private frame */
     317                 : 
     318                 : typedef struct heapframe {
     319                 :   struct heapframe *Xprevframe;
     320                 : 
     321                 :   /* Function arguments that may change */
     322                 : 
     323                 :   USPTR Xeptr;
     324                 :   const uschar *Xecode;
     325                 :   USPTR Xmstart;
     326                 :   int Xoffset_top;
     327                 :   long int Xims;
     328                 :   eptrblock *Xeptrb;
     329                 :   int Xflags;
     330                 :   unsigned int Xrdepth;
     331                 : 
     332                 :   /* Function local variables */
     333                 : 
     334                 :   USPTR Xcallpat;
     335                 : #ifdef SUPPORT_UTF8
     336                 :   USPTR Xcharptr;
     337                 : #endif
     338                 :   USPTR Xdata;
     339                 :   USPTR Xnext;
     340                 :   USPTR Xpp;
     341                 :   USPTR Xprev;
     342                 :   USPTR Xsaved_eptr;
     343                 : 
     344                 :   recursion_info Xnew_recursive;
     345                 : 
     346                 :   BOOL Xcur_is_word;
     347                 :   BOOL Xcondition;
     348                 :   BOOL Xprev_is_word;
     349                 : 
     350                 :   unsigned long int Xoriginal_ims;
     351                 : 
     352                 : #ifdef SUPPORT_UCP
     353                 :   int Xprop_type;
     354                 :   int Xprop_value;
     355                 :   int Xprop_fail_result;
     356                 :   int Xprop_category;
     357                 :   int Xprop_chartype;
     358                 :   int Xprop_script;
     359                 :   int Xoclength;
     360                 :   uschar Xocchars[8];
     361                 : #endif
     362                 : 
     363                 :   int Xcodelink;
     364                 :   int Xctype;
     365                 :   unsigned int Xfc;
     366                 :   int Xfi;
     367                 :   int Xlength;
     368                 :   int Xmax;
     369                 :   int Xmin;
     370                 :   int Xnumber;
     371                 :   int Xoffset;
     372                 :   int Xop;
     373                 :   int Xsave_capture_last;
     374                 :   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
     375                 :   int Xstacksave[REC_STACK_SAVE_MAX];
     376                 : 
     377                 :   eptrblock Xnewptrb;
     378                 : 
     379                 :   /* Where to jump back to */
     380                 : 
     381                 :   int Xwhere;
     382                 : 
     383                 : } heapframe;
     384                 : 
     385                 : #endif
     386                 : 
     387                 : 
     388                 : /***************************************************************************
     389                 : ***************************************************************************/
     390                 : 
     391                 : 
     392                 : 
     393                 : /*************************************************
     394                 : *         Match from current position            *
     395                 : *************************************************/
     396                 : 
     397                 : /* This function is called recursively in many circumstances. Whenever it
     398                 : returns a negative (error) response, the outer incarnation must also return the
     399                 : same response.
     400                 : 
     401                 : Performance note: It might be tempting to extract commonly used fields from the
     402                 : md structure (e.g. utf8, end_subject) into individual variables to improve
     403                 : performance. Tests using gcc on a SPARC disproved this; in the first case, it
     404                 : made performance worse.
     405                 : 
     406                 : Arguments:
     407                 :    eptr        pointer to current character in subject
     408                 :    ecode       pointer to current position in compiled code
     409                 :    mstart      pointer to the current match start position (can be modified
     410                 :                  by encountering \K)
     411                 :    offset_top  current top pointer
     412                 :    md          pointer to "static" info for the match
     413                 :    ims         current /i, /m, and /s options
     414                 :    eptrb       pointer to chain of blocks containing eptr at start of
     415                 :                  brackets - for testing for empty matches
     416                 :    flags       can contain
     417                 :                  match_condassert - this is an assertion condition
     418                 :                  match_cbegroup - this is the start of an unlimited repeat
     419                 :                    group that can match an empty string
     420                 :    rdepth      the recursion depth
     421                 : 
     422                 : Returns:       MATCH_MATCH if matched            )  these values are >= 0
     423                 :                MATCH_NOMATCH if failed to match  )
     424                 :                a negative PCRE_ERROR_xxx value if aborted by an error condition
     425                 :                  (e.g. stopped by repeated call or recursion limit)
     426                 : */
     427                 : 
     428                 : static int
     429                 : match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
     430                 :   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
     431                 :   int flags, unsigned int rdepth)
     432         1509979 : {
     433                 : /* These variables do not need to be preserved over recursion in this function,
     434                 : so they can be ordinary variables in all cases. Mark some of them with
     435                 : "register" because they are used a lot in loops. */
     436                 : 
     437                 : register int  rrc;         /* Returns from recursive calls */
     438                 : register int  i;           /* Used for loops not involving calls to RMATCH() */
     439                 : register unsigned int c;   /* Character values not kept over RMATCH() calls */
     440                 : register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
     441                 : 
     442                 : BOOL minimize, possessive; /* Quantifier options */
     443                 : int condcode;
     444                 : 
     445                 : /* When recursion is not being used, all "local" variables that have to be
     446                 : preserved over calls to RMATCH() are part of a "frame" which is obtained from
     447                 : heap storage. Set up the top-level frame here; others are obtained from the
     448                 : heap whenever RMATCH() does a "recursion". See the macro definitions above. */
     449                 : 
     450                 : #ifdef NO_RECURSE
     451                 : heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
     452                 : frame->Xprevframe = NULL;            /* Marks the top level */
     453                 : 
     454                 : /* Copy in the original argument variables */
     455                 : 
     456                 : frame->Xeptr = eptr;
     457                 : frame->Xecode = ecode;
     458                 : frame->Xmstart = mstart;
     459                 : frame->Xoffset_top = offset_top;
     460                 : frame->Xims = ims;
     461                 : frame->Xeptrb = eptrb;
     462                 : frame->Xflags = flags;
     463                 : frame->Xrdepth = rdepth;
     464                 : 
     465                 : /* This is where control jumps back to to effect "recursion" */
     466                 : 
     467                 : HEAP_RECURSE:
     468                 : 
     469                 : /* Macros make the argument variables come from the current frame */
     470                 : 
     471                 : #define eptr               frame->Xeptr
     472                 : #define ecode              frame->Xecode
     473                 : #define mstart             frame->Xmstart
     474                 : #define offset_top         frame->Xoffset_top
     475                 : #define ims                frame->Xims
     476                 : #define eptrb              frame->Xeptrb
     477                 : #define flags              frame->Xflags
     478                 : #define rdepth             frame->Xrdepth
     479                 : 
     480                 : /* Ditto for the local variables */
     481                 : 
     482                 : #ifdef SUPPORT_UTF8
     483                 : #define charptr            frame->Xcharptr
     484                 : #endif
     485                 : #define callpat            frame->Xcallpat
     486                 : #define codelink           frame->Xcodelink
     487                 : #define data               frame->Xdata
     488                 : #define next               frame->Xnext
     489                 : #define pp                 frame->Xpp
     490                 : #define prev               frame->Xprev
     491                 : #define saved_eptr         frame->Xsaved_eptr
     492                 : 
     493                 : #define new_recursive      frame->Xnew_recursive
     494                 : 
     495                 : #define cur_is_word        frame->Xcur_is_word
     496                 : #define condition          frame->Xcondition
     497                 : #define prev_is_word       frame->Xprev_is_word
     498                 : 
     499                 : #define original_ims       frame->Xoriginal_ims
     500                 : 
     501                 : #ifdef SUPPORT_UCP
     502                 : #define prop_type          frame->Xprop_type
     503                 : #define prop_value         frame->Xprop_value
     504                 : #define prop_fail_result   frame->Xprop_fail_result
     505                 : #define prop_category      frame->Xprop_category
     506                 : #define prop_chartype      frame->Xprop_chartype
     507                 : #define prop_script        frame->Xprop_script
     508                 : #define oclength           frame->Xoclength
     509                 : #define occhars            frame->Xocchars
     510                 : #endif
     511                 : 
     512                 : #define ctype              frame->Xctype
     513                 : #define fc                 frame->Xfc
     514                 : #define fi                 frame->Xfi
     515                 : #define length             frame->Xlength
     516                 : #define max                frame->Xmax
     517                 : #define min                frame->Xmin
     518                 : #define number             frame->Xnumber
     519                 : #define offset             frame->Xoffset
     520                 : #define op                 frame->Xop
     521                 : #define save_capture_last  frame->Xsave_capture_last
     522                 : #define save_offset1       frame->Xsave_offset1
     523                 : #define save_offset2       frame->Xsave_offset2
     524                 : #define save_offset3       frame->Xsave_offset3
     525                 : #define stacksave          frame->Xstacksave
     526                 : 
     527                 : #define newptrb            frame->Xnewptrb
     528                 : 
     529                 : /* When recursion is being used, local variables are allocated on the stack and
     530                 : get preserved during recursion in the normal way. In this environment, fi and
     531                 : i, and fc and c, can be the same variables. */
     532                 : 
     533                 : #else         /* NO_RECURSE not defined */
     534                 : #define fi i
     535                 : #define fc c
     536                 : 
     537                 : 
     538                 : #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
     539                 : const uschar *charptr;             /* in small blocks of the code. My normal */
     540                 : #endif                             /* style of coding would have declared    */
     541                 : const uschar *callpat;             /* them within each of those blocks.      */
     542                 : const uschar *data;                /* However, in order to accommodate the   */
     543                 : const uschar *next;                /* version of this code that uses an      */
     544                 : USPTR         pp;                  /* external "stack" implemented on the    */
     545                 : const uschar *prev;                /* heap, it is easier to declare them all */
     546                 : USPTR         saved_eptr;          /* here, so the declarations can be cut   */
     547                 :                                    /* out in a block. The only declarations  */
     548                 : recursion_info new_recursive;      /* within blocks below are for variables  */
     549                 :                                    /* that do not have to be preserved over  */
     550                 : BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
     551                 : BOOL condition;
     552                 : BOOL prev_is_word;
     553                 : 
     554                 : unsigned long int original_ims;
     555                 : 
     556                 : #ifdef SUPPORT_UCP
     557                 : int prop_type;
     558                 : int prop_value;
     559                 : int prop_fail_result;
     560                 : int prop_category;
     561                 : int prop_chartype;
     562                 : int prop_script;
     563                 : int oclength;
     564                 : uschar occhars[8];
     565                 : #endif
     566                 : 
     567                 : int codelink;
     568                 : int ctype;
     569                 : int length;
     570                 : int max;
     571                 : int min;
     572                 : int number;
     573                 : int offset;
     574                 : int op;
     575                 : int save_capture_last;
     576                 : int save_offset1, save_offset2, save_offset3;
     577                 : int stacksave[REC_STACK_SAVE_MAX];
     578                 : 
     579                 : eptrblock newptrb;
     580                 : #endif     /* NO_RECURSE */
     581                 : 
     582                 : /* These statements are here to stop the compiler complaining about unitialized
     583                 : variables. */
     584                 : 
     585                 : #ifdef SUPPORT_UCP
     586         1509979 : prop_value = 0;
     587         1509979 : prop_fail_result = 0;
     588                 : #endif
     589                 : 
     590                 : 
     591                 : /* This label is used for tail recursion, which is used in a few cases even
     592                 : when NO_RECURSE is not defined, in order to reduce the amount of stack that is
     593                 : used. Thanks to Ian Taylor for noticing this possibility and sending the
     594                 : original patch. */
     595                 : 
     596         2088044 : TAIL_RECURSE:
     597                 : 
     598                 : /* OK, now we can get on with the real code of the function. Recursive calls
     599                 : are specified by the macro RMATCH and RRETURN is used to return. When
     600                 : NO_RECURSE is *not* defined, these just turn into a recursive call to match()
     601                 : and a "return", respectively (possibly with some debugging if DEBUG is
     602                 : defined). However, RMATCH isn't like a function call because it's quite a
     603                 : complicated macro. It has to be used in one particular way. This shouldn't,
     604                 : however, impact performance when true recursion is being used. */
     605                 : 
     606                 : #ifdef SUPPORT_UTF8
     607         2088044 : utf8 = md->utf8;       /* Local copy of the flag */
     608                 : #else
     609                 : utf8 = FALSE;
     610                 : #endif
     611                 : 
     612                 : /* First check that we haven't called match() too many times, or that we
     613                 : haven't exceeded the recursive call limit. */
     614                 : 
     615         2088044 : if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
     616         2088041 : if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
     617                 : 
     618         2088038 : original_ims = ims;    /* Save for resetting on ')' */
     619                 : 
     620                 : /* At the start of a group with an unlimited repeat that may match an empty
     621                 : string, the match_cbegroup flag is set. When this is the case, add the current
     622                 : subject pointer to the chain of such remembered pointers, to be checked when we
     623                 : hit the closing ket, in order to break infinite loops that match no characters.
     624                 : When match() is called in other circumstances, don't add to the chain. The
     625                 : match_cbegroup flag must NOT be used with tail recursion, because the memory
     626                 : block that is used is on the stack, so a new one may be required for each
     627                 : match(). */
     628                 : 
     629         2088038 : if ((flags & match_cbegroup) != 0)
     630                 :   {
     631          100167 :   newptrb.epb_saved_eptr = eptr;
     632          100167 :   newptrb.epb_prev = eptrb;
     633          100167 :   eptrb = &newptrb;
     634                 :   }
     635                 : 
     636                 : /* Now start processing the opcodes. */
     637                 : 
     638                 : for (;;)
     639                 :   {
     640        18231589 :   minimize = possessive = FALSE;
     641        18231589 :   op = *ecode;
     642                 : 
     643                 :   /* For partial matching, remember if we ever hit the end of the subject after
     644                 :   matching at least one subject character. */
     645                 : 
     646        18231589 :   if (md->partial &&
     647                 :       eptr >= md->end_subject &&
     648                 :       eptr > mstart)
     649               0 :     md->hitend = TRUE;
     650                 : 
     651        18231589 :   switch(op)
     652                 :     {
     653                 :     case OP_FAIL:
     654               0 :     RRETURN(MATCH_NOMATCH);
     655                 : 
     656                 :     case OP_PRUNE:
     657               0 :     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
     658                 :       ims, eptrb, flags, RM51);
     659               0 :     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     660               0 :     RRETURN(MATCH_PRUNE);
     661                 : 
     662                 :     case OP_COMMIT:
     663               0 :     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
     664                 :       ims, eptrb, flags, RM52);
     665               0 :     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     666               0 :     RRETURN(MATCH_COMMIT);
     667                 : 
     668                 :     case OP_SKIP:
     669               0 :     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
     670                 :       ims, eptrb, flags, RM53);
     671               0 :     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     672               0 :     md->start_match_ptr = eptr;   /* Pass back current position */
     673               0 :     RRETURN(MATCH_SKIP);
     674                 : 
     675                 :     case OP_THEN:
     676               0 :     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
     677                 :       ims, eptrb, flags, RM54);
     678               0 :     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     679               0 :     RRETURN(MATCH_THEN);
     680                 : 
     681                 :     /* Handle a capturing bracket. If there is space in the offset vector, save
     682                 :     the current subject position in the working slot at the top of the vector.
     683                 :     We mustn't change the current values of the data slot, because they may be
     684                 :     set from a previous iteration of this group, and be referred to by a
     685                 :     reference inside the group.
     686                 : 
     687                 :     If the bracket fails to match, we need to restore this value and also the
     688                 :     values of the final offsets, in case they were set by a previous iteration
     689                 :     of the same bracket.
     690                 : 
     691                 :     If there isn't enough space in the offset vector, treat this as if it were
     692                 :     a non-capturing bracket. Don't worry about setting the flag for the error
     693                 :     case here; that is handled in the code for KET. */
     694                 : 
     695                 :     case OP_CBRA:
     696                 :     case OP_SCBRA:
     697          119435 :     number = GET2(ecode, 1+LINK_SIZE);
     698          119435 :     offset = number << 1;
     699                 : 
     700                 : #ifdef DEBUG
     701                 :     printf("start bracket %d\n", number);
     702                 :     printf("subject=");
     703                 :     pchars(eptr, 16, TRUE, md);
     704                 :     printf("\n");
     705                 : #endif
     706                 : 
     707          119435 :     if (offset < md->offset_max)
     708                 :       {
     709          115141 :       save_offset1 = md->offset_vector[offset];
     710          115141 :       save_offset2 = md->offset_vector[offset+1];
     711          115141 :       save_offset3 = md->offset_vector[md->offset_end - number];
     712          115141 :       save_capture_last = md->capture_last;
     713                 : 
     714                 :       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
     715          115141 :       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
     716                 : 
     717          115141 :       flags = (op == OP_SCBRA)? match_cbegroup : 0;
     718                 :       do
     719                 :         {
     720          165370 :         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
     721                 :           ims, eptrb, flags, RM1);
     722          165370 :         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     723          130974 :         md->capture_last = save_capture_last;
     724          130974 :         ecode += GET(ecode, 1);
     725                 :         }
     726          130974 :       while (*ecode == OP_ALT);
     727                 : 
     728                 :       DPRINTF(("bracket %d failed\n", number));
     729                 : 
     730           80745 :       md->offset_vector[offset] = save_offset1;
     731           80745 :       md->offset_vector[offset+1] = save_offset2;
     732           80745 :       md->offset_vector[md->offset_end - number] = save_offset3;
     733                 : 
     734           80745 :       RRETURN(MATCH_NOMATCH);
     735                 :       }
     736                 : 
     737                 :     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
     738                 :     as a non-capturing bracket. */
     739                 : 
     740                 :     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     741                 :     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     742                 : 
     743                 :     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     744                 : 
     745                 :     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     746                 :     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     747                 : 
     748                 :     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
     749                 :     final alternative within the brackets, we would return the result of a
     750                 :     recursive call to match() whatever happened. We can reduce stack usage by
     751                 :     turning this into a tail recursion, except in the case when match_cbegroup
     752                 :     is set.*/
     753                 : 
     754                 :     case OP_BRA:
     755                 :     case OP_SBRA:
     756                 :     DPRINTF(("start non-capturing bracket\n"));
     757          577030 :     flags = (op >= OP_SBRA)? match_cbegroup : 0;
     758                 :     for (;;)
     759                 :       {
     760          577211 :       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
     761                 :         {
     762          576948 :         if (flags == 0)    /* Not a possibly empty group */
     763                 :           {
     764          576943 :           ecode += _pcre_OP_lengths[*ecode];
     765                 :           DPRINTF(("bracket 0 tail recursion\n"));
     766          576943 :           goto TAIL_RECURSE;
     767                 :           }
     768                 : 
     769                 :         /* Possibly empty group; can't use tail recursion. */
     770                 : 
     771               5 :         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
     772                 :           eptrb, flags, RM48);
     773               5 :         RRETURN(rrc);
     774                 :         }
     775                 : 
     776                 :       /* For non-final alternatives, continue the loop for a NOMATCH result;
     777                 :       otherwise return. */
     778                 : 
     779             263 :       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
     780                 :         eptrb, flags, RM2);
     781             263 :       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     782             181 :       ecode += GET(ecode, 1);
     783             181 :       }
     784                 :     /* Control never reaches here. */
     785                 : 
     786                 :     /* Conditional group: compilation checked that there are no more than
     787                 :     two branches. If the condition is false, skipping the first branch takes us
     788                 :     past the end if there is only one branch, but that's OK because that is
     789                 :     exactly what going to the ket would do. As there is only one branch to be
     790                 :     obeyed, we can use tail recursion to avoid using another stack frame. */
     791                 : 
     792                 :     case OP_COND:
     793                 :     case OP_SCOND:
     794              52 :     codelink= GET(ecode, 1);
     795                 : 
     796                 :     /* Because of the way auto-callout works during compile, a callout item is
     797                 :     inserted between OP_COND and an assertion condition. */
     798                 : 
     799              52 :     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
     800                 :       {
     801               0 :       if (pcre_callout != NULL)
     802                 :         {
     803                 :         pcre_callout_block cb;
     804               0 :         cb.version          = 1;   /* Version 1 of the callout block */
     805               0 :         cb.callout_number   = ecode[LINK_SIZE+2];
     806               0 :         cb.offset_vector    = md->offset_vector;
     807               0 :         cb.subject          = (PCRE_SPTR)md->start_subject;
     808               0 :         cb.subject_length   = md->end_subject - md->start_subject;
     809               0 :         cb.start_match      = mstart - md->start_subject;
     810               0 :         cb.current_position = eptr - md->start_subject;
     811               0 :         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
     812               0 :         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
     813               0 :         cb.capture_top      = offset_top/2;
     814               0 :         cb.capture_last     = md->capture_last;
     815               0 :         cb.callout_data     = md->callout_data;
     816               0 :         if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
     817               0 :         if (rrc < 0) RRETURN(rrc);
     818                 :         }
     819               0 :       ecode += _pcre_OP_lengths[OP_CALLOUT];
     820                 :       }
     821                 : 
     822              52 :     condcode = ecode[LINK_SIZE+1];
     823                 : 
     824                 :     /* Now see what the actual condition is */
     825                 : 
     826              52 :     if (condcode == OP_RREF)         /* Recursion test */
     827                 :       {
     828               0 :       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
     829               0 :       condition = md->recursive != NULL &&
     830                 :         (offset == RREF_ANY || offset == md->recursive->group_num);
     831               0 :       ecode += condition? 3 : GET(ecode, 1);
     832                 :       }
     833                 : 
     834              52 :     else if (condcode == OP_CREF)    /* Group used test */
     835                 :       {
     836              52 :       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
     837              52 :       condition = offset < offset_top && md->offset_vector[offset] >= 0;
     838              52 :       ecode += condition? 3 : GET(ecode, 1);
     839                 :       }
     840                 : 
     841               0 :     else if (condcode == OP_DEF)     /* DEFINE - always false */
     842                 :       {
     843               0 :       condition = FALSE;
     844               0 :       ecode += GET(ecode, 1);
     845                 :       }
     846                 : 
     847                 :     /* The condition is an assertion. Call match() to evaluate it - setting
     848                 :     the final argument match_condassert causes it to stop at the end of an
     849                 :     assertion. */
     850                 : 
     851                 :     else
     852                 :       {
     853               0 :       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
     854                 :           match_condassert, RM3);
     855               0 :       if (rrc == MATCH_MATCH)
     856                 :         {
     857               0 :         condition = TRUE;
     858               0 :         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
     859               0 :         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
     860                 :         }
     861               0 :       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
     862                 :         {
     863               0 :         RRETURN(rrc);         /* Need braces because of following else */
     864                 :         }
     865                 :       else
     866                 :         {
     867               0 :         condition = FALSE;
     868               0 :         ecode += codelink;
     869                 :         }
     870                 :       }
     871                 : 
     872                 :     /* We are now at the branch that is to be obeyed. As there is only one,
     873                 :     we can use tail recursion to avoid using another stack frame, except when
     874                 :     match_cbegroup is required for an unlimited repeat of a possibly empty
     875                 :     group. If the second alternative doesn't exist, we can just plough on. */
     876                 : 
     877              52 :     if (condition || *ecode == OP_ALT)
     878                 :       {
     879              52 :       ecode += 1 + LINK_SIZE;
     880              52 :       if (op == OP_SCOND)        /* Possibly empty group */
     881                 :         {
     882               0 :         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
     883               0 :         RRETURN(rrc);
     884                 :         }
     885                 :       else                       /* Group must match something */
     886                 :         {
     887              52 :         flags = 0;
     888              52 :         goto TAIL_RECURSE;
     889                 :         }
     890                 :       }
     891                 :     else                         /* Condition false & no alternative */
     892                 :       {
     893               0 :       ecode += 1 + LINK_SIZE;
     894                 :       }
     895               0 :     break;
     896                 : 
     897                 : 
     898                 :     /* End of the pattern, either real or forced. If we are in a top-level
     899                 :     recursion, we should restore the offsets appropriately and continue from
     900                 :     after the call. */
     901                 : 
     902                 :     case OP_ACCEPT:
     903                 :     case OP_END:
     904           56550 :     if (md->recursive != NULL && md->recursive->group_num == 0)
     905                 :       {
     906               1 :       recursion_info *rec = md->recursive;
     907                 :       DPRINTF(("End of pattern in a (?0) recursion\n"));
     908               1 :       md->recursive = rec->prevrec;
     909               1 :       memmove(md->offset_vector, rec->offset_save,
     910                 :         rec->saved_max * sizeof(int));
     911               1 :       mstart = rec->save_start;
     912               1 :       ims = original_ims;
     913               1 :       ecode = rec->after_call;
     914               1 :       break;
     915                 :       }
     916                 : 
     917                 :     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
     918                 :     string - backtracking will then try other alternatives, if any. */
     919                 : 
     920           56549 :     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
     921           56478 :     md->end_match_ptr = eptr;           /* Record where we ended */
     922           56478 :     md->end_offset_top = offset_top;    /* and how many extracts were taken */
     923           56478 :     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
     924           56478 :     RRETURN(MATCH_MATCH);
     925                 : 
     926                 :     /* Change option settings */
     927                 : 
     928                 :     case OP_OPT:
     929               0 :     ims = ecode[1];
     930               0 :     ecode += 2;
     931                 :     DPRINTF(("ims set to %02lx\n", ims));
     932               0 :     break;
     933                 : 
     934                 :     /* Assertion brackets. Check the alternative branches in turn - the
     935                 :     matching won't pass the KET for an assertion. If any one branch matches,
     936                 :     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
     937                 :     start of each branch to move the current point backwards, so the code at
     938                 :     this level is identical to the lookahead case. */
     939                 : 
     940                 :     case OP_ASSERT:
     941                 :     case OP_ASSERTBACK:
     942                 :     do
     943                 :       {
     944              83 :       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
     945                 :         RM4);
     946              83 :       if (rrc == MATCH_MATCH) break;
     947               0 :       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     948               0 :       ecode += GET(ecode, 1);
     949                 :       }
     950               0 :     while (*ecode == OP_ALT);
     951              83 :     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
     952                 : 
     953                 :     /* If checking an assertion for a condition, return MATCH_MATCH. */
     954                 : 
     955              83 :     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
     956                 : 
     957                 :     /* Continue from after the assertion, updating the offsets high water
     958                 :     mark, since extracts may have been taken during the assertion. */
     959                 : 
     960              83 :     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
     961              83 :     ecode += 1 + LINK_SIZE;
     962              83 :     offset_top = md->end_offset_top;
     963              83 :     continue;
     964                 : 
     965                 :     /* Negative assertion: all branches must fail to match */
     966                 : 
     967                 :     case OP_ASSERT_NOT:
     968                 :     case OP_ASSERTBACK_NOT:
     969                 :     do
     970                 :       {
     971              26 :       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
     972                 :         RM5);
     973              26 :       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
     974              13 :       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     975              13 :       ecode += GET(ecode,1);
     976                 :       }
     977              13 :     while (*ecode == OP_ALT);
     978                 : 
     979              13 :     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
     980                 : 
     981              13 :     ecode += 1 + LINK_SIZE;
     982              13 :     continue;
     983                 : 
     984                 :     /* Move the subject pointer back. This occurs only at the start of
     985                 :     each branch of a lookbehind assertion. If we are too close to the start to
     986                 :     move back, this match function fails. When working with UTF-8 we move
     987                 :     back a number of characters, not bytes. */
     988                 : 
     989                 :     case OP_REVERSE:
     990                 : #ifdef SUPPORT_UTF8
     991              18 :     if (utf8)
     992                 :       {
     993               0 :       i = GET(ecode, 1);
     994               0 :       while (i-- > 0)
     995                 :         {
     996               0 :         eptr--;
     997               0 :         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
     998               0 :         BACKCHAR(eptr);
     999                 :         }
    1000                 :       }
    1001                 :     else
    1002                 : #endif
    1003                 : 
    1004                 :     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
    1005                 : 
    1006                 :       {
    1007              18 :       eptr -= GET(ecode, 1);
    1008              18 :       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
    1009                 :       }
    1010                 : 
    1011                 :     /* Skip to next op code */
    1012                 : 
    1013              17 :     ecode += 1 + LINK_SIZE;
    1014              17 :     break;
    1015                 : 
    1016                 :     /* The callout item calls an external function, if one is provided, passing
    1017                 :     details of the match so far. This is mainly for debugging, though the
    1018                 :     function is able to force a failure. */
    1019                 : 
    1020                 :     case OP_CALLOUT:
    1021               0 :     if (pcre_callout != NULL)
    1022                 :       {
    1023                 :       pcre_callout_block cb;
    1024               0 :       cb.version          = 1;   /* Version 1 of the callout block */
    1025               0 :       cb.callout_number   = ecode[1];
    1026               0 :       cb.offset_vector    = md->offset_vector;
    1027               0 :       cb.subject          = (PCRE_SPTR)md->start_subject;
    1028               0 :       cb.subject_length   = md->end_subject - md->start_subject;
    1029               0 :       cb.start_match      = mstart - md->start_subject;
    1030               0 :       cb.current_position = eptr - md->start_subject;
    1031               0 :       cb.pattern_position = GET(ecode, 2);
    1032               0 :       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
    1033               0 :       cb.capture_top      = offset_top/2;
    1034               0 :       cb.capture_last     = md->capture_last;
    1035               0 :       cb.callout_data     = md->callout_data;
    1036               0 :       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
    1037               0 :       if (rrc < 0) RRETURN(rrc);
    1038                 :       }
    1039               0 :     ecode += 2 + 2*LINK_SIZE;
    1040               0 :     break;
    1041                 : 
    1042                 :     /* Recursion either matches the current regex, or some subexpression. The
    1043                 :     offset data is the offset to the starting bracket from the start of the
    1044                 :     whole pattern. (This is so that it works from duplicated subpatterns.)
    1045                 : 
    1046                 :     If there are any capturing brackets started but not finished, we have to
    1047                 :     save their starting points and reinstate them after the recursion. However,
    1048                 :     we don't know how many such there are (offset_top records the completed
    1049                 :     total) so we just have to save all the potential data. There may be up to
    1050                 :     65535 such values, which is too large to put on the stack, but using malloc
    1051                 :     for small numbers seems expensive. As a compromise, the stack is used when
    1052                 :     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
    1053                 :     is used. A problem is what to do if the malloc fails ... there is no way of
    1054                 :     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
    1055                 :     values on the stack, and accept that the rest may be wrong.
    1056                 : 
    1057                 :     There are also other values that have to be saved. We use a chained
    1058                 :     sequence of blocks that actually live on the stack. Thanks to Robin Houston
    1059                 :     for the original version of this logic. */
    1060                 : 
    1061                 :     case OP_RECURSE:
    1062                 :       {
    1063           25006 :       callpat = md->start_code + GET(ecode, 1);
    1064           25006 :       new_recursive.group_num = (callpat == md->start_code)? 0 :
    1065                 :         GET2(callpat, 1 + LINK_SIZE);
    1066                 : 
    1067                 :       /* Add to "recursing stack" */
    1068                 : 
    1069           25006 :       new_recursive.prevrec = md->recursive;
    1070           25006 :       md->recursive = &new_recursive;
    1071                 : 
    1072                 :       /* Find where to continue from afterwards */
    1073                 : 
    1074           25006 :       ecode += 1 + LINK_SIZE;
    1075           25006 :       new_recursive.after_call = ecode;
    1076                 : 
    1077                 :       /* Now save the offset data. */
    1078                 : 
    1079           25006 :       new_recursive.saved_max = md->offset_end;
    1080           25006 :       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
    1081           25006 :         new_recursive.offset_save = stacksave;
    1082                 :       else
    1083                 :         {
    1084               0 :         new_recursive.offset_save =
    1085                 :           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
    1086               0 :         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
    1087                 :         }
    1088                 : 
    1089           25006 :       memcpy(new_recursive.offset_save, md->offset_vector,
    1090                 :             new_recursive.saved_max * sizeof(int));
    1091           25006 :       new_recursive.save_start = mstart;
    1092           25006 :       mstart = eptr;
    1093                 : 
    1094                 :       /* OK, now we can do the recursion. For each top-level alternative we
    1095                 :       restore the offset and recursion data. */
    1096                 : 
    1097                 :       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
    1098           25006 :       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
    1099                 :       do
    1100                 :         {
    1101           25006 :         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
    1102                 :           md, ims, eptrb, flags, RM6);
    1103           25006 :         if (rrc == MATCH_MATCH)
    1104                 :           {
    1105                 :           DPRINTF(("Recursion matched\n"));
    1106               1 :           md->recursive = new_recursive.prevrec;
    1107               1 :           if (new_recursive.offset_save != stacksave)
    1108               0 :             (pcre_free)(new_recursive.offset_save);
    1109               1 :           RRETURN(MATCH_MATCH);
    1110                 :           }
    1111           25005 :         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
    1112                 :           {
    1113                 :           DPRINTF(("Recursion gave error %d\n", rrc));
    1114               0 :           if (new_recursive.offset_save != stacksave)
    1115               0 :             (pcre_free)(new_recursive.offset_save);
    1116               0 :           RRETURN(rrc);
    1117                 :           }
    1118                 : 
    1119           25005 :         md->recursive = &new_recursive;
    1120           25005 :         memcpy(md->offset_vector, new_recursive.offset_save,
    1121                 :             new_recursive.saved_max * sizeof(int));
    1122           25005 :         callpat += GET(callpat, 1);
    1123                 :         }
    1124           25005 :       while (*callpat == OP_ALT);
    1125                 : 
    1126                 :       DPRINTF(("Recursion didn't match\n"));
    1127           25005 :       md->recursive = new_recursive.prevrec;
    1128           25005 :       if (new_recursive.offset_save != stacksave)
    1129               0 :         (pcre_free)(new_recursive.offset_save);
    1130           25005 :       RRETURN(MATCH_NOMATCH);
    1131                 :       }
    1132                 :     /* Control never reaches here */
    1133                 : 
    1134                 :     /* "Once" brackets are like assertion brackets except that after a match,
    1135                 :     the point in the subject string is not moved back. Thus there can never be
    1136                 :     a move back into the brackets. Friedl calls these "atomic" subpatterns.
    1137                 :     Check the alternative branches in turn - the matching won't pass the KET
    1138                 :     for this kind of subpattern. If any one branch matches, we carry on as at
    1139                 :     the end of a normal bracket, leaving the subject pointer. */
    1140                 : 
    1141                 :     case OP_ONCE:
    1142           25008 :     prev = ecode;
    1143           25008 :     saved_eptr = eptr;
    1144                 : 
    1145                 :     do
    1146                 :       {
    1147           25008 :       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
    1148           25008 :       if (rrc == MATCH_MATCH) break;
    1149           25005 :       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
    1150           25005 :       ecode += GET(ecode,1);
    1151                 :       }
    1152           25005 :     while (*ecode == OP_ALT);
    1153                 : 
    1154                 :     /* If hit the end of the group (which could be repeated), fail */
    1155                 : 
    1156           25008 :     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
    1157                 : 
    1158                 :     /* Continue as from after the assertion, updating the offsets high water
    1159                 :     mark, since extracts may have been taken. */
    1160                 : 
    1161               3 :     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
    1162                 : 
    1163               3 :     offset_top = md->end_offset_top;
    1164               3 :     eptr = md->end_match_ptr;
    1165                 : 
    1166                 :     /* For a non-repeating ket, just continue at this level. This also
    1167                 :     happens for a repeating ket if no characters were matched in the group.
    1168                 :     This is the forcible breaking of infinite loops as implemented in Perl
    1169                 :     5.005. If there is an options reset, it will get obeyed in the normal
    1170                 :     course of events. */
    1171                 : 
    1172               3 :     if (*ecode == OP_KET || eptr == saved_eptr)
    1173                 :       {
    1174               3 :       ecode += 1+LINK_SIZE;
    1175               3 :       break;
    1176                 :       }
    1177                 : 
    1178                 :     /* The repeating kets try the rest of the pattern or restart from the
    1179                 :     preceding bracket, in the appropriate order. The second "call" of match()
    1180                 :     uses tail recursion, to avoid using another stack frame. We need to reset
    1181                 :     any options that changed within the bracket before re-running it, so
    1182                 :     check the next opcode. */
    1183                 : 
    1184               0 :     if (ecode[1+LINK_SIZE] == OP_OPT)
    1185                 :       {
    1186               0 :       ims = (ims & ~PCRE_IMS) | ecode[4];
    1187                 :       DPRINTF(("ims set to %02lx at group repeat\n", ims));
    1188                 :       }
    1189                 : 
    1190               0 :     if (*ecode == OP_KETRMIN)
    1191                 :       {
    1192               0 :       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
    1193               0 :       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1194               0 :       ecode = prev;
    1195               0 :       flags = 0;
    1196               0 :       goto TAIL_RECURSE;
    1197                 :       }
    1198                 :     else  /* OP_KETRMAX */
    1199                 :       {
    1200               0 :       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
    1201               0 :       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1202               0 :       ecode += 1 + LINK_SIZE;
    1203               0 :       flags = 0;
    1204               0 :       goto TAIL_RECURSE;
    1205                 :       }
    1206                 :     /* Control never gets here */
    1207                 : 
    1208                 :     /* An alternation is the end of a branch; scan along to find the end of the
    1209                 :     bracketed group and go to there. */
    1210                 : 
    1211                 :     case OP_ALT:
    1212           24908 :     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
    1213           24853 :     break;
    1214                 : 
    1215                 :     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
    1216                 :     indicating that it may occur zero times. It may repeat infinitely, or not
    1217                 :     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
    1218                 :     with fixed upper repeat limits are compiled as a number of copies, with the
    1219                 :     optional ones preceded by BRAZERO or BRAMINZERO. */
    1220                 : 
    1221                 :     case OP_BRAZERO:
    1222                 :       {
    1223            5491 :       next = ecode+1;
    1224            5491 :       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
    1225            5491 :       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1226            5348 :       do next += GET(next,1); while (*next == OP_ALT);
    1227            5347 :       ecode = next + 1 + LINK_SIZE;
    1228                 :       }
    1229            5347 :     break;
    1230                 : 
    1231                 :     case OP_BRAMINZERO:
    1232                 :       {
    1233              14 :       next = ecode+1;
    1234              19 :       do next += GET(next, 1); while (*next == OP_ALT);
    1235              14 :       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
    1236              14 :       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1237               9 :       ecode++;
    1238                 :       }
    1239               9 :     break;
    1240                 : 
    1241                 :     case OP_SKIPZERO:
    1242                 :       {
    1243               0 :       next = ecode+1;
    1244               0 :       do next += GET(next,1); while (*next == OP_ALT);
    1245               0 :       ecode = next + 1 + LINK_SIZE;
    1246                 :       }
    1247               0 :     break;
    1248                 : 
    1249                 :     /* End of a group, repeated or non-repeating. */
    1250                 : 
    1251                 :     case OP_KET:
    1252                 :     case OP_KETRMIN:
    1253                 :     case OP_KETRMAX:
    1254          150635 :     prev = ecode - GET(ecode, 1);
    1255                 : 
    1256                 :     /* If this was a group that remembered the subject start, in order to break
    1257                 :     infinite repeats of empty string matches, retrieve the subject start from
    1258                 :     the chain. Otherwise, set it NULL. */
    1259                 : 
    1260          150635 :     if (*prev >= OP_SBRA)
    1261                 :       {
    1262           25090 :       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
    1263           25090 :       eptrb = eptrb->epb_prev;              /* Backup to previous group */
    1264                 :       }
    1265          125545 :     else saved_eptr = NULL;
    1266                 : 
    1267                 :     /* If we are at the end of an assertion group, stop matching and return
    1268                 :     MATCH_MATCH, but record the current high water mark for use by positive
    1269                 :     assertions. Do this also for the "once" (atomic) groups. */
    1270                 : 
    1271          150635 :     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
    1272                 :         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
    1273                 :         *prev == OP_ONCE)
    1274                 :       {
    1275              99 :       md->end_match_ptr = eptr;      /* For ONCE */
    1276              99 :       md->end_offset_top = offset_top;
    1277              99 :       RRETURN(MATCH_MATCH);
    1278                 :       }
    1279                 : 
    1280                 :     /* For capturing groups we have to check the group number back at the start
    1281                 :     and if necessary complete handling an extraction by setting the offsets and
    1282                 :     bumping the high water mark. Note that whole-pattern recursion is coded as
    1283                 :     a recurse into group 0, so it won't be picked up here. Instead, we catch it
    1284                 :     when the OP_END is reached. Other recursion is handled here. */
    1285                 : 
    1286          150536 :     if (*prev == OP_CBRA || *prev == OP_SCBRA)
    1287                 :       {
    1288           93710 :       number = GET2(prev, 1+LINK_SIZE);
    1289           93710 :       offset = number << 1;
    1290                 : 
    1291                 : #ifdef DEBUG
    1292                 :       printf("end bracket %d", number);
    1293                 :       printf("\n");
    1294                 : #endif
    1295                 : 
    1296           93710 :       md->capture_last = number;
    1297           93710 :       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
    1298                 :         {
    1299           90235 :         md->offset_vector[offset] =
    1300                 :           md->offset_vector[md->offset_end - number];
    1301           90235 :         md->offset_vector[offset+1] = eptr - md->start_subject;
    1302           90235 :         if (offset_top <= offset) offset_top = offset + 2;
    1303                 :         }
    1304                 : 
    1305                 :       /* Handle a recursively called group. Restore the offsets
    1306                 :       appropriately and continue from after the call. */
    1307                 : 
    1308           93710 :       if (md->recursive != NULL && md->recursive->group_num == number)
    1309                 :         {
    1310               0 :         recursion_info *rec = md->recursive;
    1311                 :         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
    1312               0 :         md->recursive = rec->prevrec;
    1313               0 :         mstart = rec->save_start;
    1314               0 :         memcpy(md->offset_vector, rec->offset_save,
    1315                 :           rec->saved_max * sizeof(int));
    1316               0 :         ecode = rec->after_call;
    1317               0 :         ims = original_ims;
    1318               0 :         break;
    1319                 :         }
    1320                 :       }
    1321                 : 
    1322                 :     /* For both capturing and non-capturing groups, reset the value of the ims
    1323                 :     flags, in case they got changed during the group. */
    1324                 : 
    1325          150536 :     ims = original_ims;
    1326                 :     DPRINTF(("ims reset to %02lx\n", ims));
    1327                 : 
    1328                 :     /* For a non-repeating ket, just continue at this level. This also
    1329                 :     happens for a repeating ket if no characters were matched in the group.
    1330                 :     This is the forcible breaking of infinite loops as implemented in Perl
    1331                 :     5.005. If there is an options reset, it will get obeyed in the normal
    1332                 :     course of events. */
    1333                 : 
    1334          150536 :     if (*ecode == OP_KET || eptr == saved_eptr)
    1335                 :       {
    1336          124308 :       ecode += 1 + LINK_SIZE;
    1337          124308 :       break;
    1338                 :       }
    1339                 : 
    1340                 :     /* The repeating kets try the rest of the pattern or restart from the
    1341                 :     preceding bracket, in the appropriate order. In the second case, we can use
    1342                 :     tail recursion to avoid using another stack frame, unless we have an
    1343                 :     unlimited repeat of a group that can match an empty string. */
    1344                 : 
    1345           26228 :     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
    1346                 : 
    1347           26228 :     if (*ecode == OP_KETRMIN)
    1348                 :       {
    1349           25010 :       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
    1350           25010 :       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1351           25008 :       if (flags != 0)    /* Could match an empty string */
    1352                 :         {
    1353           25004 :         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
    1354           25004 :         RRETURN(rrc);
    1355                 :         }
    1356               4 :       ecode = prev;
    1357               4 :       goto TAIL_RECURSE;
    1358                 :       }
    1359                 :     else  /* OP_KETRMAX */
    1360                 :       {
    1361            1218 :       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
    1362            1218 :       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1363            1066 :       ecode += 1 + LINK_SIZE;
    1364            1066 :       flags = 0;
    1365            1066 :       goto TAIL_RECURSE;
    1366                 :       }
    1367                 :     /* Control never gets here */
    1368                 : 
    1369                 :     /* Start of subject unless notbol, or after internal newline if multiline */
    1370                 : 
    1371                 :     case OP_CIRC:
    1372          520499 :     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
    1373          520499 :     if ((ims & PCRE_MULTILINE) != 0)
    1374                 :       {
    1375              61 :       if (eptr != md->start_subject &&
    1376                 :           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
    1377              28 :         RRETURN(MATCH_NOMATCH);
    1378              33 :       ecode++;
    1379              33 :       break;
    1380                 :       }
    1381                 :     /* ... else fall through */
    1382                 : 
    1383                 :     /* Start of subject assertion */
    1384                 : 
    1385                 :     case OP_SOD:
    1386          520442 :     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
    1387          520413 :     ecode++;
    1388          520413 :     break;
    1389                 : 
    1390                 :     /* Start of match assertion */
    1391                 : 
    1392                 :     case OP_SOM:
    1393               0 :     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
    1394               0 :     ecode++;
    1395               0 :     break;
    1396                 : 
    1397                 :     /* Reset the start of match point */
    1398                 : 
    1399                 :     case OP_SET_SOM:
    1400               0 :     mstart = eptr;
    1401               0 :     ecode++;
    1402               0 :     break;
    1403                 : 
    1404                 :     /* Assert before internal newline if multiline, or before a terminating
    1405                 :     newline unless endonly is set, else end of subject unless noteol is set. */
    1406                 : 
    1407                 :     case OP_DOLL:
    1408           29536 :     if ((ims & PCRE_MULTILINE) != 0)
    1409                 :       {
    1410               4 :       if (eptr < md->end_subject)
    1411               3 :         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
    1412                 :       else
    1413               1 :         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
    1414               4 :       ecode++;
    1415               4 :       break;
    1416                 :       }
    1417                 :     else
    1418                 :       {
    1419           29532 :       if (md->noteol) RRETURN(MATCH_NOMATCH);
    1420           29532 :       if (!md->endonly)
    1421                 :         {
    1422           28930 :         if (eptr != md->end_subject &&
    1423                 :             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
    1424            8529 :           RRETURN(MATCH_NOMATCH);
    1425           20401 :         ecode++;
    1426           20401 :         break;
    1427                 :         }
    1428                 :       }
    1429                 :     /* ... else fall through for endonly */
    1430                 : 
    1431                 :     /* End of subject assertion (\z) */
    1432                 : 
    1433                 :     case OP_EOD:
    1434             602 :     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
    1435              12 :     ecode++;
    1436              12 :     break;
    1437                 : 
    1438                 :     /* End of subject or ending \n assertion (\Z) */
    1439                 : 
    1440                 :     case OP_EODN:
    1441               0 :     if (eptr != md->end_subject &&
    1442                 :         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
    1443               0 :       RRETURN(MATCH_NOMATCH);
    1444               0 :     ecode++;
    1445               0 :     break;
    1446                 : 
    1447                 :     /* Word boundary assertions */
    1448                 : 
    1449                 :     case OP_NOT_WORD_BOUNDARY:
    1450                 :     case OP_WORD_BOUNDARY:
    1451                 :       {
    1452                 : 
    1453                 :       /* Find out if the previous and current characters are "word" characters.
    1454                 :       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
    1455                 :       be "non-word" characters. */
    1456                 : 
    1457                 : #ifdef SUPPORT_UTF8
    1458             209 :       if (utf8)
    1459                 :         {
    1460               0 :         if (eptr == md->start_subject) prev_is_word = FALSE; else
    1461                 :           {
    1462               0 :           USPTR lastptr = eptr - 1;
    1463               0 :           while((*lastptr & 0xc0) == 0x80) lastptr--;
    1464               0 :           GETCHAR(c, lastptr);
    1465               0 :           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
    1466                 :           }
    1467               0 :         if (eptr >= md->end_subject) cur_is_word = FALSE; else
    1468                 :           {
    1469               0 :           GETCHAR(c, eptr);
    1470               0 :           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
    1471                 :           }
    1472                 :         }
    1473                 :       else
    1474                 : #endif
    1475                 : 
    1476                 :       /* More streamlined when not in UTF-8 mode */
    1477                 : 
    1478                 :         {
    1479             209 :         prev_is_word = (eptr != md->start_subject) &&
    1480                 :           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
    1481             209 :         cur_is_word = (eptr < md->end_subject) &&
    1482                 :           ((md->ctypes[*eptr] & ctype_word) != 0);
    1483                 :         }
    1484                 : 
    1485                 :       /* Now see if the situation is what we want */
    1486                 : 
    1487             209 :       if ((*ecode++ == OP_WORD_BOUNDARY)?
    1488                 :            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
    1489             114 :         RRETURN(MATCH_NOMATCH);
    1490                 :       }
    1491              95 :     break;
    1492                 : 
    1493                 :     /* Match a single character type; inline for speed */
    1494                 : 
    1495                 :     case OP_ANY:
    1496         8398926 :     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
    1497                 :     /* Fall through */
    1498                 : 
    1499                 :     case OP_ALLANY:
    1500         8399051 :     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1501         8399038 :     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    1502         8399038 :     ecode++;
    1503         8399038 :     break;
    1504                 : 
    1505                 :     /* Match a single byte, even in UTF-8 mode. This opcode really does match
    1506                 :     any byte, even newline, independent of the setting of PCRE_DOTALL. */
    1507                 : 
    1508                 :     case OP_ANYBYTE:
    1509               0 :     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1510               0 :     ecode++;
    1511               0 :     break;
    1512                 : 
    1513                 :     case OP_NOT_DIGIT:
    1514               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1515               0 :     GETCHARINCTEST(c, eptr);
    1516               0 :     if (
    1517                 : #ifdef SUPPORT_UTF8
    1518                 :        c < 256 &&
    1519                 : #endif
    1520                 :        (md->ctypes[c] & ctype_digit) != 0
    1521                 :        )
    1522               0 :       RRETURN(MATCH_NOMATCH);
    1523               0 :     ecode++;
    1524               0 :     break;
    1525                 : 
    1526                 :     case OP_DIGIT:
    1527             395 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1528             346 :     GETCHARINCTEST(c, eptr);
    1529             346 :     if (
    1530                 : #ifdef SUPPORT_UTF8
    1531                 :        c >= 256 ||
    1532                 : #endif
    1533                 :        (md->ctypes[c] & ctype_digit) == 0
    1534                 :        )
    1535             241 :       RRETURN(MATCH_NOMATCH);
    1536             105 :     ecode++;
    1537             105 :     break;
    1538                 : 
    1539                 :     case OP_NOT_WHITESPACE:
    1540               5 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1541               4 :     GETCHARINCTEST(c, eptr);
    1542               4 :     if (
    1543                 : #ifdef SUPPORT_UTF8
    1544                 :        c < 256 &&
    1545                 : #endif
    1546                 :        (md->ctypes[c] & ctype_space) != 0
    1547                 :        )
    1548               0 :       RRETURN(MATCH_NOMATCH);
    1549               4 :     ecode++;
    1550               4 :     break;
    1551                 : 
    1552                 :     case OP_WHITESPACE:
    1553              31 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1554              30 :     GETCHARINCTEST(c, eptr);
    1555              30 :     if (
    1556                 : #ifdef SUPPORT_UTF8
    1557                 :        c >= 256 ||
    1558                 : #endif
    1559                 :        (md->ctypes[c] & ctype_space) == 0
    1560                 :        )
    1561              11 :       RRETURN(MATCH_NOMATCH);
    1562              19 :     ecode++;
    1563              19 :     break;
    1564                 : 
    1565                 :     case OP_NOT_WORDCHAR:
    1566               4 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1567               3 :     GETCHARINCTEST(c, eptr);
    1568               3 :     if (
    1569                 : #ifdef SUPPORT_UTF8
    1570                 :        c < 256 &&
    1571                 : #endif
    1572                 :        (md->ctypes[c] & ctype_word) != 0
    1573                 :        )
    1574               1 :       RRETURN(MATCH_NOMATCH);
    1575               2 :     ecode++;
    1576               2 :     break;
    1577                 : 
    1578                 :     case OP_WORDCHAR:
    1579             148 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1580             143 :     GETCHARINCTEST(c, eptr);
    1581             143 :     if (
    1582                 : #ifdef SUPPORT_UTF8
    1583                 :        c >= 256 ||
    1584                 : #endif
    1585                 :        (md->ctypes[c] & ctype_word) == 0
    1586                 :        )
    1587              38 :       RRETURN(MATCH_NOMATCH);
    1588             105 :     ecode++;
    1589             105 :     break;
    1590                 : 
    1591                 :     case OP_ANYNL:
    1592               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1593               0 :     GETCHARINCTEST(c, eptr);
    1594               0 :     switch(c)
    1595                 :       {
    1596               0 :       default: RRETURN(MATCH_NOMATCH);
    1597                 :       case 0x000d:
    1598               0 :       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
    1599               0 :       break;
    1600                 : 
    1601                 :       case 0x000a:
    1602               0 :       break;
    1603                 : 
    1604                 :       case 0x000b:
    1605                 :       case 0x000c:
    1606                 :       case 0x0085:
    1607                 :       case 0x2028:
    1608                 :       case 0x2029:
    1609               0 :       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
    1610                 :       break;
    1611                 :       }
    1612               0 :     ecode++;
    1613               0 :     break;
    1614                 : 
    1615                 :     case OP_NOT_HSPACE:
    1616               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1617               0 :     GETCHARINCTEST(c, eptr);
    1618               0 :     switch(c)
    1619                 :       {
    1620                 :       default: break;
    1621                 :       case 0x09:      /* HT */
    1622                 :       case 0x20:      /* SPACE */
    1623                 :       case 0xa0:      /* NBSP */
    1624                 :       case 0x1680:    /* OGHAM SPACE MARK */
    1625                 :       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    1626                 :       case 0x2000:    /* EN QUAD */
    1627                 :       case 0x2001:    /* EM QUAD */
    1628                 :       case 0x2002:    /* EN SPACE */
    1629                 :       case 0x2003:    /* EM SPACE */
    1630                 :       case 0x2004:    /* THREE-PER-EM SPACE */
    1631                 :       case 0x2005:    /* FOUR-PER-EM SPACE */
    1632                 :       case 0x2006:    /* SIX-PER-EM SPACE */
    1633                 :       case 0x2007:    /* FIGURE SPACE */
    1634                 :       case 0x2008:    /* PUNCTUATION SPACE */
    1635                 :       case 0x2009:    /* THIN SPACE */
    1636                 :       case 0x200A:    /* HAIR SPACE */
    1637                 :       case 0x202f:    /* NARROW NO-BREAK SPACE */
    1638                 :       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    1639                 :       case 0x3000:    /* IDEOGRAPHIC SPACE */
    1640               0 :       RRETURN(MATCH_NOMATCH);
    1641                 :       }
    1642               0 :     ecode++;
    1643               0 :     break;
    1644                 : 
    1645                 :     case OP_HSPACE:
    1646               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1647               0 :     GETCHARINCTEST(c, eptr);
    1648               0 :     switch(c)
    1649                 :       {
    1650               0 :       default: RRETURN(MATCH_NOMATCH);
    1651                 :       case 0x09:      /* HT */
    1652                 :       case 0x20:      /* SPACE */
    1653                 :       case 0xa0:      /* NBSP */
    1654                 :       case 0x1680:    /* OGHAM SPACE MARK */
    1655                 :       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    1656                 :       case 0x2000:    /* EN QUAD */
    1657                 :       case 0x2001:    /* EM QUAD */
    1658                 :       case 0x2002:    /* EN SPACE */
    1659                 :       case 0x2003:    /* EM SPACE */
    1660                 :       case 0x2004:    /* THREE-PER-EM SPACE */
    1661                 :       case 0x2005:    /* FOUR-PER-EM SPACE */
    1662                 :       case 0x2006:    /* SIX-PER-EM SPACE */
    1663                 :       case 0x2007:    /* FIGURE SPACE */
    1664                 :       case 0x2008:    /* PUNCTUATION SPACE */
    1665                 :       case 0x2009:    /* THIN SPACE */
    1666                 :       case 0x200A:    /* HAIR SPACE */
    1667                 :       case 0x202f:    /* NARROW NO-BREAK SPACE */
    1668                 :       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    1669                 :       case 0x3000:    /* IDEOGRAPHIC SPACE */
    1670                 :       break;
    1671                 :       }
    1672               0 :     ecode++;
    1673               0 :     break;
    1674                 : 
    1675                 :     case OP_NOT_VSPACE:
    1676               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1677               0 :     GETCHARINCTEST(c, eptr);
    1678               0 :     switch(c)
    1679                 :       {
    1680                 :       default: break;
    1681                 :       case 0x0a:      /* LF */
    1682                 :       case 0x0b:      /* VT */
    1683                 :       case 0x0c:      /* FF */
    1684                 :       case 0x0d:      /* CR */
    1685                 :       case 0x85:      /* NEL */
    1686                 :       case 0x2028:    /* LINE SEPARATOR */
    1687                 :       case 0x2029:    /* PARAGRAPH SEPARATOR */
    1688               0 :       RRETURN(MATCH_NOMATCH);
    1689                 :       }
    1690               0 :     ecode++;
    1691               0 :     break;
    1692                 : 
    1693                 :     case OP_VSPACE:
    1694               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1695               0 :     GETCHARINCTEST(c, eptr);
    1696               0 :     switch(c)
    1697                 :       {
    1698               0 :       default: RRETURN(MATCH_NOMATCH);
    1699                 :       case 0x0a:      /* LF */
    1700                 :       case 0x0b:      /* VT */
    1701                 :       case 0x0c:      /* FF */
    1702                 :       case 0x0d:      /* CR */
    1703                 :       case 0x85:      /* NEL */
    1704                 :       case 0x2028:    /* LINE SEPARATOR */
    1705                 :       case 0x2029:    /* PARAGRAPH SEPARATOR */
    1706                 :       break;
    1707                 :       }
    1708               0 :     ecode++;
    1709               0 :     break;
    1710                 : 
    1711                 : #ifdef SUPPORT_UCP
    1712                 :     /* Check the next character by Unicode property. We will get here only
    1713                 :     if the support is in the binary; otherwise a compile-time error occurs. */
    1714                 : 
    1715                 :     case OP_PROP:
    1716                 :     case OP_NOTPROP:
    1717              53 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1718              47 :     GETCHARINCTEST(c, eptr);
    1719                 :       {
    1720              47 :       const ucd_record *prop = GET_UCD(c);
    1721                 : 
    1722              47 :       switch(ecode[1])
    1723                 :         {
    1724                 :         case PT_ANY:
    1725               0 :         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
    1726               0 :         break;
    1727                 : 
    1728                 :         case PT_LAMP:
    1729               0 :         if ((prop->chartype == ucp_Lu ||
    1730                 :              prop->chartype == ucp_Ll ||
    1731                 :              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
    1732               0 :           RRETURN(MATCH_NOMATCH);
    1733               0 :          break;
    1734                 : 
    1735                 :         case PT_GC:
    1736              32 :         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
    1737               0 :           RRETURN(MATCH_NOMATCH);
    1738              32 :         break;
    1739                 : 
    1740                 :         case PT_PC:
    1741              15 :         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
    1742               0 :           RRETURN(MATCH_NOMATCH);
    1743              15 :         break;
    1744                 : 
    1745                 :         case PT_SC:
    1746               0 :         if ((ecode[2] != prop->script) == (op == OP_PROP))
    1747               0 :           RRETURN(MATCH_NOMATCH);
    1748               0 :         break;
    1749                 : 
    1750                 :         default:
    1751               0 :         RRETURN(PCRE_ERROR_INTERNAL);
    1752                 :         }
    1753                 : 
    1754              47 :       ecode += 3;
    1755                 :       }
    1756              47 :     break;
    1757                 : 
    1758                 :     /* Match an extended Unicode sequence. We will get here only if the support
    1759                 :     is in the binary; otherwise a compile-time error occurs. */
    1760                 : 
    1761                 :     case OP_EXTUNI:
    1762               0 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1763               0 :     GETCHARINCTEST(c, eptr);
    1764                 :       {
    1765               0 :       int category = UCD_CATEGORY(c);
    1766               0 :       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
    1767               0 :       while (eptr < md->end_subject)
    1768                 :         {
    1769               0 :         int len = 1;
    1770               0 :         if (!utf8) c = *eptr; else
    1771                 :           {
    1772               0 :           GETCHARLEN(c, eptr, len);
    1773                 :           }
    1774               0 :         category = UCD_CATEGORY(c);
    1775               0 :         if (category != ucp_M) break;
    1776               0 :         eptr += len;
    1777                 :         }
    1778                 :       }
    1779               0 :     ecode++;
    1780               0 :     break;
    1781                 : #endif
    1782                 : 
    1783                 : 
    1784                 :     /* Match a back reference, possibly repeatedly. Look past the end of the
    1785                 :     item to see if there is repeat information following. The code is similar
    1786                 :     to that for character classes, but repeated for efficiency. Then obey
    1787                 :     similar code to character type repeats - written out again for speed.
    1788                 :     However, if the referenced string is the empty string, always treat
    1789                 :     it as matched, any number of times (otherwise there could be infinite
    1790                 :     loops). */
    1791                 : 
    1792                 :     case OP_REF:
    1793                 :       {
    1794            2796 :       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
    1795            2796 :       ecode += 3;
    1796                 : 
    1797                 :       /* If the reference is unset, there are two possibilities:
    1798                 : 
    1799                 :       (a) In the default, Perl-compatible state, set the length to be longer
    1800                 :       than the amount of subject left; this ensures that every attempt at a
    1801                 :       match fails. We can't just fail here, because of the possibility of
    1802                 :       quantifiers with zero minima.
    1803                 : 
    1804                 :       (b) If the JavaScript compatibility flag is set, set the length to zero
    1805                 :       so that the back reference matches an empty string.
    1806                 : 
    1807                 :       Otherwise, set the length to the length of what was matched by the
    1808                 :       referenced subpattern. */
    1809                 : 
    1810            2796 :       if (offset >= offset_top || md->offset_vector[offset] < 0)
    1811               0 :         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
    1812                 :       else
    1813            2796 :         length = md->offset_vector[offset+1] - md->offset_vector[offset];
    1814                 : 
    1815                 :       /* Set up for repetition, or handle the non-repeated case */
    1816                 : 
    1817            2796 :       switch (*ecode)
    1818                 :         {
    1819                 :         case OP_CRSTAR:
    1820                 :         case OP_CRMINSTAR:
    1821                 :         case OP_CRPLUS:
    1822                 :         case OP_CRMINPLUS:
    1823                 :         case OP_CRQUERY:
    1824                 :         case OP_CRMINQUERY:
    1825               0 :         c = *ecode++ - OP_CRSTAR;
    1826               0 :         minimize = (c & 1) != 0;
    1827               0 :         min = rep_min[c];                 /* Pick up values from tables; */
    1828               0 :         max = rep_max[c];                 /* zero for max => infinity */
    1829               0 :         if (max == 0) max = INT_MAX;
    1830               0 :         break;
    1831                 : 
    1832                 :         case OP_CRRANGE:
    1833                 :         case OP_CRMINRANGE:
    1834               0 :         minimize = (*ecode == OP_CRMINRANGE);
    1835               0 :         min = GET2(ecode, 1);
    1836               0 :         max = GET2(ecode, 3);
    1837               0 :         if (max == 0) max = INT_MAX;
    1838               0 :         ecode += 5;
    1839               0 :         break;
    1840                 : 
    1841                 :         default:               /* No repeat follows */
    1842            2796 :         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
    1843             424 :         eptr += length;
    1844             424 :         continue;              /* With the main loop */
    1845                 :         }
    1846                 : 
    1847                 :       /* If the length of the reference is zero, just continue with the
    1848                 :       main loop. */
    1849                 : 
    1850               0 :       if (length == 0) continue;
    1851                 : 
    1852                 :       /* First, ensure the minimum number of matches are present. We get back
    1853                 :       the length of the reference string explicitly rather than passing the
    1854                 :       address of eptr, so that eptr can be a register variable. */
    1855                 : 
    1856               0 :       for (i = 1; i <= min; i++)
    1857                 :         {
    1858               0 :         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
    1859               0 :         eptr += length;
    1860                 :         }
    1861                 : 
    1862                 :       /* If min = max, continue at the same level without recursion.
    1863                 :       They are not both allowed to be zero. */
    1864                 : 
    1865               0 :       if (min == max) continue;
    1866                 : 
    1867                 :       /* If minimizing, keep trying and advancing the pointer */
    1868                 : 
    1869               0 :       if (minimize)
    1870                 :         {
    1871               0 :         for (fi = min;; fi++)
    1872                 :           {
    1873               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
    1874               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1875               0 :           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
    1876               0 :             RRETURN(MATCH_NOMATCH);
    1877               0 :           eptr += length;
    1878               0 :           }
    1879                 :         /* Control never gets here */
    1880                 :         }
    1881                 : 
    1882                 :       /* If maximizing, find the longest string and work backwards */
    1883                 : 
    1884                 :       else
    1885                 :         {
    1886               0 :         pp = eptr;
    1887               0 :         for (i = min; i < max; i++)
    1888                 :           {
    1889               0 :           if (!match_ref(offset, eptr, length, md, ims)) break;
    1890               0 :           eptr += length;
    1891                 :           }
    1892               0 :         while (eptr >= pp)
    1893                 :           {
    1894               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
    1895               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1896               0 :           eptr -= length;
    1897                 :           }
    1898               0 :         RRETURN(MATCH_NOMATCH);
    1899                 :         }
    1900                 :       }
    1901                 :     /* Control never gets here */
    1902                 : 
    1903                 : 
    1904                 : 
    1905                 :     /* Match a bit-mapped character class, possibly repeatedly. This op code is
    1906                 :     used when all the characters in the class have values in the range 0-255,
    1907                 :     and either the matching is caseful, or the characters are in the range
    1908                 :     0-127 when UTF-8 processing is enabled. The only difference between
    1909                 :     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
    1910                 :     encountered.
    1911                 : 
    1912                 :     First, look past the end of the item to see if there is repeat information
    1913                 :     following. Then obey similar code to character type repeats - written out
    1914                 :     again for speed. */
    1915                 : 
    1916                 :     case OP_NCLASS:
    1917                 :     case OP_CLASS:
    1918                 :       {
    1919          143972 :       data = ecode + 1;                /* Save for matching */
    1920          143972 :       ecode += 33;                     /* Advance past the item */
    1921                 : 
    1922          143972 :       switch (*ecode)
    1923                 :         {
    1924                 :         case OP_CRSTAR:
    1925                 :         case OP_CRMINSTAR:
    1926                 :         case OP_CRPLUS:
    1927                 :         case OP_CRMINPLUS:
    1928                 :         case OP_CRQUERY:
    1929                 :         case OP_CRMINQUERY:
    1930          140189 :         c = *ecode++ - OP_CRSTAR;
    1931          140189 :         minimize = (c & 1) != 0;
    1932          140189 :         min = rep_min[c];                 /* Pick up values from tables; */
    1933          140189 :         max = rep_max[c];                 /* zero for max => infinity */
    1934          140189 :         if (max == 0) max = INT_MAX;
    1935          140189 :         break;
    1936                 : 
    1937                 :         case OP_CRRANGE:
    1938                 :         case OP_CRMINRANGE:
    1939               4 :         minimize = (*ecode == OP_CRMINRANGE);
    1940               4 :         min = GET2(ecode, 1);
    1941               4 :         max = GET2(ecode, 3);
    1942               4 :         if (max == 0) max = INT_MAX;
    1943               4 :         ecode += 5;
    1944               4 :         break;
    1945                 : 
    1946                 :         default:               /* No repeat follows */
    1947            3779 :         min = max = 1;
    1948                 :         break;
    1949                 :         }
    1950                 : 
    1951                 :       /* First, ensure the minimum number of matches are present. */
    1952                 : 
    1953                 : #ifdef SUPPORT_UTF8
    1954                 :       /* UTF-8 mode */
    1955          143972 :       if (utf8)
    1956                 :         {
    1957               0 :         for (i = 1; i <= min; i++)
    1958                 :           {
    1959               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1960               0 :           GETCHARINC(c, eptr);
    1961               0 :           if (c > 255)
    1962                 :             {
    1963               0 :             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
    1964                 :             }
    1965                 :           else
    1966                 :             {
    1967               0 :             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
    1968                 :             }
    1969                 :           }
    1970                 :         }
    1971                 :       else
    1972                 : #endif
    1973                 :       /* Not UTF-8 mode */
    1974                 :         {
    1975          195710 :         for (i = 1; i <= min; i++)
    1976                 :           {
    1977          143006 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1978          141712 :           c = *eptr++;
    1979          141712 :           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
    1980                 :           }
    1981                 :         }
    1982                 : 
    1983                 :       /* If max == min we can continue with the main loop without the
    1984                 :       need to recurse. */
    1985                 : 
    1986           52704 :       if (min == max) continue;
    1987                 : 
    1988                 :       /* If minimizing, keep testing the rest of the expression and advancing
    1989                 :       the pointer while it matches the class. */
    1990                 : 
    1991           50999 :       if (minimize)
    1992                 :         {
    1993                 : #ifdef SUPPORT_UTF8
    1994                 :         /* UTF-8 mode */
    1995               0 :         if (utf8)
    1996                 :           {
    1997               0 :           for (fi = min;; fi++)
    1998                 :             {
    1999               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
    2000               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2001               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2002               0 :             GETCHARINC(c, eptr);
    2003               0 :             if (c > 255)
    2004                 :               {
    2005               0 :               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
    2006                 :               }
    2007                 :             else
    2008                 :               {
    2009               0 :               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
    2010                 :               }
    2011               0 :             }
    2012                 :           }
    2013                 :         else
    2014                 : #endif
    2015                 :         /* Not UTF-8 mode */
    2016                 :           {
    2017               0 :           for (fi = min;; fi++)
    2018                 :             {
    2019               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
    2020               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2021               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2022               0 :             c = *eptr++;
    2023               0 :             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
    2024               0 :             }
    2025                 :           }
    2026                 :         /* Control never gets here */
    2027                 :         }
    2028                 : 
    2029                 :       /* If maximizing, find the longest possible run, then work backwards. */
    2030                 : 
    2031                 :       else
    2032                 :         {
    2033           50999 :         pp = eptr;
    2034                 : 
    2035                 : #ifdef SUPPORT_UTF8
    2036                 :         /* UTF-8 mode */
    2037           50999 :         if (utf8)
    2038                 :           {
    2039               0 :           for (i = min; i < max; i++)
    2040                 :             {
    2041               0 :             int len = 1;
    2042               0 :             if (eptr >= md->end_subject) break;
    2043               0 :             GETCHARLEN(c, eptr, len);
    2044               0 :             if (c > 255)
    2045                 :               {
    2046               0 :               if (op == OP_CLASS) break;
    2047                 :               }
    2048                 :             else
    2049                 :               {
    2050               0 :               if ((data[c/8] & (1 << (c&7))) == 0) break;
    2051                 :               }
    2052               0 :             eptr += len;
    2053                 :             }
    2054                 :           for (;;)
    2055                 :             {
    2056               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
    2057               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2058               0 :             if (eptr-- == pp) break;        /* Stop if tried at original pos */
    2059               0 :             BACKCHAR(eptr);
    2060               0 :             }
    2061                 :           }
    2062                 :         else
    2063                 : #endif
    2064                 :           /* Not UTF-8 mode */
    2065                 :           {
    2066         1822273 :           for (i = min; i < max; i++)
    2067                 :             {
    2068         1822204 :             if (eptr >= md->end_subject) break;
    2069         1820409 :             c = *eptr;
    2070         1820409 :             if ((data[c/8] & (1 << (c&7))) == 0) break;
    2071         1771274 :             eptr++;
    2072                 :             }
    2073          427125 :           while (eptr >= pp)
    2074                 :             {
    2075          374602 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
    2076          374602 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2077          325127 :             eptr--;
    2078                 :             }
    2079                 :           }
    2080                 : 
    2081            1524 :         RRETURN(MATCH_NOMATCH);
    2082                 :         }
    2083                 :       }
    2084                 :     /* Control never gets here */
    2085                 : 
    2086                 : 
    2087                 :     /* Match an extended character class. This opcode is encountered only
    2088                 :     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
    2089                 :     mode, because Unicode properties are supported in non-UTF-8 mode. */
    2090                 : 
    2091                 : #ifdef SUPPORT_UTF8
    2092                 :     case OP_XCLASS:
    2093                 :       {
    2094               3 :       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
    2095               3 :       ecode += GET(ecode, 1);                      /* Advance past the item */
    2096                 : 
    2097               3 :       switch (*ecode)
    2098                 :         {
    2099                 :         case OP_CRSTAR:
    2100                 :         case OP_CRMINSTAR:
    2101                 :         case OP_CRPLUS:
    2102                 :         case OP_CRMINPLUS:
    2103                 :         case OP_CRQUERY:
    2104                 :         case OP_CRMINQUERY:
    2105               3 :         c = *ecode++ - OP_CRSTAR;
    2106               3 :         minimize = (c & 1) != 0;
    2107               3 :         min = rep_min[c];                 /* Pick up values from tables; */
    2108               3 :         max = rep_max[c];                 /* zero for max => infinity */
    2109               3 :         if (max == 0) max = INT_MAX;
    2110               3 :         break;
    2111                 : 
    2112                 :         case OP_CRRANGE:
    2113                 :         case OP_CRMINRANGE:
    2114               0 :         minimize = (*ecode == OP_CRMINRANGE);
    2115               0 :         min = GET2(ecode, 1);
    2116               0 :         max = GET2(ecode, 3);
    2117               0 :         if (max == 0) max = INT_MAX;
    2118               0 :         ecode += 5;
    2119               0 :         break;
    2120                 : 
    2121                 :         default:               /* No repeat follows */
    2122               0 :         min = max = 1;
    2123                 :         break;
    2124                 :         }
    2125                 : 
    2126                 :       /* First, ensure the minimum number of matches are present. */
    2127                 : 
    2128               3 :       for (i = 1; i <= min; i++)
    2129                 :         {
    2130               3 :         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2131               3 :         GETCHARINCTEST(c, eptr);
    2132               3 :         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
    2133                 :         }
    2134                 : 
    2135                 :       /* If max == min we can continue with the main loop without the
    2136                 :       need to recurse. */
    2137                 : 
    2138               0 :       if (min == max) continue;
    2139                 : 
    2140                 :       /* If minimizing, keep testing the rest of the expression and advancing
    2141                 :       the pointer while it matches the class. */
    2142                 : 
    2143               0 :       if (minimize)
    2144                 :         {
    2145               0 :         for (fi = min;; fi++)
    2146                 :           {
    2147               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
    2148               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2149               0 :           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2150               0 :           GETCHARINCTEST(c, eptr);
    2151               0 :           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
    2152               0 :           }
    2153                 :         /* Control never gets here */
    2154                 :         }
    2155                 : 
    2156                 :       /* If maximizing, find the longest possible run, then work backwards. */
    2157                 : 
    2158                 :       else
    2159                 :         {
    2160               0 :         pp = eptr;
    2161               0 :         for (i = min; i < max; i++)
    2162                 :           {
    2163               0 :           int len = 1;
    2164               0 :           if (eptr >= md->end_subject) break;
    2165               0 :           GETCHARLENTEST(c, eptr, len);
    2166               0 :           if (!_pcre_xclass(c, data)) break;
    2167               0 :           eptr += len;
    2168                 :           }
    2169                 :         for(;;)
    2170                 :           {
    2171               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
    2172               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2173               0 :           if (eptr-- == pp) break;        /* Stop if tried at original pos */
    2174               0 :           if (utf8) BACKCHAR(eptr);
    2175               0 :           }
    2176               0 :         RRETURN(MATCH_NOMATCH);
    2177                 :         }
    2178                 : 
    2179                 :       /* Control never gets here */
    2180                 :       }
    2181                 : #endif    /* End of XCLASS */
    2182                 : 
    2183                 :     /* Match a single character, casefully */
    2184                 : 
    2185                 :     case OP_CHAR:
    2186                 : #ifdef SUPPORT_UTF8
    2187         8012400 :     if (utf8)
    2188                 :       {
    2189               0 :       length = 1;
    2190               0 :       ecode++;
    2191               0 :       GETCHARLEN(fc, ecode, length);
    2192               0 :       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    2193               0 :       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
    2194                 :       }
    2195                 :     else
    2196                 : #endif
    2197                 : 
    2198                 :     /* Non-UTF-8 mode */
    2199                 :       {
    2200         8012400 :       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
    2201         8005945 :       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
    2202         7015741 :       ecode += 2;
    2203                 :       }
    2204         7015741 :     break;
    2205                 : 
    2206                 :     /* Match a single character, caselessly */
    2207                 : 
    2208                 :     case OP_CHARNC:
    2209                 : #ifdef SUPPORT_UTF8
    2210           77923 :     if (utf8)
    2211                 :       {
    2212               5 :       length = 1;
    2213               5 :       ecode++;
    2214               5 :       GETCHARLEN(fc, ecode, length);
    2215                 : 
    2216               5 :       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    2217                 : 
    2218                 :       /* If the pattern character's value is < 128, we have only one byte, and
    2219                 :       can use the fast lookup table. */
    2220                 : 
    2221               5 :       if (fc < 128)
    2222                 :         {
    2223               5 :         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
    2224                 :         }
    2225                 : 
    2226                 :       /* Otherwise we must pick up the subject character */
    2227                 : 
    2228                 :       else
    2229                 :         {
    2230                 :         unsigned int dc;
    2231               0 :         GETCHARINC(dc, eptr);
    2232               0 :         ecode += length;
    2233                 : 
    2234                 :         /* If we have Unicode property support, we can use it to test the other
    2235                 :         case of the character, if there is one. */
    2236                 : 
    2237               0 :         if (fc != dc)
    2238                 :           {
    2239                 : #ifdef SUPPORT_UCP
    2240               0 :           if (dc != UCD_OTHERCASE(fc))
    2241                 : #endif
    2242               0 :             RRETURN(MATCH_NOMATCH);
    2243                 :           }
    2244                 :         }
    2245                 :       }
    2246                 :     else
    2247                 : #endif   /* SUPPORT_UTF8 */
    2248                 : 
    2249                 :     /* Non-UTF-8 mode */
    2250                 :       {
    2251           77918 :       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
    2252           41550 :       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
    2253            3497 :       ecode += 2;
    2254                 :       }
    2255            3501 :     break;
    2256                 : 
    2257                 :     /* Match a single character repeatedly. */
    2258                 : 
    2259                 :     case OP_EXACT:
    2260               0 :     min = max = GET2(ecode, 1);
    2261               0 :     ecode += 3;
    2262               0 :     goto REPEATCHAR;
    2263                 : 
    2264                 :     case OP_POSUPTO:
    2265               0 :     possessive = TRUE;
    2266                 :     /* Fall through */
    2267                 : 
    2268                 :     case OP_UPTO:
    2269                 :     case OP_MINUPTO:
    2270               0 :     min = 0;
    2271               0 :     max = GET2(ecode, 1);
    2272               0 :     minimize = *ecode == OP_MINUPTO;
    2273               0 :     ecode += 3;
    2274               0 :     goto REPEATCHAR;
    2275                 : 
    2276                 :     case OP_POSSTAR:
    2277              28 :     possessive = TRUE;
    2278              28 :     min = 0;
    2279              28 :     max = INT_MAX;
    2280              28 :     ecode++;
    2281              28 :     goto REPEATCHAR;
    2282                 : 
    2283                 :     case OP_POSPLUS:
    2284               1 :     possessive = TRUE;
    2285               1 :     min = 1;
    2286               1 :     max = INT_MAX;
    2287               1 :     ecode++;
    2288               1 :     goto REPEATCHAR;
    2289                 : 
    2290                 :     case OP_POSQUERY:
    2291               0 :     possessive = TRUE;
    2292               0 :     min = 0;
    2293               0 :     max = 1;
    2294               0 :     ecode++;
    2295               0 :     goto REPEATCHAR;
    2296                 : 
    2297                 :     case OP_STAR:
    2298                 :     case OP_MINSTAR:
    2299                 :     case OP_PLUS:
    2300                 :     case OP_MINPLUS:
    2301                 :     case OP_QUERY:
    2302                 :     case OP_MINQUERY:
    2303            3788 :     c = *ecode++ - OP_STAR;
    2304            3788 :     minimize = (c & 1) != 0;
    2305            3788 :     min = rep_min[c];                 /* Pick up values from tables; */
    2306            3788 :     max = rep_max[c];                 /* zero for max => infinity */
    2307            3788 :     if (max == 0) max = INT_MAX;
    2308                 : 
    2309                 :     /* Common code for all repeated single-character matches. We can give
    2310                 :     up quickly if there are fewer than the minimum number of characters left in
    2311                 :     the subject. */
    2312                 : 
    2313            3817 :     REPEATCHAR:
    2314                 : #ifdef SUPPORT_UTF8
    2315            3817 :     if (utf8)
    2316                 :       {
    2317               0 :       length = 1;
    2318               0 :       charptr = ecode;
    2319               0 :       GETCHARLEN(fc, ecode, length);
    2320               0 :       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    2321               0 :       ecode += length;
    2322                 : 
    2323                 :       /* Handle multibyte character matching specially here. There is
    2324                 :       support for caseless matching if UCP support is present. */
    2325                 : 
    2326               0 :       if (length > 1)
    2327                 :         {
    2328                 : #ifdef SUPPORT_UCP
    2329                 :         unsigned int othercase;
    2330               0 :         if ((ims & PCRE_CASELESS) != 0 &&
    2331                 :             (othercase = UCD_OTHERCASE(fc)) != fc)
    2332               0 :           oclength = _pcre_ord2utf8(othercase, occhars);
    2333               0 :         else oclength = 0;
    2334                 : #endif  /* SUPPORT_UCP */
    2335                 : 
    2336               0 :         for (i = 1; i <= min; i++)
    2337                 :           {
    2338               0 :           if (memcmp(eptr, charptr, length) == 0) eptr += length;
    2339                 : #ifdef SUPPORT_UCP
    2340                 :           /* Need braces because of following else */
    2341               0 :           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
    2342                 :           else
    2343                 :             {
    2344               0 :             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
    2345               0 :             eptr += oclength;
    2346                 :             }
    2347                 : #else   /* without SUPPORT_UCP */
    2348                 :           else { RRETURN(MATCH_NOMATCH); }
    2349                 : #endif  /* SUPPORT_UCP */
    2350                 :           }
    2351                 : 
    2352               0 :         if (min == max) continue;
    2353                 : 
    2354               0 :         if (minimize)
    2355                 :           {
    2356               0 :           for (fi = min;; fi++)
    2357                 :             {
    2358               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
    2359               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2360               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2361               0 :             if (memcmp(eptr, charptr, length) == 0) eptr += length;
    2362                 : #ifdef SUPPORT_UCP
    2363                 :             /* Need braces because of following else */
    2364               0 :             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
    2365                 :             else
    2366                 :               {
    2367               0 :               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
    2368               0 :               eptr += oclength;
    2369                 :               }
    2370                 : #else   /* without SUPPORT_UCP */
    2371                 :             else { RRETURN (MATCH_NOMATCH); }
    2372                 : #endif  /* SUPPORT_UCP */
    2373               0 :             }
    2374                 :           /* Control never gets here */
    2375                 :           }
    2376                 : 
    2377                 :         else  /* Maximize */
    2378                 :           {
    2379               0 :           pp = eptr;
    2380               0 :           for (i = min; i < max; i++)
    2381                 :             {
    2382               0 :             if (eptr > md->end_subject - length) break;
    2383               0 :             if (memcmp(eptr, charptr, length) == 0) eptr += length;
    2384                 : #ifdef SUPPORT_UCP
    2385               0 :             else if (oclength == 0) break;
    2386                 :             else
    2387                 :               {
    2388               0 :               if (memcmp(eptr, occhars, oclength) != 0) break;
    2389               0 :               eptr += oclength;
    2390                 :               }
    2391                 : #else   /* without SUPPORT_UCP */
    2392                 :             else break;
    2393                 : #endif  /* SUPPORT_UCP */
    2394                 :             }
    2395                 : 
    2396               0 :           if (possessive) continue;
    2397                 :           for(;;)
    2398                 :            {
    2399               0 :            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
    2400               0 :            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2401               0 :            if (eptr == pp) RRETURN(MATCH_NOMATCH);
    2402                 : #ifdef SUPPORT_UCP
    2403               0 :            eptr--;
    2404               0 :            BACKCHAR(eptr);
    2405                 : #else   /* without SUPPORT_UCP */
    2406                 :            eptr -= length;
    2407                 : #endif  /* SUPPORT_UCP */
    2408               0 :            }
    2409                 :           }
    2410                 :         /* Control never gets here */
    2411                 :         }
    2412                 : 
    2413                 :       /* If the length of a UTF-8 character is 1, we fall through here, and
    2414                 :       obey the code as for non-UTF-8 characters below, though in this case the
    2415                 :       value of fc will always be < 128. */
    2416                 :       }
    2417                 :     else
    2418                 : #endif  /* SUPPORT_UTF8 */
    2419                 : 
    2420                 :     /* When not in UTF-8 mode, load a single-byte character. */
    2421                 :       {
    2422            3817 :       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    2423            3817 :       fc = *ecode++;
    2424                 :       }
    2425                 : 
    2426                 :     /* The value of fc at this point is always less than 256, though we may or
    2427                 :     may not be in UTF-8 mode. The code is duplicated for the caseless and
    2428                 :     caseful cases, for speed, since matching characters is likely to be quite
    2429                 :     common. First, ensure the minimum number of matches are present. If min =
    2430                 :     max, continue at the same level without recursing. Otherwise, if
    2431                 :     minimizing, keep trying the rest of the expression and advancing one
    2432                 :     matching character if failing, up to the maximum. Alternatively, if
    2433                 :     maximizing, find the maximum number of characters and work backwards. */
    2434                 : 
    2435                 :     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
    2436                 :       max, eptr));
    2437                 : 
    2438            3817 :     if ((ims & PCRE_CASELESS) != 0)
    2439                 :       {
    2440               0 :       fc = md->lcc[fc];
    2441               0 :       for (i = 1; i <= min; i++)
    2442               0 :         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
    2443               0 :       if (min == max) continue;
    2444               0 :       if (minimize)
    2445                 :         {
    2446               0 :         for (fi = min;; fi++)
    2447                 :           {
    2448               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
    2449               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2450               0 :           if (fi >= max || eptr >= md->end_subject ||
    2451                 :               fc != md->lcc[*eptr++])
    2452               0 :             RRETURN(MATCH_NOMATCH);
    2453               0 :           }
    2454                 :         /* Control never gets here */
    2455                 :         }
    2456                 :       else  /* Maximize */
    2457                 :         {
    2458               0 :         pp = eptr;
    2459               0 :         for (i = min; i < max; i++)
    2460                 :           {
    2461               0 :           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
    2462               0 :           eptr++;
    2463                 :           }
    2464               0 :         if (possessive) continue;
    2465               0 :         while (eptr >= pp)
    2466                 :           {
    2467               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
    2468               0 :           eptr--;
    2469               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2470                 :           }
    2471               0 :         RRETURN(MATCH_NOMATCH);
    2472                 :         }
    2473                 :       /* Control never gets here */
    2474                 :       }
    2475                 : 
    2476                 :     /* Caseful comparisons (includes all multi-byte characters) */
    2477                 : 
    2478                 :     else
    2479                 :       {
    2480            3817 :       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
    2481            3813 :       if (min == max) continue;
    2482            3813 :       if (minimize)
    2483                 :         {
    2484               0 :         for (fi = min;; fi++)
    2485                 :           {
    2486               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
    2487               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2488               0 :           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
    2489               0 :             RRETURN(MATCH_NOMATCH);
    2490               0 :           }
    2491                 :         /* Control never gets here */
    2492                 :         }
    2493                 :       else  /* Maximize */
    2494                 :         {
    2495            3813 :         pp = eptr;
    2496            4114 :         for (i = min; i < max; i++)
    2497                 :           {
    2498            3843 :           if (eptr >= md->end_subject || fc != *eptr) break;
    2499             301 :           eptr++;
    2500                 :           }
    2501            3813 :         if (possessive) continue;
    2502           10964 :         while (eptr >= pp)
    2503                 :           {
    2504            3845 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
    2505            3845 :           eptr--;
    2506            3845 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2507                 :           }
    2508            3335 :         RRETURN(MATCH_NOMATCH);
    2509                 :         }
    2510                 :       }
    2511                 :     /* Control never gets here */
    2512                 : 
    2513                 :     /* Match a negated single one-byte character. The character we are
    2514                 :     checking can be multibyte. */
    2515                 : 
    2516                 :     case OP_NOT:
    2517              58 :     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2518              58 :     ecode++;
    2519              58 :     GETCHARINCTEST(c, eptr);
    2520              58 :     if ((ims & PCRE_CASELESS) != 0)
    2521                 :       {
    2522                 : #ifdef SUPPORT_UTF8
    2523               0 :       if (c < 256)
    2524                 : #endif
    2525               0 :       c = md->lcc[c];
    2526               0 :       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
    2527                 :       }
    2528                 :     else
    2529                 :       {
    2530              58 :       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
    2531                 :       }
    2532              50 :     break;
    2533                 : 
    2534                 :     /* Match a negated single one-byte character repeatedly. This is almost a
    2535                 :     repeat of the code for a repeated single character, but I haven't found a
    2536                 :     nice way of commoning these up that doesn't require a test of the
    2537                 :     positive/negative option for each character match. Maybe that wouldn't add
    2538                 :     very much to the time taken, but character matching *is* what this is all
    2539                 :     about... */
    2540                 : 
    2541                 :     case OP_NOTEXACT:
    2542               2 :     min = max = GET2(ecode, 1);
    2543               2 :     ecode += 3;
    2544               2 :     goto REPEATNOTCHAR;
    2545                 : 
    2546                 :     case OP_NOTUPTO:
    2547                 :     case OP_NOTMINUPTO:
    2548               0 :     min = 0;
    2549               0 :     max = GET2(ecode, 1);
    2550               0 :     minimize = *ecode == OP_NOTMINUPTO;
    2551               0 :     ecode += 3;
    2552               0 :     goto REPEATNOTCHAR;
    2553                 : 
    2554                 :     case OP_NOTPOSSTAR:
    2555               0 :     possessive = TRUE;
    2556               0 :     min = 0;
    2557               0 :     max = INT_MAX;
    2558               0 :     ecode++;
    2559               0 :     goto REPEATNOTCHAR;
    2560                 : 
    2561                 :     case OP_NOTPOSPLUS:
    2562               8 :     possessive = TRUE;
    2563               8 :     min = 1;
    2564               8 :     max = INT_MAX;
    2565               8 :     ecode++;
    2566               8 :     goto REPEATNOTCHAR;
    2567                 : 
    2568                 :     case OP_NOTPOSQUERY:
    2569               0 :     possessive = TRUE;
    2570               0 :     min = 0;
    2571               0 :     max = 1;
    2572               0 :     ecode++;
    2573               0 :     goto REPEATNOTCHAR;
    2574                 : 
    2575                 :     case OP_NOTPOSUPTO:
    2576               0 :     possessive = TRUE;
    2577               0 :     min = 0;
    2578               0 :     max = GET2(ecode, 1);
    2579               0 :     ecode += 3;
    2580               0 :     goto REPEATNOTCHAR;
    2581                 : 
    2582                 :     case OP_NOTSTAR:
    2583                 :     case OP_NOTMINSTAR:
    2584                 :     case OP_NOTPLUS:
    2585                 :     case OP_NOTMINPLUS:
    2586                 :     case OP_NOTQUERY:
    2587                 :     case OP_NOTMINQUERY:
    2588           25004 :     c = *ecode++ - OP_NOTSTAR;
    2589           25004 :     minimize = (c & 1) != 0;
    2590           25004 :     min = rep_min[c];                 /* Pick up values from tables; */
    2591           25004 :     max = rep_max[c];                 /* zero for max => infinity */
    2592           25004 :     if (max == 0) max = INT_MAX;
    2593                 : 
    2594                 :     /* Common code for all repeated single-byte matches. We can give up quickly
    2595                 :     if there are fewer than the minimum number of bytes left in the
    2596                 :     subject. */
    2597                 : 
    2598           25014 :     REPEATNOTCHAR:
    2599           25014 :     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    2600           12894 :     fc = *ecode++;
    2601                 : 
    2602                 :     /* The code is duplicated for the caseless and caseful cases, for speed,
    2603                 :     since matching characters is likely to be quite common. First, ensure the
    2604                 :     minimum number of matches are present. If min = max, continue at the same
    2605                 :     level without recursing. Otherwise, if minimizing, keep trying the rest of
    2606                 :     the expression and advancing one matching character if failing, up to the
    2607                 :     maximum. Alternatively, if maximizing, find the maximum number of
    2608                 :     characters and work backwards. */
    2609                 : 
    2610                 :     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
    2611                 :       max, eptr));
    2612                 : 
    2613           12894 :     if ((ims & PCRE_CASELESS) != 0)
    2614                 :       {
    2615           12884 :       fc = md->lcc[fc];
    2616                 : 
    2617                 : #ifdef SUPPORT_UTF8
    2618                 :       /* UTF-8 mode */
    2619           12884 :       if (utf8)
    2620                 :         {
    2621                 :         register unsigned int d;
    2622               0 :         for (i = 1; i <= min; i++)
    2623                 :           {
    2624               0 :           GETCHARINC(d, eptr);
    2625               0 :           if (d < 256) d = md->lcc[d];
    2626               0 :           if (fc == d) RRETURN(MATCH_NOMATCH);
    2627                 :           }
    2628                 :         }
    2629                 :       else
    2630                 : #endif
    2631                 : 
    2632                 :       /* Not UTF-8 mode */
    2633                 :         {
    2634           25389 :         for (i = 1; i <= min; i++)
    2635           12886 :           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
    2636                 :         }
    2637                 : 
    2638           12503 :       if (min == max) continue;
    2639                 : 
    2640           12502 :       if (minimize)
    2641                 :         {
    2642                 : #ifdef SUPPORT_UTF8
    2643                 :         /* UTF-8 mode */
    2644               0 :         if (utf8)
    2645                 :           {
    2646                 :           register unsigned int d;
    2647               0 :           for (fi = min;; fi++)
    2648                 :             {
    2649               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
    2650               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2651               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2652               0 :             GETCHARINC(d, eptr);
    2653               0 :             if (d < 256) d = md->lcc[d];
    2654               0 :             if (fc == d) RRETURN(MATCH_NOMATCH);
    2655                 : 
    2656               0 :             }
    2657                 :           }
    2658                 :         else
    2659                 : #endif
    2660                 :         /* Not UTF-8 mode */
    2661                 :           {
    2662               0 :           for (fi = min;; fi++)
    2663                 :             {
    2664               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
    2665               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2666               0 :             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
    2667               0 :               RRETURN(MATCH_NOMATCH);
    2668               0 :             }
    2669                 :           }
    2670                 :         /* Control never gets here */
    2671                 :         }
    2672                 : 
    2673                 :       /* Maximize case */
    2674                 : 
    2675                 :       else
    2676                 :         {
    2677           12502 :         pp = eptr;
    2678                 : 
    2679                 : #ifdef SUPPORT_UTF8
    2680                 :         /* UTF-8 mode */
    2681           12502 :         if (utf8)
    2682                 :           {
    2683                 :           register unsigned int d;
    2684               0 :           for (i = min; i < max; i++)
    2685                 :             {
    2686               0 :             int len = 1;
    2687               0 :             if (eptr >= md->end_subject) break;
    2688               0 :             GETCHARLEN(d, eptr, len);
    2689               0 :             if (d < 256) d = md->lcc[d];
    2690               0 :             if (fc == d) break;
    2691               0 :             eptr += len;
    2692                 :             }
    2693               0 :         if (possessive) continue;
    2694                 :         for(;;)
    2695                 :             {
    2696               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
    2697               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2698               0 :             if (eptr-- == pp) break;        /* Stop if tried at original pos */
    2699               0 :             BACKCHAR(eptr);
    2700               0 :             }
    2701                 :           }
    2702                 :         else
    2703                 : #endif
    2704                 :         /* Not UTF-8 mode */
    2705                 :           {
    2706           24634 :           for (i = min; i < max; i++)
    2707                 :             {
    2708           24634 :             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
    2709           12132 :             eptr++;
    2710                 :             }
    2711           12502 :           if (possessive) continue;
    2712           49606 :           while (eptr >= pp)
    2713                 :             {
    2714           24622 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
    2715           24622 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2716           24602 :             eptr--;
    2717                 :             }
    2718                 :           }
    2719                 : 
    2720           12482 :         RRETURN(MATCH_NOMATCH);
    2721                 :         }
    2722                 :       /* Control never gets here */
    2723                 :       }
    2724                 : 
    2725                 :     /* Caseful comparisons */
    2726                 : 
    2727                 :     else
    2728                 :       {
    2729                 : #ifdef SUPPORT_UTF8
    2730                 :       /* UTF-8 mode */
    2731              10 :       if (utf8)
    2732                 :         {
    2733                 :         register unsigned int d;
    2734               0 :         for (i = 1; i <= min; i++)
    2735                 :           {
    2736               0 :           GETCHARINC(d, eptr);
    2737               0 :           if (fc == d) RRETURN(MATCH_NOMATCH);
    2738                 :           }
    2739                 :         }
    2740                 :       else
    2741                 : #endif
    2742                 :       /* Not UTF-8 mode */
    2743                 :         {
    2744              18 :         for (i = 1; i <= min; i++)
    2745               9 :           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
    2746                 :         }
    2747                 : 
    2748               9 :       if (min == max) continue;
    2749                 : 
    2750               9 :       if (minimize)
    2751                 :         {
    2752                 : #ifdef SUPPORT_UTF8
    2753                 :         /* UTF-8 mode */
    2754               0 :         if (utf8)
    2755                 :           {
    2756                 :           register unsigned int d;
    2757               0 :           for (fi = min;; fi++)
    2758                 :             {
    2759               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
    2760               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2761               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2762               0 :             GETCHARINC(d, eptr);
    2763               0 :             if (fc == d) RRETURN(MATCH_NOMATCH);
    2764               0 :             }
    2765                 :           }
    2766                 :         else
    2767                 : #endif
    2768                 :         /* Not UTF-8 mode */
    2769                 :           {
    2770               0 :           for (fi = min;; fi++)
    2771                 :             {
    2772               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
    2773               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2774               0 :             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
    2775               0 :               RRETURN(MATCH_NOMATCH);
    2776               0 :             }
    2777                 :           }
    2778                 :         /* Control never gets here */
    2779                 :         }
    2780                 : 
    2781                 :       /* Maximize case */
    2782                 : 
    2783                 :       else
    2784                 :         {
    2785               9 :         pp = eptr;
    2786                 : 
    2787                 : #ifdef SUPPORT_UTF8
    2788                 :         /* UTF-8 mode */
    2789               9 :         if (utf8)
    2790                 :           {
    2791                 :           register unsigned int d;
    2792               0 :           for (i = min; i < max; i++)
    2793                 :             {
    2794               0 :             int len = 1;
    2795               0 :             if (eptr >= md->end_subject) break;
    2796               0 :             GETCHARLEN(d, eptr, len);
    2797               0 :             if (fc == d) break;
    2798               0 :             eptr += len;
    2799                 :             }
    2800               0 :           if (possessive) continue;
    2801                 :           for(;;)
    2802                 :             {
    2803               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
    2804               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2805               0 :             if (eptr-- == pp) break;        /* Stop if tried at original pos */
    2806               0 :             BACKCHAR(eptr);
    2807               0 :             }
    2808                 :           }
    2809                 :         else
    2810                 : #endif
    2811                 :         /* Not UTF-8 mode */
    2812                 :           {
    2813              33 :           for (i = min; i < max; i++)
    2814                 :             {
    2815              33 :             if (eptr >= md->end_subject || fc == *eptr) break;
    2816              24 :             eptr++;
    2817                 :             }
    2818               9 :           if (possessive) continue;
    2819               4 :           while (eptr >= pp)
    2820                 :             {
    2821               2 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
    2822               2 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    2823               0 :             eptr--;
    2824                 :             }
    2825                 :           }
    2826                 : 
    2827               0 :         RRETURN(MATCH_NOMATCH);
    2828                 :         }
    2829                 :       }
    2830                 :     /* Control never gets here */
    2831                 : 
    2832                 :     /* Match a single character type repeatedly; several different opcodes
    2833                 :     share code. This is very similar to the code for single characters, but we
    2834                 :     repeat it in the interests of efficiency. */
    2835                 : 
    2836                 :     case OP_TYPEEXACT:
    2837             218 :     min = max = GET2(ecode, 1);
    2838             218 :     minimize = TRUE;
    2839             218 :     ecode += 3;
    2840             218 :     goto REPEATTYPE;
    2841                 : 
    2842                 :     case OP_TYPEUPTO:
    2843                 :     case OP_TYPEMINUPTO:
    2844               7 :     min = 0;
    2845               7 :     max = GET2(ecode, 1);
    2846               7 :     minimize = *ecode == OP_TYPEMINUPTO;
    2847               7 :     ecode += 3;
    2848               7 :     goto REPEATTYPE;
    2849                 : 
    2850                 :     case OP_TYPEPOSSTAR:
    2851             507 :     possessive = TRUE;
    2852             507 :     min = 0;
    2853             507 :     max = INT_MAX;
    2854             507 :     ecode++;
    2855             507 :     goto REPEATTYPE;
    2856                 : 
    2857                 :     case OP_TYPEPOSPLUS:
    2858           26731 :     possessive = TRUE;
    2859           26731 :     min = 1;
    2860           26731 :     max = INT_MAX;
    2861           26731 :     ecode++;
    2862           26731 :     goto REPEATTYPE;
    2863                 : 
    2864                 :     case OP_TYPEPOSQUERY:
    2865               0 :     possessive = TRUE;
    2866               0 :     min = 0;
    2867               0 :     max = 1;
    2868               0 :     ecode++;
    2869               0 :     goto REPEATTYPE;
    2870                 : 
    2871                 :     case OP_TYPEPOSUPTO:
    2872               0 :     possessive = TRUE;
    2873               0 :     min = 0;
    2874               0 :     max = GET2(ecode, 1);
    2875               0 :     ecode += 3;
    2876               0 :     goto REPEATTYPE;
    2877                 : 
    2878                 :     case OP_TYPESTAR:
    2879                 :     case OP_TYPEMINSTAR:
    2880                 :     case OP_TYPEPLUS:
    2881                 :     case OP_TYPEMINPLUS:
    2882                 :     case OP_TYPEQUERY:
    2883                 :     case OP_TYPEMINQUERY:
    2884            8301 :     c = *ecode++ - OP_TYPESTAR;
    2885            8301 :     minimize = (c & 1) != 0;
    2886            8301 :     min = rep_min[c];                 /* Pick up values from tables; */
    2887            8301 :     max = rep_max[c];                 /* zero for max => infinity */
    2888            8301 :     if (max == 0) max = INT_MAX;
    2889                 : 
    2890                 :     /* Common code for all repeated single character type matches. Note that
    2891                 :     in UTF-8 mode, '.' matches a character of any length, but for the other
    2892                 :     character types, the valid characters are all one-byte long. */
    2893                 : 
    2894           35764 :     REPEATTYPE:
    2895           35764 :     ctype = *ecode++;      /* Code for the character type */
    2896                 : 
    2897                 : #ifdef SUPPORT_UCP
    2898           35771 :     if (ctype == OP_PROP || ctype == OP_NOTPROP)
    2899                 :       {
    2900               7 :       prop_fail_result = ctype == OP_NOTPROP;
    2901               7 :       prop_type = *ecode++;
    2902               7 :       prop_value = *ecode++;
    2903                 :       }
    2904           35757 :     else prop_type = -1;
    2905                 : #endif
    2906                 : 
    2907                 :     /* First, ensure the minimum number of matches are present. Use inline
    2908                 :     code for maximizing the speed, and do the type test once at the start
    2909                 :     (i.e. keep it out of the loop). Also we can test that there are at least
    2910                 :     the minimum number of bytes before we start. This isn't as effective in
    2911                 :     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
    2912                 :     is tidier. Also separate the UCP code, which can be the same for both UTF-8
    2913                 :     and single-bytes. */
    2914                 : 
    2915           35764 :     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
    2916           35753 :     if (min > 0)
    2917                 :       {
    2918                 : #ifdef SUPPORT_UCP
    2919           31920 :       if (prop_type >= 0)
    2920                 :         {
    2921               7 :         switch(prop_type)
    2922                 :           {
    2923                 :           case PT_ANY:
    2924               0 :           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
    2925               0 :           for (i = 1; i <= min; i++)
    2926                 :             {
    2927               0 :             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2928               0 :             GETCHARINCTEST(c, eptr);
    2929                 :             }
    2930               0 :           break;
    2931                 : 
    2932                 :           case PT_LAMP:
    2933               0 :           for (i = 1; i <= min; i++)
    2934                 :             {
    2935               0 :             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2936               0 :             GETCHARINCTEST(c, eptr);
    2937               0 :             prop_chartype = UCD_CHARTYPE(c);
    2938               0 :             if ((prop_chartype == ucp_Lu ||
    2939                 :                  prop_chartype == ucp_Ll ||
    2940                 :                  prop_chartype == ucp_Lt) == prop_fail_result)
    2941               0 :               RRETURN(MATCH_NOMATCH);
    2942                 :             }
    2943               0 :           break;
    2944                 : 
    2945                 :           case PT_GC:
    2946               8 :           for (i = 1; i <= min; i++)
    2947                 :             {
    2948               7 :             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2949               7 :             GETCHARINCTEST(c, eptr);
    2950               7 :             prop_category = UCD_CATEGORY(c);
    2951               7 :             if ((prop_category == prop_value) == prop_fail_result)
    2952               6 :               RRETURN(MATCH_NOMATCH);
    2953                 :             }
    2954               1 :           break;
    2955                 : 
    2956                 :           case PT_PC:
    2957               0 :           for (i = 1; i <= min; i++)
    2958                 :             {
    2959               0 :             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2960               0 :             GETCHARINCTEST(c, eptr);
    2961               0 :             prop_chartype = UCD_CHARTYPE(c);
    2962               0 :             if ((prop_chartype == prop_value) == prop_fail_result)
    2963               0 :               RRETURN(MATCH_NOMATCH);
    2964                 :             }
    2965               0 :           break;
    2966                 : 
    2967                 :           case PT_SC:
    2968               0 :           for (i = 1; i <= min; i++)
    2969                 :             {
    2970               0 :             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    2971               0 :             GETCHARINCTEST(c, eptr);
    2972               0 :             prop_script = UCD_SCRIPT(c);
    2973               0 :             if ((prop_script == prop_value) == prop_fail_result)
    2974               0 :               RRETURN(MATCH_NOMATCH);
    2975                 :             }
    2976               0 :           break;
    2977                 : 
    2978                 :           default:
    2979               0 :           RRETURN(PCRE_ERROR_INTERNAL);
    2980                 :           }
    2981                 :         }
    2982                 : 
    2983                 :       /* Match extended Unicode sequences. We will get here only if the
    2984                 :       support is in the binary; otherwise a compile-time error occurs. */
    2985                 : 
    2986           31913 :       else if (ctype == OP_EXTUNI)
    2987                 :         {
    2988               0 :         for (i = 1; i <= min; i++)
    2989                 :           {
    2990               0 :           GETCHARINCTEST(c, eptr);
    2991               0 :           prop_category = UCD_CATEGORY(c);
    2992               0 :           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
    2993               0 :           while (eptr < md->end_subject)
    2994                 :             {
    2995               0 :             int len = 1;
    2996               0 :             if (!utf8) c = *eptr; else
    2997                 :               {
    2998               0 :               GETCHARLEN(c, eptr, len);
    2999                 :               }
    3000               0 :             prop_category = UCD_CATEGORY(c);
    3001               0 :             if (prop_category != ucp_M) break;
    3002               0 :             eptr += len;
    3003                 :             }
    3004                 :           }
    3005                 :         }
    3006                 : 
    3007                 :       else
    3008                 : #endif     /* SUPPORT_UCP */
    3009                 : 
    3010                 : /* Handle all other cases when the coding is UTF-8 */
    3011                 : 
    3012                 : #ifdef SUPPORT_UTF8
    3013           31913 :       if (utf8) switch(ctype)
    3014                 :         {
    3015                 :         case OP_ANY:
    3016               0 :         for (i = 1; i <= min; i++)
    3017                 :           {
    3018               0 :           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
    3019               0 :             RRETURN(MATCH_NOMATCH);
    3020               0 :           eptr++;
    3021               0 :           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3022                 :           }
    3023               0 :         break;
    3024                 : 
    3025                 :         case OP_ALLANY:
    3026               0 :         for (i = 1; i <= min; i++)
    3027                 :           {
    3028               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3029               0 :           eptr++;
    3030               0 :           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3031                 :           }
    3032               0 :         break;
    3033                 : 
    3034                 :         case OP_ANYBYTE:
    3035               0 :         eptr += min;
    3036               0 :         break;
    3037                 : 
    3038                 :         case OP_ANYNL:
    3039               0 :         for (i = 1; i <= min; i++)
    3040                 :           {
    3041               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3042               0 :           GETCHARINC(c, eptr);
    3043               0 :           switch(c)
    3044                 :             {
    3045               0 :             default: RRETURN(MATCH_NOMATCH);
    3046                 :             case 0x000d:
    3047               0 :             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
    3048               0 :             break;
    3049                 : 
    3050                 :             case 0x000a:
    3051               0 :             break;
    3052                 : 
    3053                 :             case 0x000b:
    3054                 :             case 0x000c:
    3055                 :             case 0x0085:
    3056                 :             case 0x2028:
    3057                 :             case 0x2029:
    3058               0 :             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
    3059                 :             break;
    3060                 :             }
    3061                 :           }
    3062               0 :         break;
    3063                 : 
    3064                 :         case OP_NOT_HSPACE:
    3065               0 :         for (i = 1; i <= min; i++)
    3066                 :           {
    3067               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3068               0 :           GETCHARINC(c, eptr);
    3069               0 :           switch(c)
    3070                 :             {
    3071                 :             default: break;
    3072                 :             case 0x09:      /* HT */
    3073                 :             case 0x20:      /* SPACE */
    3074                 :             case 0xa0:      /* NBSP */
    3075                 :             case 0x1680:    /* OGHAM SPACE MARK */
    3076                 :             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    3077                 :             case 0x2000:    /* EN QUAD */
    3078                 :             case 0x2001:    /* EM QUAD */
    3079                 :             case 0x2002:    /* EN SPACE */
    3080                 :             case 0x2003:    /* EM SPACE */
    3081                 :             case 0x2004:    /* THREE-PER-EM SPACE */
    3082                 :             case 0x2005:    /* FOUR-PER-EM SPACE */
    3083                 :             case 0x2006:    /* SIX-PER-EM SPACE */
    3084                 :             case 0x2007:    /* FIGURE SPACE */
    3085                 :             case 0x2008:    /* PUNCTUATION SPACE */
    3086                 :             case 0x2009:    /* THIN SPACE */
    3087                 :             case 0x200A:    /* HAIR SPACE */
    3088                 :             case 0x202f:    /* NARROW NO-BREAK SPACE */
    3089                 :             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    3090                 :             case 0x3000:    /* IDEOGRAPHIC SPACE */
    3091               0 :             RRETURN(MATCH_NOMATCH);
    3092                 :             }
    3093                 :           }
    3094               0 :         break;
    3095                 : 
    3096                 :         case OP_HSPACE:
    3097               0 :         for (i = 1; i <= min; i++)
    3098                 :           {
    3099               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3100               0 :           GETCHARINC(c, eptr);
    3101               0 :           switch(c)
    3102                 :             {
    3103               0 :             default: RRETURN(MATCH_NOMATCH);
    3104                 :             case 0x09:      /* HT */
    3105                 :             case 0x20:      /* SPACE */
    3106                 :             case 0xa0:      /* NBSP */
    3107                 :             case 0x1680:    /* OGHAM SPACE MARK */
    3108                 :             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    3109                 :             case 0x2000:    /* EN QUAD */
    3110                 :             case 0x2001:    /* EM QUAD */
    3111                 :             case 0x2002:    /* EN SPACE */
    3112                 :             case 0x2003:    /* EM SPACE */
    3113                 :             case 0x2004:    /* THREE-PER-EM SPACE */
    3114                 :             case 0x2005:    /* FOUR-PER-EM SPACE */
    3115                 :             case 0x2006:    /* SIX-PER-EM SPACE */
    3116                 :             case 0x2007:    /* FIGURE SPACE */
    3117                 :             case 0x2008:    /* PUNCTUATION SPACE */
    3118                 :             case 0x2009:    /* THIN SPACE */
    3119                 :             case 0x200A:    /* HAIR SPACE */
    3120                 :             case 0x202f:    /* NARROW NO-BREAK SPACE */
    3121                 :             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    3122                 :             case 0x3000:    /* IDEOGRAPHIC SPACE */
    3123                 :             break;
    3124                 :             }
    3125                 :           }
    3126               0 :         break;
    3127                 : 
    3128                 :         case OP_NOT_VSPACE:
    3129               0 :         for (i = 1; i <= min; i++)
    3130                 :           {
    3131               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3132               0 :           GETCHARINC(c, eptr);
    3133               0 :           switch(c)
    3134                 :             {
    3135                 :             default: break;
    3136                 :             case 0x0a:      /* LF */
    3137                 :             case 0x0b:      /* VT */
    3138                 :             case 0x0c:      /* FF */
    3139                 :             case 0x0d:      /* CR */
    3140                 :             case 0x85:      /* NEL */
    3141                 :             case 0x2028:    /* LINE SEPARATOR */
    3142                 :             case 0x2029:    /* PARAGRAPH SEPARATOR */
    3143               0 :             RRETURN(MATCH_NOMATCH);
    3144                 :             }
    3145                 :           }
    3146               0 :         break;
    3147                 : 
    3148                 :         case OP_VSPACE:
    3149               0 :         for (i = 1; i <= min; i++)
    3150                 :           {
    3151               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3152               0 :           GETCHARINC(c, eptr);
    3153               0 :           switch(c)
    3154                 :             {
    3155               0 :             default: RRETURN(MATCH_NOMATCH);
    3156                 :             case 0x0a:      /* LF */
    3157                 :             case 0x0b:      /* VT */
    3158                 :             case 0x0c:      /* FF */
    3159                 :             case 0x0d:      /* CR */
    3160                 :             case 0x85:      /* NEL */
    3161                 :             case 0x2028:    /* LINE SEPARATOR */
    3162                 :             case 0x2029:    /* PARAGRAPH SEPARATOR */
    3163                 :             break;
    3164                 :             }
    3165                 :           }
    3166               0 :         break;
    3167                 : 
    3168                 :         case OP_NOT_DIGIT:
    3169               0 :         for (i = 1; i <= min; i++)
    3170                 :           {
    3171               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3172               0 :           GETCHARINC(c, eptr);
    3173               0 :           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
    3174               0 :             RRETURN(MATCH_NOMATCH);
    3175                 :           }
    3176               0 :         break;
    3177                 : 
    3178                 :         case OP_DIGIT:
    3179               0 :         for (i = 1; i <= min; i++)
    3180                 :           {
    3181               0 :           if (eptr >= md->end_subject ||
    3182                 :              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
    3183               0 :             RRETURN(MATCH_NOMATCH);
    3184                 :           /* No need to skip more bytes - we know it's a 1-byte character */
    3185                 :           }
    3186               0 :         break;
    3187                 : 
    3188                 :         case OP_NOT_WHITESPACE:
    3189               6 :         for (i = 1; i <= min; i++)
    3190                 :           {
    3191               4 :           if (eptr >= md->end_subject ||
    3192                 :              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
    3193               0 :             RRETURN(MATCH_NOMATCH);
    3194               5 :           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
    3195                 :           }
    3196               2 :         break;
    3197                 : 
    3198                 :         case OP_WHITESPACE:
    3199               0 :         for (i = 1; i <= min; i++)
    3200                 :           {
    3201               0 :           if (eptr >= md->end_subject ||
    3202                 :              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
    3203               0 :             RRETURN(MATCH_NOMATCH);
    3204                 :           /* No need to skip more bytes - we know it's a 1-byte character */
    3205                 :           }
    3206               0 :         break;
    3207                 : 
    3208                 :         case OP_NOT_WORDCHAR:
    3209               4 :         for (i = 1; i <= min; i++)
    3210                 :           {
    3211               3 :           if (eptr >= md->end_subject ||
    3212                 :              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
    3213               1 :             RRETURN(MATCH_NOMATCH);
    3214               4 :           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
    3215                 :           }
    3216               1 :         break;
    3217                 : 
    3218                 :         case OP_WORDCHAR:
    3219               0 :         for (i = 1; i <= min; i++)
    3220                 :           {
    3221               0 :           if (eptr >= md->end_subject ||
    3222                 :              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
    3223               0 :             RRETURN(MATCH_NOMATCH);
    3224                 :           /* No need to skip more bytes - we know it's a 1-byte character */
    3225                 :           }
    3226               0 :         break;
    3227                 : 
    3228                 :         default:
    3229               0 :         RRETURN(PCRE_ERROR_INTERNAL);
    3230                 :         }  /* End switch(ctype) */
    3231                 : 
    3232                 :       else
    3233                 : #endif     /* SUPPORT_UTF8 */
    3234                 : 
    3235                 :       /* Code for the non-UTF-8 case for minimum matching of operators other
    3236                 :       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
    3237                 :       number of bytes present, as this was tested above. */
    3238                 : 
    3239           31909 :       switch(ctype)
    3240                 :         {
    3241                 :         case OP_ANY:
    3242            1119 :         for (i = 1; i <= min; i++)
    3243                 :           {
    3244             564 :           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
    3245             561 :           eptr++;
    3246                 :           }
    3247             555 :         break;
    3248                 : 
    3249                 :         case OP_ALLANY:
    3250             629 :         eptr += min;
    3251             629 :         break;
    3252                 : 
    3253                 :         case OP_ANYBYTE:
    3254               0 :         eptr += min;
    3255               0 :         break;
    3256                 : 
    3257                 :         /* Because of the CRLF case, we can't assume the minimum number of
    3258                 :         bytes are present in this case. */
    3259                 : 
    3260                 :         case OP_ANYNL:
    3261               0 :         for (i = 1; i <= min; i++)
    3262                 :           {
    3263               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3264               0 :           switch(*eptr++)
    3265                 :             {
    3266               0 :             default: RRETURN(MATCH_NOMATCH);
    3267                 :             case 0x000d:
    3268               0 :             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
    3269               0 :             break;
    3270                 :             case 0x000a:
    3271               0 :             break;
    3272                 : 
    3273                 :             case 0x000b:
    3274                 :             case 0x000c:
    3275                 :             case 0x0085:
    3276               0 :             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
    3277                 :             break;
    3278                 :             }
    3279                 :           }
    3280               0 :         break;
    3281                 : 
    3282                 :         case OP_NOT_HSPACE:
    3283               0 :         for (i = 1; i <= min; i++)
    3284                 :           {
    3285               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3286               0 :           switch(*eptr++)
    3287                 :             {
    3288                 :             default: break;
    3289                 :             case 0x09:      /* HT */
    3290                 :             case 0x20:      /* SPACE */
    3291                 :             case 0xa0:      /* NBSP */
    3292               0 :             RRETURN(MATCH_NOMATCH);
    3293                 :             }
    3294                 :           }
    3295               0 :         break;
    3296                 : 
    3297                 :         case OP_HSPACE:
    3298               0 :         for (i = 1; i <= min; i++)
    3299                 :           {
    3300               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3301               0 :           switch(*eptr++)
    3302                 :             {
    3303               0 :             default: RRETURN(MATCH_NOMATCH);
    3304                 :             case 0x09:      /* HT */
    3305                 :             case 0x20:      /* SPACE */
    3306                 :             case 0xa0:      /* NBSP */
    3307                 :             break;
    3308                 :             }
    3309                 :           }
    3310               0 :         break;
    3311                 : 
    3312                 :         case OP_NOT_VSPACE:
    3313               0 :         for (i = 1; i <= min; i++)
    3314                 :           {
    3315               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3316               0 :           switch(*eptr++)
    3317                 :             {
    3318                 :             default: break;
    3319                 :             case 0x0a:      /* LF */
    3320                 :             case 0x0b:      /* VT */
    3321                 :             case 0x0c:      /* FF */
    3322                 :             case 0x0d:      /* CR */
    3323                 :             case 0x85:      /* NEL */
    3324               0 :             RRETURN(MATCH_NOMATCH);
    3325                 :             }
    3326                 :           }
    3327               0 :         break;
    3328                 : 
    3329                 :         case OP_VSPACE:
    3330               0 :         for (i = 1; i <= min; i++)
    3331                 :           {
    3332               0 :           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3333               0 :           switch(*eptr++)
    3334                 :             {
    3335               0 :             default: RRETURN(MATCH_NOMATCH);
    3336                 :             case 0x0a:      /* LF */
    3337                 :             case 0x0b:      /* VT */
    3338                 :             case 0x0c:      /* FF */
    3339                 :             case 0x0d:      /* CR */
    3340                 :             case 0x85:      /* NEL */
    3341                 :             break;
    3342                 :             }
    3343                 :           }
    3344               0 :         break;
    3345                 : 
    3346                 :         case OP_NOT_DIGIT:
    3347               2 :         for (i = 1; i <= min; i++)
    3348               1 :           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
    3349               1 :         break;
    3350                 : 
    3351                 :         case OP_DIGIT:
    3352           55531 :         for (i = 1; i <= min; i++)
    3353           28101 :           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
    3354           27430 :         break;
    3355                 : 
    3356                 :         case OP_NOT_WHITESPACE:
    3357               8 :         for (i = 1; i <= min; i++)
    3358               4 :           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
    3359               4 :         break;
    3360                 : 
    3361                 :         case OP_WHITESPACE:
    3362            4887 :         for (i = 1; i <= min; i++)
    3363            2447 :           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
    3364            2440 :         break;
    3365                 : 
    3366                 :         case OP_NOT_WORDCHAR:
    3367               0 :         for (i = 1; i <= min; i++)
    3368               0 :           if ((md->ctypes[*eptr++] & ctype_word) != 0)
    3369               0 :             RRETURN(MATCH_NOMATCH);
    3370               0 :         break;
    3371                 : 
    3372                 :         case OP_WORDCHAR:
    3373             388 :         for (i = 1; i <= min; i++)
    3374             251 :           if ((md->ctypes[*eptr++] & ctype_word) == 0)
    3375              45 :             RRETURN(MATCH_NOMATCH);
    3376             137 :         break;
    3377                 : 
    3378                 :         default:
    3379               0 :         RRETURN(PCRE_ERROR_INTERNAL);
    3380                 :         }
    3381                 :       }
    3382                 : 
    3383                 :     /* If min = max, continue at the same level without recursing */
    3384                 : 
    3385           35033 :     if (min == max) continue;
    3386                 : 
    3387                 :     /* If minimizing, we have to test the rest of the pattern before each
    3388                 :     subsequent match. Again, separate the UTF-8 case for speed, and also
    3389                 :     separate the UCP cases. */
    3390                 : 
    3391           34968 :     if (minimize)
    3392                 :       {
    3393                 : #ifdef SUPPORT_UCP
    3394              94 :       if (prop_type >= 0)
    3395                 :         {
    3396               0 :         switch(prop_type)
    3397                 :           {
    3398                 :           case PT_ANY:
    3399               0 :           for (fi = min;; fi++)
    3400                 :             {
    3401               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
    3402               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3403               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3404               0 :             GETCHARINC(c, eptr);
    3405               0 :             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
    3406               0 :             }
    3407                 :           /* Control never gets here */
    3408                 : 
    3409                 :           case PT_LAMP:
    3410               0 :           for (fi = min;; fi++)
    3411                 :             {
    3412               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
    3413               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3414               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3415               0 :             GETCHARINC(c, eptr);
    3416               0 :             prop_chartype = UCD_CHARTYPE(c);
    3417               0 :             if ((prop_chartype == ucp_Lu ||
    3418                 :                  prop_chartype == ucp_Ll ||
    3419                 :                  prop_chartype == ucp_Lt) == prop_fail_result)
    3420               0 :               RRETURN(MATCH_NOMATCH);
    3421               0 :             }
    3422                 :           /* Control never gets here */
    3423                 : 
    3424                 :           case PT_GC:
    3425               0 :           for (fi = min;; fi++)
    3426                 :             {
    3427               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
    3428               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3429               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3430               0 :             GETCHARINC(c, eptr);
    3431               0 :             prop_category = UCD_CATEGORY(c);
    3432               0 :             if ((prop_category == prop_value) == prop_fail_result)
    3433               0 :               RRETURN(MATCH_NOMATCH);
    3434               0 :             }
    3435                 :           /* Control never gets here */
    3436                 : 
    3437                 :           case PT_PC:
    3438               0 :           for (fi = min;; fi++)
    3439                 :             {
    3440               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
    3441               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3442               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3443               0 :             GETCHARINC(c, eptr);
    3444               0 :             prop_chartype = UCD_CHARTYPE(c);
    3445               0 :             if ((prop_chartype == prop_value) == prop_fail_result)
    3446               0 :               RRETURN(MATCH_NOMATCH);
    3447               0 :             }
    3448                 :           /* Control never gets here */
    3449                 : 
    3450                 :           case PT_SC:
    3451               0 :           for (fi = min;; fi++)
    3452                 :             {
    3453               0 :             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
    3454               0 :             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3455               0 :             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3456               0 :             GETCHARINC(c, eptr);
    3457               0 :             prop_script = UCD_SCRIPT(c);
    3458               0 :             if ((prop_script == prop_value) == prop_fail_result)
    3459               0 :               RRETURN(MATCH_NOMATCH);
    3460               0 :             }
    3461                 :           /* Control never gets here */
    3462                 : 
    3463                 :           default:
    3464               0 :           RRETURN(PCRE_ERROR_INTERNAL);
    3465                 :           }
    3466                 :         }
    3467                 : 
    3468                 :       /* Match extended Unicode sequences. We will get here only if the
    3469                 :       support is in the binary; otherwise a compile-time error occurs. */
    3470                 : 
    3471              94 :       else if (ctype == OP_EXTUNI)
    3472                 :         {
    3473               0 :         for (fi = min;; fi++)
    3474                 :           {
    3475               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
    3476               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3477               0 :           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    3478               0 :           GETCHARINCTEST(c, eptr);
    3479               0 :           prop_category = UCD_CATEGORY(c);
    3480               0 :           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
    3481               0 :           while (eptr < md->end_subject)
    3482                 :             {
    3483               0 :             int len = 1;
    3484               0 :             if (!utf8) c = *eptr; else
    3485                 :               {
    3486               0 :               GETCHARLEN(c, eptr, len);
    3487                 :               }
    3488               0 :             prop_category = UCD_CATEGORY(c);
    3489               0 :             if (prop_category != ucp_M) break;
    3490               0 :             eptr += len;
    3491                 :             }
    3492               0 :           }
    3493                 :         }
    3494                 : 
    3495                 :       else
    3496                 : #endif     /* SUPPORT_UCP */
    3497                 : 
    3498                 : #ifdef SUPPORT_UTF8
    3499                 :       /* UTF-8 mode */
    3500              94 :       if (utf8)
    3501                 :         {
    3502               0 :         for (fi = min;; fi++)
    3503                 :           {
    3504               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
    3505               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3506               0 :           if (fi >= max || eptr >= md->end_subject ||
    3507                 :                (ctype == OP_ANY && IS_NEWLINE(eptr)))
    3508               0 :             RRETURN(MATCH_NOMATCH);
    3509                 : 
    3510               0 :           GETCHARINC(c, eptr);
    3511               0 :           switch(ctype)
    3512                 :             {
    3513                 :             case OP_ANY:        /* This is the non-NL case */
    3514                 :             case OP_ALLANY:
    3515                 :             case OP_ANYBYTE:
    3516               0 :             break;
    3517                 : 
    3518                 :             case OP_ANYNL:
    3519               0 :             switch(c)
    3520                 :               {
    3521               0 :               default: RRETURN(MATCH_NOMATCH);
    3522                 :               case 0x000d:
    3523               0 :               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
    3524               0 :               break;
    3525                 :               case 0x000a:
    3526               0 :               break;
    3527                 : 
    3528                 :               case 0x000b:
    3529                 :               case 0x000c:
    3530                 :               case 0x0085:
    3531                 :               case 0x2028:
    3532                 :               case 0x2029:
    3533               0 :               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
    3534                 :               break;
    3535                 :               }
    3536               0 :             break;
    3537                 : 
    3538                 :             case OP_NOT_HSPACE:
    3539               0 :             switch(c)
    3540                 :               {
    3541                 :               default: break;
    3542                 :               case 0x09:      /* HT */
    3543                 :               case 0x20:      /* SPACE */
    3544                 :               case 0xa0:      /* NBSP */
    3545                 :               case 0x1680:    /* OGHAM SPACE MARK */
    3546                 :               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    3547                 :               case 0x2000:    /* EN QUAD */
    3548                 :               case 0x2001:    /* EM QUAD */
    3549                 :               case 0x2002:    /* EN SPACE */
    3550                 :               case 0x2003:    /* EM SPACE */
    3551                 :               case 0x2004:    /* THREE-PER-EM SPACE */
    3552                 :               case 0x2005:    /* FOUR-PER-EM SPACE */
    3553                 :               case 0x2006:    /* SIX-PER-EM SPACE */
    3554                 :               case 0x2007:    /* FIGURE SPACE */
    3555                 :               case 0x2008:    /* PUNCTUATION SPACE */
    3556                 :               case 0x2009:    /* THIN SPACE */
    3557                 :               case 0x200A:    /* HAIR SPACE */
    3558                 :               case 0x202f:    /* NARROW NO-BREAK SPACE */
    3559                 :               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    3560                 :               case 0x3000:    /* IDEOGRAPHIC SPACE */
    3561               0 :               RRETURN(MATCH_NOMATCH);
    3562                 :               }
    3563               0 :             break;
    3564                 : 
    3565                 :             case OP_HSPACE:
    3566               0 :             switch(c)
    3567                 :               {
    3568               0 :               default: RRETURN(MATCH_NOMATCH);
    3569                 :               case 0x09:      /* HT */
    3570                 :               case 0x20:      /* SPACE */
    3571                 :               case 0xa0:      /* NBSP */
    3572                 :               case 0x1680:    /* OGHAM SPACE MARK */
    3573                 :               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    3574                 :               case 0x2000:    /* EN QUAD */
    3575                 :               case 0x2001:    /* EM QUAD */
    3576                 :               case 0x2002:    /* EN SPACE */
    3577                 :               case 0x2003:    /* EM SPACE */
    3578                 :               case 0x2004:    /* THREE-PER-EM SPACE */
    3579                 :               case 0x2005:    /* FOUR-PER-EM SPACE */
    3580                 :               case 0x2006:    /* SIX-PER-EM SPACE */
    3581                 :               case 0x2007:    /* FIGURE SPACE */
    3582                 :               case 0x2008:    /* PUNCTUATION SPACE */
    3583                 :               case 0x2009:    /* THIN SPACE */
    3584                 :               case 0x200A:    /* HAIR SPACE */
    3585                 :               case 0x202f:    /* NARROW NO-BREAK SPACE */
    3586                 :               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    3587                 :               case 0x3000:    /* IDEOGRAPHIC SPACE */
    3588                 :               break;
    3589                 :               }
    3590               0 :             break;
    3591                 : 
    3592                 :             case OP_NOT_VSPACE:
    3593               0 :             switch(c)
    3594                 :               {
    3595                 :               default: break;
    3596                 :               case 0x0a:      /* LF */
    3597                 :               case 0x0b:      /* VT */
    3598                 :               case 0x0c:      /* FF */
    3599                 :               case 0x0d:      /* CR */
    3600                 :               case 0x85:      /* NEL */
    3601                 :               case 0x2028:    /* LINE SEPARATOR */
    3602                 :               case 0x2029:    /* PARAGRAPH SEPARATOR */
    3603               0 :               RRETURN(MATCH_NOMATCH);
    3604                 :               }
    3605               0 :             break;
    3606                 : 
    3607                 :             case OP_VSPACE:
    3608               0 :             switch(c)
    3609                 :               {
    3610               0 :               default: RRETURN(MATCH_NOMATCH);
    3611                 :               case 0x0a:      /* LF */
    3612                 :               case 0x0b:      /* VT */
    3613                 :               case 0x0c:      /* FF */
    3614                 :               case 0x0d:      /* CR */
    3615                 :               case 0x85:      /* NEL */
    3616                 :               case 0x2028:    /* LINE SEPARATOR */
    3617                 :               case 0x2029:    /* PARAGRAPH SEPARATOR */
    3618                 :               break;
    3619                 :               }
    3620               0 :             break;
    3621                 : 
    3622                 :             case OP_NOT_DIGIT:
    3623               0 :             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
    3624               0 :               RRETURN(MATCH_NOMATCH);
    3625               0 :             break;
    3626                 : 
    3627                 :             case OP_DIGIT:
    3628               0 :             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
    3629               0 :               RRETURN(MATCH_NOMATCH);
    3630               0 :             break;
    3631                 : 
    3632                 :             case OP_NOT_WHITESPACE:
    3633               0 :             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
    3634               0 :               RRETURN(MATCH_NOMATCH);
    3635               0 :             break;
    3636                 : 
    3637                 :             case OP_WHITESPACE:
    3638               0 :             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
    3639               0 :               RRETURN(MATCH_NOMATCH);
    3640               0 :             break;
    3641                 : 
    3642                 :             case OP_NOT_WORDCHAR:
    3643               0 :             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
    3644               0 :               RRETURN(MATCH_NOMATCH);
    3645               0 :             break;
    3646                 : 
    3647                 :             case OP_WORDCHAR:
    3648               0 :             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
    3649               0 :               RRETURN(MATCH_NOMATCH);
    3650               0 :             break;
    3651                 : 
    3652                 :             default:
    3653               0 :             RRETURN(PCRE_ERROR_INTERNAL);
    3654                 :             }
    3655               0 :           }
    3656                 :         }
    3657                 :       else
    3658                 : #endif
    3659                 :       /* Not UTF-8 mode */
    3660                 :         {
    3661            3775 :         for (fi = min;; fi++)
    3662                 :           {
    3663            3775 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
    3664            3775 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3665            3682 :           if (fi >= max || eptr >= md->end_subject ||
    3666                 :                (ctype == OP_ANY && IS_NEWLINE(eptr)))
    3667               1 :             RRETURN(MATCH_NOMATCH);
    3668                 : 
    3669            3681 :           c = *eptr++;
    3670            3681 :           switch(ctype)
    3671                 :             {
    3672                 :             case OP_ANY:     /* This is the non-NL case */
    3673                 :             case OP_ALLANY:
    3674                 :             case OP_ANYBYTE:
    3675            3681 :             break;
    3676                 : 
    3677                 :             case OP_ANYNL:
    3678               0 :             switch(c)
    3679                 :               {
    3680               0 :               default: RRETURN(MATCH_NOMATCH);
    3681                 :               case 0x000d:
    3682               0 :               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
    3683               0 :               break;
    3684                 : 
    3685                 :               case 0x000a:
    3686               0 :               break;
    3687                 : 
    3688                 :               case 0x000b:
    3689                 :               case 0x000c:
    3690                 :               case 0x0085:
    3691               0 :               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
    3692                 :               break;
    3693                 :               }
    3694               0 :             break;
    3695                 : 
    3696                 :             case OP_NOT_HSPACE:
    3697               0 :             switch(c)
    3698                 :               {
    3699                 :               default: break;
    3700                 :               case 0x09:      /* HT */
    3701                 :               case 0x20:      /* SPACE */
    3702                 :               case 0xa0:      /* NBSP */
    3703               0 :               RRETURN(MATCH_NOMATCH);
    3704                 :               }
    3705               0 :             break;
    3706                 : 
    3707                 :             case OP_HSPACE:
    3708               0 :             switch(c)
    3709                 :               {
    3710               0 :               default: RRETURN(MATCH_NOMATCH);
    3711                 :               case 0x09:      /* HT */
    3712                 :               case 0x20:      /* SPACE */
    3713                 :               case 0xa0:      /* NBSP */
    3714                 :               break;
    3715                 :               }
    3716               0 :             break;
    3717                 : 
    3718                 :             case OP_NOT_VSPACE:
    3719               0 :             switch(c)
    3720                 :               {
    3721                 :               default: break;
    3722                 :               case 0x0a:      /* LF */
    3723                 :               case 0x0b:      /* VT */
    3724                 :               case 0x0c:      /* FF */
    3725                 :               case 0x0d:      /* CR */
    3726                 :               case 0x85:      /* NEL */
    3727               0 :               RRETURN(MATCH_NOMATCH);
    3728                 :               }
    3729               0 :             break;
    3730                 : 
    3731                 :             case OP_VSPACE:
    3732               0 :             switch(c)
    3733                 :               {
    3734               0 :               default: RRETURN(MATCH_NOMATCH);
    3735                 :               case 0x0a:      /* LF */
    3736                 :               case 0x0b:      /* VT */
    3737                 :               case 0x0c:      /* FF */
    3738                 :               case 0x0d:      /* CR */
    3739                 :               case 0x85:      /* NEL */
    3740                 :               break;
    3741                 :               }
    3742               0 :             break;
    3743                 : 
    3744                 :             case OP_NOT_DIGIT:
    3745               0 :             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
    3746               0 :             break;
    3747                 : 
    3748                 :             case OP_DIGIT:
    3749               0 :             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
    3750               0 :             break;
    3751                 : 
    3752                 :             case OP_NOT_WHITESPACE:
    3753               0 :             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
    3754               0 :             break;
    3755                 : 
    3756                 :             case OP_WHITESPACE:
    3757               0 :             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
    3758               0 :             break;
    3759                 : 
    3760                 :             case OP_NOT_WORDCHAR:
    3761               0 :             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
    3762               0 :             break;
    3763                 : 
    3764                 :             case OP_WORDCHAR:
    3765               0 :             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
    3766               0 :             break;
    3767                 : 
    3768                 :             default:
    3769               0 :             RRETURN(PCRE_ERROR_INTERNAL);
    3770                 :             }
    3771            3681 :           }
    3772                 :         }
    3773                 :       /* Control never gets here */
    3774                 :       }
    3775                 : 
    3776                 :     /* If maximizing, it is worth using inline code for speed, doing the type
    3777                 :     test once at the start (i.e. keep it out of the loop). Again, keep the
    3778                 :     UTF-8 and UCP stuff separate. */
    3779                 : 
    3780                 :     else
    3781                 :       {
    3782           34874 :       pp = eptr;  /* Remember where we started */
    3783                 : 
    3784                 : #ifdef SUPPORT_UCP
    3785           34874 :       if (prop_type >= 0)
    3786                 :         {
    3787               1 :         switch(prop_type)
    3788                 :           {
    3789                 :           case PT_ANY:
    3790               0 :           for (i = min; i < max; i++)
    3791                 :             {
    3792               0 :             int len = 1;
    3793               0 :             if (eptr >= md->end_subject) break;
    3794               0 :             GETCHARLEN(c, eptr, len);
    3795               0 :             if (prop_fail_result) break;
    3796               0 :             eptr+= len;
    3797                 :             }
    3798               0 :           break;
    3799                 : 
    3800                 :           case PT_LAMP:
    3801               0 :           for (i = min; i < max; i++)
    3802                 :             {
    3803               0 :             int len = 1;
    3804               0 :             if (eptr >= md->end_subject) break;
    3805               0 :             GETCHARLEN(c, eptr, len);
    3806               0 :             prop_chartype = UCD_CHARTYPE(c);
    3807               0 :             if ((prop_chartype == ucp_Lu ||
    3808                 :                  prop_chartype == ucp_Ll ||
    3809                 :                  prop_chartype == ucp_Lt) == prop_fail_result)
    3810               0 :               break;
    3811               0 :             eptr+= len;
    3812                 :             }
    3813               0 :           break;
    3814                 : 
    3815                 :           case PT_GC:
    3816               3 :           for (i = min; i < max; i++)
    3817                 :             {
    3818               3 :             int len = 1;
    3819               3 :             if (eptr >= md->end_subject) break;
    3820               2 :             GETCHARLEN(c, eptr, len);
    3821               2 :             prop_category = UCD_CATEGORY(c);
    3822               2 :             if ((prop_category == prop_value) == prop_fail_result)
    3823               0 :               break;
    3824               2 :             eptr+= len;
    3825                 :             }
    3826               1 :           break;
    3827                 : 
    3828                 :           case PT_PC:
    3829               0 :           for (i = min; i < max; i++)
    3830                 :             {
    3831               0 :             int len = 1;
    3832               0 :             if (eptr >= md->end_subject) break;
    3833               0 :             GETCHARLEN(c, eptr, len);
    3834               0 :             prop_chartype = UCD_CHARTYPE(c);
    3835               0 :             if ((prop_chartype == prop_value) == prop_fail_result)
    3836               0 :               break;
    3837               0 :             eptr+= len;
    3838                 :             }
    3839               0 :           break;
    3840                 : 
    3841                 :           case PT_SC:
    3842               0 :           for (i = min; i < max; i++)
    3843                 :             {
    3844               0 :             int len = 1;
    3845               0 :             if (eptr >= md->end_subject) break;
    3846               0 :             GETCHARLEN(c, eptr, len);
    3847               0 :             prop_script = UCD_SCRIPT(c);
    3848               0 :             if ((prop_script == prop_value) == prop_fail_result)
    3849               0 :               break;
    3850               0 :             eptr+= len;
    3851                 :             }
    3852                 :           break;
    3853                 :           }
    3854                 : 
    3855                 :         /* eptr is now past the end of the maximum run */
    3856                 : 
    3857               1 :         if (possessive) continue;
    3858                 :         for(;;)
    3859                 :           {
    3860               1 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
    3861               1 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3862               0 :           if (eptr-- == pp) break;        /* Stop if tried at original pos */
    3863               0 :           if (utf8) BACKCHAR(eptr);
    3864               0 :           }
    3865                 :         }
    3866                 : 
    3867                 :       /* Match extended Unicode sequences. We will get here only if the
    3868                 :       support is in the binary; otherwise a compile-time error occurs. */
    3869                 : 
    3870           34873 :       else if (ctype == OP_EXTUNI)
    3871                 :         {
    3872               0 :         for (i = min; i < max; i++)
    3873                 :           {
    3874               0 :           if (eptr >= md->end_subject) break;
    3875               0 :           GETCHARINCTEST(c, eptr);
    3876               0 :           prop_category = UCD_CATEGORY(c);
    3877               0 :           if (prop_category == ucp_M) break;
    3878               0 :           while (eptr < md->end_subject)
    3879                 :             {
    3880               0 :             int len = 1;
    3881               0 :             if (!utf8) c = *eptr; else
    3882                 :               {
    3883               0 :               GETCHARLEN(c, eptr, len);
    3884                 :               }
    3885               0 :             prop_category = UCD_CATEGORY(c);
    3886               0 :             if (prop_category != ucp_M) break;
    3887               0 :             eptr += len;
    3888                 :             }
    3889                 :           }
    3890                 : 
    3891                 :         /* eptr is now past the end of the maximum run */
    3892                 : 
    3893               0 :         if (possessive) continue;
    3894                 :         for(;;)
    3895                 :           {
    3896               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
    3897               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    3898               0 :           if (eptr-- == pp) break;        /* Stop if tried at original pos */
    3899                 :           for (;;)                        /* Move back over one extended */
    3900                 :             {
    3901               0 :             int len = 1;
    3902               0 :             if (!utf8) c = *eptr; else
    3903                 :               {
    3904               0 :               BACKCHAR(eptr);
    3905               0 :               GETCHARLEN(c, eptr, len);
    3906                 :               }
    3907               0 :             prop_category = UCD_CATEGORY(c);
    3908               0 :             if (prop_category != ucp_M) break;
    3909               0 :             eptr--;
    3910               0 :             }
    3911               0 :           }
    3912                 :         }
    3913                 : 
    3914                 :       else
    3915                 : #endif   /* SUPPORT_UCP */
    3916                 : 
    3917                 : #ifdef SUPPORT_UTF8
    3918                 :       /* UTF-8 mode */
    3919                 : 
    3920           34873 :       if (utf8)
    3921                 :         {
    3922               0 :         switch(ctype)
    3923                 :           {
    3924                 :           case OP_ANY:
    3925               0 :           if (max < INT_MAX)
    3926                 :             {
    3927               0 :             for (i = min; i < max; i++)
    3928                 :               {
    3929               0 :               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
    3930               0 :               eptr++;
    3931               0 :               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3932                 :               }
    3933                 :             }
    3934                 : 
    3935                 :           /* Handle unlimited UTF-8 repeat */
    3936                 : 
    3937                 :           else
    3938                 :             {
    3939               0 :             for (i = min; i < max; i++)
    3940                 :               {
    3941               0 :               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
    3942               0 :               eptr++;
    3943               0 :               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3944                 :               }
    3945                 :             }
    3946               0 :           break;
    3947                 : 
    3948                 :           case OP_ALLANY:
    3949               0 :           if (max < INT_MAX)
    3950                 :             {
    3951               0 :             for (i = min; i < max; i++)
    3952                 :               {
    3953               0 :               if (eptr >= md->end_subject) break;
    3954               0 :               eptr++;
    3955               0 :               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3956                 :               }
    3957                 :             }
    3958               0 :           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
    3959               0 :           break;
    3960                 : 
    3961                 :           /* The byte case is the same as non-UTF8 */
    3962                 : 
    3963                 :           case OP_ANYBYTE:
    3964               0 :           c = max - min;
    3965               0 :           if (c > (unsigned int)(md->end_subject - eptr))
    3966               0 :             c = md->end_subject - eptr;
    3967               0 :           eptr += c;
    3968               0 :           break;
    3969                 : 
    3970                 :           case OP_ANYNL:
    3971               0 :           for (i = min; i < max; i++)
    3972                 :             {
    3973               0 :             int len = 1;
    3974               0 :             if (eptr >= md->end_subject) break;
    3975               0 :             GETCHARLEN(c, eptr, len);
    3976               0 :             if (c == 0x000d)
    3977                 :               {
    3978               0 :               if (++eptr >= md->end_subject) break;
    3979               0 :               if (*eptr == 0x000a) eptr++;
    3980                 :               }
    3981                 :             else
    3982                 :               {
    3983               0 :               if (c != 0x000a &&
    3984                 :                   (md->bsr_anycrlf ||
    3985                 :                    (c != 0x000b && c != 0x000c &&
    3986                 :                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
    3987                 :                 break;
    3988               0 :               eptr += len;
    3989                 :               }
    3990                 :             }
    3991               0 :           break;
    3992                 : 
    3993                 :           case OP_NOT_HSPACE:
    3994                 :           case OP_HSPACE:
    3995               0 :           for (i = min; i < max; i++)
    3996                 :             {
    3997                 :             BOOL gotspace;
    3998               0 :             int len = 1;
    3999               0 :             if (eptr >= md->end_subject) break;
    4000               0 :             GETCHARLEN(c, eptr, len);
    4001               0 :             switch(c)
    4002                 :               {
    4003               0 :               default: gotspace = FALSE; break;
    4004                 :               case 0x09:      /* HT */
    4005                 :               case 0x20:      /* SPACE */
    4006                 :               case 0xa0:      /* NBSP */
    4007                 :               case 0x1680:    /* OGHAM SPACE MARK */
    4008                 :               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
    4009                 :               case 0x2000:    /* EN QUAD */
    4010                 :               case 0x2001:    /* EM QUAD */
    4011                 :               case 0x2002:    /* EN SPACE */
    4012                 :               case 0x2003:    /* EM SPACE */
    4013                 :               case 0x2004:    /* THREE-PER-EM SPACE */
    4014                 :               case 0x2005:    /* FOUR-PER-EM SPACE */
    4015                 :               case 0x2006:    /* SIX-PER-EM SPACE */
    4016                 :               case 0x2007:    /* FIGURE SPACE */
    4017                 :               case 0x2008:    /* PUNCTUATION SPACE */
    4018                 :               case 0x2009:    /* THIN SPACE */
    4019                 :               case 0x200A:    /* HAIR SPACE */
    4020                 :               case 0x202f:    /* NARROW NO-BREAK SPACE */
    4021                 :               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
    4022                 :               case 0x3000:    /* IDEOGRAPHIC SPACE */
    4023               0 :               gotspace = TRUE;
    4024                 :               break;
    4025                 :               }
    4026               0 :             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
    4027               0 :             eptr += len;
    4028                 :             }
    4029               0 :           break;
    4030                 : 
    4031                 :           case OP_NOT_VSPACE:
    4032                 :           case OP_VSPACE:
    4033               0 :           for (i = min; i < max; i++)
    4034                 :             {
    4035                 :             BOOL gotspace;
    4036               0 :             int len = 1;
    4037               0 :             if (eptr >= md->end_subject) break;
    4038               0 :             GETCHARLEN(c, eptr, len);
    4039               0 :             switch(c)
    4040                 :               {
    4041               0 :               default: gotspace = FALSE; break;
    4042                 :               case 0x0a:      /* LF */
    4043                 :               case 0x0b:      /* VT */
    4044                 :               case 0x0c:      /* FF */
    4045                 :               case 0x0d:      /* CR */
    4046                 :               case 0x85:      /* NEL */
    4047                 :               case 0x2028:    /* LINE SEPARATOR */
    4048                 :               case 0x2029:    /* PARAGRAPH SEPARATOR */
    4049               0 :               gotspace = TRUE;
    4050                 :               break;
    4051                 :               }
    4052               0 :             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
    4053               0 :             eptr += len;
    4054                 :             }
    4055               0 :           break;
    4056                 : 
    4057                 :           case OP_NOT_DIGIT:
    4058               0 :           for (i = min; i < max; i++)
    4059                 :             {
    4060               0 :             int len = 1;
    4061               0 :             if (eptr >= md->end_subject) break;
    4062               0 :             GETCHARLEN(c, eptr, len);
    4063               0 :             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
    4064               0 :             eptr+= len;
    4065                 :             }
    4066               0 :           break;
    4067                 : 
    4068                 :           case OP_DIGIT:
    4069               0 :           for (i = min; i < max; i++)
    4070                 :             {
    4071               0 :             int len = 1;
    4072               0 :             if (eptr >= md->end_subject) break;
    4073               0 :             GETCHARLEN(c, eptr, len);
    4074               0 :             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
    4075               0 :             eptr+= len;
    4076                 :             }
    4077               0 :           break;
    4078                 : 
    4079                 :           case OP_NOT_WHITESPACE:
    4080               0 :           for (i = min; i < max; i++)
    4081                 :             {
    4082               0 :             int len = 1;
    4083               0 :             if (eptr >= md->end_subject) break;
    4084               0 :             GETCHARLEN(c, eptr, len);
    4085               0 :             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
    4086               0 :             eptr+= len;
    4087                 :             }
    4088               0 :           break;
    4089                 : 
    4090                 :           case OP_WHITESPACE:
    4091               0 :           for (i = min; i < max; i++)
    4092                 :             {
    4093               0 :             int len = 1;
    4094               0 :             if (eptr >= md->end_subject) break;
    4095               0 :             GETCHARLEN(c, eptr, len);
    4096               0 :             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
    4097               0 :             eptr+= len;
    4098                 :             }
    4099               0 :           break;
    4100                 : 
    4101                 :           case OP_NOT_WORDCHAR:
    4102               0 :           for (i = min; i < max; i++)
    4103                 :             {
    4104               0 :             int len = 1;
    4105               0 :             if (eptr >= md->end_subject) break;
    4106               0 :             GETCHARLEN(c, eptr, len);
    4107               0 :             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
    4108               0 :             eptr+= len;
    4109                 :             }
    4110               0 :           break;
    4111                 : 
    4112                 :           case OP_WORDCHAR:
    4113               0 :           for (i = min; i < max; i++)
    4114                 :             {
    4115               0 :             int len = 1;
    4116               0 :             if (eptr >= md->end_subject) break;
    4117               0 :             GETCHARLEN(c, eptr, len);
    4118               0 :             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
    4119               0 :             eptr+= len;
    4120                 :             }
    4121               0 :           break;
    4122                 : 
    4123                 :           default:
    4124               0 :           RRETURN(PCRE_ERROR_INTERNAL);
    4125                 :           }
    4126                 : 
    4127                 :         /* eptr is now past the end of the maximum run */
    4128                 : 
    4129               0 :         if (possessive) continue;
    4130                 :         for(;;)
    4131                 :           {
    4132               0 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
    4133               0 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    4134               0 :           if (eptr-- == pp) break;        /* Stop if tried at original pos */
    4135               0 :           BACKCHAR(eptr);
    4136               0 :           }
    4137                 :         }
    4138                 :       else
    4139                 : #endif  /* SUPPORT_UTF8 */
    4140                 : 
    4141                 :       /* Not UTF-8 mode */
    4142                 :         {
    4143           34873 :         switch(ctype)
    4144                 :           {
    4145                 :           case OP_ANY:
    4146           15797 :           for (i = min; i < max; i++)
    4147                 :             {
    4148           15793 :             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
    4149           15222 :             eptr++;
    4150                 :             }
    4151             575 :           break;
    4152                 : 
    4153                 :           case OP_ALLANY:
    4154                 :           case OP_ANYBYTE:
    4155             728 :           c = max - min;
    4156             728 :           if (c > (unsigned int)(md->end_subject - eptr))
    4157             728 :             c = md->end_subject - eptr;
    4158             728 :           eptr += c;
    4159             728 :           break;
    4160                 : 
    4161                 :           case OP_ANYNL:
    4162               0 :           for (i = min; i < max; i++)
    4163                 :             {
    4164               0 :             if (eptr >= md->end_subject) break;
    4165               0 :             c = *eptr;
    4166               0 :             if (c == 0x000d)
    4167                 :               {
    4168               0 :               if (++eptr >= md->end_subject) break;
    4169               0 :               if (*eptr == 0x000a) eptr++;
    4170                 :               }
    4171                 :             else
    4172                 :               {
    4173               0 :               if (c != 0x000a &&
    4174                 :                   (md->bsr_anycrlf ||
    4175                 :                     (c != 0x000b && c != 0x000c && c != 0x0085)))
    4176                 :                 break;
    4177               0 :               eptr++;
    4178                 :               }
    4179                 :             }
    4180               0 :           break;
    4181                 : 
    4182                 :           case OP_NOT_HSPACE:
    4183               0 :           for (i = min; i < max; i++)
    4184                 :             {
    4185               0 :             if (eptr >= md->end_subject) break;
    4186               0 :             c = *eptr;
    4187               0 :             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
    4188               0 :             eptr++;
    4189                 :             }
    4190               0 :           break;
    4191                 : 
    4192                 :           case OP_HSPACE:
    4193               0 :           for (i = min; i < max; i++)
    4194                 :             {
    4195               0 :             if (eptr >= md->end_subject) break;
    4196               0 :             c = *eptr;
    4197               0 :             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
    4198               0 :             eptr++;
    4199                 :             }
    4200               0 :           break;
    4201                 : 
    4202                 :           case OP_NOT_VSPACE:
    4203               0 :           for (i = min; i < max; i++)
    4204                 :             {
    4205               0 :             if (eptr >= md->end_subject) break;
    4206               0 :             c = *eptr;
    4207               0 :             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
    4208                 :               break;
    4209               0 :             eptr++;
    4210                 :             }
    4211               0 :           break;
    4212                 : 
    4213                 :           case OP_VSPACE:
    4214               0 :           for (i = min; i < max; i++)
    4215                 :             {
    4216               0 :             if (eptr >= md->end_subject) break;
    4217               0 :             c = *eptr;
    4218               0 :             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
    4219               0 :               break;
    4220               0 :             eptr++;
    4221                 :             }
    4222               0 :           break;
    4223                 : 
    4224                 :           case OP_NOT_DIGIT:
    4225               5 :           for (i = min; i < max; i++)
    4226                 :             {
    4227               5 :             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
    4228                 :               break;
    4229               4 :             eptr++;
    4230                 :             }
    4231               1 :           break;
    4232                 : 
    4233                 :           case OP_DIGIT:
    4234           60257 :           for (i = min; i < max; i++)
    4235                 :             {
    4236           60257 :             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
    4237                 :               break;
    4238           32606 :             eptr++;
    4239                 :             }
    4240           27651 :           break;
    4241                 : 
    4242                 :           case OP_NOT_WHITESPACE:
    4243              72 :           for (i = min; i < max; i++)
    4244                 :             {
    4245              72 :             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
    4246                 :               break;
    4247              67 :             eptr++;
    4248                 :             }
    4249               5 :           break;
    4250                 : 
    4251                 :           case OP_WHITESPACE:
    4252            8607 :           for (i = min; i < max; i++)
    4253                 :             {
    4254            8607 :             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
    4255                 :               break;
    4256            2837 :             eptr++;
    4257                 :             }
    4258            5770 :           break;
    4259                 : 
    4260                 :           case OP_NOT_WORDCHAR:
    4261               0 :           for (i = min; i < max; i++)
    4262                 :             {
    4263               0 :             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
    4264                 :               break;
    4265               0 :             eptr++;
    4266                 :             }
    4267               0 :           break;
    4268                 : 
    4269                 :           case OP_WORDCHAR:
    4270             788 :           for (i = min; i < max; i++)
    4271                 :             {
    4272             782 :             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
    4273                 :               break;
    4274             645 :             eptr++;
    4275                 :             }
    4276             143 :           break;
    4277                 : 
    4278                 :           default:
    4279               0 :           RRETURN(PCRE_ERROR_INTERNAL);
    4280                 :           }
    4281                 : 
    4282                 :         /* eptr is now past the end of the maximum run */
    4283                 : 
    4284           34873 :         if (possessive) continue;
    4285          271358 :         while (eptr >= pp)
    4286                 :           {
    4287          260585 :           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
    4288          260585 :           eptr--;
    4289          260585 :           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    4290                 :           }
    4291                 :         }
    4292                 : 
    4293                 :       /* Get here if we can't make it match with any permitted repetitions */
    4294                 : 
    4295            3016 :       RRETURN(MATCH_NOMATCH);
    4296                 :       }
    4297                 :     /* Control never gets here */
    4298                 : 
    4299                 :     /* There's been some horrible disaster. Arrival here can only mean there is
    4300                 :     something seriously wrong in the code above or the OP_xxx definitions. */
    4301                 : 
    4302                 :     default:
    4303                 :     DPRINTF(("Unknown opcode %d\n", *ecode));
    4304               0 :     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
    4305                 :     }
    4306                 : 
    4307                 :   /* Do not stick any code in here without much thought; it is assumed
    4308                 :   that "continue" in the code above comes out to here to repeat the main
    4309                 :   loop. */
    4310                 : 
    4311        16143551 :   }             /* End of main loop */
    4312                 : /* Control never reaches here */
    4313                 : 
    4314                 : 
    4315                 : /* When compiling to use the heap rather than the stack for recursive calls to
    4316                 : match(), the RRETURN() macro jumps here. The number that is saved in
    4317                 : frame->Xwhere indicates which label we actually want to return to. */
    4318                 : 
    4319                 : #ifdef NO_RECURSE
    4320                 : #define LBL(val) case val: goto L_RM##val;
    4321                 : HEAP_RETURN:
    4322                 : switch (frame->Xwhere)
    4323                 :   {
    4324                 :   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
    4325                 :   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
    4326                 :   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
    4327                 :   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
    4328                 :   LBL(53) LBL(54)
    4329                 : #ifdef SUPPORT_UTF8
    4330                 :   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
    4331                 :   LBL(32) LBL(34) LBL(42) LBL(46)
    4332                 : #ifdef SUPPORT_UCP
    4333                 :   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
    4334                 : #endif  /* SUPPORT_UCP */
    4335                 : #endif  /* SUPPORT_UTF8 */
    4336                 :   default:
    4337                 :   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
    4338                 :   return PCRE_ERROR_INTERNAL;
    4339                 :   }
    4340                 : #undef LBL
    4341                 : #endif  /* NO_RECURSE */
    4342                 : }
    4343                 : 
    4344                 : 
    4345                 : /***************************************************************************
    4346                 : ****************************************************************************
    4347                 :                    RECURSION IN THE match() FUNCTION
    4348                 : 
    4349                 : Undefine all the macros that were defined above to handle this. */
    4350                 : 
    4351                 : #ifdef NO_RECURSE
    4352                 : #undef eptr
    4353                 : #undef ecode
    4354                 : #undef mstart
    4355                 : #undef offset_top
    4356                 : #undef ims
    4357                 : #undef eptrb
    4358                 : #undef flags
    4359                 : 
    4360                 : #undef callpat
    4361                 : #undef charptr
    4362                 : #undef data
    4363                 : #undef next
    4364                 : #undef pp
    4365                 : #undef prev
    4366                 : #undef saved_eptr
    4367                 : 
    4368                 : #undef new_recursive
    4369                 : 
    4370                 : #undef cur_is_word
    4371                 : #undef condition
    4372                 : #undef prev_is_word
    4373                 : 
    4374                 : #undef original_ims
    4375                 : 
    4376                 : #undef ctype
    4377                 : #undef length
    4378                 : #undef max
    4379                 : #undef min
    4380                 : #undef number
    4381                 : #undef offset
    4382                 : #undef op
    4383                 : #undef save_capture_last
    4384                 : #undef save_offset1
    4385                 : #undef save_offset2
    4386                 : #undef save_offset3
    4387                 : #undef stacksave
    4388                 : 
    4389                 : #undef newptrb
    4390                 : 
    4391                 : #endif
    4392                 : 
    4393                 : /* These two are defined as macros in both cases */
    4394                 : 
    4395                 : #undef fc
    4396                 : #undef fi
    4397                 : 
    4398                 : /***************************************************************************
    4399                 : ***************************************************************************/
    4400                 : 
    4401                 : 
    4402                 : 
    4403                 : /*************************************************
    4404                 : *         Execute a Regular Expression           *
    4405                 : *************************************************/
    4406                 : 
    4407                 : /* This function applies a compiled re to a subject string and picks out
    4408                 : portions of the string if it matches. Two elements in the vector are set for
    4409                 : each substring: the offsets to the start and end of the substring.
    4410                 : 
    4411                 : Arguments:
    4412                 :   argument_re     points to the compiled expression
    4413                 :   extra_data      points to extra data or is NULL
    4414                 :   subject         points to the subject string
    4415                 :   length          length of subject string (may contain binary zeros)
    4416                 :   start_offset    where to start in the subject string
    4417                 :   options         option bits
    4418                 :   offsets         points to a vector of ints to be filled in with offsets
    4419                 :   offsetcount     the number of elements in the vector
    4420                 : 
    4421                 : Returns:          > 0 => success; value is the number of elements filled in
    4422                 :                   = 0 => success, but offsets is not big enough
    4423                 :                    -1 => failed to match
    4424                 :                  < -1 => some kind of unexpected problem
    4425                 : */
    4426                 : 
    4427                 : PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    4428                 : pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
    4429                 :   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
    4430                 :   int offsetcount)
    4431         1290903 : {
    4432                 : int rc, resetcount, ocount;
    4433         1290903 : int first_byte = -1;
    4434         1290903 : int req_byte = -1;
    4435         1290903 : int req_byte2 = -1;
    4436                 : int newline;
    4437                 : unsigned long int ims;
    4438         1290903 : BOOL using_temporary_offsets = FALSE;
    4439                 : BOOL anchored;
    4440                 : BOOL startline;
    4441                 : BOOL firstline;
    4442         1290903 : BOOL first_byte_caseless = FALSE;
    4443         1290903 : BOOL req_byte_caseless = FALSE;
    4444                 : BOOL utf8;
    4445                 : match_data match_block;
    4446         1290903 : match_data *md = &match_block;
    4447                 : const uschar *tables;
    4448         1290903 : const uschar *start_bits = NULL;
    4449         1290903 : USPTR start_match = (USPTR)subject + start_offset;
    4450                 : USPTR end_subject;
    4451         1290903 : USPTR req_byte_ptr = start_match - 1;
    4452                 : 
    4453                 : pcre_study_data internal_study;
    4454                 : const pcre_study_data *study;
    4455                 : 
    4456                 : real_pcre internal_re;
    4457         1290903 : const real_pcre *external_re = (const real_pcre *)argument_re;
    4458         1290903 : const real_pcre *re = external_re;
    4459                 : 
    4460                 : /* Plausibility checks */
    4461                 : 
    4462         1290903 : if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
    4463         1290903 : if (re == NULL || subject == NULL ||
    4464               0 :    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
    4465         1290903 : if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
    4466                 : 
    4467                 : /* Fish out the optional data from the extra_data structure, first setting
    4468                 : the default values. */
    4469                 : 
    4470         1290903 : study = NULL;
    4471         1290903 : md->match_limit = MATCH_LIMIT;
    4472         1290903 : md->match_limit_recursion = MATCH_LIMIT_RECURSION;
    4473         1290903 : md->callout_data = NULL;
    4474                 : 
    4475                 : /* The table pointer is always in native byte order. */
    4476                 : 
    4477         1290903 : tables = external_re->tables;
    4478                 : 
    4479         1290903 : if (extra_data != NULL)
    4480                 :   {
    4481         1290825 :   register unsigned int flags = extra_data->flags;
    4482         1290825 :   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
    4483              12 :     study = (const pcre_study_data *)extra_data->study_data;
    4484         1290825 :   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
    4485         1290825 :     md->match_limit = extra_data->match_limit;
    4486         1290825 :   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
    4487         1290825 :     md->match_limit_recursion = extra_data->match_limit_recursion;
    4488         1290825 :   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
    4489               0 :     md->callout_data = extra_data->callout_data;
    4490         1290825 :   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
    4491                 :   }
    4492                 : 
    4493                 : /* If the exec call supplied NULL for tables, use the inbuilt ones. This
    4494                 : is a feature that makes it possible to save compiled regex and re-use them
    4495                 : in other programs later. */
    4496                 : 
    4497         1290903 : if (tables == NULL) tables = _pcre_default_tables;
    4498                 : 
    4499                 : /* Check that the first field in the block is the magic number. If it is not,
    4500                 : test for a regex that was compiled on a host of opposite endianness. If this is
    4501                 : the case, flipped values are put in internal_re and internal_study if there was
    4502                 : study data too. */
    4503                 : 
    4504         1290903 : if (re->magic_number != MAGIC_NUMBER)
    4505                 :   {
    4506               0 :   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
    4507               0 :   if (re == NULL) return PCRE_ERROR_BADMAGIC;
    4508               0 :   if (study != NULL) study = &internal_study;
    4509                 :   }
    4510                 : 
    4511                 : /* Set up other data */
    4512                 : 
    4513         1290903 : anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
    4514         1290903 : startline = (re->flags & PCRE_STARTLINE) != 0;
    4515         1290903 : firstline = (re->options & PCRE_FIRSTLINE) != 0;
    4516                 : 
    4517                 : /* The code starts after the real_pcre block and the capture name table. */
    4518                 : 
    4519         1290903 : md->start_code = (const uschar *)external_re + re->name_table_offset +
    4520                 :   re->name_count * re->name_entry_size;
    4521                 : 
    4522         1290903 : md->start_subject = (USPTR)subject;
    4523         1290903 : md->start_offset = start_offset;
    4524         1290903 : md->end_subject = md->start_subject + length;
    4525         1290903 : end_subject = md->end_subject;
    4526                 : 
    4527         1290903 : md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
    4528         1290903 : utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
    4529         1290903 : md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
    4530                 : 
    4531         1290903 : md->notbol = (options & PCRE_NOTBOL) != 0;
    4532         1290903 : md->noteol = (options & PCRE_NOTEOL) != 0;
    4533         1290903 : md->notempty = (options & PCRE_NOTEMPTY) != 0;
    4534         1290903 : md->partial = (options & PCRE_PARTIAL) != 0;
    4535         1290903 : md->hitend = FALSE;
    4536                 : 
    4537         1290903 : md->recursive = NULL;                   /* No recursion at top level */
    4538                 : 
    4539         1290903 : md->lcc = tables + lcc_offset;
    4540         1290903 : md->ctypes = tables + ctypes_offset;
    4541                 : 
    4542                 : /* Handle different \R options. */
    4543                 : 
    4544         1290903 : switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
    4545                 :   {
    4546                 :   case 0:
    4547         1290903 :   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
    4548               0 :     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
    4549                 :   else
    4550                 : #ifdef BSR_ANYCRLF
    4551                 :   md->bsr_anycrlf = TRUE;
    4552                 : #else
    4553         1290903 :   md->bsr_anycrlf = FALSE;
    4554                 : #endif
    4555         1290903 :   break;
    4556                 : 
    4557                 :   case PCRE_BSR_ANYCRLF:
    4558               0 :   md->bsr_anycrlf = TRUE;
    4559               0 :   break;
    4560                 : 
    4561                 :   case PCRE_BSR_UNICODE:
    4562               0 :   md->bsr_anycrlf = FALSE;
    4563               0 :   break;
    4564                 : 
    4565               0 :   default: return PCRE_ERROR_BADNEWLINE;
    4566                 :   }
    4567                 : 
    4568                 : /* Handle different types of newline. The three bits give eight cases. If
    4569                 : nothing is set at run time, whatever was used at compile time applies. */
    4570                 : 
    4571                 : switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
    4572         1290903 :         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
    4573                 :   {
    4574         1290903 :   case 0: newline = NEWLINE; break;   /* Compile-time default */
    4575               0 :   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
    4576               0 :   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
    4577                 :   case PCRE_NEWLINE_CR+
    4578               0 :        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
    4579               0 :   case PCRE_NEWLINE_ANY: newline = -1; break;
    4580               0 :   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
    4581               0 :   default: return PCRE_ERROR_BADNEWLINE;
    4582                 :   }
    4583                 : 
    4584         1290903 : if (newline == -2)
    4585                 :   {
    4586               0 :   md->nltype = NLTYPE_ANYCRLF;
    4587                 :   }
    4588         1290903 : else if (newline < 0)
    4589                 :   {
    4590               0 :   md->nltype = NLTYPE_ANY;
    4591                 :   }
    4592                 : else
    4593                 :   {
    4594         1290903 :   md->nltype = NLTYPE_FIXED;
    4595         1290903 :   if (newline > 255)
    4596                 :     {
    4597               0 :     md->nllen = 2;
    4598               0 :     md->nl[0] = (newline >> 8) & 255;
    4599               0 :     md->nl[1] = newline & 255;
    4600                 :     }
    4601                 :   else
    4602                 :     {
    4603         1290903 :     md->nllen = 1;
    4604         1290903 :     md->nl[0] = newline;
    4605                 :     }
    4606                 :   }
    4607                 : 
    4608                 : /* Partial matching is supported only for a restricted set of regexes at the
    4609                 : moment. */
    4610                 : 
    4611         1290903 : if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
    4612               0 :   return PCRE_ERROR_BADPARTIAL;
    4613                 : 
    4614                 : /* Check a UTF-8 string if required. Unfortunately there's no way of passing
    4615                 : back the character offset. */
    4616                 : 
    4617                 : #ifdef SUPPORT_UTF8
    4618         1290903 : if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
    4619                 :   {
    4620              15 :   if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
    4621               1 :     return PCRE_ERROR_BADUTF8;
    4622              14 :   if (start_offset > 0 && start_offset < length)
    4623                 :     {
    4624               0 :     int tb = ((USPTR)subject)[start_offset];
    4625               0 :     if (tb > 127)
    4626                 :       {
    4627               0 :       tb &= 0xc0;
    4628               0 :       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
    4629                 :       }
    4630                 :     }
    4631                 :   }
    4632                 : #endif
    4633                 : 
    4634                 : /* The ims options can vary during the matching as a result of the presence
    4635                 : of (?ims) items in the pattern. They are kept in a local variable so that
    4636                 : restoring at the exit of a group is easy. */
    4637                 : 
    4638         1290902 : ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
    4639                 : 
    4640                 : /* If the expression has got more back references than the offsets supplied can
    4641                 : hold, we get a temporary chunk of working store to use during the matching.
    4642                 : Otherwise, we can use the vector supplied, rounding down its size to a multiple
    4643                 : of 3. */
    4644                 : 
    4645         1290902 : ocount = offsetcount - (offsetcount % 3);
    4646                 : 
    4647         1290902 : if (re->top_backref > 0 && re->top_backref >= ocount/3)
    4648                 :   {
    4649               0 :   ocount = re->top_backref * 3 + 3;
    4650               0 :   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
    4651               0 :   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
    4652               0 :   using_temporary_offsets = TRUE;
    4653                 :   DPRINTF(("Got memory to hold back references\n"));
    4654                 :   }
    4655         1290902 : else md->offset_vector = offsets;
    4656                 : 
    4657         1290902 : md->offset_end = ocount;
    4658         1290902 : md->offset_max = (2*ocount)/3;
    4659         1290902 : md->offset_overflow = FALSE;
    4660         1290902 : md->capture_last = -1;
    4661                 : 
    4662                 : /* Compute the minimum number of offsets that we need to reset each time. Doing
    4663                 : this makes a huge difference to execution time when there aren't many brackets
    4664                 : in the pattern. */
    4665                 : 
    4666         1290902 : resetcount = 2 + re->top_bracket * 2;
    4667         1290902 : if (resetcount > offsetcount) resetcount = ocount;
    4668                 : 
    4669                 : /* Reset the working variable associated with each extraction. These should
    4670                 : never be used unless previously set, but they get saved and restored, and so we
    4671                 : initialize them to avoid reading uninitialized locations. */
    4672                 : 
    4673         1290902 : if (md->offset_vector != NULL)
    4674                 :   {
    4675         1290881 :   register int *iptr = md->offset_vector + ocount;
    4676         1290881 :   register int *iend = iptr - resetcount/2 + 1;
    4677         1290881 :   while (--iptr >= iend) *iptr = -1;
    4678                 :   }
    4679                 : 
    4680                 : /* Set up the first character to match, if available. The first_byte value is
    4681                 : never set for an anchored regular expression, but the anchoring may be forced
    4682                 : at run time, so we have to test for anchoring. The first char may be unset for
    4683                 : an unanchored pattern, of course. If there's no first char and the pattern was
    4684                 : studied, there may be a bitmap of possible first characters. */
    4685                 : 
    4686         1290902 : if (!anchored)
    4687                 :   {
    4688           28956 :   if ((re->flags & PCRE_FIRSTSET) != 0)
    4689                 :     {
    4690           21377 :     first_byte = re->first_byte & 255;
    4691           21377 :     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
    4692              14 :       first_byte = md->lcc[first_byte];
    4693                 :     }
    4694                 :   else
    4695            7579 :     if (!startline && study != NULL &&
    4696                 :       (study->options & PCRE_STUDY_MAPPED) != 0)
    4697              12 :         start_bits = study->start_bits;
    4698                 :   }
    4699                 : 
    4700                 : /* For anchored or unanchored matches, there may be a "last known required
    4701                 : character" set. */
    4702                 : 
    4703         1290902 : if ((re->flags & PCRE_REQCHSET) != 0)
    4704                 :   {
    4705          901209 :   req_byte = re->req_byte & 255;
    4706          901209 :   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
    4707          901209 :   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
    4708                 :   }
    4709                 : 
    4710                 : 
    4711                 : /* ==========================================================================*/
    4712                 : 
    4713                 : /* Loop for handling unanchored repeated matching attempts; for anchored regexs
    4714                 : the loop runs just once. */
    4715                 : 
    4716                 : for(;;)
    4717                 :   {
    4718         1328590 :   USPTR save_end_subject = end_subject;
    4719                 :   USPTR new_start_match;
    4720                 : 
    4721                 :   /* Reset the maximum number of extractions we might see. */
    4722                 : 
    4723         1328590 :   if (md->offset_vector != NULL)
    4724                 :     {
    4725         1328569 :     register int *iptr = md->offset_vector;
    4726         1328569 :     register int *iend = iptr + resetcount;
    4727         1328569 :     while (iptr < iend) *iptr++ = -1;
    4728                 :     }
    4729                 : 
    4730                 :   /* If firstline is TRUE, the start of the match is constrained to the first
    4731                 :   line of a multiline string. That is, the match must be before or at the first
    4732                 :   newline. Implement this by temporarily adjusting end_subject so that we stop
    4733                 :   scanning at a newline. If the match fails at the newline, later code breaks
    4734                 :   this loop. */
    4735                 : 
    4736         1328590 :   if (firstline)
    4737                 :     {
    4738               0 :     USPTR t = start_match;
    4739                 : #ifdef SUPPORT_UTF8
    4740               0 :     if (utf8)
    4741                 :       {
    4742               0 :       while (t < md->end_subject && !IS_NEWLINE(t))
    4743                 :         {
    4744               0 :         t++;
    4745               0 :         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
    4746                 :         }
    4747                 :       }
    4748                 :     else
    4749                 : #endif
    4750               0 :     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
    4751               0 :     end_subject = t;
    4752                 :     }
    4753                 : 
    4754                 :   /* There are some optimizations that avoid running the match if a known
    4755                 :   starting point is not found, or if a known later character is not present.
    4756                 :   However, there is an option that disables these, for testing and for ensuring
    4757                 :   that all callouts do actually occur. */
    4758                 : 
    4759         1328590 :   if ((options & PCRE_NO_START_OPTIMIZE) == 0)
    4760                 :     {
    4761                 :     /* Advance to a unique first byte if there is one. */
    4762                 : 
    4763         1328590 :     if (first_byte >= 0)
    4764                 :       {
    4765           21973 :       if (first_byte_caseless)
    4766             143 :         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
    4767             107 :           start_match++;
    4768                 :       else
    4769        18283954 :         while (start_match < end_subject && *start_match != first_byte)
    4770        18240044 :           start_match++;
    4771                 :       }
    4772                 : 
    4773                 :     /* Or to just after a linebreak for a multiline match */
    4774                 : 
    4775         1306617 :     else if (startline)
    4776                 :       {
    4777              73 :       if (start_match > md->start_subject + start_offset)
    4778                 :         {
    4779                 : #ifdef SUPPORT_UTF8
    4780              30 :         if (utf8)
    4781                 :           {
    4782               0 :           while (start_match < end_subject && !WAS_NEWLINE(start_match))
    4783                 :             {
    4784               0 :             start_match++;
    4785               0 :             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
    4786               0 :               start_match++;
    4787                 :             }
    4788                 :           }
    4789                 :         else
    4790                 : #endif
    4791             177 :         while (start_match < end_subject && !WAS_NEWLINE(start_match))
    4792             117 :           start_match++;
    4793                 : 
    4794                 :         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
    4795                 :         and we are now at a LF, advance the match position by one more character.
    4796                 :         */
    4797                 : 
    4798              30 :         if (start_match[-1] == CHAR_CR &&
    4799                 :              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
    4800                 :              start_match < end_subject &&
    4801                 :              *start_match == CHAR_NL)
    4802               0 :           start_match++;
    4803                 :         }
    4804                 :       }
    4805                 : 
    4806                 :     /* Or to a non-unique first byte after study */
    4807                 : 
    4808         1306544 :     else if (start_bits != NULL)
    4809                 :       {
    4810              37 :       while (start_match < end_subject)
    4811                 :         {
    4812              21 :         register unsigned int c = *start_match;
    4813              34 :         if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
    4814               8 :           else break;
    4815                 :         }
    4816                 :       }
    4817                 :     }   /* Starting optimizations */
    4818                 : 
    4819                 :   /* Restore fudged end_subject */
    4820                 : 
    4821         1328590 :   end_subject = save_end_subject;
    4822                 : 
    4823                 : #ifdef DEBUG  /* Sigh. Some compilers never learn. */
    4824                 :   printf(">>>> Match against: ");
    4825                 :   pchars(start_match, end_subject - start_match, TRUE, md);
    4826                 :   printf("\n");
    4827                 : #endif
    4828                 : 
    4829                 :   /* If req_byte is set, we know that that character must appear in the
    4830                 :   subject for the match to succeed. If the first character is set, req_byte
    4831                 :   must be later in the subject; otherwise the test starts at the match point.
    4832                 :   This optimization can save a huge amount of backtracking in patterns with
    4833                 :   nested unlimited repeats that aren't going to match. Writing separate code
    4834                 :   for cased/caseless versions makes it go faster, as does using an
    4835                 :   autoincrement and backing off on a match.
    4836                 : 
    4837                 :   HOWEVER: when the subject string is very, very long, searching to its end
    4838                 :   can take a long time, and give bad performance on quite ordinary patterns.
    4839                 :   This showed up when somebody was matching something like /^\d+C/ on a
    4840                 :   32-megabyte string... so we don't do this when the string is sufficiently
    4841                 :   long.
    4842                 : 
    4843                 :   ALSO: this processing is disabled when partial matching is requested, or if
    4844                 :   disabling is explicitly requested. */
    4845                 : 
    4846         1328590 :   if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
    4847                 :       req_byte >= 0 &&
    4848                 :       end_subject - start_match < REQ_BYTE_MAX &&
    4849                 :       !md->partial)
    4850                 :     {
    4851          897759 :     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
    4852                 : 
    4853                 :     /* We don't need to repeat the search if we haven't yet reached the
    4854                 :     place we found it at last time. */
    4855                 : 
    4856          897759 :     if (p > req_byte_ptr)
    4857                 :       {
    4858          897419 :       if (req_byte_caseless)
    4859                 :         {
    4860            2425 :         while (p < end_subject)
    4861                 :           {
    4862            1928 :           register int pp = *p++;
    4863            1928 :           if (pp == req_byte || pp == req_byte2) { p--; break; }
    4864                 :           }
    4865                 :         }
    4866                 :       else
    4867                 :         {
    4868        16529982 :         while (p < end_subject)
    4869                 :           {
    4870        14874519 :           if (*p++ == req_byte) { p--; break; }
    4871                 :           }
    4872                 :         }
    4873                 : 
    4874                 :       /* If we can't find the required character, break the matching loop,
    4875                 :       forcing a match failure. */
    4876                 : 
    4877          897419 :       if (p >= end_subject)
    4878                 :         {
    4879          758541 :         rc = MATCH_NOMATCH;
    4880          758541 :         break;
    4881                 :         }
    4882                 : 
    4883                 :       /* If we have found the required character, save the point where we
    4884                 :       found it, so that we don't search again next time round the loop if
    4885                 :       the start hasn't passed this character yet. */
    4886                 : 
    4887          138878 :       req_byte_ptr = p;
    4888                 :       }
    4889                 :     }
    4890                 : 
    4891                 :   /* OK, we can now run the match. */
    4892                 : 
    4893          570049 :   md->start_match_ptr = start_match;
    4894          570049 :   md->match_call_count = 0;
    4895          570049 :   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
    4896                 : 
    4897          570049 :   switch(rc)
    4898                 :     {
    4899                 :     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
    4900                 :     exactly like PRUNE. */
    4901                 : 
    4902                 :     case MATCH_NOMATCH:
    4903                 :     case MATCH_PRUNE:
    4904                 :     case MATCH_THEN:
    4905          513565 :     new_start_match = start_match + 1;
    4906                 : #ifdef SUPPORT_UTF8
    4907          513565 :     if (utf8)
    4908              54 :       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
    4909               2 :         new_start_match++;
    4910                 : #endif
    4911          513565 :     break;
    4912                 : 
    4913                 :     /* SKIP passes back the next starting point explicitly. */
    4914                 : 
    4915                 :     case MATCH_SKIP:
    4916               0 :     new_start_match = md->start_match_ptr;
    4917               0 :     break;
    4918                 : 
    4919                 :     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
    4920                 : 
    4921                 :     case MATCH_COMMIT:
    4922               0 :     rc = MATCH_NOMATCH;
    4923               0 :     goto ENDLOOP;
    4924                 : 
    4925                 :     /* Any other return is some kind of error. */
    4926                 : 
    4927                 :     default:
    4928           56484 :     goto ENDLOOP;
    4929                 :     }
    4930                 : 
    4931                 :   /* Control reaches here for the various types of "no match at this point"
    4932                 :   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
    4933                 : 
    4934          513565 :   rc = MATCH_NOMATCH;
    4935                 : 
    4936                 :   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
    4937                 :   newline in the subject (though it may continue over the newline). Therefore,
    4938                 :   if we have just failed to match, starting at a newline, do not continue. */
    4939                 : 
    4940          513565 :   if (firstline && IS_NEWLINE(start_match)) break;
    4941                 : 
    4942                 :   /* Advance to new matching position */
    4943                 : 
    4944          513565 :   start_match = new_start_match;
    4945                 : 
    4946                 :   /* Break the loop if the pattern is anchored or if we have passed the end of
    4947                 :   the subject. */
    4948                 : 
    4949          513565 :   if (anchored || start_match > end_subject) break;
    4950                 : 
    4951                 :   /* If we have just passed a CR and we are now at a LF, and the pattern does
    4952                 :   not contain any explicit matches for \r or \n, and the newline option is CRLF
    4953                 :   or ANY or ANYCRLF, advance the match position by one more character. */
    4954                 : 
    4955           37688 :   if (start_match[-1] == CHAR_CR &&
    4956                 :       start_match < end_subject &&
    4957                 :       *start_match == CHAR_NL &&
    4958                 :       (re->flags & PCRE_HASCRORLF) == 0 &&
    4959                 :         (md->nltype == NLTYPE_ANY ||
    4960                 :          md->nltype == NLTYPE_ANYCRLF ||
    4961                 :          md->nllen == 2))
    4962               0 :     start_match++;
    4963                 : 
    4964           37688 :   }   /* End of for(;;) "bumpalong" loop */
    4965                 : 
    4966                 : /* ==========================================================================*/
    4967                 : 
    4968                 : /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
    4969                 : conditions is true:
    4970                 : 
    4971                 : (1) The pattern is anchored or the match was failed by (*COMMIT);
    4972                 : 
    4973                 : (2) We are past the end of the subject;
    4974                 : 
    4975                 : (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
    4976                 :     this option requests that a match occur at or before the first newline in
    4977                 :     the subject.
    4978                 : 
    4979                 : When we have a match and the offset vector is big enough to deal with any
    4980                 : backreferences, captured substring offsets will already be set up. In the case
    4981                 : where we had to get some local store to hold offsets for backreference
    4982                 : processing, copy those that we can. In this case there need not be overflow if
    4983                 : certain parts of the pattern were not used, even though there are more
    4984                 : capturing parentheses than vector slots. */
    4985                 : 
    4986         1290902 : ENDLOOP:
    4987                 : 
    4988         1290902 : if (rc == MATCH_MATCH)
    4989                 :   {
    4990           56478 :   if (using_temporary_offsets)
    4991                 :     {
    4992               0 :     if (offsetcount >= 4)
    4993                 :       {
    4994               0 :       memcpy(offsets + 2, md->offset_vector + 2,
    4995                 :         (offsetcount - 2) * sizeof(int));
    4996                 :       DPRINTF(("Copied offsets from temporary memory\n"));
    4997                 :       }
    4998               0 :     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
    4999                 :     DPRINTF(("Freeing temporary memory\n"));
    5000               0 :     (pcre_free)(md->offset_vector);
    5001                 :     }
    5002                 : 
    5003                 :   /* Set the return code to the number of captured strings, or 0 if there are
    5004                 :   too many to fit into the vector. */
    5005                 : 
    5006           56478 :   rc = md->offset_overflow? 0 : md->end_offset_top/2;
    5007                 : 
    5008                 :   /* If there is space, set up the whole thing as substring 0. The value of
    5009                 :   md->start_match_ptr might be modified if \K was encountered on the success
    5010                 :   matching path. */
    5011                 : 
    5012           56478 :   if (offsetcount < 2) rc = 0; else
    5013                 :     {
    5014           56465 :     offsets[0] = md->start_match_ptr - md->start_subject;
    5015           56465 :     offsets[1] = md->end_match_ptr - md->start_subject;
    5016                 :     }
    5017                 : 
    5018                 :   DPRINTF((">>>> returning %d\n", rc));
    5019           56478 :   return rc;
    5020                 :   }
    5021                 : 
    5022                 : /* Control gets here if there has been an error, or if the overall match
    5023                 : attempt has failed at all permitted starting positions. */
    5024                 : 
    5025         1234424 : if (using_temporary_offsets)
    5026                 :   {
    5027                 :   DPRINTF(("Freeing temporary memory\n"));
    5028               0 :   (pcre_free)(md->offset_vector);
    5029                 :   }
    5030                 : 
    5031         1234424 : if (rc != MATCH_NOMATCH)
    5032                 :   {
    5033                 :   DPRINTF((">>>> error: returning %d\n", rc));
    5034               6 :   return rc;
    5035                 :   }
    5036         1234418 : else if (md->partial && md->hitend)
    5037                 :   {
    5038                 :   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
    5039               0 :   return PCRE_ERROR_PARTIAL;
    5040                 :   }
    5041                 : else
    5042                 :   {
    5043                 :   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
    5044         1234418 :   return PCRE_ERROR_NOMATCH;
    5045                 :   }
    5046                 : }
    5047                 : 
    5048                 : /* End of pcre_exec.c */

Generated by: LTP GCOV extension version 1.5

Generated at Thu, 19 Nov 2009 08:20:14 +0000 (5 days ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.