1 : /*************************************************
2 : * Perl-Compatible Regular Expressions *
3 : *************************************************/
4 :
5 : /* PCRE is a library of functions to support regular expressions whose syntax
6 : and semantics are as close as possible to those of the Perl 5 language.
7 :
8 : Written by Philip Hazel
9 : Copyright (c) 1997-2009 University of Cambridge
10 :
11 : -----------------------------------------------------------------------------
12 : Redistribution and use in source and binary forms, with or without
13 : modification, are permitted provided that the following conditions are met:
14 :
15 : * Redistributions of source code must retain the above copyright notice,
16 : this list of conditions and the following disclaimer.
17 :
18 : * Redistributions in binary form must reproduce the above copyright
19 : notice, this list of conditions and the following disclaimer in the
20 : documentation and/or other materials provided with the distribution.
21 :
22 : * Neither the name of the University of Cambridge nor the names of its
23 : contributors may be used to endorse or promote products derived from
24 : this software without specific prior written permission.
25 :
26 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 : POSSIBILITY OF SUCH DAMAGE.
37 : -----------------------------------------------------------------------------
38 : */
39 :
40 :
41 : /* This module contains internal functions for testing newlines when more than
42 : one kind of newline is to be recognized. When a newline is found, its length is
43 : returned. In principle, we could implement several newline "types", each
44 : referring to a different set of newline characters. At present, PCRE supports
45 : only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
46 : and NLTYPE_ANY. The full list of Unicode newline characters is taken from
47 : http://unicode.org/unicode/reports/tr18/. */
48 :
49 :
50 : #include "config.h"
51 :
52 : #include "pcre_internal.h"
53 :
54 :
55 :
56 : /*************************************************
57 : * Check for newline at given position *
58 : *************************************************/
59 :
60 : /* It is guaranteed that the initial value of ptr is less than the end of the
61 : string that is being processed.
62 :
63 : Arguments:
64 : ptr pointer to possible newline
65 : type the newline type
66 : endptr pointer to the end of the string
67 : lenptr where to return the length
68 : utf8 TRUE if in utf8 mode
69 :
70 : Returns: TRUE or FALSE
71 : */
72 :
73 : BOOL
74 : _pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
75 0 : {
76 : int c;
77 0 : if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
78 :
79 0 : if (type == NLTYPE_ANYCRLF) switch(c)
80 : {
81 0 : case 0x000a: *lenptr = 1; return TRUE; /* LF */
82 0 : case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
83 0 : return TRUE; /* CR */
84 0 : default: return FALSE;
85 : }
86 :
87 : /* NLTYPE_ANY */
88 :
89 0 : else switch(c)
90 : {
91 : case 0x000a: /* LF */
92 : case 0x000b: /* VT */
93 0 : case 0x000c: *lenptr = 1; return TRUE; /* FF */
94 0 : case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
95 0 : return TRUE; /* CR */
96 0 : case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
97 : case 0x2028: /* LS */
98 0 : case 0x2029: *lenptr = 3; return TRUE; /* PS */
99 0 : default: return FALSE;
100 : }
101 : }
102 :
103 :
104 :
105 : /*************************************************
106 : * Check for newline at previous position *
107 : *************************************************/
108 :
109 : /* It is guaranteed that the initial value of ptr is greater than the start of
110 : the string that is being processed.
111 :
112 : Arguments:
113 : ptr pointer to possible newline
114 : type the newline type
115 : startptr pointer to the start of the string
116 : lenptr where to return the length
117 : utf8 TRUE if in utf8 mode
118 :
119 : Returns: TRUE or FALSE
120 : */
121 :
122 : BOOL
123 : _pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
124 0 : {
125 : int c;
126 0 : ptr--;
127 : #ifdef SUPPORT_UTF8
128 0 : if (utf8)
129 : {
130 0 : BACKCHAR(ptr);
131 0 : GETCHAR(c, ptr);
132 : }
133 0 : else c = *ptr;
134 : #else /* no UTF-8 support */
135 : c = *ptr;
136 : #endif /* SUPPORT_UTF8 */
137 :
138 0 : if (type == NLTYPE_ANYCRLF) switch(c)
139 : {
140 0 : case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
141 0 : return TRUE; /* LF */
142 0 : case 0x000d: *lenptr = 1; return TRUE; /* CR */
143 0 : default: return FALSE;
144 : }
145 :
146 0 : else switch(c)
147 : {
148 0 : case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
149 0 : return TRUE; /* LF */
150 : case 0x000b: /* VT */
151 : case 0x000c: /* FF */
152 0 : case 0x000d: *lenptr = 1; return TRUE; /* CR */
153 0 : case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
154 : case 0x2028: /* LS */
155 0 : case 0x2029: *lenptr = 3; return TRUE; /* PS */
156 0 : default: return FALSE;
157 : }
158 : }
159 :
160 : /* End of pcre_newline.c */
|