1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 6 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Authors: Derick Rethans <derick@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: sanitizing_filters.c 288117 2009-09-07 02:35:25Z iliaa $ */
20 :
21 : #include "php_filter.h"
22 : #include "filter_private.h"
23 : #include "ext/standard/php_smart_str.h"
24 :
25 : /* {{{ STRUCTS */
26 : typedef unsigned long filter_map[256];
27 : /* }}} */
28 :
29 : /* {{{ HELPER FUNCTIONS */
30 : static void php_filter_encode_html(zval *value, const unsigned char *chars)
31 66850 : {
32 66850 : smart_str str = {0};
33 66850 : int len = Z_STRLEN_P(value);
34 66850 : unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35 66850 : unsigned char *e = s + len;
36 :
37 66850 : if (Z_STRLEN_P(value) == 0) {
38 7 : return;
39 : }
40 :
41 4389602 : while (s < e) {
42 4255916 : if (chars[*s]) {
43 43 : smart_str_appendl(&str, "&#", 2);
44 43 : smart_str_append_unsigned(&str, (unsigned long)*s);
45 43 : smart_str_appendc(&str, ';');
46 : } else {
47 : /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48 4255873 : smart_str_appendc(&str, *s);
49 : }
50 4255916 : s++;
51 : }
52 :
53 66843 : smart_str_0(&str);
54 66843 : efree(Z_STRVAL_P(value));
55 66843 : Z_STRVAL_P(value) = str.c;
56 66843 : Z_STRLEN_P(value) = str.len;
57 : }
58 :
59 : static const unsigned char hexchars[] = "0123456789ABCDEF";
60 :
61 : #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
62 : #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
63 : #define DIGIT "0123456789"
64 :
65 : #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
66 :
67 : static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
68 22 : {
69 : unsigned char *str, *p;
70 : unsigned char tmp[256];
71 22 : unsigned char *s = (unsigned char *)chars;
72 22 : unsigned char *e = s + char_len;
73 :
74 22 : memset(tmp, 1, sizeof(tmp)-1);
75 :
76 1474 : while (s < e) {
77 1430 : tmp[*s++] = 0;
78 : }
79 : /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80 : if (encode_nul) {
81 : tmp[0] = 1;
82 : }
83 : if (high) {
84 : memset(tmp + 127, 1, sizeof(tmp) - 127);
85 : }
86 : if (low) {
87 : memset(tmp, 1, 32);
88 : }
89 : */
90 22 : p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
91 22 : s = (unsigned char *)Z_STRVAL_P(value);
92 22 : e = s + Z_STRLEN_P(value);
93 :
94 331 : while (s < e) {
95 287 : if (tmp[*s]) {
96 95 : *p++ = '%';
97 95 : *p++ = hexchars[(unsigned char) *s >> 4];
98 95 : *p++ = hexchars[(unsigned char) *s & 15];
99 : } else {
100 192 : *p++ = *s;
101 : }
102 287 : s++;
103 : }
104 22 : *p = '\0';
105 22 : efree(Z_STRVAL_P(value));
106 22 : Z_STRVAL_P(value) = (char *)str;
107 22 : Z_STRLEN_P(value) = p - str;
108 22 : }
109 :
110 : static void php_filter_strip(zval *value, long flags)
111 66872 : {
112 : unsigned char *buf, *str;
113 : int i, c;
114 :
115 : /* Optimization for if no strip flags are set */
116 66872 : if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
117 66866 : return;
118 : }
119 :
120 6 : str = (unsigned char *)Z_STRVAL_P(value);
121 6 : buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
122 6 : c = 0;
123 250 : for (i = 0; i < Z_STRLEN_P(value); i++) {
124 244 : if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
125 244 : } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
126 : } else {
127 244 : buf[c] = str[i];
128 244 : ++c;
129 : }
130 : }
131 : /* update zval string data */
132 6 : buf[c] = '\0';
133 6 : efree(Z_STRVAL_P(value));
134 6 : Z_STRVAL_P(value) = (char *)buf;
135 6 : Z_STRLEN_P(value) = c;
136 : }
137 : /* }}} */
138 :
139 : /* {{{ FILTER MAP HELPERS */
140 : static void filter_map_init(filter_map *map)
141 97 : {
142 97 : memset(map, 0, sizeof(filter_map));
143 97 : }
144 :
145 : static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
146 107 : {
147 : int l, i;
148 :
149 107 : l = strlen((const char*)allowed_list);
150 6227 : for (i = 0; i < l; ++i) {
151 6120 : (*map)[allowed_list[i]] = flag;
152 : }
153 107 : }
154 :
155 : static void filter_map_apply(zval *value, filter_map *map)
156 97 : {
157 : unsigned char *buf, *str;
158 : int i, c;
159 :
160 97 : str = (unsigned char *)Z_STRVAL_P(value);
161 97 : buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
162 97 : c = 0;
163 1113 : for (i = 0; i < Z_STRLEN_P(value); i++) {
164 1016 : if ((*map)[str[i]]) {
165 845 : buf[c] = str[i];
166 845 : ++c;
167 : }
168 : }
169 : /* update zval string data */
170 97 : buf[c] = '\0';
171 97 : efree(Z_STRVAL_P(value));
172 97 : Z_STRVAL_P(value) = (char *)buf;
173 97 : Z_STRLEN_P(value) = c;
174 97 : }
175 : /* }}} */
176 :
177 : /* {{{ php_filter_string */
178 : void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
179 297 : {
180 : size_t new_len;
181 297 : unsigned char enc[256] = {0};
182 :
183 : /* strip high/strip low ( see flags )*/
184 297 : php_filter_strip(value, flags);
185 :
186 297 : if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
187 296 : enc['\''] = enc['"'] = 1;
188 : }
189 297 : if (flags & FILTER_FLAG_ENCODE_AMP) {
190 1 : enc['&'] = 1;
191 : }
192 297 : if (flags & FILTER_FLAG_ENCODE_LOW) {
193 0 : memset(enc, 1, 32);
194 : }
195 297 : if (flags & FILTER_FLAG_ENCODE_HIGH) {
196 0 : memset(enc + 127, 1, sizeof(enc) - 127);
197 : }
198 :
199 297 : php_filter_encode_html(value, enc);
200 :
201 : /* strip tags, implicitly also removes \0 chars */
202 297 : new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
203 297 : Z_STRLEN_P(value) = new_len;
204 :
205 297 : if (new_len == 0) {
206 7 : zval_dtor(value);
207 7 : ZVAL_EMPTY_STRING(value);
208 7 : return;
209 : }
210 : }
211 : /* }}} */
212 :
213 : /* {{{ php_filter_encoded */
214 : void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
215 22 : {
216 : /* apply strip_high and strip_low filters */
217 22 : php_filter_strip(value, flags);
218 : /* urlencode */
219 22 : php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
220 22 : }
221 : /* }}} */
222 :
223 : /* {{{ php_filter_special_chars */
224 : void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
225 20 : {
226 20 : unsigned char enc[256] = {0};
227 :
228 20 : php_filter_strip(value, flags);
229 :
230 : /* encodes ' " < > & \0 to numerical entities */
231 20 : enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
232 :
233 : /* if strip low is not set, then we encode them as &#xx; */
234 20 : memset(enc, 1, 32);
235 :
236 20 : if (flags & FILTER_FLAG_ENCODE_HIGH) {
237 4 : memset(enc + 127, 1, sizeof(enc) - 127);
238 : }
239 :
240 20 : php_filter_encode_html(value, enc);
241 20 : }
242 : /* }}} */
243 :
244 : /* {{{ php_filter_unsafe_raw */
245 : void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
246 66534 : {
247 : /* Only if no flags are set (optimization) */
248 66534 : if (flags != 0 && Z_STRLEN_P(value) > 0) {
249 66533 : unsigned char enc[256] = {0};
250 :
251 66533 : php_filter_strip(value, flags);
252 :
253 66533 : if (flags & FILTER_FLAG_ENCODE_AMP) {
254 4 : enc['&'] = 1;
255 : }
256 66533 : if (flags & FILTER_FLAG_ENCODE_LOW) {
257 0 : memset(enc, 1, 32);
258 : }
259 66533 : if (flags & FILTER_FLAG_ENCODE_HIGH) {
260 0 : memset(enc + 127, 1, sizeof(enc) - 127);
261 : }
262 :
263 66533 : php_filter_encode_html(value, enc);
264 : }
265 66534 : }
266 : /* }}} */
267 :
268 : /* {{{ php_filter_email */
269 : #define SAFE "$-_.+"
270 : #define EXTRA "!*'(),"
271 : #define NATIONAL "{}|\\^~[]`"
272 : #define PUNCTUATION "<>#%\""
273 : #define RESERVED ";/?:@&="
274 :
275 : void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
276 14 : {
277 : /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
278 14 : const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
279 : filter_map map;
280 :
281 14 : filter_map_init(&map);
282 14 : filter_map_update(&map, 1, allowed_list);
283 14 : filter_map_apply(value, &map);
284 14 : }
285 : /* }}} */
286 :
287 : /* {{{ php_filter_url */
288 : void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
289 48 : {
290 : /* Strip all chars not part of section 5 of
291 : * http://www.faqs.org/rfcs/rfc1738.html */
292 48 : const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
293 : filter_map map;
294 :
295 48 : filter_map_init(&map);
296 48 : filter_map_update(&map, 1, allowed_list);
297 48 : filter_map_apply(value, &map);
298 48 : }
299 : /* }}} */
300 :
301 : /* {{{ php_filter_number_int */
302 : void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
303 16 : {
304 : /* strip everything [^0-9+-] */
305 16 : const unsigned char allowed_list[] = "+-" DIGIT;
306 : filter_map map;
307 :
308 16 : filter_map_init(&map);
309 16 : filter_map_update(&map, 1, allowed_list);
310 16 : filter_map_apply(value, &map);
311 16 : }
312 : /* }}} */
313 :
314 : /* {{{ php_filter_number_float */
315 : void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
316 19 : {
317 : /* strip everything [^0-9+-] */
318 19 : const unsigned char allowed_list[] = "+-" DIGIT;
319 : filter_map map;
320 :
321 19 : filter_map_init(&map);
322 19 : filter_map_update(&map, 1, allowed_list);
323 :
324 : /* depending on flags, strip '.', 'e', ",", "'" */
325 19 : if (flags & FILTER_FLAG_ALLOW_FRACTION) {
326 6 : filter_map_update(&map, 2, (const unsigned char *) ".");
327 : }
328 19 : if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
329 2 : filter_map_update(&map, 3, (const unsigned char *) ",");
330 : }
331 19 : if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
332 2 : filter_map_update(&map, 4, (const unsigned char *) "eE");
333 : }
334 19 : filter_map_apply(value, &map);
335 19 : }
336 : /* }}} */
337 :
338 : /* {{{ php_filter_magic_quotes */
339 : void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
340 13 : {
341 : char *buf;
342 : int len;
343 :
344 : /* just call php_addslashes quotes */
345 13 : buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
346 :
347 13 : efree(Z_STRVAL_P(value));
348 13 : Z_STRVAL_P(value) = buf;
349 13 : Z_STRLEN_P(value) = len;
350 13 : }
351 : /* }}} */
352 :
353 : /*
354 : * Local variables:
355 : * tab-width: 4
356 : * c-basic-offset: 4
357 : * End:
358 : * vim600: noet sw=4 ts=4 fdm=marker
359 : * vim<600: noet sw=4 ts=4
360 : */
|