1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 6 |
4 : +----------------------------------------------------------------------+
5 : | This source file is subject to version 3.01 of the PHP license, |
6 : | that is bundled with this package in the file LICENSE, and is |
7 : | available through the world-wide-web at the following url: |
8 : | http://www.php.net/license/3_01.txt |
9 : | If you did not receive a copy of the PHP license and are unable to |
10 : | obtain it through the world-wide-web, please send a note to |
11 : | license@php.net so we can mail you a copy immediately. |
12 : +----------------------------------------------------------------------+
13 : | Authors: Sara Golemon (pollita@php.net) |
14 : +----------------------------------------------------------------------+
15 : */
16 :
17 : /* $Id: unicode_filter.c 260032 2008-05-20 07:48:04Z tony2001 $ */
18 :
19 :
20 : #include "php.h"
21 : #include <unicode/ucnv.h>
22 :
23 : /* {{{ data structure */
24 : typedef struct _php_unicode_filter_data {
25 : char is_persistent;
26 : UConverter *conv;
27 :
28 : char to_unicode;
29 : } php_unicode_filter_data;
30 : /* }}} */
31 :
32 : /* {{{ unicode.* filter implementation */
33 :
34 : /* unicode.to.* -- Expects String -- Returns Unicode */
35 : static php_stream_filter_status_t php_unicode_to_string_filter(
36 : php_stream *stream,
37 : php_stream_filter *thisfilter,
38 : php_stream_bucket_brigade *buckets_in,
39 : php_stream_bucket_brigade *buckets_out,
40 : size_t *bytes_consumed,
41 : int flags
42 : TSRMLS_DC)
43 10982 : {
44 : php_unicode_filter_data *data;
45 10982 : php_stream_filter_status_t exit_status = PSFS_FEED_ME;
46 10982 : size_t consumed = 0;
47 :
48 10982 : if (!thisfilter || !thisfilter->abstract) {
49 : /* Should never happen */
50 0 : return PSFS_ERR_FATAL;
51 : }
52 :
53 10982 : data = (php_unicode_filter_data *)(thisfilter->abstract);
54 25248 : while (buckets_in->head) {
55 3284 : php_stream_bucket *bucket = buckets_in->head;
56 3284 : UChar *src = bucket->buf.u;
57 :
58 3284 : php_stream_bucket_unlink(bucket TSRMLS_CC);
59 3284 : if (bucket->buf_type != IS_UNICODE) {
60 : /* Already ASCII, can't really do anything with it */
61 618 : consumed += bucket->buflen;
62 618 : php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
63 618 : exit_status = PSFS_PASS_ON;
64 618 : continue;
65 : }
66 :
67 7998 : while (src < (bucket->buf.u + bucket->buflen)) {
68 2666 : int remaining = bucket->buflen - (src - bucket->buf.u);
69 : char *destp, *destbuf;
70 2666 : int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
71 2666 : UErrorCode errCode = U_ZERO_ERROR;
72 : php_stream_bucket *new_bucket;
73 :
74 2666 : destp = destbuf = (char *)pemalloc(destlen, data->is_persistent);
75 :
76 2666 : ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode);
77 : /* UTODO: Error catching */
78 2666 : new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
79 2666 : php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
80 2666 : exit_status = PSFS_PASS_ON;
81 : }
82 2666 : consumed += bucket->buflen;
83 2666 : php_stream_bucket_delref(bucket TSRMLS_CC);
84 : }
85 :
86 10982 : if (flags & PSFS_FLAG_FLUSH_CLOSE) {
87 5981 : UErrorCode errCode = U_ZERO_ERROR;
88 5981 : char d[64], *dest = d, *destp = d + 64;
89 : /* Spit it out! */
90 :
91 5981 : ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
92 : /* UTODO: Error catching */
93 5981 : if (dest > d) {
94 0 : php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC);
95 0 : php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
96 0 : exit_status = PSFS_PASS_ON;
97 : }
98 : }
99 :
100 10982 : if (bytes_consumed) {
101 10975 : *bytes_consumed = consumed;
102 : }
103 :
104 10982 : return exit_status;
105 : }
106 :
107 : /* unicode.from.* -- Expects Unicode -- Returns String */
108 : static php_stream_filter_status_t php_unicode_from_string_filter(
109 : php_stream *stream,
110 : php_stream_filter *thisfilter,
111 : php_stream_bucket_brigade *buckets_in,
112 : php_stream_bucket_brigade *buckets_out,
113 : size_t *bytes_consumed,
114 : int flags
115 : TSRMLS_DC)
116 24298 : {
117 : php_unicode_filter_data *data;
118 24298 : php_stream_filter_status_t exit_status = PSFS_FEED_ME;
119 24298 : size_t consumed = 0;
120 :
121 24298 : if (!thisfilter || !thisfilter->abstract) {
122 : /* Should never happen */
123 0 : return PSFS_ERR_FATAL;
124 : }
125 :
126 24298 : data = (php_unicode_filter_data *)(thisfilter->abstract);
127 60978 : while (buckets_in->head) {
128 12382 : php_stream_bucket *bucket = buckets_in->head;
129 12382 : char *src = bucket->buf.s;
130 :
131 12382 : php_stream_bucket_unlink(bucket TSRMLS_CC);
132 12382 : if (bucket->buf_type == IS_UNICODE) {
133 : /* already in unicode, nothing to do */
134 0 : consumed += bucket->buflen;
135 0 : php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
136 0 : exit_status = PSFS_PASS_ON;
137 0 : continue;
138 : }
139 :
140 37031 : while (src < (bucket->buf.s + bucket->buflen)) {
141 12382 : int remaining = bucket->buflen - (src - bucket->buf.s);
142 : UChar *destp, *destbuf;
143 12382 : int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
144 12382 : UErrorCode errCode = U_ZERO_ERROR;
145 : php_stream_bucket *new_bucket;
146 :
147 12382 : if ((destlen & 1) != 0) {
148 5967 : destlen++;
149 : }
150 :
151 12382 : destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent);
152 :
153 12382 : ucnv_toUnicode(data->conv, &destp, (UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, FALSE, &errCode);
154 :
155 12382 : if (errCode != U_ZERO_ERROR) {
156 115 : pefree(destbuf, data->is_persistent);
157 115 : break;
158 : }
159 :
160 12267 : new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
161 12267 : php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
162 12267 : exit_status = PSFS_PASS_ON;
163 : }
164 12382 : consumed += bucket->buflen;
165 12382 : php_stream_bucket_delref(bucket TSRMLS_CC);
166 : }
167 :
168 24298 : if (flags & PSFS_FLAG_FLUSH_CLOSE) {
169 11916 : UErrorCode errCode = U_ZERO_ERROR;
170 11916 : UChar d[64], *dest = d, *destp = d + 64;
171 : /* Spit it out! */
172 :
173 11916 : ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
174 : /* UTODO: Error catching */
175 11916 : if (dest > d) {
176 0 : php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC);
177 0 : php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
178 0 : exit_status = PSFS_PASS_ON;
179 : }
180 : }
181 :
182 24298 : if (bytes_consumed) {
183 0 : *bytes_consumed = consumed;
184 : }
185 :
186 24298 : return exit_status;
187 : }
188 :
189 : /* unicode.tidy.* -- Expects anything -- Returns whatever is preferred by subsequent filters
190 : Can be used to "magically" fix-up bucket messes */
191 : static php_stream_filter_status_t php_unicode_tidy_filter(
192 : php_stream *stream,
193 : php_stream_filter *thisfilter,
194 : php_stream_bucket_brigade *buckets_in,
195 : php_stream_bucket_brigade *buckets_out,
196 : size_t *bytes_consumed,
197 : int flags
198 : TSRMLS_DC)
199 0 : {
200 : php_unicode_filter_data *data;
201 : int prefer_unicode;
202 :
203 0 : if (!thisfilter || !thisfilter->abstract) {
204 : /* Should never happen */
205 0 : return PSFS_ERR_FATAL;
206 : }
207 :
208 0 : prefer_unicode = php_stream_filter_output_prefer_unicode(thisfilter);
209 0 : data = (php_unicode_filter_data *)(thisfilter->abstract);
210 :
211 0 : if (prefer_unicode) {
212 0 : if (!data->to_unicode) {
213 0 : ucnv_resetToUnicode(data->conv);
214 0 : data->to_unicode = prefer_unicode;
215 : }
216 0 : return php_unicode_from_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
217 : } else {
218 0 : if (data->to_unicode) {
219 0 : ucnv_resetFromUnicode(data->conv);
220 0 : data->to_unicode = prefer_unicode;
221 : }
222 0 : return php_unicode_to_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
223 : }
224 : }
225 :
226 : static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC)
227 18723 : {
228 18723 : if (thisfilter && thisfilter->abstract) {
229 18723 : php_unicode_filter_data *data = (php_unicode_filter_data *)thisfilter->abstract;
230 18723 : ucnv_close(data->conv);
231 18723 : pefree(data, data->is_persistent);
232 : }
233 18723 : }
234 :
235 : php_stream_filter_ops php_unicode_to_string_filter_ops = {
236 : php_unicode_to_string_filter,
237 : php_unicode_filter_dtor,
238 : "unicode.to.*",
239 : PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING
240 : };
241 :
242 : php_stream_filter_ops php_unicode_from_string_filter_ops = {
243 : php_unicode_from_string_filter,
244 : php_unicode_filter_dtor,
245 : "unicode.from.*",
246 : PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE
247 : };
248 :
249 : php_stream_filter_ops php_unicode_tidy_filter_ops = {
250 : php_unicode_tidy_filter,
251 : php_unicode_filter_dtor,
252 : "unicode.tidy.*",
253 : PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY
254 : };
255 : /* }}} */
256 :
257 : /* {{{ unicode.* factory */
258 :
259 : static php_stream_filter *php_unicode_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
260 18723 : {
261 : php_unicode_filter_data *data;
262 : const char *charset, *direction;
263 : php_stream_filter_ops *fops;
264 18723 : UErrorCode ucnvError = U_ZERO_ERROR;
265 : /* Note: from_error_mode means from unicode to charset. from filter means from charset to unicode */
266 18723 : uint16_t err_mode = UG(from_error_mode);
267 18723 : char to_unicode = 0;
268 : zval **tmpzval;
269 :
270 18723 : if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) {
271 : /* Never happens */
272 0 : return NULL;
273 : }
274 :
275 18723 : direction = filtername + sizeof("unicode.") - 1;
276 18723 : if (strncmp(direction, "to.", sizeof("to.") - 1) == 0) {
277 5989 : fops = &php_unicode_to_string_filter_ops;
278 5989 : charset = direction + sizeof("to.") - 1;
279 12734 : } else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) {
280 12734 : fops = &php_unicode_from_string_filter_ops;
281 12734 : charset = direction + sizeof("from.") - 1;
282 12734 : to_unicode = 1;
283 12734 : err_mode = UG(to_error_mode);
284 0 : } else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) {
285 0 : fops = &php_unicode_tidy_filter_ops;
286 0 : charset = direction + sizeof("tidy.") - 1;
287 0 : } else if (strcmp(direction, "tidy") == 0) {
288 0 : fops = &php_unicode_tidy_filter_ops;
289 0 : charset = "utf8";
290 : } else {
291 : /* Shouldn't happen */
292 0 : return NULL;
293 : }
294 :
295 : /* Create this filter */
296 18723 : data = (php_unicode_filter_data *)pecalloc(1, sizeof(php_unicode_filter_data), persistent);
297 18723 : if (!data) {
298 0 : php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed allocating %d bytes", sizeof(php_unicode_filter_data));
299 0 : return NULL;
300 : }
301 :
302 18723 : data->conv = ucnv_open(charset, &ucnvError);
303 18723 : data->to_unicode = to_unicode;
304 18723 : if (!data->conv) {
305 0 : char *reason = "Unknown Error";
306 0 : pefree(data, persistent);
307 0 : switch (ucnvError) {
308 : case U_MEMORY_ALLOCATION_ERROR:
309 0 : reason = "unable to allocate memory";
310 0 : break;
311 : case U_FILE_ACCESS_ERROR:
312 0 : reason = "file access error";
313 : break;
314 : default:
315 : ;
316 : }
317 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to open charset converter, %s", reason);
318 0 : return NULL;
319 : }
320 :
321 18723 : if (filterparams &&
322 : Z_TYPE_P(filterparams) == IS_ARRAY &&
323 : zend_ascii_hash_find(Z_ARRVAL_P(filterparams), "error_mode", sizeof("error_mode"), (void**)&tmpzval) == SUCCESS &&
324 : tmpzval && *tmpzval) {
325 18723 : if (Z_TYPE_PP(tmpzval) == IS_LONG) {
326 18723 : err_mode = Z_LVAL_PP(tmpzval);
327 : } else {
328 0 : zval copyval = **tmpzval;
329 0 : zval_copy_ctor(©val);
330 0 : convert_to_long(©val);
331 0 : err_mode = Z_LVAL(copyval);
332 : }
333 : }
334 :
335 18723 : zend_set_converter_error_mode(data->conv, to_unicode ? ZEND_TO_UNICODE : ZEND_FROM_UNICODE, err_mode);
336 18723 : if (!to_unicode) {
337 5989 : UChar *freeme = NULL;
338 5989 : UChar *subst_char = UG(from_subst_char);
339 :
340 5989 : if (filterparams &&
341 : Z_TYPE_P(filterparams) == IS_ARRAY &&
342 : zend_ascii_hash_find(Z_ARRVAL_P(filterparams), "subst_char", sizeof("subst_char"), (void**)&tmpzval) == SUCCESS &&
343 : tmpzval && *tmpzval) {
344 5989 : if (Z_TYPE_PP(tmpzval) == IS_UNICODE) {
345 5989 : subst_char = Z_USTRVAL_PP(tmpzval);
346 : } else {
347 0 : zval copyval = **tmpzval;
348 0 : zval_copy_ctor(©val);
349 0 : convert_to_unicode(©val);
350 0 : subst_char = freeme = Z_USTRVAL(copyval);
351 : }
352 : }
353 :
354 5989 : zend_set_converter_subst_char(data->conv, subst_char);
355 :
356 5989 : if (freeme) {
357 0 : efree(freeme);
358 : }
359 : }
360 :
361 18723 : return php_stream_filter_alloc(fops, data, persistent);
362 : }
363 :
364 : php_stream_filter_factory php_unicode_filter_factory = {
365 : php_unicode_filter_create
366 : };
367 : /* }}} */
368 :
369 : /*
370 : * Local variables:
371 : * tab-width: 4
372 : * c-basic-offset: 4
373 : * End:
374 : * vim600: sw=4 ts=4 fdm=marker
375 : * vim<600: sw=4 ts=4
376 : */
377 :
|