1 : /*
2 : * "streamable kanji code filter and converter"
3 : * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 : *
5 : * LICENSE NOTICES
6 : *
7 : * This file is part of "streamable kanji code filter and converter",
8 : * which is distributed under the terms of GNU Lesser General Public
9 : * License (version 2) as published by the Free Software Foundation.
10 : *
11 : * This software is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with "streamable kanji code filter and converter";
18 : * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 : * Suite 330, Boston, MA 02111-1307 USA
20 : *
21 : * The author of this file:
22 : *
23 : */
24 : /*
25 : * The source code included in this files was separated from mbfilter.c
26 : * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
27 : * mbfilter.c is included in this package .
28 : *
29 : */
30 :
31 : #ifdef HAVE_CONFIG_H
32 : #include "config.h"
33 : #endif
34 :
35 : #ifdef HAVE_STDDEF_H
36 : #include <stddef.h>
37 : #endif
38 :
39 : #include "mbfl_encoding.h"
40 : #include "mbfl_allocators.h"
41 : #include "mbfl_filter_output.h"
42 : #include "mbfilter_pass.h"
43 : #include "mbfilter_8bit.h"
44 : #include "mbfilter_wchar.h"
45 :
46 : #include "filters/mbfilter_euc_cn.h"
47 : #include "filters/mbfilter_hz.h"
48 : #include "filters/mbfilter_euc_tw.h"
49 : #include "filters/mbfilter_big5.h"
50 : #include "filters/mbfilter_uhc.h"
51 : #include "filters/mbfilter_euc_kr.h"
52 : #include "filters/mbfilter_iso2022_kr.h"
53 : #include "filters/mbfilter_sjis.h"
54 : #include "filters/mbfilter_cp51932.h"
55 : #include "filters/mbfilter_jis.h"
56 : #include "filters/mbfilter_iso2022_jp_ms.h"
57 : #include "filters/mbfilter_euc_jp.h"
58 : #include "filters/mbfilter_euc_jp_win.h"
59 : #include "filters/mbfilter_ascii.h"
60 : #include "filters/mbfilter_koi8r.h"
61 : #include "filters/mbfilter_cp866.h"
62 : #include "filters/mbfilter_cp932.h"
63 : #include "filters/mbfilter_cp936.h"
64 : #include "filters/mbfilter_cp1251.h"
65 : #include "filters/mbfilter_cp1252.h"
66 : #include "filters/mbfilter_iso8859_1.h"
67 : #include "filters/mbfilter_iso8859_2.h"
68 : #include "filters/mbfilter_iso8859_3.h"
69 : #include "filters/mbfilter_iso8859_4.h"
70 : #include "filters/mbfilter_iso8859_5.h"
71 : #include "filters/mbfilter_iso8859_6.h"
72 : #include "filters/mbfilter_iso8859_7.h"
73 : #include "filters/mbfilter_iso8859_8.h"
74 : #include "filters/mbfilter_iso8859_9.h"
75 : #include "filters/mbfilter_iso8859_10.h"
76 : #include "filters/mbfilter_iso8859_13.h"
77 : #include "filters/mbfilter_iso8859_14.h"
78 : #include "filters/mbfilter_iso8859_15.h"
79 : #include "filters/mbfilter_base64.h"
80 : #include "filters/mbfilter_qprint.h"
81 : #include "filters/mbfilter_uuencode.h"
82 : #include "filters/mbfilter_7bit.h"
83 : #include "filters/mbfilter_utf7.h"
84 : #include "filters/mbfilter_utf7imap.h"
85 : #include "filters/mbfilter_utf8.h"
86 : #include "filters/mbfilter_utf16.h"
87 : #include "filters/mbfilter_utf32.h"
88 : #include "filters/mbfilter_byte2.h"
89 : #include "filters/mbfilter_byte4.h"
90 : #include "filters/mbfilter_ucs4.h"
91 : #include "filters/mbfilter_ucs2.h"
92 : #include "filters/mbfilter_htmlent.h"
93 : #include "filters/mbfilter_armscii8.h"
94 :
95 : static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter);
96 :
97 : /* hex character table "0123456789ABCDEF" */
98 : static char mbfl_hexchar_table[] = {
99 : 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
100 : };
101 :
102 : const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
103 : &vtbl_utf8_wchar,
104 : &vtbl_wchar_utf8,
105 : &vtbl_eucjp_wchar,
106 : &vtbl_wchar_eucjp,
107 : &vtbl_sjis_wchar,
108 : &vtbl_wchar_sjis,
109 : &vtbl_cp51932_wchar,
110 : &vtbl_wchar_cp51932,
111 : &vtbl_jis_wchar,
112 : &vtbl_wchar_jis,
113 : &vtbl_2022jp_wchar,
114 : &vtbl_wchar_2022jp,
115 : &vtbl_2022jpms_wchar,
116 : &vtbl_wchar_2022jpms,
117 : &vtbl_eucjpwin_wchar,
118 : &vtbl_wchar_eucjpwin,
119 : &vtbl_sjiswin_wchar,
120 : &vtbl_wchar_sjiswin,
121 : &vtbl_euccn_wchar,
122 : &vtbl_wchar_euccn,
123 : &vtbl_cp936_wchar,
124 : &vtbl_wchar_cp936,
125 : &vtbl_hz_wchar,
126 : &vtbl_wchar_hz,
127 : &vtbl_euctw_wchar,
128 : &vtbl_wchar_euctw,
129 : &vtbl_big5_wchar,
130 : &vtbl_wchar_big5,
131 : &vtbl_euckr_wchar,
132 : &vtbl_wchar_euckr,
133 : &vtbl_uhc_wchar,
134 : &vtbl_wchar_uhc,
135 : &vtbl_2022kr_wchar,
136 : &vtbl_wchar_2022kr,
137 : &vtbl_cp1251_wchar,
138 : &vtbl_wchar_cp1251,
139 : &vtbl_cp866_wchar,
140 : &vtbl_wchar_cp866,
141 : &vtbl_koi8r_wchar,
142 : &vtbl_wchar_koi8r,
143 : &vtbl_cp1252_wchar,
144 : &vtbl_wchar_cp1252,
145 : &vtbl_ascii_wchar,
146 : &vtbl_wchar_ascii,
147 : &vtbl_8859_1_wchar,
148 : &vtbl_wchar_8859_1,
149 : &vtbl_8859_2_wchar,
150 : &vtbl_wchar_8859_2,
151 : &vtbl_8859_3_wchar,
152 : &vtbl_wchar_8859_3,
153 : &vtbl_8859_4_wchar,
154 : &vtbl_wchar_8859_4,
155 : &vtbl_8859_5_wchar,
156 : &vtbl_wchar_8859_5,
157 : &vtbl_8859_6_wchar,
158 : &vtbl_wchar_8859_6,
159 : &vtbl_8859_7_wchar,
160 : &vtbl_wchar_8859_7,
161 : &vtbl_8859_8_wchar,
162 : &vtbl_wchar_8859_8,
163 : &vtbl_8859_9_wchar,
164 : &vtbl_wchar_8859_9,
165 : &vtbl_8859_10_wchar,
166 : &vtbl_wchar_8859_10,
167 : &vtbl_8859_13_wchar,
168 : &vtbl_wchar_8859_13,
169 : &vtbl_8859_14_wchar,
170 : &vtbl_wchar_8859_14,
171 : &vtbl_8859_15_wchar,
172 : &vtbl_wchar_8859_15,
173 : &vtbl_8bit_b64,
174 : &vtbl_b64_8bit,
175 : &vtbl_uuencode_8bit,
176 : &vtbl_wchar_html,
177 : &vtbl_html_wchar,
178 : &vtbl_8bit_qprint,
179 : &vtbl_qprint_8bit,
180 : &vtbl_8bit_7bit,
181 : &vtbl_7bit_8bit,
182 : &vtbl_utf7_wchar,
183 : &vtbl_wchar_utf7,
184 : &vtbl_utf7imap_wchar,
185 : &vtbl_wchar_utf7imap,
186 : &vtbl_utf16_wchar,
187 : &vtbl_wchar_utf16,
188 : &vtbl_utf16be_wchar,
189 : &vtbl_wchar_utf16be,
190 : &vtbl_utf16le_wchar,
191 : &vtbl_wchar_utf16le,
192 : &vtbl_utf32_wchar,
193 : &vtbl_wchar_utf32,
194 : &vtbl_utf32be_wchar,
195 : &vtbl_wchar_utf32be,
196 : &vtbl_utf32le_wchar,
197 : &vtbl_wchar_utf32le,
198 : &vtbl_ucs4_wchar,
199 : &vtbl_wchar_ucs4,
200 : &vtbl_ucs4be_wchar,
201 : &vtbl_wchar_ucs4be,
202 : &vtbl_ucs4le_wchar,
203 : &vtbl_wchar_ucs4le,
204 : &vtbl_ucs2_wchar,
205 : &vtbl_wchar_ucs2,
206 : &vtbl_ucs2be_wchar,
207 : &vtbl_wchar_ucs2be,
208 : &vtbl_ucs2le_wchar,
209 : &vtbl_wchar_ucs2le,
210 : &vtbl_byte4be_wchar,
211 : &vtbl_wchar_byte4be,
212 : &vtbl_byte4le_wchar,
213 : &vtbl_wchar_byte4le,
214 : &vtbl_byte2be_wchar,
215 : &vtbl_wchar_byte2be,
216 : &vtbl_byte2le_wchar,
217 : &vtbl_wchar_byte2le,
218 : &vtbl_armscii8_wchar,
219 : &vtbl_wchar_armscii8,
220 : &vtbl_pass,
221 : NULL
222 : };
223 :
224 : mbfl_convert_filter *
225 : mbfl_convert_filter_new(
226 : enum mbfl_no_encoding from,
227 : enum mbfl_no_encoding to,
228 : int (*output_function)(int, void* ),
229 : int (*flush_function)(void*),
230 : void* data)
231 17168 : {
232 : mbfl_convert_filter * filter;
233 :
234 : /* allocate */
235 17168 : filter = (mbfl_convert_filter *)mbfl_malloc(sizeof(mbfl_convert_filter));
236 17168 : if (filter == NULL) {
237 0 : return NULL;
238 : }
239 :
240 : /* encoding structure */
241 17168 : filter->from = mbfl_no2encoding(from);
242 17168 : filter->to = mbfl_no2encoding(to);
243 17168 : if (filter->from == NULL) {
244 0 : filter->from = &mbfl_encoding_pass;
245 : }
246 17168 : if (filter->to == NULL) {
247 0 : filter->to = &mbfl_encoding_pass;
248 : }
249 :
250 17168 : if (output_function != NULL) {
251 17168 : filter->output_function = output_function;
252 : } else {
253 0 : filter->output_function = mbfl_filter_output_null;
254 : }
255 17168 : filter->flush_function = flush_function;
256 17168 : filter->data = data;
257 17168 : filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
258 17168 : filter->illegal_substchar = 0x3f; /* '?' */
259 17168 : filter->num_illegalchar = 0;
260 :
261 : /* setup the function table */
262 17168 : mbfl_convert_filter_reset_vtbl(filter);
263 :
264 : /* constructor */
265 17168 : (*filter->filter_ctor)(filter);
266 :
267 17168 : return filter;
268 : }
269 :
270 : void
271 : mbfl_convert_filter_delete(mbfl_convert_filter *filter)
272 17168 : {
273 17168 : if (filter) {
274 17168 : (*filter->filter_dtor)(filter);
275 17168 : mbfl_free((void*)filter);
276 : }
277 17168 : }
278 :
279 : int
280 : mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter)
281 0 : {
282 0 : return (*filter->filter_function)(c, filter);
283 : }
284 :
285 : int
286 : mbfl_convert_filter_flush(mbfl_convert_filter *filter)
287 15144 : {
288 15144 : (*filter->filter_flush)(filter);
289 15144 : return (filter->flush_function ? (*filter->flush_function)(filter->data) : 0);
290 : }
291 :
292 : void mbfl_convert_filter_reset(mbfl_convert_filter *filter,
293 : enum mbfl_no_encoding from, enum mbfl_no_encoding to)
294 53 : {
295 : /* destruct old filter */
296 53 : (*filter->filter_dtor)(filter);
297 :
298 : /* resset filter member */
299 53 : filter->from = mbfl_no2encoding(from);
300 53 : filter->to = mbfl_no2encoding(to);
301 :
302 : /* set the vtbl */
303 53 : mbfl_convert_filter_reset_vtbl(filter);
304 :
305 : /* construct new filter */
306 53 : (*filter->filter_ctor)(filter);
307 53 : }
308 :
309 : void
310 : mbfl_convert_filter_copy(
311 : mbfl_convert_filter *src,
312 : mbfl_convert_filter *dist)
313 40525 : {
314 40525 : dist->filter_ctor = src->filter_ctor;
315 40525 : dist->filter_dtor = src->filter_dtor;
316 40525 : dist->filter_function = src->filter_function;
317 40525 : dist->filter_flush = src->filter_flush;
318 40525 : dist->output_function = src->output_function;
319 40525 : dist->flush_function = src->flush_function;
320 40525 : dist->data = src->data;
321 40525 : dist->status = src->status;
322 40525 : dist->cache = src->cache;
323 40525 : dist->from = src->from;
324 40525 : dist->to = src->to;
325 40525 : dist->illegal_mode = src->illegal_mode;
326 40525 : dist->illegal_substchar = src->illegal_substchar;
327 40525 : dist->num_illegalchar = src->num_illegalchar;
328 40525 : }
329 :
330 : int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src)
331 302 : {
332 : int n;
333 : unsigned char *p;
334 :
335 302 : p = src->buffer;
336 302 : n = src->pos;
337 606 : while (n > 0) {
338 2 : if ((*filter->filter_function)(*p++, filter) < 0) {
339 0 : return -1;
340 : }
341 2 : n--;
342 : }
343 :
344 302 : return n;
345 : }
346 :
347 : int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p)
348 3 : {
349 : int c;
350 :
351 12 : while ((c = *p++) != '\0') {
352 6 : if ((*filter->filter_function)(c, filter) < 0) {
353 0 : return -1;
354 : }
355 : }
356 :
357 3 : return 0;
358 : }
359 :
360 : /* illegal character output function for conv-filter */
361 : int
362 : mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
363 73 : {
364 : int mode_backup, ret, n, m, r;
365 :
366 73 : ret = 0;
367 73 : mode_backup = filter->illegal_mode;
368 73 : filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
369 73 : switch (mode_backup) {
370 : case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR:
371 71 : ret = (*filter->filter_function)(filter->illegal_substchar, filter);
372 71 : break;
373 : case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
374 1 : if (c >= 0) {
375 1 : if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */
376 1 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
377 : } else {
378 0 : if (c < MBFL_WCSGROUP_WCHARMAX) {
379 0 : m = c & ~MBFL_WCSPLANE_MASK;
380 0 : switch (m) {
381 : case MBFL_WCSPLANE_JIS0208:
382 0 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS+");
383 0 : break;
384 : case MBFL_WCSPLANE_JIS0212:
385 0 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS2+");
386 0 : break;
387 : case MBFL_WCSPLANE_WINCP932:
388 0 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"W932+");
389 0 : break;
390 : case MBFL_WCSPLANE_8859_1:
391 0 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"I8859_1+");
392 0 : break;
393 : default:
394 0 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"?+");
395 : break;
396 : }
397 0 : c &= MBFL_WCSPLANE_MASK;
398 : } else {
399 0 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"BAD+");
400 0 : c &= MBFL_WCSGROUP_MASK;
401 : }
402 : }
403 1 : if (ret >= 0) {
404 1 : m = 0;
405 1 : r = 28;
406 10 : while (r >= 0) {
407 8 : n = (c >> r) & 0xf;
408 8 : if (n || m) {
409 4 : m = 1;
410 4 : ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
411 4 : if (ret < 0) {
412 0 : break;
413 : }
414 : }
415 8 : r -= 4;
416 : }
417 1 : if (m == 0 && ret >= 0) {
418 0 : ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
419 : }
420 : }
421 : }
422 1 : break;
423 : case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
424 1 : if (c >= 0) {
425 1 : if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */
426 1 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
427 1 : if (ret < 0)
428 0 : break;
429 :
430 1 : m = 0;
431 1 : r = 28;
432 10 : while (r >= 0) {
433 8 : n = (c >> r) & 0xf;
434 8 : if (n || m) {
435 4 : m = 1;
436 4 : ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
437 4 : if (ret < 0) {
438 0 : break;
439 : }
440 : }
441 8 : r -= 4;
442 : }
443 1 : if (ret < 0) {
444 0 : break;
445 : }
446 1 : if (m == 0) {
447 0 : ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
448 : }
449 1 : ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
450 : } else {
451 0 : ret = (*filter->filter_function)(filter->illegal_substchar, filter);
452 : }
453 : }
454 : break;
455 : default:
456 : break;
457 : }
458 73 : filter->illegal_mode = mode_backup;
459 73 : filter->num_illegalchar++;
460 73 : return ret;
461 : }
462 :
463 : const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encoding from, enum mbfl_no_encoding to)
464 20555 : {
465 : const struct mbfl_convert_vtbl *vtbl;
466 : int i;
467 :
468 21227 : if (to == mbfl_no_encoding_base64 ||
469 : to == mbfl_no_encoding_qprint ||
470 : to == mbfl_no_encoding_7bit) {
471 672 : from = mbfl_no_encoding_8bit;
472 19883 : } else if (from == mbfl_no_encoding_base64 ||
473 : from == mbfl_no_encoding_qprint ||
474 : from == mbfl_no_encoding_uuencode) {
475 17 : to = mbfl_no_encoding_8bit;
476 : }
477 :
478 20555 : i = 0;
479 1600468 : while ((vtbl = mbfl_convert_filter_list[i++]) != NULL){
480 1576228 : if (vtbl->from == from && vtbl->to == to) {
481 16870 : return vtbl;
482 : }
483 : }
484 :
485 3685 : return NULL;
486 : }
487 :
488 :
489 : static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter)
490 17221 : {
491 : const struct mbfl_convert_vtbl *vtbl;
492 :
493 17221 : vtbl = mbfl_convert_filter_get_vtbl(filter->from->no_encoding, filter->to->no_encoding);
494 17221 : if (vtbl == NULL) {
495 362 : vtbl = &vtbl_pass;
496 : }
497 :
498 17221 : filter->filter_ctor = vtbl->filter_ctor;
499 17221 : filter->filter_dtor = vtbl->filter_dtor;
500 17221 : filter->filter_function = vtbl->filter_function;
501 17221 : filter->filter_flush = vtbl->filter_flush;
502 17221 : }
503 :
504 : /*
505 : * commonly used constructor and destructor
506 : */
507 : void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
508 17138 : {
509 17138 : filter->status = 0;
510 17138 : filter->cache = 0;
511 17138 : }
512 :
513 : int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
514 25883 : {
515 25883 : filter->status = 0;
516 25883 : filter->cache = 0;
517 25883 : return 0;
518 : }
519 :
520 : void mbfl_filt_conv_common_dtor(mbfl_convert_filter *filter)
521 17138 : {
522 17138 : filter->status = 0;
523 17138 : filter->cache = 0;
524 17138 : }
525 :
526 :
|