1 : /*
2 : * "streamable kanji code filter and converter"
3 : * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 : *
5 : * LICENSE NOTICES
6 : *
7 : * This file is part of "streamable kanji code filter and converter",
8 : * which is distributed under the terms of GNU Lesser General Public
9 : * License (version 2) as published by the Free Software Foundation.
10 : *
11 : * This software is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with "streamable kanji code filter and converter";
18 : * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 : * Suite 330, Boston, MA 02111-1307 USA
20 : *
21 : * The author of this file:
22 : *
23 : */
24 : /*
25 : * The source code included in this files was separated from mbfilter.c
26 : * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 : *
28 : */
29 :
30 : #ifdef HAVE_CONFIG_H
31 : #include "config.h"
32 : #endif
33 :
34 : #include "mbfilter.h"
35 : #include "mbfilter_utf7.h"
36 :
37 : static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter);
38 :
39 : static const unsigned char mbfl_base64_table[] = {
40 : /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
41 : 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
42 : /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
43 : 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
44 : /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
45 : 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
46 : /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
47 : 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
48 : /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
49 : 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
50 : };
51 :
52 : static const char *mbfl_encoding_utf7_aliases[] = {"utf7", NULL};
53 :
54 : const mbfl_encoding mbfl_encoding_utf7 = {
55 : mbfl_no_encoding_utf7,
56 : "UTF-7",
57 : "UTF-7",
58 : (const char *(*)[])&mbfl_encoding_utf7_aliases,
59 : NULL,
60 : MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
61 : };
62 :
63 : const struct mbfl_identify_vtbl vtbl_identify_utf7 = {
64 : mbfl_no_encoding_utf7,
65 : mbfl_filt_ident_common_ctor,
66 : mbfl_filt_ident_common_dtor,
67 : mbfl_filt_ident_utf7
68 : };
69 :
70 : const struct mbfl_convert_vtbl vtbl_utf7_wchar = {
71 : mbfl_no_encoding_utf7,
72 : mbfl_no_encoding_wchar,
73 : mbfl_filt_conv_common_ctor,
74 : mbfl_filt_conv_common_dtor,
75 : mbfl_filt_conv_utf7_wchar,
76 : mbfl_filt_conv_common_flush
77 : };
78 :
79 : const struct mbfl_convert_vtbl vtbl_wchar_utf7 = {
80 : mbfl_no_encoding_wchar,
81 : mbfl_no_encoding_utf7,
82 : mbfl_filt_conv_common_ctor,
83 : mbfl_filt_conv_common_dtor,
84 : mbfl_filt_conv_wchar_utf7,
85 : mbfl_filt_conv_wchar_utf7_flush
86 : };
87 :
88 :
89 : #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
90 :
91 : /*
92 : * UTF-7 => wchar
93 : */
94 : int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
95 81 : {
96 : int s, n;
97 :
98 81 : n = -1;
99 81 : if (filter->status != 0) { /* Modified Base64 */
100 0 : if (c >= 0x41 && c <= 0x5a) { /* A - Z */
101 0 : n = c - 65;
102 0 : } else if (c >= 0x61 && c <= 0x7a) { /* a - z */
103 0 : n = c - 71;
104 0 : } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
105 0 : n = c + 4;
106 0 : } else if (c == 0x2b) { /* '+' */
107 0 : n = 62;
108 0 : } else if (c == 0x2f) { /* '/' */
109 0 : n = 63;
110 : }
111 0 : if (n < 0 || n > 63) {
112 0 : if (c == 0x2d) {
113 0 : if (filter->status == 1) { /* "+-" -> "+" */
114 0 : CK((*filter->output_function)(0x2b, filter->data));
115 : }
116 0 : } else if (c >= 0 && c < 0x80) { /* ASCII exclude '-' */
117 0 : CK((*filter->output_function)(c, filter->data));
118 : } else { /* illegal character */
119 0 : s = c & MBFL_WCSGROUP_MASK;
120 0 : s |= MBFL_WCSGROUP_THROUGH;
121 0 : CK((*filter->output_function)(s, filter->data));
122 : }
123 0 : filter->cache = 0;
124 0 : filter->status = 0;
125 0 : return c;
126 : }
127 : }
128 :
129 81 : switch (filter->status) {
130 : /* directly encoded characters */
131 : case 0:
132 81 : if (c == 0x2b) { /* '+' shift character */
133 0 : filter->status = 1;
134 94 : } else if (c >= 0 && c < 0x80) { /* ASCII */
135 14 : CK((*filter->output_function)(c, filter->data));
136 : } else { /* illegal character */
137 67 : s = c & MBFL_WCSGROUP_MASK;
138 67 : s |= MBFL_WCSGROUP_THROUGH;
139 67 : CK((*filter->output_function)(s, filter->data));
140 : }
141 79 : break;
142 :
143 : /* decode Modified Base64 */
144 : case 1:
145 : case 2:
146 0 : filter->cache |= n << 10;
147 0 : filter->status = 3;
148 0 : break;
149 : case 3:
150 0 : filter->cache |= n << 4;
151 0 : filter->status = 4;
152 0 : break;
153 : case 4:
154 0 : s = ((n >> 2) & 0xf) | (filter->cache & 0xffff);
155 0 : n = (n & 0x3) << 14;
156 0 : filter->status = 5;
157 0 : if (s >= 0xd800 && s < 0xdc00) {
158 0 : s = (((s & 0x3ff) << 16) + 0x400000) | n;
159 0 : filter->cache = s;
160 0 : } else if (s >= 0xdc00 && s < 0xe000) {
161 0 : s &= 0x3ff;
162 0 : s |= (filter->cache & 0xfff0000) >> 6;
163 0 : filter->cache = n;
164 0 : if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
165 0 : CK((*filter->output_function)(s, filter->data));
166 : } else { /* illegal character */
167 0 : s &= MBFL_WCSGROUP_MASK;
168 0 : s |= MBFL_WCSGROUP_THROUGH;
169 0 : CK((*filter->output_function)(s, filter->data));
170 : }
171 : } else {
172 0 : filter->cache = n;
173 0 : CK((*filter->output_function)(s, filter->data));
174 : }
175 0 : break;
176 :
177 : case 5:
178 0 : filter->cache |= n << 8;
179 0 : filter->status = 6;
180 0 : break;
181 : case 6:
182 0 : filter->cache |= n << 2;
183 0 : filter->status = 7;
184 0 : break;
185 : case 7:
186 0 : s = ((n >> 4) & 0x3) | (filter->cache & 0xffff);
187 0 : n = (n & 0xf) << 12;
188 0 : filter->status = 8;
189 0 : if (s >= 0xd800 && s < 0xdc00) {
190 0 : s = (((s & 0x3ff) << 16) + 0x400000) | n;
191 0 : filter->cache = s;
192 0 : } else if (s >= 0xdc00 && s < 0xe000) {
193 0 : s &= 0x3ff;
194 0 : s |= (filter->cache & 0xfff0000) >> 6;
195 0 : filter->cache = n;
196 0 : if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
197 0 : CK((*filter->output_function)(s, filter->data));
198 : } else { /* illegal character */
199 0 : s &= MBFL_WCSGROUP_MASK;
200 0 : s |= MBFL_WCSGROUP_THROUGH;
201 0 : CK((*filter->output_function)(s, filter->data));
202 : }
203 : } else {
204 0 : filter->cache = n;
205 0 : CK((*filter->output_function)(s, filter->data));
206 : }
207 0 : break;
208 :
209 : case 8:
210 0 : filter->cache |= n << 6;
211 0 : filter->status = 9;
212 0 : break;
213 : case 9:
214 0 : s = n | (filter->cache & 0xffff);
215 0 : filter->status = 2;
216 0 : if (s >= 0xd800 && s < 0xdc00) {
217 0 : s = (((s & 0x3ff) << 16) + 0x400000);
218 0 : filter->cache = s;
219 0 : } else if (s >= 0xdc00 && s < 0xe000) {
220 0 : s &= 0x3ff;
221 0 : s |= (filter->cache & 0xfff0000) >> 6;
222 0 : filter->cache = 0;
223 0 : if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
224 0 : CK((*filter->output_function)(s, filter->data));
225 : } else { /* illegal character */
226 0 : s &= MBFL_WCSGROUP_MASK;
227 0 : s |= MBFL_WCSGROUP_THROUGH;
228 0 : CK((*filter->output_function)(s, filter->data));
229 : }
230 : } else {
231 0 : filter->cache = 0;
232 0 : CK((*filter->output_function)(s, filter->data));
233 : }
234 0 : break;
235 :
236 : default:
237 0 : filter->status = 0;
238 : break;
239 : }
240 :
241 79 : return c;
242 : }
243 :
244 : /*
245 : * wchar => UTF-7
246 : */
247 : int mbfl_filt_conv_wchar_utf7(int c, mbfl_convert_filter *filter)
248 15 : {
249 : int s, n;
250 :
251 15 : n = 0;
252 25 : if (c >= 0 && c < 0x80) { /* ASCII */
253 10 : if (c >= 0x41 && c <= 0x5a) { /* A - Z */
254 0 : n = 1;
255 14 : } else if (c >= 0x61 && c <= 0x7a) { /* a - z */
256 4 : n = 1;
257 6 : } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
258 0 : n = 1;
259 6 : } else if (c == '\0') { /* '\0' */
260 0 : n = 1;
261 6 : } else if (c == 0x2f) { /* '/' */
262 0 : n = 1;
263 6 : } else if (c == 0x2d) { /* '-' */
264 0 : n = 1;
265 6 : } else if (c == 0x20) { /* SPACE */
266 1 : n = 2;
267 5 : } else if (c == 0x09) { /* HTAB */
268 0 : n = 2;
269 5 : } else if (c == 0x0d) { /* CR */
270 0 : n = 2;
271 5 : } else if (c == 0x0a) { /* LF */
272 0 : n = 2;
273 5 : } else if (c == 0x27) { /* "'" */
274 0 : n = 2;
275 5 : } else if (c == 0x28) { /* '(' */
276 0 : n = 2;
277 5 : } else if (c == 0x29) { /* ')' */
278 0 : n = 2;
279 5 : } else if (c == 0x2c) { /* ',' */
280 0 : n = 2;
281 5 : } else if (c == 0x2e) { /* '.' */
282 0 : n = 2;
283 5 : } else if (c == 0x3a) { /* ':' */
284 0 : n = 2;
285 5 : } else if (c == 0x3f) { /* '?' */
286 5 : n = 2;
287 : }
288 5 : } else if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
289 : ;
290 5 : } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
291 0 : s = ((c >> 10) - 0x40) | 0xd800;
292 0 : CK((*filter->filter_function)(s, filter));
293 0 : s = (c & 0x3ff) | 0xdc00;
294 0 : CK((*filter->filter_function)(s, filter));
295 0 : return c;
296 : } else {
297 5 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
298 5 : CK(mbfl_filt_conv_illegal_output(c, filter));
299 : }
300 5 : return c;
301 : }
302 :
303 10 : switch (filter->status) {
304 : case 0:
305 10 : if (n != 0) { /* directly encode characters */
306 10 : CK((*filter->output_function)(c, filter->data));
307 : } else { /* Modified Base64 */
308 0 : CK((*filter->output_function)(0x2b, filter->data)); /* '+' */
309 0 : filter->status++;
310 0 : filter->cache = c;
311 : }
312 10 : break;
313 :
314 : /* encode Modified Base64 */
315 : case 1:
316 0 : s = filter->cache;
317 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 10) & 0x3f], filter->data));
318 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 4) & 0x3f], filter->data));
319 0 : if (n != 0) {
320 0 : CK((*filter->output_function)(mbfl_base64_table[(s << 2) & 0x3c], filter->data));
321 0 : if (n == 1) {
322 0 : CK((*filter->output_function)(0x2d, filter->data)); /* '-' */
323 : }
324 0 : CK((*filter->output_function)(c, filter->data));
325 0 : filter->status = 0;
326 : } else {
327 0 : filter->status++;
328 0 : filter->cache = ((s & 0xf) << 16) | c;
329 : }
330 0 : break;
331 :
332 : case 2:
333 0 : s = filter->cache;
334 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 14) & 0x3f], filter->data));
335 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 8) & 0x3f], filter->data));
336 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 2) & 0x3f], filter->data));
337 0 : if (n != 0) {
338 0 : CK((*filter->output_function)(mbfl_base64_table[(s << 4) & 0x30], filter->data));
339 0 : if (n == 1) {
340 0 : CK((*filter->output_function)(0x2d, filter->data)); /* '-' */
341 : }
342 0 : CK((*filter->output_function)(c, filter->data));
343 0 : filter->status = 0;
344 : } else {
345 0 : filter->status++;
346 0 : filter->cache = ((s & 0x3) << 16) | c;
347 : }
348 0 : break;
349 :
350 : case 3:
351 0 : s = filter->cache;
352 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 12) & 0x3f], filter->data));
353 0 : CK((*filter->output_function)(mbfl_base64_table[(s >> 6) & 0x3f], filter->data));
354 0 : CK((*filter->output_function)(mbfl_base64_table[s & 0x3f], filter->data));
355 0 : if (n != 0) {
356 0 : if (n == 1) {
357 0 : CK((*filter->output_function)(0x2d, filter->data)); /* '-' */
358 : }
359 0 : CK((*filter->output_function)(c, filter->data));
360 0 : filter->status = 0;
361 : } else {
362 0 : filter->status = 1;
363 0 : filter->cache = c;
364 : }
365 0 : break;
366 :
367 : default:
368 0 : filter->status = 0;
369 : break;
370 : }
371 :
372 10 : return c;
373 :
374 : }
375 :
376 : int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter)
377 2 : {
378 : int status, cache;
379 :
380 2 : status = filter->status;
381 2 : cache = filter->cache;
382 2 : filter->status = 0;
383 2 : filter->cache = 0;
384 : /* flush fragments */
385 2 : switch (status) {
386 : case 1:
387 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 10) & 0x3f], filter->data));
388 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 4) & 0x3f], filter->data));
389 0 : CK((*filter->output_function)(mbfl_base64_table[(cache << 2) & 0x3c], filter->data));
390 0 : CK((*filter->output_function)(0x2d, filter->data)); /* '-' */
391 0 : break;
392 :
393 : case 2:
394 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 14) & 0x3f], filter->data));
395 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 8) & 0x3f], filter->data));
396 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 2) & 0x3f], filter->data));
397 0 : CK((*filter->output_function)(mbfl_base64_table[(cache << 4) & 0x30], filter->data));
398 0 : CK((*filter->output_function)(0x2d, filter->data)); /* '-' */
399 0 : break;
400 :
401 : case 3:
402 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
403 0 : CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
404 0 : CK((*filter->output_function)(mbfl_base64_table[cache & 0x3f], filter->data));
405 0 : CK((*filter->output_function)(0x2d, filter->data)); /* '-' */
406 : break;
407 : }
408 2 : return 0;
409 : }
410 :
411 : static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter)
412 0 : {
413 : int n;
414 :
415 0 : switch (filter->status) {
416 : /* directly encoded characters */
417 : case 0:
418 0 : if (c == 0x2b) { /* '+' shift character */
419 0 : filter->status++;
420 0 : } else if (c == 0x5c || c == 0x7e || c < 0 || c > 0x7f) { /* illegal character */
421 0 : filter->flag = 1; /* bad */
422 : }
423 0 : break;
424 :
425 : /* Modified Base64 */
426 : case 1:
427 : case 2:
428 0 : n = 0;
429 0 : if (c >= 0x41 && c <= 0x5a) { /* A - Z */
430 0 : n = 1;
431 0 : } else if (c >= 0x61 && c <= 0x7a) { /* a - z */
432 0 : n = 1;
433 0 : } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
434 0 : n = 1;
435 0 : } else if (c == 0x2b) { /* '+' */
436 0 : n = 1;
437 0 : } else if (c == 0x2f) { /* '/' */
438 0 : n = 1;
439 : }
440 0 : if (n <= 0) {
441 0 : if (filter->status == 1 && c != 0x2d) {
442 0 : filter->flag = 1; /* bad */
443 0 : } else if (c < 0 || c > 0x7f) {
444 0 : filter->flag = 1; /* bad */
445 : }
446 0 : filter->status = 0;
447 : } else {
448 0 : filter->status = 2;
449 : }
450 0 : break;
451 :
452 : default:
453 0 : filter->status = 0;
454 : break;
455 : }
456 :
457 0 : return c;
458 : }
459 :
460 :
|