1 : /*
2 : * "streamable kanji code filter and converter"
3 : * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 : *
5 : * LICENSE NOTICES
6 : *
7 : * This file is part of "streamable kanji code filter and converter",
8 : * which is distributed under the terms of GNU Lesser General Public
9 : * License (version 2) as published by the Free Software Foundation.
10 : *
11 : * This software is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with "streamable kanji code filter and converter";
18 : * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 : * Suite 330, Boston, MA 02111-1307 USA
20 : *
21 : * The author of this file:
22 : *
23 : */
24 : /*
25 : * The source code included in this files was separated from mbfilter.c
26 : * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 : *
28 : */
29 :
30 : #ifdef HAVE_CONFIG_H
31 : #include "config.h"
32 : #endif
33 :
34 : #include "mbfilter.h"
35 : #include "mbfilter_utf16.h"
36 :
37 : static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
38 :
39 : const mbfl_encoding mbfl_encoding_utf16 = {
40 : mbfl_no_encoding_utf16,
41 : "UTF-16",
42 : "UTF-16",
43 : (const char *(*)[])&mbfl_encoding_utf16_aliases,
44 : NULL,
45 : MBFL_ENCTYPE_MWC2BE
46 : };
47 :
48 : const mbfl_encoding mbfl_encoding_utf16be = {
49 : mbfl_no_encoding_utf16be,
50 : "UTF-16BE",
51 : "UTF-16BE",
52 : NULL,
53 : NULL,
54 : MBFL_ENCTYPE_MWC2BE
55 : };
56 :
57 : const mbfl_encoding mbfl_encoding_utf16le = {
58 : mbfl_no_encoding_utf16le,
59 : "UTF-16LE",
60 : "UTF-16LE",
61 : NULL,
62 : NULL,
63 : MBFL_ENCTYPE_MWC2LE
64 : };
65 :
66 : const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
67 : mbfl_no_encoding_utf16,
68 : mbfl_no_encoding_wchar,
69 : mbfl_filt_conv_common_ctor,
70 : mbfl_filt_conv_common_dtor,
71 : mbfl_filt_conv_utf16_wchar,
72 : mbfl_filt_conv_common_flush
73 : };
74 :
75 : const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
76 : mbfl_no_encoding_wchar,
77 : mbfl_no_encoding_utf16,
78 : mbfl_filt_conv_common_ctor,
79 : mbfl_filt_conv_common_dtor,
80 : mbfl_filt_conv_wchar_utf16be,
81 : mbfl_filt_conv_common_flush
82 : };
83 :
84 : const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
85 : mbfl_no_encoding_utf16be,
86 : mbfl_no_encoding_wchar,
87 : mbfl_filt_conv_common_ctor,
88 : mbfl_filt_conv_common_dtor,
89 : mbfl_filt_conv_utf16be_wchar,
90 : mbfl_filt_conv_common_flush
91 : };
92 :
93 : const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
94 : mbfl_no_encoding_wchar,
95 : mbfl_no_encoding_utf16be,
96 : mbfl_filt_conv_common_ctor,
97 : mbfl_filt_conv_common_dtor,
98 : mbfl_filt_conv_wchar_utf16be,
99 : mbfl_filt_conv_common_flush
100 : };
101 :
102 : const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
103 : mbfl_no_encoding_utf16le,
104 : mbfl_no_encoding_wchar,
105 : mbfl_filt_conv_common_ctor,
106 : mbfl_filt_conv_common_dtor,
107 : mbfl_filt_conv_utf16le_wchar,
108 : mbfl_filt_conv_common_flush
109 : };
110 :
111 : const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
112 : mbfl_no_encoding_wchar,
113 : mbfl_no_encoding_utf16le,
114 : mbfl_filt_conv_common_ctor,
115 : mbfl_filt_conv_common_dtor,
116 : mbfl_filt_conv_wchar_utf16le,
117 : mbfl_filt_conv_common_flush
118 : };
119 :
120 : #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
121 :
122 : /*
123 : * UTF-16 => wchar
124 : */
125 : int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
126 132 : {
127 : int n, endian;
128 :
129 132 : endian = filter->status & 0xff00;
130 132 : switch (filter->status & 0x0f) {
131 : case 0:
132 67 : if (endian) {
133 8 : n = c & 0xff;
134 : } else {
135 59 : n = (c & 0xff) << 8;
136 : }
137 67 : filter->cache |= n;
138 67 : filter->status++;
139 67 : break;
140 : default:
141 65 : if (endian) {
142 8 : n = (c & 0xff) << 8;
143 : } else {
144 57 : n = c & 0xff;
145 : }
146 65 : n |= filter->cache & 0xffff;
147 65 : filter->status &= ~0x0f;
148 65 : if (n >= 0xd800 && n < 0xdc00) {
149 0 : filter->cache = ((n & 0x3ff) << 16) + 0x400000;
150 65 : } else if (n >= 0xdc00 && n < 0xe000) {
151 0 : n &= 0x3ff;
152 0 : n |= (filter->cache & 0xfff0000) >> 6;
153 0 : filter->cache = 0;
154 0 : if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
155 0 : CK((*filter->output_function)(n, filter->data));
156 : } else { /* illegal character */
157 0 : n &= MBFL_WCSGROUP_MASK;
158 0 : n |= MBFL_WCSGROUP_THROUGH;
159 0 : CK((*filter->output_function)(n, filter->data));
160 : }
161 : } else {
162 65 : int is_first = filter->status & 0x10;
163 65 : filter->cache = 0;
164 65 : filter->status |= 0x10;
165 65 : if (!is_first) {
166 10 : if (n == 0xfffe) {
167 3 : if (endian) {
168 0 : filter->status &= ~0x100; /* big-endian */
169 : } else {
170 3 : filter->status |= 0x100; /* little-endian */
171 : }
172 3 : break;
173 7 : } else if (n == 0xfeff) {
174 3 : break;
175 : }
176 : }
177 59 : CK((*filter->output_function)(n, filter->data));
178 : }
179 : break;
180 : }
181 :
182 131 : return c;
183 : }
184 :
185 : /*
186 : * UTF-16BE => wchar
187 : */
188 : int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
189 88 : {
190 : int n;
191 :
192 88 : switch (filter->status) {
193 : case 0:
194 45 : filter->status = 1;
195 45 : n = (c & 0xff) << 8;
196 45 : filter->cache |= n;
197 45 : break;
198 : default:
199 43 : filter->status = 0;
200 43 : n = (filter->cache & 0xff00) | (c & 0xff);
201 43 : if (n >= 0xd800 && n < 0xdc00) {
202 0 : filter->cache = ((n & 0x3ff) << 16) + 0x400000;
203 43 : } else if (n >= 0xdc00 && n < 0xe000) {
204 0 : n &= 0x3ff;
205 0 : n |= (filter->cache & 0xfff0000) >> 6;
206 0 : filter->cache = 0;
207 0 : if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
208 0 : CK((*filter->output_function)(n, filter->data));
209 : } else { /* illegal character */
210 0 : n &= MBFL_WCSGROUP_MASK;
211 0 : n |= MBFL_WCSGROUP_THROUGH;
212 0 : CK((*filter->output_function)(n, filter->data));
213 : }
214 : } else {
215 43 : filter->cache = 0;
216 43 : CK((*filter->output_function)(n, filter->data));
217 : }
218 : break;
219 : }
220 :
221 87 : return c;
222 : }
223 :
224 : /*
225 : * wchar => UTF-16BE
226 : */
227 : int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
228 14 : {
229 : int n;
230 :
231 28 : if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
232 14 : CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
233 14 : CK((*filter->output_function)(c & 0xff, filter->data));
234 0 : } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
235 0 : n = ((c >> 10) - 0x40) | 0xd800;
236 0 : CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
237 0 : CK((*filter->output_function)(n & 0xff, filter->data));
238 0 : n = (c & 0x3ff) | 0xdc00;
239 0 : CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
240 0 : CK((*filter->output_function)(n & 0xff, filter->data));
241 : } else {
242 0 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
243 0 : CK(mbfl_filt_conv_illegal_output(c, filter));
244 : }
245 : }
246 :
247 14 : return c;
248 : }
249 :
250 : /*
251 : * UTF-16LE => wchar
252 : */
253 : int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
254 88 : {
255 : int n;
256 :
257 88 : switch (filter->status) {
258 : case 0:
259 45 : filter->status = 1;
260 45 : n = c & 0xff;
261 45 : filter->cache |= n;
262 45 : break;
263 : default:
264 43 : filter->status = 0;
265 43 : n = (filter->cache & 0xff) | ((c & 0xff) << 8);
266 43 : if (n >= 0xd800 && n < 0xdc00) {
267 0 : filter->cache = ((n & 0x3ff) << 16) + 0x400000;
268 43 : } else if (n >= 0xdc00 && n < 0xe000) {
269 0 : n &= 0x3ff;
270 0 : n |= (filter->cache & 0xfff0000) >> 6;
271 0 : filter->cache = 0;
272 0 : if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
273 0 : CK((*filter->output_function)(n, filter->data));
274 : } else { /* illegal character */
275 0 : n &= MBFL_WCSGROUP_MASK;
276 0 : n |= MBFL_WCSGROUP_THROUGH;
277 0 : CK((*filter->output_function)(n, filter->data));
278 : }
279 : } else {
280 43 : filter->cache = 0;
281 43 : CK((*filter->output_function)(n, filter->data));
282 : }
283 : break;
284 : }
285 :
286 87 : return c;
287 : }
288 :
289 : /*
290 : * wchar => UTF-16LE
291 : */
292 : int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
293 7 : {
294 : int n;
295 :
296 14 : if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
297 7 : CK((*filter->output_function)(c & 0xff, filter->data));
298 7 : CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
299 0 : } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
300 0 : n = ((c >> 10) - 0x40) | 0xd800;
301 0 : CK((*filter->output_function)(n & 0xff, filter->data));
302 0 : CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
303 0 : n = (c & 0x3ff) | 0xdc00;
304 0 : CK((*filter->output_function)(n & 0xff, filter->data));
305 0 : CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
306 : } else {
307 0 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
308 0 : CK(mbfl_filt_conv_illegal_output(c, filter));
309 : }
310 : }
311 :
312 7 : return c;
313 : }
314 :
315 :
316 :
|