1 : /*
2 : * "streamable kanji code filter and converter"
3 : * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 : *
5 : * LICENSE NOTICES
6 : *
7 : * This file is part of "streamable kanji code filter and converter",
8 : * which is distributed under the terms of GNU Lesser General Public
9 : * License (version 2) as published by the Free Software Foundation.
10 : *
11 : * This software is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with "streamable kanji code filter and converter";
18 : * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 : * Suite 330, Boston, MA 02111-1307 USA
20 : *
21 : * The author of this file:
22 : *
23 : */
24 : /*
25 : * The source code included in this files was separated from mbfilter.c
26 : * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 : *
28 : */
29 :
30 : #ifdef HAVE_CONFIG_H
31 : #include "config.h"
32 : #endif
33 :
34 : #include "mbfilter.h"
35 : #include "mbfilter_ucs4.h"
36 :
37 : static const char *mbfl_encoding_ucs4_aliases[] = {"ISO-10646-UCS-4", "UCS4", NULL};
38 :
39 : const mbfl_encoding mbfl_encoding_ucs4 = {
40 : mbfl_no_encoding_ucs4,
41 : "UCS-4",
42 : "UCS-4",
43 : (const char *(*)[])&mbfl_encoding_ucs4_aliases,
44 : NULL,
45 : MBFL_ENCTYPE_WCS4BE
46 : };
47 :
48 : const mbfl_encoding mbfl_encoding_ucs4be = {
49 : mbfl_no_encoding_ucs4be,
50 : "UCS-4BE",
51 : "UCS-4BE",
52 : NULL,
53 : NULL,
54 : MBFL_ENCTYPE_WCS4BE
55 : };
56 :
57 : const mbfl_encoding mbfl_encoding_ucs4le = {
58 : mbfl_no_encoding_ucs4le,
59 : "UCS-4LE",
60 : "UCS-4LE",
61 : NULL,
62 : NULL,
63 : MBFL_ENCTYPE_WCS4LE
64 : };
65 :
66 : const struct mbfl_convert_vtbl vtbl_ucs4_wchar = {
67 : mbfl_no_encoding_ucs4,
68 : mbfl_no_encoding_wchar,
69 : mbfl_filt_conv_common_ctor,
70 : mbfl_filt_conv_common_dtor,
71 : mbfl_filt_conv_ucs4_wchar,
72 : mbfl_filt_conv_common_flush
73 : };
74 :
75 : const struct mbfl_convert_vtbl vtbl_wchar_ucs4 = {
76 : mbfl_no_encoding_wchar,
77 : mbfl_no_encoding_ucs4,
78 : mbfl_filt_conv_common_ctor,
79 : mbfl_filt_conv_common_dtor,
80 : mbfl_filt_conv_wchar_ucs4be,
81 : mbfl_filt_conv_common_flush
82 : };
83 :
84 : const struct mbfl_convert_vtbl vtbl_ucs4be_wchar = {
85 : mbfl_no_encoding_ucs4be,
86 : mbfl_no_encoding_wchar,
87 : mbfl_filt_conv_common_ctor,
88 : mbfl_filt_conv_common_dtor,
89 : mbfl_filt_conv_ucs4be_wchar,
90 : mbfl_filt_conv_common_flush
91 : };
92 :
93 : const struct mbfl_convert_vtbl vtbl_wchar_ucs4be = {
94 : mbfl_no_encoding_wchar,
95 : mbfl_no_encoding_ucs4be,
96 : mbfl_filt_conv_common_ctor,
97 : mbfl_filt_conv_common_dtor,
98 : mbfl_filt_conv_wchar_ucs4be,
99 : mbfl_filt_conv_common_flush
100 : };
101 :
102 : const struct mbfl_convert_vtbl vtbl_ucs4le_wchar = {
103 : mbfl_no_encoding_ucs4le,
104 : mbfl_no_encoding_wchar,
105 : mbfl_filt_conv_common_ctor,
106 : mbfl_filt_conv_common_dtor,
107 : mbfl_filt_conv_ucs4le_wchar,
108 : mbfl_filt_conv_common_flush
109 : };
110 :
111 : const struct mbfl_convert_vtbl vtbl_wchar_ucs4le = {
112 : mbfl_no_encoding_wchar,
113 : mbfl_no_encoding_ucs4le,
114 : mbfl_filt_conv_common_ctor,
115 : mbfl_filt_conv_common_dtor,
116 : mbfl_filt_conv_wchar_ucs4le,
117 : mbfl_filt_conv_common_flush
118 : };
119 :
120 :
121 : #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
122 :
123 : /*
124 : * UCS-4 => wchar
125 : */
126 : int mbfl_filt_conv_ucs4_wchar(int c, mbfl_convert_filter *filter)
127 0 : {
128 : int n, endian;
129 :
130 0 : endian = filter->status & 0xff00;
131 0 : switch (filter->status & 0xff) {
132 : case 0:
133 0 : if (endian) {
134 0 : n = c & 0xff;
135 : } else {
136 0 : n = (c & 0xff) << 24;
137 : }
138 0 : filter->cache = n;
139 0 : filter->status++;
140 0 : break;
141 : case 1:
142 0 : if (endian) {
143 0 : n = (c & 0xff) << 8;
144 : } else {
145 0 : n = (c & 0xff) << 16;
146 : }
147 0 : filter->cache |= n;
148 0 : filter->status++;
149 0 : break;
150 : case 2:
151 0 : if (endian) {
152 0 : n = (c & 0xff) << 16;
153 : } else {
154 0 : n = (c & 0xff) << 8;
155 : }
156 0 : filter->cache |= n;
157 0 : filter->status++;
158 0 : break;
159 : default:
160 0 : if (endian) {
161 0 : n = (c & 0xff) << 24;
162 : } else {
163 0 : n = c & 0xff;
164 : }
165 0 : n |= filter->cache;
166 0 : if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
167 0 : if (endian) {
168 0 : filter->status = 0; /* big-endian */
169 : } else {
170 0 : filter->status = 0x100; /* little-endian */
171 : }
172 0 : CK((*filter->output_function)(0xfeff, filter->data));
173 : } else {
174 0 : filter->status &= ~0xff;
175 0 : CK((*filter->output_function)(n, filter->data));
176 : }
177 : break;
178 : }
179 :
180 0 : return c;
181 : }
182 :
183 : /*
184 : * UCS-4BE => wchar
185 : */
186 : int mbfl_filt_conv_ucs4be_wchar(int c, mbfl_convert_filter *filter)
187 76104 : {
188 : int n;
189 :
190 76104 : if (filter->status == 0) {
191 19026 : filter->status = 1;
192 19026 : n = (c & 0xff) << 24;
193 19026 : filter->cache = n;
194 57078 : } else if (filter->status == 1) {
195 19026 : filter->status = 2;
196 19026 : n = (c & 0xff) << 16;
197 19026 : filter->cache |= n;
198 38052 : } else if (filter->status == 2) {
199 19026 : filter->status = 3;
200 19026 : n = (c & 0xff) << 8;
201 19026 : filter->cache |= n;
202 : } else {
203 19026 : filter->status = 0;
204 19026 : n = (c & 0xff) | filter->cache;
205 19026 : CK((*filter->output_function)(n, filter->data));
206 : }
207 76104 : return c;
208 : }
209 :
210 : /*
211 : * wchar => UCS-4BE
212 : */
213 : int mbfl_filt_conv_wchar_ucs4be(int c, mbfl_convert_filter *filter)
214 10777 : {
215 21554 : if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
216 10777 : CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
217 10777 : CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
218 10777 : CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
219 10777 : CK((*filter->output_function)(c & 0xff, filter->data));
220 : } else {
221 0 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
222 0 : CK(mbfl_filt_conv_illegal_output(c, filter));
223 : }
224 : }
225 :
226 10777 : return c;
227 : }
228 :
229 : /*
230 : * UCS-4LE => wchar
231 : */
232 : int mbfl_filt_conv_ucs4le_wchar(int c, mbfl_convert_filter *filter)
233 0 : {
234 : int n;
235 :
236 0 : if (filter->status == 0) {
237 0 : filter->status = 1;
238 0 : n = (c & 0xff);
239 0 : filter->cache = n;
240 0 : } else if (filter->status == 1) {
241 0 : filter->status = 2;
242 0 : n = (c & 0xff) << 8;
243 0 : filter->cache |= n;
244 0 : } else if (filter->status == 2) {
245 0 : filter->status = 3;
246 0 : n = (c & 0xff) << 16;
247 0 : filter->cache |= n;
248 : } else {
249 0 : filter->status = 0;
250 0 : n = ((c & 0xff) << 24) | filter->cache;
251 0 : CK((*filter->output_function)(n, filter->data));
252 : }
253 0 : return c;
254 : }
255 :
256 : /*
257 : * wchar => UCS-4LE
258 : */
259 : int mbfl_filt_conv_wchar_ucs4le(int c, mbfl_convert_filter *filter)
260 0 : {
261 0 : if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
262 0 : CK((*filter->output_function)(c & 0xff, filter->data));
263 0 : CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
264 0 : CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
265 0 : CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
266 : } else {
267 0 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
268 0 : CK(mbfl_filt_conv_illegal_output(c, filter));
269 : }
270 : }
271 :
272 0 : return c;
273 : }
274 :
275 :
|