1 : /*
2 : * "streamable kanji code filter and converter"
3 : * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 : *
5 : * LICENSE NOTICES
6 : *
7 : * This file is part of "streamable kanji code filter and converter",
8 : * which is distributed under the terms of GNU Lesser General Public
9 : * License (version 2) as published by the Free Software Foundation.
10 : *
11 : * This software is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with "streamable kanji code filter and converter";
18 : * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 : * Suite 330, Boston, MA 02111-1307 USA
20 : *
21 : * The author of this file:
22 : *
23 : */
24 : /*
25 : * The source code included in this files was separated from mbfilter.c
26 : * by moriyoshi koizumi <moriyoshi@php.net> on 20 dec 2002.
27 : *
28 : */
29 :
30 : #ifdef HAVE_CONFIG_H
31 : #include "config.h"
32 : #endif
33 :
34 : #include "mbfilter.h"
35 : #include "mbfilter_utf32.h"
36 :
37 : static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL};
38 :
39 : const mbfl_encoding mbfl_encoding_utf32 = {
40 : mbfl_no_encoding_utf32,
41 : "UTF-32",
42 : "UTF-32",
43 : (const char *(*)[])&mbfl_encoding_utf32_aliases,
44 : NULL,
45 : MBFL_ENCTYPE_WCS4BE
46 : };
47 :
48 : const mbfl_encoding mbfl_encoding_utf32be = {
49 : mbfl_no_encoding_utf32be,
50 : "UTF-32BE",
51 : "UTF-32BE",
52 : NULL,
53 : NULL,
54 : MBFL_ENCTYPE_WCS4BE
55 : };
56 :
57 : const mbfl_encoding mbfl_encoding_utf32le = {
58 : mbfl_no_encoding_utf32le,
59 : "UTF-32LE",
60 : "UTF-32LE",
61 : NULL,
62 : NULL,
63 : MBFL_ENCTYPE_WCS4LE
64 : };
65 :
66 : const struct mbfl_convert_vtbl vtbl_utf32_wchar = {
67 : mbfl_no_encoding_utf32,
68 : mbfl_no_encoding_wchar,
69 : mbfl_filt_conv_common_ctor,
70 : mbfl_filt_conv_common_dtor,
71 : mbfl_filt_conv_utf32_wchar,
72 : mbfl_filt_conv_common_flush
73 : };
74 :
75 : const struct mbfl_convert_vtbl vtbl_wchar_utf32 = {
76 : mbfl_no_encoding_wchar,
77 : mbfl_no_encoding_utf32,
78 : mbfl_filt_conv_common_ctor,
79 : mbfl_filt_conv_common_dtor,
80 : mbfl_filt_conv_wchar_utf32be,
81 : mbfl_filt_conv_common_flush
82 : };
83 :
84 : const struct mbfl_convert_vtbl vtbl_utf32be_wchar = {
85 : mbfl_no_encoding_utf32be,
86 : mbfl_no_encoding_wchar,
87 : mbfl_filt_conv_common_ctor,
88 : mbfl_filt_conv_common_dtor,
89 : mbfl_filt_conv_utf32be_wchar,
90 : mbfl_filt_conv_common_flush
91 : };
92 :
93 : const struct mbfl_convert_vtbl vtbl_wchar_utf32be = {
94 : mbfl_no_encoding_wchar,
95 : mbfl_no_encoding_utf32be,
96 : mbfl_filt_conv_common_ctor,
97 : mbfl_filt_conv_common_dtor,
98 : mbfl_filt_conv_wchar_utf32be,
99 : mbfl_filt_conv_common_flush
100 : };
101 :
102 : const struct mbfl_convert_vtbl vtbl_utf32le_wchar = {
103 : mbfl_no_encoding_utf32le,
104 : mbfl_no_encoding_wchar,
105 : mbfl_filt_conv_common_ctor,
106 : mbfl_filt_conv_common_dtor,
107 : mbfl_filt_conv_utf32le_wchar,
108 : mbfl_filt_conv_common_flush
109 : };
110 :
111 : const struct mbfl_convert_vtbl vtbl_wchar_utf32le = {
112 : mbfl_no_encoding_wchar,
113 : mbfl_no_encoding_utf32le,
114 : mbfl_filt_conv_common_ctor,
115 : mbfl_filt_conv_common_dtor,
116 : mbfl_filt_conv_wchar_utf32le,
117 : mbfl_filt_conv_common_flush
118 : };
119 :
120 : #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
121 :
122 : /*
123 : * UTF-32 => wchar
124 : */
125 : int mbfl_filt_conv_utf32_wchar(int c, mbfl_convert_filter *filter)
126 0 : {
127 : int n, endian;
128 :
129 0 : endian = filter->status & 0xff00;
130 0 : switch (filter->status & 0xff) {
131 : case 0:
132 0 : if (endian) {
133 0 : n = c & 0xff;
134 : } else {
135 0 : n = (c & 0xff) << 24;
136 : }
137 0 : filter->cache = n;
138 0 : filter->status++;
139 0 : break;
140 : case 1:
141 0 : if (endian) {
142 0 : n = (c & 0xff) << 8;
143 : } else {
144 0 : n = (c & 0xff) << 16;
145 : }
146 0 : filter->cache |= n;
147 0 : filter->status++;
148 0 : break;
149 : case 2:
150 0 : if (endian) {
151 0 : n = (c & 0xff) << 16;
152 : } else {
153 0 : n = (c & 0xff) << 8;
154 : }
155 0 : filter->cache |= n;
156 0 : filter->status++;
157 0 : break;
158 : default:
159 0 : if (endian) {
160 0 : n = (c & 0xff) << 24;
161 : } else {
162 0 : n = c & 0xff;
163 : }
164 0 : n |= filter->cache;
165 0 : if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
166 0 : if (endian) {
167 0 : filter->status = 0; /* big-endian */
168 : } else {
169 0 : filter->status = 0x100; /* little-endian */
170 : }
171 0 : CK((*filter->output_function)(0xfeff, filter->data));
172 : } else {
173 0 : filter->status &= ~0xff;
174 0 : CK((*filter->output_function)(n, filter->data));
175 : }
176 : break;
177 : }
178 :
179 0 : return c;
180 : }
181 :
182 : /*
183 : * UTF-32BE => wchar
184 : */
185 : int mbfl_filt_conv_utf32be_wchar(int c, mbfl_convert_filter *filter)
186 0 : {
187 : int n;
188 :
189 0 : if (filter->status == 0) {
190 0 : filter->status = 1;
191 0 : n = (c & 0xff) << 24;
192 0 : filter->cache = n;
193 0 : } else if (filter->status == 1) {
194 0 : filter->status = 2;
195 0 : n = (c & 0xff) << 16;
196 0 : filter->cache |= n;
197 0 : } else if (filter->status == 2) {
198 0 : filter->status = 3;
199 0 : n = (c & 0xff) << 8;
200 0 : filter->cache |= n;
201 : } else {
202 0 : filter->status = 0;
203 0 : n = (c & 0xff) | filter->cache;
204 0 : CK((*filter->output_function)(n, filter->data));
205 : }
206 0 : return c;
207 : }
208 :
209 : /*
210 : * wchar => UTF-32BE
211 : */
212 : int mbfl_filt_conv_wchar_utf32be(int c, mbfl_convert_filter *filter)
213 0 : {
214 0 : if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
215 0 : CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
216 0 : CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
217 0 : CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
218 0 : CK((*filter->output_function)(c & 0xff, filter->data));
219 : } else {
220 0 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
221 0 : CK(mbfl_filt_conv_illegal_output(c, filter));
222 : }
223 : }
224 :
225 0 : return c;
226 : }
227 :
228 : /*
229 : * UTF-32LE => wchar
230 : */
231 : int mbfl_filt_conv_utf32le_wchar(int c, mbfl_convert_filter *filter)
232 0 : {
233 : int n;
234 :
235 0 : if (filter->status == 0) {
236 0 : filter->status = 1;
237 0 : n = (c & 0xff);
238 0 : filter->cache = n;
239 0 : } else if (filter->status == 1) {
240 0 : filter->status = 2;
241 0 : n = (c & 0xff) << 8;
242 0 : filter->cache |= n;
243 0 : } else if (filter->status == 2) {
244 0 : filter->status = 3;
245 0 : n = (c & 0xff) << 16;
246 0 : filter->cache |= n;
247 : } else {
248 0 : filter->status = 0;
249 0 : n = ((c & 0xff) << 24) | filter->cache;
250 0 : CK((*filter->output_function)(n, filter->data));
251 : }
252 0 : return c;
253 : }
254 :
255 : /*
256 : * wchar => UTF-32LE
257 : */
258 : int mbfl_filt_conv_wchar_utf32le(int c, mbfl_convert_filter *filter)
259 0 : {
260 0 : if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
261 0 : CK((*filter->output_function)(c & 0xff, filter->data));
262 0 : CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
263 0 : CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
264 0 : CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
265 : } else {
266 0 : if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
267 0 : CK(mbfl_filt_conv_illegal_output(c, filter));
268 : }
269 : }
270 :
271 0 : return c;
272 : }
|