1 : /*
2 : +----------------------------------------------------------------------+
3 : | Zend Engine |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1998-2009 Zend Technologies Ltd. (http://www.zend.com) |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 2.00 of the Zend license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at |
10 : | http://www.zend.com/license/2_00.txt. |
11 : | If you did not receive a copy of the Zend license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@zend.com so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Authors: Andrei Zmievski <andrei@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : #ifndef ZEND_UNICODE_H
20 : #define ZEND_UNICODE_H
21 :
22 : #include "zend.h"
23 : #include <unicode/utypes.h>
24 : #include <unicode/uclean.h>
25 : #include <unicode/ustring.h>
26 : #include <unicode/ucnv.h>
27 : #include <unicode/uchar.h>
28 : #include <unicode/uloc.h>
29 : #include <unicode/ucol.h>
30 :
31 : enum {
32 : ZEND_CONV_ERROR_STOP,
33 : ZEND_CONV_ERROR_SKIP,
34 : ZEND_CONV_ERROR_SUBST,
35 : ZEND_CONV_ERROR_ESCAPE_UNICODE,
36 : ZEND_CONV_ERROR_ESCAPE_ICU,
37 : ZEND_CONV_ERROR_ESCAPE_JAVA,
38 : ZEND_CONV_ERROR_ESCAPE_XML_DEC,
39 : ZEND_CONV_ERROR_ESCAPE_XML_HEX,
40 : ZEND_CONV_ERROR_LAST_ENUM,
41 :
42 : ZEND_CONV_ERROR_EXCEPTION = 0x100
43 : };
44 :
45 : typedef enum {
46 : ZEND_FROM_UNICODE,
47 : ZEND_TO_UNICODE,
48 : } zend_conv_direction;
49 :
50 :
51 : typedef struct _zend_collator {
52 : UCollator *coll;
53 : int refcount;
54 : } zend_collator;
55 :
56 :
57 : extern ZEND_API zend_class_entry *unicodeConversionException;
58 :
59 :
60 : /* internal functions */
61 :
62 : int zend_set_converter_encoding(UConverter **converter, const char *encoding);
63 : void zend_set_converter_error_mode(UConverter *conv, zend_conv_direction dir, uint16_t error_mode);
64 : void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char);
65 : void zend_register_unicode_exceptions(TSRMLS_D);
66 : void zend_update_converters_error_behavior(TSRMLS_D);
67 : zend_collator* zend_collator_create(UCollator *coll);
68 : void zend_collator_destroy(zend_collator *zcoll);
69 :
70 :
71 : /* API functions */
72 :
73 : ZEND_API void zend_convert_encodings(UConverter *target_conv, UConverter *source_conv, char **target, int *target_len, const char *source, int source_len, UErrorCode *status);
74 : ZEND_API char* zend_unicode_to_ascii(const UChar *us, int us_len TSRMLS_DC);
75 :
76 : ZEND_API int zend_string_to_unicode_ex(UConverter *conv, UChar **target, int *target_len, const char *source, int source_len, UErrorCode *status);
77 : ZEND_API int zend_string_to_unicode(UConverter *conv, UChar **u, int *u_len, char *s, int s_len TSRMLS_DC);
78 : ZEND_API int zend_unicode_to_string_ex(UConverter *conv, char **s, int *s_len, const UChar *u, int u_len, UErrorCode *status);
79 : ZEND_API int zend_unicode_to_string(UConverter *conv, char **s, int *s_len, const UChar *u, int u_len TSRMLS_DC);
80 :
81 : ZEND_API int zval_string_to_unicode_ex(zval *string, UConverter *conv TSRMLS_DC);
82 : ZEND_API int zval_string_to_unicode(zval *string TSRMLS_DC);
83 : ZEND_API int zval_unicode_to_string_ex(zval *string, UConverter *conv TSRMLS_DC);
84 : ZEND_API int zval_unicode_to_string(zval *string TSRMLS_DC);
85 :
86 : ZEND_API int zend_cmp_unicode_and_string(UChar *ustr, char* str, uint len);
87 : ZEND_API int zend_cmp_unicode_and_literal(UChar *ustr, int ulen, char* str, int slen);
88 :
89 : ZEND_API void zend_case_fold_string(UChar **dest, int *dest_len, UChar *src, int src_len, uint32_t options, UErrorCode *status);
90 :
91 : ZEND_API int zend_is_valid_identifier(UChar *ident, int ident_len);
92 : ZEND_API int zend_normalize_identifier(UChar **dest, int *dest_len, UChar *ident, int ident_len, zend_bool fold_case);
93 :
94 : ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC);
95 :
96 : /*
97 : * Function to get a codepoint at position n. Iterates over codepoints starting from the
98 : * beginning of the string. Does not check for n > length, this is left up to the caller.
99 : */
100 : static inline UChar32 zend_get_codepoint_at(UChar *str, int length, int n)
101 0 : {
102 0 : int32_t offset = 0;
103 0 : UChar32 c = 0;
104 :
105 0 : if (n > 0) {
106 0 : U16_FWD_N(str, offset, length, n);
107 : }
108 0 : U16_NEXT(str, offset, length, c);
109 :
110 0 : return c;
111 : }
112 :
113 : /*
114 : * Convert a single codepoint to UChar sequence (1 or 2).
115 : * The UChar buffer is assumed to be large enough.
116 : */
117 : static inline int zend_codepoint_to_uchar(UChar32 codepoint, UChar *buf)
118 0 : {
119 0 : if (U_IS_BMP(codepoint)) {
120 0 : *buf++ = (UChar) codepoint;
121 0 : return 1;
122 0 : } else if (codepoint <= UCHAR_MAX_VALUE) {
123 0 : *buf++ = (UChar) U16_LEAD(codepoint);
124 0 : *buf++ = (UChar) U16_TRAIL(codepoint);
125 0 : return 2;
126 : } else {
127 0 : return 0;
128 : }
129 : }
130 :
131 : #define ZSTR_LEN(__type, __str) ((__type==IS_UNICODE)?u_strlen(__str.u):strlen(__str.s))
132 :
133 : #define ZBYTES(__type, __len) (((__type) == IS_UNICODE) ? UBYTES((__len)) : (__len))
134 :
135 : #define ZEND_U_CONVERTER(c) ((c)?(c):UG(fallback_encoding_conv))
136 :
137 : #define USTR_FREE(ustr) do { if (ustr) { efree(ustr); } } while (0);
138 : #define UBYTES(len) ((len) * sizeof(UChar))
139 : #define USTR_LEN(str) u_strlen((str).u)
140 : #define USTR_VAL(str) (str).u
141 :
142 : #define USTR_BYTES(__type, __length) \
143 : ((__type == IS_UNICODE)?UBYTES(__length):__length)
144 :
145 : #define TEXT_BYTES(__chars_len) \
146 : UBYTES(__chars_len)
147 :
148 : #define TEXT_CHARS(__bytes_len) \
149 : (__bytes_len / sizeof(UChar))
150 :
151 : #define USTR_MAKE(cs) zend_ascii_to_unicode(cs, sizeof(cs) ZEND_FILE_LINE_CC)
152 : #define USTR_MAKE_REL(cs) zend_ascii_to_unicode(cs, sizeof(cs) ZEND_FILE_LINE_RELAY_CC)
153 : static inline UChar* zend_ascii_to_unicode(const char *cs, size_t cs_size ZEND_FILE_LINE_DC)
154 0 : {
155 : /* u_charsToUChars() takes care of the terminating NULL */
156 0 : UChar *us = eumalloc_rel(cs_size);
157 0 : u_charsToUChars(cs, us, cs_size);
158 0 : return us;
159 : }
160 :
161 : #endif /* ZEND_UNICODE_H */
|