1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Pierre A. Joye <pierre@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 : /* $Id: idn.c 283618 2009-07-06 23:48:27Z stas $ */
19 :
20 : /* {{{ includes */
21 : #ifdef HAVE_CONFIG_H
22 : #include "config.h"
23 : #endif
24 :
25 : #include <php.h>
26 :
27 : #include <unicode/uidna.h>
28 : #include <unicode/ustring.h>
29 : #include "ext/standard/php_string.h"
30 :
31 : #include "intl_error.h"
32 : #include "intl_convert.h"
33 : /* }}} */
34 :
35 : /* {{{ grapheme_register_constants
36 : * Register API constants
37 : */
38 : void idn_register_constants( INIT_FUNC_ARGS )
39 17633 : {
40 : /* Option to prohibit processing of unassigned codepoints in the input and
41 : do not check if the input conforms to STD-3 ASCII rules. */
42 17633 : REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
43 :
44 : /* Option to allow processing of unassigned codepoints in the input */
45 17633 : REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
46 :
47 : /* Option to check if input conforms to STD-3 ASCII rules */
48 17633 : REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
49 17633 : }
50 : /* }}} */
51 :
52 : enum {
53 : INTL_IDN_TO_ASCII = 0,
54 : INTL_IDN_TO_UTF8
55 : };
56 :
57 : static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
58 2 : {
59 : unsigned char* domain;
60 : int domain_len;
61 2 : long option = 0;
62 2 : UChar* ustring = NULL;
63 2 : int ustring_len = 0;
64 : UErrorCode status;
65 : char *converted_utf8;
66 : int32_t converted_utf8_len;
67 : UChar converted[MAXPATHLEN];
68 : int32_t converted_ret_len;
69 :
70 2 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) {
71 0 : return;
72 : }
73 :
74 2 : if (domain_len < 1) {
75 0 : intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
76 0 : RETURN_FALSE;
77 : }
78 :
79 : /* convert the string to UTF-16. */
80 2 : status = U_ZERO_ERROR;
81 2 : intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );
82 :
83 2 : if (U_FAILURE(status)) {
84 0 : intl_error_set_code(NULL, status TSRMLS_CC);
85 :
86 : /* Set error messages. */
87 0 : intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 1 TSRMLS_CC );
88 0 : efree(ustring);
89 0 : RETURN_FALSE;
90 : } else {
91 : UParseError parse_error;
92 :
93 2 : status = U_ZERO_ERROR;
94 2 : if (mode == INTL_IDN_TO_ASCII) {
95 1 : converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
96 : } else {
97 1 : converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
98 : }
99 2 : efree(ustring);
100 :
101 2 : if (U_FAILURE(status)) {
102 0 : intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
103 0 : RETURN_FALSE;
104 : }
105 :
106 2 : status = U_ZERO_ERROR;
107 2 : intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
108 :
109 2 : if (U_FAILURE(status)) {
110 : /* Set global error code. */
111 0 : intl_error_set_code(NULL, status TSRMLS_CC);
112 :
113 : /* Set error messages. */
114 0 : intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 1 TSRMLS_CC );
115 0 : efree(converted_utf8);
116 0 : RETURN_FALSE;
117 : }
118 : }
119 :
120 : /* return the allocated string, not a duplicate */
121 2 : RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
122 : }
123 :
124 : /* {{{ proto int idn_to_ascii(string domain[, int options])
125 : Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
126 : PHP_FUNCTION(idn_to_ascii)
127 1 : {
128 1 : php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
129 1 : }
130 : /* }}} */
131 :
132 :
133 : /* {{{ proto int idn_to_utf8(string domain[, int options])
134 : Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
135 : PHP_FUNCTION(idn_to_utf8)
136 1 : {
137 1 : php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
138 1 : }
139 : /* }}} */
140 :
141 :
142 : /*
143 : * Local variables:
144 : * tab-width: 4
145 : * c-basic-offset: 4
146 : * End:
147 : * vim600: fdm=marker
148 : * vim: noet sw=4 ts=4
149 : */
|