1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2009 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Andrei Zmievski <andrei@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: tokenizer.c 272374 2008-12-31 11:17:49Z sebastian $ */
20 :
21 : #ifdef HAVE_CONFIG_H
22 : #include "config.h"
23 : #endif
24 :
25 : #include "php.h"
26 : #include "php_ini.h"
27 : #include "ext/standard/info.h"
28 : #include "php_tokenizer.h"
29 :
30 : typedef struct yy_buffer_state *YY_BUFFER_STATE;
31 : typedef unsigned int yy_size_t;
32 : struct yy_buffer_state
33 : {
34 : FILE *yy_input_file;
35 :
36 : char *yy_ch_buf; /* input buffer */
37 : char *yy_buf_pos; /* current position in input buffer */
38 :
39 : /* Size of input buffer in bytes, not including room for EOB
40 : * characters.
41 : */
42 : yy_size_t yy_buf_size;
43 :
44 : /* Number of characters read into yy_ch_buf, not including EOB
45 : * characters.
46 : */
47 : int yy_n_chars;
48 :
49 : /* Whether we "own" the buffer - i.e., we know we created it,
50 : * and can realloc() it to grow it, and should free() it to
51 : * delete it.
52 : */
53 : int yy_is_our_buffer;
54 :
55 : /* Whether this is an "interactive" input source; if so, and
56 : * if we're using stdio for input, then we want to use getc()
57 : * instead of fread(), to make sure we stop fetching input after
58 : * each newline.
59 : */
60 : int yy_is_interactive;
61 :
62 : /* Whether we're considered to be at the beginning of a line.
63 : * If so, '^' rules will be active on the next match, otherwise
64 : * not.
65 : */
66 : int yy_at_bol;
67 :
68 : /* Whether to try to fill the input buffer when we reach the
69 : * end of it.
70 : */
71 : int yy_fill_buffer;
72 :
73 : int yy_buffer_status;
74 : #define YY_BUFFER_NEW 0
75 : #define YY_BUFFER_NORMAL 1
76 : /* When an EOF's been seen but there's still some text to process
77 : * then we mark the buffer as YY_EOF_PENDING, to indicate that we
78 : * shouldn't try reading from the input source any more. We might
79 : * still have a bunch of tokens to match, though, because of
80 : * possible backing-up.
81 : *
82 : * When we actually see the EOF, we change the status to "new"
83 : * (via yyrestart()), so that the user can continue scanning by
84 : * just pointing yyin at a new input file.
85 : */
86 : #define YY_BUFFER_EOF_PENDING 2
87 : };
88 :
89 : #include "zend.h"
90 : #include "zend_language_scanner.h"
91 : #include <zend_language_parser.h>
92 :
93 : #define zendtext LANG_SCNG(yy_text)
94 : #define zendleng LANG_SCNG(yy_leng)
95 :
96 : /* {{{ tokenizer_functions[]
97 : *
98 : * Every user visible function must have an entry in tokenizer_functions[].
99 : */
100 : zend_function_entry tokenizer_functions[] = {
101 : PHP_FE(token_get_all, NULL)
102 : PHP_FE(token_name, NULL)
103 : {NULL, NULL, NULL} /* Must be the last line in tokenizer_functions[] */
104 : };
105 : /* }}} */
106 :
107 : /* {{{ tokenizer_module_entry
108 : */
109 : zend_module_entry tokenizer_module_entry = {
110 : #if ZEND_MODULE_API_NO >= 20010901
111 : STANDARD_MODULE_HEADER,
112 : #endif
113 : "tokenizer",
114 : tokenizer_functions,
115 : PHP_MINIT(tokenizer),
116 : NULL,
117 : NULL,
118 : NULL,
119 : PHP_MINFO(tokenizer),
120 : #if ZEND_MODULE_API_NO >= 20010901
121 : "0.1", /* Replace with version number for your extension */
122 : #endif
123 : STANDARD_MODULE_PROPERTIES
124 : };
125 : /* }}} */
126 :
127 : #ifdef COMPILE_DL_TOKENIZER
128 : ZEND_GET_MODULE(tokenizer)
129 : #endif
130 :
131 : /* {{{ PHP_MINIT_FUNCTION
132 : */
133 : PHP_MINIT_FUNCTION(tokenizer)
134 13565 : {
135 13565 : tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
136 13565 : return SUCCESS;
137 : }
138 : /* }}} */
139 :
140 : /* {{{ PHP_MINFO_FUNCTION
141 : */
142 : PHP_MINFO_FUNCTION(tokenizer)
143 6 : {
144 6 : php_info_print_table_start();
145 6 : php_info_print_table_row(2, "Tokenizer Support", "enabled");
146 6 : php_info_print_table_end();
147 6 : }
148 : /* }}} */
149 :
150 : static void tokenize(zval *return_value TSRMLS_DC)
151 67 : {
152 : zval token;
153 : zval *keyword;
154 : int token_type;
155 : zend_bool destroy;
156 67 : int token_line = 1;
157 :
158 67 : array_init(return_value);
159 :
160 67 : ZVAL_NULL(&token);
161 1763 : while ((token_type = lex_scan(&token TSRMLS_CC))) {
162 1629 : destroy = 1;
163 1629 : switch (token_type) {
164 : case T_CLOSE_TAG:
165 40 : if (zendtext[zendleng - 1] != '>') {
166 0 : CG(zend_lineno)++;
167 : }
168 : case T_OPEN_TAG:
169 : case T_OPEN_TAG_WITH_ECHO:
170 : case T_WHITESPACE:
171 : case T_COMMENT:
172 : case T_DOC_COMMENT:
173 670 : destroy = 0;
174 : break;
175 : }
176 :
177 1629 : if (token_type >= 256) {
178 1207 : MAKE_STD_ZVAL(keyword);
179 1207 : array_init(keyword);
180 1207 : add_next_index_long(keyword, token_type);
181 1207 : if (token_type == T_END_HEREDOC) {
182 3 : if (CG(increment_lineno)) {
183 3 : token_line = ++CG(zend_lineno);
184 3 : CG(increment_lineno) = 0;
185 : }
186 3 : add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1);
187 3 : efree(Z_STRVAL(token));
188 : } else {
189 1204 : add_next_index_stringl(keyword, zendtext, zendleng, 1);
190 : }
191 1207 : add_next_index_long(keyword, token_line);
192 1207 : add_next_index_zval(return_value, keyword);
193 : } else {
194 422 : add_next_index_stringl(return_value, zendtext, zendleng, 1);
195 : }
196 1629 : if (destroy && Z_TYPE(token) != IS_NULL) {
197 351 : zval_dtor(&token);
198 : }
199 1629 : ZVAL_NULL(&token);
200 :
201 1629 : token_line = CG(zend_lineno);
202 : }
203 67 : }
204 :
205 : /* {{{ proto array token_get_all(string source)
206 : */
207 : PHP_FUNCTION(token_get_all)
208 77 : {
209 77 : char *source = NULL;
210 77 : int argc = ZEND_NUM_ARGS();
211 : int source_len;
212 : zval source_z;
213 : zend_lex_state original_lex_state;
214 :
215 77 : if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE)
216 10 : return;
217 :
218 67 : ZVAL_STRINGL(&source_z, source, source_len, 1);
219 67 : zend_save_lexical_state(&original_lex_state TSRMLS_CC);
220 :
221 67 : if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) {
222 0 : RETURN_EMPTY_STRING();
223 : }
224 :
225 67 : LANG_SCNG(start) = 1;
226 :
227 67 : tokenize(return_value TSRMLS_CC);
228 :
229 67 : zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
230 67 : zval_dtor(&source_z);
231 : }
232 : /* }}} */
233 :
234 : /* {{{ proto string token_name(int type)
235 : */
236 : PHP_FUNCTION(token_name)
237 121 : {
238 121 : int argc = ZEND_NUM_ARGS();
239 : long type;
240 :
241 121 : if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) {
242 2 : return;
243 : }
244 119 : RETVAL_STRING(get_token_type_name(type), 1);
245 : }
246 : /* }}} */
247 :
248 : /*
249 : * Local variables:
250 : * tab-width: 4
251 : * c-basic-offset: 4
252 : * End:
253 : * vim600: noet sw=4 ts=4 fdm=marker
254 : * vim<600: noet sw=4 ts=4
255 : */
|