1 : /*
2 : ** 2001 September 15
3 : **
4 : ** The author disclaims copyright to this source code. In place of
5 : ** a legal notice, here is a blessing:
6 : **
7 : ** May you do good and not evil.
8 : ** May you find forgiveness for yourself and forgive others.
9 : ** May you share freely, never taking more than you give.
10 : **
11 : *************************************************************************
12 : ** An tokenizer for SQL
13 : **
14 : ** This file contains C code that splits an SQL input string up into
15 : ** individual tokens and sends those tokens one-by-one over to the
16 : ** parser for analysis.
17 : **
18 : ** $Id: tokenize.c 195361 2005-09-07 15:11:33Z iliaa $
19 : */
20 : #include "sqliteInt.h"
21 : #include "os.h"
22 : #include <ctype.h>
23 : #include <stdlib.h>
24 :
25 : /*
26 : ** All the keywords of the SQL language are stored as in a hash
27 : ** table composed of instances of the following structure.
28 : */
29 : typedef struct Keyword Keyword;
30 : struct Keyword {
31 : char *zName; /* The keyword name */
32 : u8 tokenType; /* Token value for this keyword */
33 : u8 len; /* Length of this keyword */
34 : u8 iNext; /* Index in aKeywordTable[] of next with same hash */
35 : };
36 :
37 : /*
38 : ** These are the keywords
39 : */
40 : static Keyword aKeywordTable[] = {
41 : { "ABORT", TK_ABORT, },
42 : { "AFTER", TK_AFTER, },
43 : { "ALL", TK_ALL, },
44 : { "AND", TK_AND, },
45 : { "AS", TK_AS, },
46 : { "ASC", TK_ASC, },
47 : { "ATTACH", TK_ATTACH, },
48 : { "BEFORE", TK_BEFORE, },
49 : { "BEGIN", TK_BEGIN, },
50 : { "BETWEEN", TK_BETWEEN, },
51 : { "BY", TK_BY, },
52 : { "CASCADE", TK_CASCADE, },
53 : { "CASE", TK_CASE, },
54 : { "CHECK", TK_CHECK, },
55 : { "CLUSTER", TK_CLUSTER, },
56 : { "COLLATE", TK_COLLATE, },
57 : { "COMMIT", TK_COMMIT, },
58 : { "CONFLICT", TK_CONFLICT, },
59 : { "CONSTRAINT", TK_CONSTRAINT, },
60 : { "COPY", TK_COPY, },
61 : { "CREATE", TK_CREATE, },
62 : { "CROSS", TK_JOIN_KW, },
63 : { "DATABASE", TK_DATABASE, },
64 : { "DEFAULT", TK_DEFAULT, },
65 : { "DEFERRED", TK_DEFERRED, },
66 : { "DEFERRABLE", TK_DEFERRABLE, },
67 : { "DELETE", TK_DELETE, },
68 : { "DELIMITERS", TK_DELIMITERS, },
69 : { "DESC", TK_DESC, },
70 : { "DETACH", TK_DETACH, },
71 : { "DISTINCT", TK_DISTINCT, },
72 : { "DROP", TK_DROP, },
73 : { "END", TK_END, },
74 : { "EACH", TK_EACH, },
75 : { "ELSE", TK_ELSE, },
76 : { "EXCEPT", TK_EXCEPT, },
77 : { "EXPLAIN", TK_EXPLAIN, },
78 : { "FAIL", TK_FAIL, },
79 : { "FOR", TK_FOR, },
80 : { "FOREIGN", TK_FOREIGN, },
81 : { "FROM", TK_FROM, },
82 : { "FULL", TK_JOIN_KW, },
83 : { "GLOB", TK_GLOB, },
84 : { "GROUP", TK_GROUP, },
85 : { "HAVING", TK_HAVING, },
86 : { "IGNORE", TK_IGNORE, },
87 : { "IMMEDIATE", TK_IMMEDIATE, },
88 : { "IN", TK_IN, },
89 : { "INDEX", TK_INDEX, },
90 : { "INITIALLY", TK_INITIALLY, },
91 : { "INNER", TK_JOIN_KW, },
92 : { "INSERT", TK_INSERT, },
93 : { "INSTEAD", TK_INSTEAD, },
94 : { "INTERSECT", TK_INTERSECT, },
95 : { "INTO", TK_INTO, },
96 : { "IS", TK_IS, },
97 : { "ISNULL", TK_ISNULL, },
98 : { "JOIN", TK_JOIN, },
99 : { "KEY", TK_KEY, },
100 : { "LEFT", TK_JOIN_KW, },
101 : { "LIKE", TK_LIKE, },
102 : { "LIMIT", TK_LIMIT, },
103 : { "MATCH", TK_MATCH, },
104 : { "NATURAL", TK_JOIN_KW, },
105 : { "NOT", TK_NOT, },
106 : { "NOTNULL", TK_NOTNULL, },
107 : { "NULL", TK_NULL, },
108 : { "OF", TK_OF, },
109 : { "OFFSET", TK_OFFSET, },
110 : { "ON", TK_ON, },
111 : { "OR", TK_OR, },
112 : { "ORDER", TK_ORDER, },
113 : { "OUTER", TK_JOIN_KW, },
114 : { "PRAGMA", TK_PRAGMA, },
115 : { "PRIMARY", TK_PRIMARY, },
116 : { "RAISE", TK_RAISE, },
117 : { "REFERENCES", TK_REFERENCES, },
118 : { "REPLACE", TK_REPLACE, },
119 : { "RESTRICT", TK_RESTRICT, },
120 : { "RIGHT", TK_JOIN_KW, },
121 : { "ROLLBACK", TK_ROLLBACK, },
122 : { "ROW", TK_ROW, },
123 : { "SELECT", TK_SELECT, },
124 : { "SET", TK_SET, },
125 : { "STATEMENT", TK_STATEMENT, },
126 : { "TABLE", TK_TABLE, },
127 : { "TEMP", TK_TEMP, },
128 : { "TEMPORARY", TK_TEMP, },
129 : { "THEN", TK_THEN, },
130 : { "TRANSACTION", TK_TRANSACTION, },
131 : { "TRIGGER", TK_TRIGGER, },
132 : { "UNION", TK_UNION, },
133 : { "UNIQUE", TK_UNIQUE, },
134 : { "UPDATE", TK_UPDATE, },
135 : { "USING", TK_USING, },
136 : { "VACUUM", TK_VACUUM, },
137 : { "VALUES", TK_VALUES, },
138 : { "VIEW", TK_VIEW, },
139 : { "WHEN", TK_WHEN, },
140 : { "WHERE", TK_WHERE, },
141 : };
142 :
143 : /*
144 : ** This is the hash table
145 : */
146 : #define KEY_HASH_SIZE 101
147 : static u8 aiHashTable[KEY_HASH_SIZE];
148 :
149 :
150 : /*
151 : ** This function looks up an identifier to determine if it is a
152 : ** keyword. If it is a keyword, the token code of that keyword is
153 : ** returned. If the input is not a keyword, TK_ID is returned.
154 : */
155 10319 : int sqliteKeywordCode(const char *z, int n){
156 : int h, i;
157 : Keyword *p;
158 : static char needInit = 1;
159 10319 : if( needInit ){
160 : /* Initialize the keyword hash table */
161 148 : sqliteOsEnterMutex();
162 148 : if( needInit ){
163 : int nk;
164 148 : nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
165 14948 : for(i=0; i<nk; i++){
166 14800 : aKeywordTable[i].len = strlen(aKeywordTable[i].zName);
167 14800 : h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len);
168 14800 : h %= KEY_HASH_SIZE;
169 14800 : aKeywordTable[i].iNext = aiHashTable[h];
170 14800 : aiHashTable[h] = i+1;
171 : }
172 148 : needInit = 0;
173 : }
174 148 : sqliteOsLeaveMutex();
175 : }
176 10319 : h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE;
177 16945 : for(i=aiHashTable[h]; i; i=p->iNext){
178 10347 : p = &aKeywordTable[i-1];
179 10347 : if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){
180 3721 : return p->tokenType;
181 : }
182 : }
183 6598 : return TK_ID;
184 : }
185 :
186 :
187 : /*
188 : ** If X is a character that can be used in an identifier and
189 : ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then
190 : ** X is always an identifier character. (Hence all UTF-8
191 : ** characters can be part of an identifier). isIdChar[X] will
192 : ** be 0 for every character in the lower 128 ASCII characters
193 : ** that cannot be used as part of an identifier.
194 : **
195 : ** In this implementation, an identifier can be a string of
196 : ** alphabetic characters, digits, and "_" plus any character
197 : ** with the high-order bit set. The latter rule means that
198 : ** any sequence of UTF-8 characters or characters taken from
199 : ** an extended ISO8859 character set can form an identifier.
200 : */
201 : static const char isIdChar[] = {
202 : /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
203 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
204 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
205 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
206 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
207 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
208 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
209 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
210 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
211 : };
212 :
213 :
214 : /*
215 : ** Return the length of the token that begins at z[0].
216 : ** Store the token type in *tokenType before returning.
217 : */
218 26303 : static int sqliteGetToken(const unsigned char *z, int *tokenType){
219 : int i;
220 26303 : switch( *z ){
221 : case ' ': case '\t': case '\n': case '\f': case '\r': {
222 9637 : for(i=1; isspace(z[i]); i++){}
223 9637 : *tokenType = TK_SPACE;
224 9637 : return i;
225 : }
226 : case '-': {
227 0 : if( z[1]=='-' ){
228 0 : for(i=2; z[i] && z[i]!='\n'; i++){}
229 0 : *tokenType = TK_COMMENT;
230 0 : return i;
231 : }
232 0 : *tokenType = TK_MINUS;
233 0 : return 1;
234 : }
235 : case '(': {
236 815 : *tokenType = TK_LP;
237 815 : return 1;
238 : }
239 : case ')': {
240 815 : *tokenType = TK_RP;
241 815 : return 1;
242 : }
243 : case ';': {
244 4 : *tokenType = TK_SEMI;
245 4 : return 1;
246 : }
247 : case '+': {
248 0 : *tokenType = TK_PLUS;
249 0 : return 1;
250 : }
251 : case '*': {
252 74 : *tokenType = TK_STAR;
253 74 : return 1;
254 : }
255 : case '/': {
256 0 : if( z[1]!='*' || z[2]==0 ){
257 0 : *tokenType = TK_SLASH;
258 0 : return 1;
259 : }
260 0 : for(i=3; z[i] && (z[i]!='/' || z[i-1]!='*'); i++){}
261 0 : if( z[i] ) i++;
262 0 : *tokenType = TK_COMMENT;
263 0 : return i;
264 : }
265 : case '%': {
266 0 : *tokenType = TK_REM;
267 0 : return 1;
268 : }
269 : case '=': {
270 72 : *tokenType = TK_EQ;
271 72 : return 1 + (z[1]=='=');
272 : }
273 : case '<': {
274 4 : if( z[1]=='=' ){
275 0 : *tokenType = TK_LE;
276 0 : return 2;
277 4 : }else if( z[1]=='>' ){
278 0 : *tokenType = TK_NE;
279 0 : return 2;
280 4 : }else if( z[1]=='<' ){
281 0 : *tokenType = TK_LSHIFT;
282 0 : return 2;
283 : }else{
284 4 : *tokenType = TK_LT;
285 4 : return 1;
286 : }
287 : }
288 : case '>': {
289 3 : if( z[1]=='=' ){
290 0 : *tokenType = TK_GE;
291 0 : return 2;
292 3 : }else if( z[1]=='>' ){
293 0 : *tokenType = TK_RSHIFT;
294 0 : return 2;
295 : }else{
296 3 : *tokenType = TK_GT;
297 3 : return 1;
298 : }
299 : }
300 : case '!': {
301 1 : if( z[1]!='=' ){
302 0 : *tokenType = TK_ILLEGAL;
303 0 : return 2;
304 : }else{
305 1 : *tokenType = TK_NE;
306 1 : return 2;
307 : }
308 : }
309 : case '|': {
310 0 : if( z[1]!='|' ){
311 0 : *tokenType = TK_BITOR;
312 0 : return 1;
313 : }else{
314 0 : *tokenType = TK_CONCAT;
315 0 : return 2;
316 : }
317 : }
318 : case ',': {
319 2916 : *tokenType = TK_COMMA;
320 2916 : return 1;
321 : }
322 : case '&': {
323 0 : *tokenType = TK_BITAND;
324 0 : return 1;
325 : }
326 : case '~': {
327 0 : *tokenType = TK_BITNOT;
328 0 : return 1;
329 : }
330 : case '\'': case '"': {
331 743 : int delim = z[0];
332 3962 : for(i=1; z[i]; i++){
333 3962 : if( z[i]==delim ){
334 744 : if( z[i+1]==delim ){
335 1 : i++;
336 : }else{
337 743 : break;
338 : }
339 : }
340 : }
341 743 : if( z[i] ) i++;
342 743 : *tokenType = TK_STRING;
343 743 : return i;
344 : }
345 : case '.': {
346 345 : *tokenType = TK_DOT;
347 345 : return 1;
348 : }
349 : case '0': case '1': case '2': case '3': case '4':
350 : case '5': case '6': case '7': case '8': case '9': {
351 555 : *tokenType = TK_INTEGER;
352 555 : for(i=1; isdigit(z[i]); i++){}
353 555 : if( z[i]=='.' && isdigit(z[i+1]) ){
354 0 : i += 2;
355 0 : while( isdigit(z[i]) ){ i++; }
356 0 : *tokenType = TK_FLOAT;
357 : }
358 555 : if( (z[i]=='e' || z[i]=='E') &&
359 : ( isdigit(z[i+1])
360 : || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
361 : )
362 : ){
363 0 : i += 2;
364 0 : while( isdigit(z[i]) ){ i++; }
365 0 : *tokenType = TK_FLOAT;
366 : }
367 555 : return i;
368 : }
369 : case '[': {
370 0 : for(i=1; z[i] && z[i-1]!=']'; i++){}
371 0 : *tokenType = TK_ID;
372 0 : return i;
373 : }
374 : case '?': {
375 0 : *tokenType = TK_VARIABLE;
376 0 : return 1;
377 : }
378 : default: {
379 10319 : if( (*z&0x80)==0 && !isIdChar[*z] ){
380 0 : break;
381 : }
382 10319 : for(i=1; (z[i]&0x80)!=0 || isIdChar[z[i]]; i++){}
383 10319 : *tokenType = sqliteKeywordCode((char*)z, i);
384 10319 : return i;
385 : }
386 : }
387 0 : *tokenType = TK_ILLEGAL;
388 0 : return 1;
389 : }
390 :
391 : /*
392 : ** Run the parser on the given SQL string. The parser structure is
393 : ** passed in. An SQLITE_ status code is returned. If an error occurs
394 : ** and pzErrMsg!=NULL then an error message might be written into
395 : ** memory obtained from malloc() and *pzErrMsg made to point to that
396 : ** error message. Or maybe not.
397 : */
398 1501 : int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
399 1501 : int nErr = 0;
400 : int i;
401 : void *pEngine;
402 : int tokenType;
403 1501 : int lastTokenParsed = -1;
404 1501 : sqlite *db = pParse->db;
405 : extern void *sqliteParserAlloc(void*(*)(int));
406 : extern void sqliteParserFree(void*, void(*)(void*));
407 : extern int sqliteParser(void*, int, Token, Parse*);
408 :
409 1501 : db->flags &= ~SQLITE_Interrupt;
410 1501 : pParse->rc = SQLITE_OK;
411 1501 : i = 0;
412 1501 : pEngine = sqliteParserAlloc((void*(*)(int))malloc);
413 1501 : if( pEngine==0 ){
414 0 : sqliteSetString(pzErrMsg, "out of memory", (char*)0);
415 0 : return 1;
416 : }
417 1501 : pParse->sLastToken.dyn = 0;
418 1501 : pParse->zTail = zSql;
419 29301 : while( sqlite_malloc_failed==0 && zSql[i]!=0 ){
420 : assert( i>=0 );
421 26303 : pParse->sLastToken.z = &zSql[i];
422 : assert( pParse->sLastToken.dyn==0 );
423 26303 : pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType);
424 26303 : i += pParse->sLastToken.n;
425 26303 : switch( tokenType ){
426 : case TK_SPACE:
427 : case TK_COMMENT: {
428 9637 : if( (db->flags & SQLITE_Interrupt)!=0 ){
429 0 : pParse->rc = SQLITE_INTERRUPT;
430 0 : sqliteSetString(pzErrMsg, "interrupt", (char*)0);
431 0 : goto abort_parse;
432 : }
433 9637 : break;
434 : }
435 : case TK_ILLEGAL: {
436 0 : sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1,
437 : pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
438 0 : nErr++;
439 0 : goto abort_parse;
440 : }
441 : case TK_SEMI: {
442 4 : pParse->zTail = &zSql[i];
443 : /* Fall thru into the default case */
444 : }
445 : default: {
446 16666 : sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
447 16666 : lastTokenParsed = tokenType;
448 16666 : if( pParse->rc!=SQLITE_OK ){
449 4 : goto abort_parse;
450 : }
451 : break;
452 : }
453 : }
454 : }
455 1501 : abort_parse:
456 1501 : if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
457 1497 : if( lastTokenParsed!=TK_SEMI ){
458 1497 : sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
459 1497 : pParse->zTail = &zSql[i];
460 : }
461 1497 : sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
462 : }
463 1501 : sqliteParserFree(pEngine, free);
464 1501 : if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
465 0 : sqliteSetString(&pParse->zErrMsg, sqlite_error_string(pParse->rc),
466 : (char*)0);
467 : }
468 1501 : if( pParse->zErrMsg ){
469 606 : if( pzErrMsg && *pzErrMsg==0 ){
470 303 : *pzErrMsg = pParse->zErrMsg;
471 : }else{
472 0 : sqliteFree(pParse->zErrMsg);
473 : }
474 303 : pParse->zErrMsg = 0;
475 303 : if( !nErr ) nErr++;
476 : }
477 1501 : if( pParse->pVdbe && pParse->nErr>0 ){
478 300 : sqliteVdbeDelete(pParse->pVdbe);
479 300 : pParse->pVdbe = 0;
480 : }
481 1501 : if( pParse->pNewTable ){
482 0 : sqliteDeleteTable(pParse->db, pParse->pNewTable);
483 0 : pParse->pNewTable = 0;
484 : }
485 1501 : if( pParse->pNewTrigger ){
486 0 : sqliteDeleteTrigger(pParse->pNewTrigger);
487 0 : pParse->pNewTrigger = 0;
488 : }
489 1501 : if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
490 3 : pParse->rc = SQLITE_ERROR;
491 : }
492 1501 : return nErr;
493 : }
494 :
495 : /*
496 : ** Token types used by the sqlite_complete() routine. See the header
497 : ** comments on that procedure for additional information.
498 : */
499 : #define tkEXPLAIN 0
500 : #define tkCREATE 1
501 : #define tkTEMP 2
502 : #define tkTRIGGER 3
503 : #define tkEND 4
504 : #define tkSEMI 5
505 : #define tkWS 6
506 : #define tkOTHER 7
507 :
508 : /*
509 : ** Return TRUE if the given SQL string ends in a semicolon.
510 : **
511 : ** Special handling is require for CREATE TRIGGER statements.
512 : ** Whenever the CREATE TRIGGER keywords are seen, the statement
513 : ** must end with ";END;".
514 : **
515 : ** This implementation uses a state machine with 7 states:
516 : **
517 : ** (0) START At the beginning or end of an SQL statement. This routine
518 : ** returns 1 if it ends in the START state and 0 if it ends
519 : ** in any other state.
520 : **
521 : ** (1) EXPLAIN The keyword EXPLAIN has been seen at the beginning of
522 : ** a statement.
523 : **
524 : ** (2) CREATE The keyword CREATE has been seen at the beginning of a
525 : ** statement, possibly preceeded by EXPLAIN and/or followed by
526 : ** TEMP or TEMPORARY
527 : **
528 : ** (3) NORMAL We are in the middle of statement which ends with a single
529 : ** semicolon.
530 : **
531 : ** (4) TRIGGER We are in the middle of a trigger definition that must be
532 : ** ended by a semicolon, the keyword END, and another semicolon.
533 : **
534 : ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at
535 : ** the end of a trigger definition.
536 : **
537 : ** (6) END We've seen the ";END" of the ";END;" that occurs at the end
538 : ** of a trigger difinition.
539 : **
540 : ** Transitions between states above are determined by tokens extracted
541 : ** from the input. The following tokens are significant:
542 : **
543 : ** (0) tkEXPLAIN The "explain" keyword.
544 : ** (1) tkCREATE The "create" keyword.
545 : ** (2) tkTEMP The "temp" or "temporary" keyword.
546 : ** (3) tkTRIGGER The "trigger" keyword.
547 : ** (4) tkEND The "end" keyword.
548 : ** (5) tkSEMI A semicolon.
549 : ** (6) tkWS Whitespace
550 : ** (7) tkOTHER Any other SQL token.
551 : **
552 : ** Whitespace never causes a state transition and is always ignored.
553 : */
554 0 : int sqlite_complete(const char *zSql){
555 0 : u8 state = 0; /* Current state, using numbers defined in header comment */
556 : u8 token; /* Value of the next token */
557 :
558 : /* The following matrix defines the transition from one state to another
559 : ** according to what token is seen. trans[state][token] returns the
560 : ** next state.
561 : */
562 : static const u8 trans[7][8] = {
563 : /* Token: */
564 : /* State: ** EXPLAIN CREATE TEMP TRIGGER END SEMI WS OTHER */
565 : /* 0 START: */ { 1, 2, 3, 3, 3, 0, 0, 3, },
566 : /* 1 EXPLAIN: */ { 3, 2, 3, 3, 3, 0, 1, 3, },
567 : /* 2 CREATE: */ { 3, 3, 2, 4, 3, 0, 2, 3, },
568 : /* 3 NORMAL: */ { 3, 3, 3, 3, 3, 0, 3, 3, },
569 : /* 4 TRIGGER: */ { 4, 4, 4, 4, 4, 5, 4, 4, },
570 : /* 5 SEMI: */ { 4, 4, 4, 4, 6, 5, 5, 4, },
571 : /* 6 END: */ { 4, 4, 4, 4, 4, 0, 6, 4, },
572 : };
573 :
574 0 : while( *zSql ){
575 0 : switch( *zSql ){
576 : case ';': { /* A semicolon */
577 0 : token = tkSEMI;
578 0 : break;
579 : }
580 : case ' ':
581 : case '\r':
582 : case '\t':
583 : case '\n':
584 : case '\f': { /* White space is ignored */
585 0 : token = tkWS;
586 0 : break;
587 : }
588 : case '/': { /* C-style comments */
589 0 : if( zSql[1]!='*' ){
590 0 : token = tkOTHER;
591 0 : break;
592 : }
593 0 : zSql += 2;
594 0 : while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
595 0 : if( zSql[0]==0 ) return 0;
596 0 : zSql++;
597 0 : token = tkWS;
598 0 : break;
599 : }
600 : case '-': { /* SQL-style comments from "--" to end of line */
601 0 : if( zSql[1]!='-' ){
602 0 : token = tkOTHER;
603 0 : break;
604 : }
605 0 : while( *zSql && *zSql!='\n' ){ zSql++; }
606 0 : if( *zSql==0 ) return state==0;
607 0 : token = tkWS;
608 0 : break;
609 : }
610 : case '[': { /* Microsoft-style identifiers in [...] */
611 0 : zSql++;
612 0 : while( *zSql && *zSql!=']' ){ zSql++; }
613 0 : if( *zSql==0 ) return 0;
614 0 : token = tkOTHER;
615 0 : break;
616 : }
617 : case '"': /* single- and double-quoted strings */
618 : case '\'': {
619 0 : int c = *zSql;
620 0 : zSql++;
621 0 : while( *zSql && *zSql!=c ){ zSql++; }
622 0 : if( *zSql==0 ) return 0;
623 0 : token = tkOTHER;
624 0 : break;
625 : }
626 : default: {
627 0 : if( isIdChar[(u8)*zSql] ){
628 : /* Keywords and unquoted identifiers */
629 : int nId;
630 0 : for(nId=1; isIdChar[(u8)zSql[nId]]; nId++){}
631 0 : switch( *zSql ){
632 : case 'c': case 'C': {
633 0 : if( nId==6 && sqliteStrNICmp(zSql, "create", 6)==0 ){
634 0 : token = tkCREATE;
635 : }else{
636 0 : token = tkOTHER;
637 : }
638 0 : break;
639 : }
640 : case 't': case 'T': {
641 0 : if( nId==7 && sqliteStrNICmp(zSql, "trigger", 7)==0 ){
642 0 : token = tkTRIGGER;
643 0 : }else if( nId==4 && sqliteStrNICmp(zSql, "temp", 4)==0 ){
644 0 : token = tkTEMP;
645 0 : }else if( nId==9 && sqliteStrNICmp(zSql, "temporary", 9)==0 ){
646 0 : token = tkTEMP;
647 : }else{
648 0 : token = tkOTHER;
649 : }
650 0 : break;
651 : }
652 : case 'e': case 'E': {
653 0 : if( nId==3 && sqliteStrNICmp(zSql, "end", 3)==0 ){
654 0 : token = tkEND;
655 0 : }else if( nId==7 && sqliteStrNICmp(zSql, "explain", 7)==0 ){
656 0 : token = tkEXPLAIN;
657 : }else{
658 0 : token = tkOTHER;
659 : }
660 0 : break;
661 : }
662 : default: {
663 0 : token = tkOTHER;
664 : break;
665 : }
666 : }
667 0 : zSql += nId-1;
668 : }else{
669 : /* Operators and special symbols */
670 0 : token = tkOTHER;
671 : }
672 : break;
673 : }
674 : }
675 0 : state = trans[state][token];
676 0 : zSql++;
677 : }
678 0 : return state==0;
679 : }
|