1 : /*
2 : ** 2001 September 15
3 : **
4 : ** The author disclaims copyright to this source code. In place of
5 : ** a legal notice, here is a blessing:
6 : **
7 : ** May you do good and not evil.
8 : ** May you find forgiveness for yourself and forgive others.
9 : ** May you share freely, never taking more than you give.
10 : **
11 : *************************************************************************
12 : ** An tokenizer for SQL
13 : **
14 : ** This file contains C code that splits an SQL input string up into
15 : ** individual tokens and sends those tokens one-by-one over to the
16 : ** parser for analysis.
17 : **
18 : ** $Id$
19 : */
20 : #include "sqliteInt.h"
21 : #include "os.h"
22 : #include <ctype.h>
23 : #include <stdlib.h>
24 :
25 : /*
26 : ** The charMap() macro maps alphabetic characters into their
27 : ** lower-case ASCII equivalent. On ASCII machines, this is just
28 : ** an upper-to-lower case map. On EBCDIC machines we also need
29 : ** to adjust the encoding. Only alphabetic characters and underscores
30 : ** need to be translated.
31 : */
32 : #ifdef SQLITE_ASCII
33 : # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
34 : #endif
35 : #ifdef SQLITE_EBCDIC
36 : # define charMap(X) ebcdicToAscii[(unsigned char)X]
37 : const unsigned char ebcdicToAscii[] = {
38 : /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
39 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
40 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
41 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
42 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */
43 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
44 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */
45 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */
46 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */
47 : 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */
48 : 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */
49 : 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */
50 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
51 : 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */
52 : 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */
53 : 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */
54 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */
55 : };
56 : #endif
57 :
58 : /*
59 : ** The sqlite3KeywordCode function looks up an identifier to determine if
60 : ** it is a keyword. If it is a keyword, the token code of that keyword is
61 : ** returned. If the input is not a keyword, TK_ID is returned.
62 : **
63 : ** The implementation of this routine was generated by a program,
64 : ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
65 : ** The output of the mkkeywordhash.c program is written into a file
66 : ** named keywordhash.h and then included into this source file by
67 : ** the #include below.
68 : */
69 : #include "keywordhash.h"
70 :
71 :
72 : /*
73 : ** If X is a character that can be used in an identifier then
74 : ** IdChar(X) will be true. Otherwise it is false.
75 : **
76 : ** For ASCII, any character with the high-order bit set is
77 : ** allowed in an identifier. For 7-bit characters,
78 : ** sqlite3IsIdChar[X] must be 1.
79 : **
80 : ** For EBCDIC, the rules are more complex but have the same
81 : ** end result.
82 : **
83 : ** Ticket #1066. the SQL standard does not allow '$' in the
84 : ** middle of identfiers. But many SQL implementations do.
85 : ** SQLite will allow '$' in identifiers for compatibility.
86 : ** But the feature is undocumented.
87 : */
88 : #ifdef SQLITE_ASCII
89 : const char sqlite3IsIdChar[] = {
90 : /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
91 : 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
92 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
93 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
94 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
95 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
96 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
97 : };
98 : #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20]))
99 : #endif
100 : #ifdef SQLITE_EBCDIC
101 : const char sqlite3IsIdChar[] = {
102 : /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
103 : 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */
104 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */
105 : 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */
106 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */
107 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */
108 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */
109 : 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */
110 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
111 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */
112 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */
113 : 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */
114 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */
115 : };
116 : #define IdChar(C) (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40]))
117 : #endif
118 :
119 :
120 : /*
121 : ** Return the length of the token that begins at z[0].
122 : ** Store the token type in *tokenType before returning.
123 : */
124 19156 : static int getToken(const unsigned char *z, int *tokenType){
125 : int i, c;
126 19156 : switch( *z ){
127 : case ' ': case '\t': case '\n': case '\f': case '\r': {
128 6688 : for(i=1; isspace(z[i]); i++){}
129 6688 : *tokenType = TK_SPACE;
130 6688 : return i;
131 : }
132 : case '-': {
133 0 : if( z[1]=='-' ){
134 0 : for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
135 0 : *tokenType = TK_COMMENT;
136 0 : return i;
137 : }
138 0 : *tokenType = TK_MINUS;
139 0 : return 1;
140 : }
141 : case '(': {
142 673 : *tokenType = TK_LP;
143 673 : return 1;
144 : }
145 : case ')': {
146 673 : *tokenType = TK_RP;
147 673 : return 1;
148 : }
149 : case ';': {
150 39 : *tokenType = TK_SEMI;
151 39 : return 1;
152 : }
153 : case '+': {
154 0 : *tokenType = TK_PLUS;
155 0 : return 1;
156 : }
157 : case '*': {
158 37 : *tokenType = TK_STAR;
159 37 : return 1;
160 : }
161 : case '/': {
162 0 : if( z[1]!='*' || z[2]==0 ){
163 0 : *tokenType = TK_SLASH;
164 0 : return 1;
165 : }
166 0 : for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
167 0 : if( c ) i++;
168 0 : *tokenType = TK_COMMENT;
169 0 : return i;
170 : }
171 : case '%': {
172 0 : *tokenType = TK_REM;
173 0 : return 1;
174 : }
175 : case '=': {
176 431 : *tokenType = TK_EQ;
177 431 : return 1 + (z[1]=='=');
178 : }
179 : case '<': {
180 0 : if( (c=z[1])=='=' ){
181 0 : *tokenType = TK_LE;
182 0 : return 2;
183 0 : }else if( c=='>' ){
184 0 : *tokenType = TK_NE;
185 0 : return 2;
186 0 : }else if( c=='<' ){
187 0 : *tokenType = TK_LSHIFT;
188 0 : return 2;
189 : }else{
190 0 : *tokenType = TK_LT;
191 0 : return 1;
192 : }
193 : }
194 : case '>': {
195 2 : if( (c=z[1])=='=' ){
196 0 : *tokenType = TK_GE;
197 0 : return 2;
198 2 : }else if( c=='>' ){
199 0 : *tokenType = TK_RSHIFT;
200 0 : return 2;
201 : }else{
202 2 : *tokenType = TK_GT;
203 2 : return 1;
204 : }
205 : }
206 : case '!': {
207 5 : if( z[1]!='=' ){
208 0 : *tokenType = TK_ILLEGAL;
209 0 : return 2;
210 : }else{
211 5 : *tokenType = TK_NE;
212 5 : return 2;
213 : }
214 : }
215 : case '|': {
216 0 : if( z[1]!='|' ){
217 0 : *tokenType = TK_BITOR;
218 0 : return 1;
219 : }else{
220 0 : *tokenType = TK_CONCAT;
221 0 : return 2;
222 : }
223 : }
224 : case ',': {
225 1766 : *tokenType = TK_COMMA;
226 1766 : return 1;
227 : }
228 : case '&': {
229 0 : *tokenType = TK_BITAND;
230 0 : return 1;
231 : }
232 : case '~': {
233 0 : *tokenType = TK_BITNOT;
234 0 : return 1;
235 : }
236 : case '`':
237 : case '\'':
238 : case '"': {
239 738 : int delim = z[0];
240 8103 : for(i=1; (c=z[i])!=0; i++){
241 8103 : if( c==delim ){
242 739 : if( z[i+1]==delim ){
243 1 : i++;
244 : }else{
245 738 : break;
246 : }
247 : }
248 : }
249 738 : if( c ){
250 738 : *tokenType = TK_STRING;
251 738 : return i+1;
252 : }else{
253 0 : *tokenType = TK_ILLEGAL;
254 0 : return i;
255 : }
256 : }
257 : case '.': {
258 : #ifndef SQLITE_OMIT_FLOATING_POINT
259 193 : if( !isdigit(z[1]) )
260 : #endif
261 : {
262 193 : *tokenType = TK_DOT;
263 193 : return 1;
264 : }
265 : /* If the next character is a digit, this is a floating point
266 : ** number that begins with ".". Fall thru into the next case */
267 : }
268 : case '0': case '1': case '2': case '3': case '4':
269 : case '5': case '6': case '7': case '8': case '9': {
270 244 : *tokenType = TK_INTEGER;
271 244 : for(i=0; isdigit(z[i]); i++){}
272 : #ifndef SQLITE_OMIT_FLOATING_POINT
273 244 : if( z[i]=='.' ){
274 0 : i++;
275 0 : while( isdigit(z[i]) ){ i++; }
276 0 : *tokenType = TK_FLOAT;
277 : }
278 244 : if( (z[i]=='e' || z[i]=='E') &&
279 : ( isdigit(z[i+1])
280 : || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
281 : )
282 : ){
283 0 : i += 2;
284 0 : while( isdigit(z[i]) ){ i++; }
285 0 : *tokenType = TK_FLOAT;
286 : }
287 : #endif
288 488 : while( IdChar(z[i]) ){
289 0 : *tokenType = TK_ILLEGAL;
290 0 : i++;
291 : }
292 244 : return i;
293 : }
294 : case '[': {
295 0 : for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
296 0 : *tokenType = TK_ID;
297 0 : return i;
298 : }
299 : case '?': {
300 25 : *tokenType = TK_VARIABLE;
301 25 : for(i=1; isdigit(z[i]); i++){}
302 25 : return i;
303 : }
304 : case '#': {
305 163 : for(i=1; isdigit(z[i]); i++){}
306 163 : if( i>1 ){
307 : /* Parameters of the form #NNN (where NNN is a number) are used
308 : ** internally by sqlite3NestedParse. */
309 163 : *tokenType = TK_REGISTER;
310 163 : return i;
311 : }
312 : /* Fall through into the next case if the '#' is not followed by
313 : ** a digit. Try to match #AAAA where AAAA is a parameter name. */
314 : }
315 : #ifndef SQLITE_OMIT_TCL_VARIABLE
316 : case '$':
317 : #endif
318 : case '@': /* For compatibility with MS SQL Server */
319 : case ':': {
320 25 : int n = 0;
321 25 : *tokenType = TK_VARIABLE;
322 120 : for(i=1; (c=z[i])!=0; i++){
323 209 : if( IdChar(c) ){
324 95 : n++;
325 : #ifndef SQLITE_OMIT_TCL_VARIABLE
326 19 : }else if( c=='(' && n>0 ){
327 : do{
328 0 : i++;
329 0 : }while( (c=z[i])!=0 && !isspace(c) && c!=')' );
330 0 : if( c==')' ){
331 0 : i++;
332 : }else{
333 0 : *tokenType = TK_ILLEGAL;
334 : }
335 0 : break;
336 19 : }else if( c==':' && z[i+1]==':' ){
337 0 : i++;
338 : #endif
339 : }else{
340 : break;
341 : }
342 : }
343 25 : if( n==0 ) *tokenType = TK_ILLEGAL;
344 25 : return i;
345 : }
346 : #ifndef SQLITE_OMIT_BLOB_LITERAL
347 : case 'x': case 'X': {
348 1 : if( (c=z[1])=='\'' || c=='"' ){
349 0 : int delim = c;
350 0 : *tokenType = TK_BLOB;
351 0 : for(i=2; (c=z[i])!=0; i++){
352 0 : if( c==delim ){
353 0 : if( i%2 ) *tokenType = TK_ILLEGAL;
354 0 : break;
355 : }
356 0 : if( !isxdigit(c) ){
357 0 : *tokenType = TK_ILLEGAL;
358 0 : return i;
359 : }
360 : }
361 0 : if( c ) i++;
362 0 : return i;
363 : }
364 : /* Otherwise fall through to the next case */
365 : }
366 : #endif
367 : default: {
368 7454 : if( !IdChar(*z) ){
369 : break;
370 : }
371 7454 : for(i=1; IdChar(z[i]); i++){}
372 7454 : *tokenType = keywordCode((char*)z, i);
373 7454 : return i;
374 : }
375 : }
376 0 : *tokenType = TK_ILLEGAL;
377 0 : return 1;
378 : }
379 0 : int sqlite3GetToken(const unsigned char *z, int *tokenType){
380 0 : return getToken(z, tokenType);
381 : }
382 :
383 : /*
384 : ** Run the parser on the given SQL string. The parser structure is
385 : ** passed in. An SQLITE_ status code is returned. If an error occurs
386 : ** and pzErrMsg!=NULL then an error message might be written into
387 : ** memory obtained from malloc() and *pzErrMsg made to point to that
388 : ** error message. Or maybe not.
389 : */
390 1036 : int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
391 1036 : int nErr = 0;
392 : int i;
393 : void *pEngine;
394 : int tokenType;
395 1036 : int lastTokenParsed = -1;
396 1036 : sqlite3 *db = pParse->db;
397 : extern void *sqlite3ParserAlloc(void*(*)(size_t));
398 : extern void sqlite3ParserFree(void*, void(*)(void*));
399 : extern void sqlite3Parser(void*, int, Token, Parse*);
400 :
401 1036 : if( db->activeVdbeCnt==0 ){
402 917 : db->u1.isInterrupted = 0;
403 : }
404 1036 : pParse->rc = SQLITE_OK;
405 1036 : i = 0;
406 1036 : pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3MallocX);
407 1036 : if( pEngine==0 ){
408 0 : return SQLITE_NOMEM;
409 : }
410 : assert( pParse->sLastToken.dyn==0 );
411 : assert( pParse->pNewTable==0 );
412 : assert( pParse->pNewTrigger==0 );
413 : assert( pParse->nVar==0 );
414 : assert( pParse->nVarExpr==0 );
415 : assert( pParse->nVarExprAlloc==0 );
416 : assert( pParse->apVarExpr==0 );
417 1036 : pParse->zTail = pParse->zSql = zSql;
418 21228 : while( !sqlite3MallocFailed() && zSql[i]!=0 ){
419 : assert( i>=0 );
420 19156 : pParse->sLastToken.z = (u8*)&zSql[i];
421 : assert( pParse->sLastToken.dyn==0 );
422 19156 : pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType);
423 19156 : i += pParse->sLastToken.n;
424 19156 : switch( tokenType ){
425 : case TK_SPACE:
426 : case TK_COMMENT: {
427 6688 : if( db->u1.isInterrupted ){
428 0 : pParse->rc = SQLITE_INTERRUPT;
429 0 : sqlite3SetString(pzErrMsg, "interrupt", (char*)0);
430 0 : goto abort_parse;
431 : }
432 6688 : break;
433 : }
434 : case TK_ILLEGAL: {
435 0 : if( pzErrMsg ){
436 0 : sqliteFree(*pzErrMsg);
437 0 : *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"",
438 : &pParse->sLastToken);
439 : }
440 0 : nErr++;
441 0 : goto abort_parse;
442 : }
443 : case TK_SEMI: {
444 39 : pParse->zTail = &zSql[i];
445 : /* Fall thru into the default case */
446 : }
447 : default: {
448 12468 : sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
449 12468 : lastTokenParsed = tokenType;
450 12468 : if( pParse->rc!=SQLITE_OK ){
451 0 : goto abort_parse;
452 : }
453 : break;
454 : }
455 : }
456 : }
457 1036 : abort_parse:
458 1036 : if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
459 1036 : if( lastTokenParsed!=TK_SEMI ){
460 997 : sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
461 997 : pParse->zTail = &zSql[i];
462 : }
463 1036 : sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
464 : }
465 1036 : sqlite3ParserFree(pEngine, sqlite3FreeX);
466 1036 : if( sqlite3MallocFailed() ){
467 0 : pParse->rc = SQLITE_NOMEM;
468 : }
469 1036 : if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
470 0 : sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0);
471 : }
472 1036 : if( pParse->zErrMsg ){
473 620 : if( pzErrMsg && *pzErrMsg==0 ){
474 310 : *pzErrMsg = pParse->zErrMsg;
475 : }else{
476 0 : sqliteFree(pParse->zErrMsg);
477 : }
478 310 : pParse->zErrMsg = 0;
479 310 : if( !nErr ) nErr++;
480 : }
481 1036 : if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
482 0 : sqlite3VdbeDelete(pParse->pVdbe);
483 0 : pParse->pVdbe = 0;
484 : }
485 : #ifndef SQLITE_OMIT_SHARED_CACHE
486 1036 : if( pParse->nested==0 ){
487 930 : sqliteFree(pParse->aTableLock);
488 930 : pParse->aTableLock = 0;
489 930 : pParse->nTableLock = 0;
490 : }
491 : #endif
492 :
493 1036 : if( !IN_DECLARE_VTAB ){
494 : /* If the pParse->declareVtab flag is set, do not delete any table
495 : ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
496 : ** will take responsibility for freeing the Table structure.
497 : */
498 1036 : sqlite3DeleteTable(pParse->pNewTable);
499 : }
500 :
501 1036 : sqlite3DeleteTrigger(pParse->pNewTrigger);
502 1036 : sqliteFree(pParse->apVarExpr);
503 1036 : if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
504 1 : pParse->rc = SQLITE_ERROR;
505 : }
506 1036 : return nErr;
507 : }
|