PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - pdo_sqlite/sqlite/src - utf.c
Test: PHP Code Coverage
Date: 2009-11-19 Instrumented lines: 115
Code covered: 7.0 % Executed lines: 8
Legend: not executed executed

       1                 : /*
       2                 : ** 2004 April 13
       3                 : **
       4                 : ** The author disclaims copyright to this source code.  In place of
       5                 : ** a legal notice, here is a blessing:
       6                 : **
       7                 : **    May you do good and not evil.
       8                 : **    May you find forgiveness for yourself and forgive others.
       9                 : **    May you share freely, never taking more than you give.
      10                 : **
      11                 : *************************************************************************
      12                 : ** This file contains routines used to translate between UTF-8, 
      13                 : ** UTF-16, UTF-16BE, and UTF-16LE.
      14                 : **
      15                 : ** $Id$
      16                 : **
      17                 : ** Notes on UTF-8:
      18                 : **
      19                 : **   Byte-0    Byte-1    Byte-2    Byte-3    Value
      20                 : **  0xxxxxxx                                 00000000 00000000 0xxxxxxx
      21                 : **  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
      22                 : **  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
      23                 : **  11110uuu  10uuzzzz  10yyyyyy  10xxxxxx   000uuuuu zzzzyyyy yyxxxxxx
      24                 : **
      25                 : **
      26                 : ** Notes on UTF-16:  (with wwww+1==uuuuu)
      27                 : **
      28                 : **      Word-0               Word-1          Value
      29                 : **  110110ww wwzzzzyy   110111yy yyxxxxxx    000uuuuu zzzzyyyy yyxxxxxx
      30                 : **  zzzzyyyy yyxxxxxx                        00000000 zzzzyyyy yyxxxxxx
      31                 : **
      32                 : **
      33                 : ** BOM or Byte Order Mark:
      34                 : **     0xff 0xfe   little-endian utf-16 follows
      35                 : **     0xfe 0xff   big-endian utf-16 follows
      36                 : **
      37                 : **
      38                 : ** Handling of malformed strings:
      39                 : **
      40                 : ** SQLite accepts and processes malformed strings without an error wherever
      41                 : ** possible. However this is not possible when converting between UTF-8 and
      42                 : ** UTF-16.
      43                 : **
      44                 : ** When converting malformed UTF-8 strings to UTF-16, one instance of the
      45                 : ** replacement character U+FFFD for each byte that cannot be interpeted as
      46                 : ** part of a valid unicode character.
      47                 : **
      48                 : ** When converting malformed UTF-16 strings to UTF-8, one instance of the
      49                 : ** replacement character U+FFFD for each pair of bytes that cannot be
      50                 : ** interpeted as part of a valid unicode character.
      51                 : **
      52                 : ** This file contains the following public routines:
      53                 : **
      54                 : ** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
      55                 : ** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
      56                 : ** sqlite3utf16ByteLen()     - Calculate byte-length of a void* UTF16 string.
      57                 : ** sqlite3utf8CharLen()      - Calculate char-length of a char* UTF8 string.
      58                 : ** sqlite3utf8LikeCompare()  - Do a LIKE match given two UTF8 char* strings.
      59                 : **
      60                 : */
      61                 : #include "sqliteInt.h"
      62                 : #include <assert.h>
      63                 : #include "vdbeInt.h"
      64                 : 
      65                 : /*
      66                 : ** The following constant value is used by the SQLITE_BIGENDIAN and
      67                 : ** SQLITE_LITTLEENDIAN macros.
      68                 : */
      69                 : const int sqlite3one = 1;
      70                 : 
      71                 : /*
      72                 : ** This table maps from the first byte of a UTF-8 character to the number
      73                 : ** of trailing bytes expected. A value '4' indicates that the table key
      74                 : ** is not a legal first byte for a UTF-8 character.
      75                 : */
      76                 : static const u8 xtra_utf8_bytes[256]  = {
      77                 : /* 0xxxxxxx */
      78                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      79                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      80                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      81                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      82                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      83                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      84                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      85                 : 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
      86                 : 
      87                 : /* 10wwwwww */
      88                 : 4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
      89                 : 4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
      90                 : 4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
      91                 : 4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
      92                 : 
      93                 : /* 110yyyyy */
      94                 : 1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
      95                 : 1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
      96                 : 
      97                 : /* 1110zzzz */
      98                 : 2, 2, 2, 2, 2, 2, 2, 2,     2, 2, 2, 2, 2, 2, 2, 2,
      99                 : 
     100                 : /* 11110yyy */
     101                 : 3, 3, 3, 3, 3, 3, 3, 3,     4, 4, 4, 4, 4, 4, 4, 4,
     102                 : };
     103                 : 
     104                 : /*
     105                 : ** This table maps from the number of trailing bytes in a UTF-8 character
     106                 : ** to an integer constant that is effectively calculated for each character
     107                 : ** read by a naive implementation of a UTF-8 character reader. The code
     108                 : ** in the READ_UTF8 macro explains things best.
     109                 : */
     110                 : static const int xtra_utf8_bits[] =  {
     111                 :   0,
     112                 :   12416,          /* (0xC0 << 6) + (0x80) */
     113                 :   925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
     114                 :   63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
     115                 : };
     116                 : 
     117                 : /*
     118                 : ** If a UTF-8 character contains N bytes extra bytes (N bytes follow
     119                 : ** the initial byte so that the total character length is N+1) then
     120                 : ** masking the character with utf8_mask[N] must produce a non-zero
     121                 : ** result.  Otherwise, we have an (illegal) overlong encoding.
     122                 : */
     123                 : static const int utf_mask[] = {
     124                 :   0x00000000,
     125                 :   0xffffff80,
     126                 :   0xfffff800,
     127                 :   0xffff0000,
     128                 : };
     129                 : 
     130                 : #define READ_UTF8(zIn, c) { \
     131                 :   int xtra;                                            \
     132                 :   c = *(zIn)++;                                        \
     133                 :   xtra = xtra_utf8_bytes[c];                           \
     134                 :   switch( xtra ){                                      \
     135                 :     case 4: c = (int)0xFFFD; break;                    \
     136                 :     case 3: c = (c<<6) + *(zIn)++;                     \
     137                 :     case 2: c = (c<<6) + *(zIn)++;                     \
     138                 :     case 1: c = (c<<6) + *(zIn)++;                     \
     139                 :     c -= xtra_utf8_bits[xtra];                         \
     140                 :     if( (utf_mask[xtra]&c)==0                          \
     141                 :         || (c&0xFFFFF800)==0xD800                      \
     142                 :         || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }    \
     143                 :   }                                                    \
     144                 : }
     145               0 : int sqlite3ReadUtf8(const unsigned char *z){
     146                 :   int c;
     147               0 :   READ_UTF8(z, c);
     148               0 :   return c;
     149                 : }
     150                 : 
     151                 : #define SKIP_UTF8(zIn) {                               \
     152                 :   zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1);            \
     153                 : }
     154                 : 
     155                 : #define WRITE_UTF8(zOut, c) {                          \
     156                 :   if( c<0x00080 ){                                     \
     157                 :     *zOut++ = (c&0xFF);                                \
     158                 :   }                                                    \
     159                 :   else if( c<0x00800 ){                                \
     160                 :     *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
     161                 :     *zOut++ = 0x80 + (c & 0x3F);                       \
     162                 :   }                                                    \
     163                 :   else if( c<0x10000 ){                                \
     164                 :     *zOut++ = 0xE0 + ((c>>12)&0x0F);                   \
     165                 :     *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
     166                 :     *zOut++ = 0x80 + (c & 0x3F);                       \
     167                 :   }else{                                               \
     168                 :     *zOut++ = 0xF0 + ((c>>18) & 0x07);                 \
     169                 :     *zOut++ = 0x80 + ((c>>12) & 0x3F);                 \
     170                 :     *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
     171                 :     *zOut++ = 0x80 + (c & 0x3F);                       \
     172                 :   }                                                    \
     173                 : }
     174                 : 
     175                 : #define WRITE_UTF16LE(zOut, c) {                                \
     176                 :   if( c<=0xFFFF ){                                              \
     177                 :     *zOut++ = (c&0x00FF);                                       \
     178                 :     *zOut++ = ((c>>8)&0x00FF);                                  \
     179                 :   }else{                                                        \
     180                 :     *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
     181                 :     *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
     182                 :     *zOut++ = (c&0x00FF);                                       \
     183                 :     *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
     184                 :   }                                                             \
     185                 : }
     186                 : 
     187                 : #define WRITE_UTF16BE(zOut, c) {                                \
     188                 :   if( c<=0xFFFF ){                                              \
     189                 :     *zOut++ = ((c>>8)&0x00FF);                                  \
     190                 :     *zOut++ = (c&0x00FF);                                       \
     191                 :   }else{                                                        \
     192                 :     *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
     193                 :     *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
     194                 :     *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
     195                 :     *zOut++ = (c&0x00FF);                                       \
     196                 :   }                                                             \
     197                 : }
     198                 : 
     199                 : #define READ_UTF16LE(zIn, c){                                         \
     200                 :   c = (*zIn++);                                                       \
     201                 :   c += ((*zIn++)<<8);                                                 \
     202                 :   if( c>=0xD800 && c<=0xE000 ){                                       \
     203                 :     int c2 = (*zIn++);                                                \
     204                 :     c2 += ((*zIn++)<<8);                                              \
     205                 :     c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
     206                 :     if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
     207                 :   }                                                                   \
     208                 : }
     209                 : 
     210                 : #define READ_UTF16BE(zIn, c){                                         \
     211                 :   c = ((*zIn++)<<8);                                                  \
     212                 :   c += (*zIn++);                                                      \
     213                 :   if( c>=0xD800 && c<=0xE000 ){                                       \
     214                 :     int c2 = ((*zIn++)<<8);                                           \
     215                 :     c2 += (*zIn++);                                                   \
     216                 :     c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
     217                 :     if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
     218                 :   }                                                                   \
     219                 : }
     220                 : 
     221                 : #define SKIP_UTF16BE(zIn){                                            \
     222                 :   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn+1)==0x00)) ){  \
     223                 :     zIn += 4;                                                         \
     224                 :   }else{                                                              \
     225                 :     zIn += 2;                                                         \
     226                 :   }                                                                   \
     227                 : }
     228                 : #define SKIP_UTF16LE(zIn){                                            \
     229                 :   zIn++;                                                              \
     230                 :   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn-1)==0x00)) ){  \
     231                 :     zIn += 3;                                                         \
     232                 :   }else{                                                              \
     233                 :     zIn += 1;                                                         \
     234                 :   }                                                                   \
     235                 : }
     236                 : 
     237                 : #define RSKIP_UTF16LE(zIn){                                            \
     238                 :   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn-1)==0x00)) ){  \
     239                 :     zIn -= 4;                                                         \
     240                 :   }else{                                                              \
     241                 :     zIn -= 2;                                                         \
     242                 :   }                                                                   \
     243                 : }
     244                 : #define RSKIP_UTF16BE(zIn){                                            \
     245                 :   zIn--;                                                              \
     246                 :   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn+1)==0x00)) ){  \
     247                 :     zIn -= 3;                                                         \
     248                 :   }else{                                                              \
     249                 :     zIn -= 1;                                                         \
     250                 :   }                                                                   \
     251                 : }
     252                 : 
     253                 : /*
     254                 : ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
     255                 : ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
     256                 : */ 
     257                 : /* #define TRANSLATE_TRACE 1 */
     258                 : 
     259                 : #ifndef SQLITE_OMIT_UTF16
     260                 : /*
     261                 : ** This routine transforms the internal text encoding used by pMem to
     262                 : ** desiredEnc. It is an error if the string is already of the desired
     263                 : ** encoding, or if *pMem does not contain a string value.
     264                 : */
     265               0 : int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
     266                 :   unsigned char zShort[NBFS]; /* Temporary short output buffer */
     267                 :   int len;                    /* Maximum length of output string in bytes */
     268                 :   unsigned char *zOut;                  /* Output buffer */
     269                 :   unsigned char *zIn;                   /* Input iterator */
     270                 :   unsigned char *zTerm;                 /* End of input */
     271                 :   unsigned char *z;                     /* Output iterator */
     272                 :   unsigned int c;
     273                 : 
     274                 :   assert( pMem->flags&MEM_Str );
     275                 :   assert( pMem->enc!=desiredEnc );
     276                 :   assert( pMem->enc!=0 );
     277                 :   assert( pMem->n>=0 );
     278                 : 
     279                 : #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
     280                 :   {
     281                 :     char zBuf[100];
     282                 :     sqlite3VdbeMemPrettyPrint(pMem, zBuf);
     283                 :     fprintf(stderr, "INPUT:  %s\n", zBuf);
     284                 :   }
     285                 : #endif
     286                 : 
     287                 :   /* If the translation is between UTF-16 little and big endian, then 
     288                 :   ** all that is required is to swap the byte order. This case is handled
     289                 :   ** differently from the others.
     290                 :   */
     291               0 :   if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
     292                 :     u8 temp;
     293                 :     int rc;
     294               0 :     rc = sqlite3VdbeMemMakeWriteable(pMem);
     295               0 :     if( rc!=SQLITE_OK ){
     296                 :       assert( rc==SQLITE_NOMEM );
     297               0 :       return SQLITE_NOMEM;
     298                 :     }
     299               0 :     zIn = (u8*)pMem->z;
     300               0 :     zTerm = &zIn[pMem->n];
     301               0 :     while( zIn<zTerm ){
     302               0 :       temp = *zIn;
     303               0 :       *zIn = *(zIn+1);
     304               0 :       zIn++;
     305               0 :       *zIn++ = temp;
     306                 :     }
     307               0 :     pMem->enc = desiredEnc;
     308               0 :     goto translate_out;
     309                 :   }
     310                 : 
     311                 :   /* Set len to the maximum number of bytes required in the output buffer. */
     312               0 :   if( desiredEnc==SQLITE_UTF8 ){
     313                 :     /* When converting from UTF-16, the maximum growth results from
     314                 :     ** translating a 2-byte character to a 4-byte UTF-8 character.
     315                 :     ** A single byte is required for the output string
     316                 :     ** nul-terminator.
     317                 :     */
     318               0 :     len = pMem->n * 2 + 1;
     319                 :   }else{
     320                 :     /* When converting from UTF-8 to UTF-16 the maximum growth is caused
     321                 :     ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16
     322                 :     ** character. Two bytes are required in the output buffer for the
     323                 :     ** nul-terminator.
     324                 :     */
     325               0 :     len = pMem->n * 2 + 2;
     326                 :   }
     327                 : 
     328                 :   /* Set zIn to point at the start of the input buffer and zTerm to point 1
     329                 :   ** byte past the end.
     330                 :   **
     331                 :   ** Variable zOut is set to point at the output buffer. This may be space
     332                 :   ** obtained from malloc(), or Mem.zShort, if it large enough and not in
     333                 :   ** use, or the zShort array on the stack (see above).
     334                 :   */
     335               0 :   zIn = (u8*)pMem->z;
     336               0 :   zTerm = &zIn[pMem->n];
     337               0 :   if( len>NBFS ){
     338               0 :     zOut = sqliteMallocRaw(len);
     339               0 :     if( !zOut ) return SQLITE_NOMEM;
     340                 :   }else{
     341               0 :     zOut = zShort;
     342                 :   }
     343               0 :   z = zOut;
     344                 : 
     345               0 :   if( pMem->enc==SQLITE_UTF8 ){
     346               0 :     if( desiredEnc==SQLITE_UTF16LE ){
     347                 :       /* UTF-8 -> UTF-16 Little-endian */
     348               0 :       while( zIn<zTerm ){
     349               0 :         READ_UTF8(zIn, c); 
     350               0 :         WRITE_UTF16LE(z, c);
     351                 :       }
     352                 :     }else{
     353                 :       assert( desiredEnc==SQLITE_UTF16BE );
     354                 :       /* UTF-8 -> UTF-16 Big-endian */
     355               0 :       while( zIn<zTerm ){
     356               0 :         READ_UTF8(zIn, c); 
     357               0 :         WRITE_UTF16BE(z, c);
     358                 :       }
     359                 :     }
     360               0 :     pMem->n = z - zOut;
     361               0 :     *z++ = 0;
     362                 :   }else{
     363                 :     assert( desiredEnc==SQLITE_UTF8 );
     364               0 :     if( pMem->enc==SQLITE_UTF16LE ){
     365                 :       /* UTF-16 Little-endian -> UTF-8 */
     366               0 :       while( zIn<zTerm ){
     367               0 :         READ_UTF16LE(zIn, c); 
     368               0 :         WRITE_UTF8(z, c);
     369                 :       }
     370                 :     }else{
     371                 :       /* UTF-16 Little-endian -> UTF-8 */
     372               0 :       while( zIn<zTerm ){
     373               0 :         READ_UTF16BE(zIn, c); 
     374               0 :         WRITE_UTF8(z, c);
     375                 :       }
     376                 :     }
     377               0 :     pMem->n = z - zOut;
     378                 :   }
     379               0 :   *z = 0;
     380                 :   assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
     381                 : 
     382               0 :   sqlite3VdbeMemRelease(pMem);
     383               0 :   pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
     384               0 :   pMem->enc = desiredEnc;
     385               0 :   if( zOut==zShort ){
     386               0 :     memcpy(pMem->zShort, zOut, len);
     387               0 :     zOut = (u8*)pMem->zShort;
     388               0 :     pMem->flags |= (MEM_Term|MEM_Short);
     389                 :   }else{
     390               0 :     pMem->flags |= (MEM_Term|MEM_Dyn);
     391                 :   }
     392               0 :   pMem->z = (char*)zOut;
     393                 : 
     394               0 : translate_out:
     395                 : #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
     396                 :   {
     397                 :     char zBuf[100];
     398                 :     sqlite3VdbeMemPrettyPrint(pMem, zBuf);
     399                 :     fprintf(stderr, "OUTPUT: %s\n", zBuf);
     400                 :   }
     401                 : #endif
     402               0 :   return SQLITE_OK;
     403                 : }
     404                 : 
     405                 : /*
     406                 : ** This routine checks for a byte-order mark at the beginning of the 
     407                 : ** UTF-16 string stored in *pMem. If one is present, it is removed and
     408                 : ** the encoding of the Mem adjusted. This routine does not do any
     409                 : ** byte-swapping, it just sets Mem.enc appropriately.
     410                 : **
     411                 : ** The allocation (static, dynamic etc.) and encoding of the Mem may be
     412                 : ** changed by this function.
     413                 : */
     414               0 : int sqlite3VdbeMemHandleBom(Mem *pMem){
     415               0 :   int rc = SQLITE_OK;
     416               0 :   u8 bom = 0;
     417                 : 
     418               0 :   if( pMem->n<0 || pMem->n>1 ){
     419               0 :     u8 b1 = *(u8 *)pMem->z;
     420               0 :     u8 b2 = *(((u8 *)pMem->z) + 1);
     421               0 :     if( b1==0xFE && b2==0xFF ){
     422               0 :       bom = SQLITE_UTF16BE;
     423                 :     }
     424               0 :     if( b1==0xFF && b2==0xFE ){
     425               0 :       bom = SQLITE_UTF16LE;
     426                 :     }
     427                 :   }
     428                 :   
     429               0 :   if( bom ){
     430                 :     /* This function is called as soon as a string is stored in a Mem*,
     431                 :     ** from within sqlite3VdbeMemSetStr(). At that point it is not possible
     432                 :     ** for the string to be stored in Mem.zShort, or for it to be stored
     433                 :     ** in dynamic memory with no destructor.
     434                 :     */
     435                 :     assert( !(pMem->flags&MEM_Short) );
     436                 :     assert( !(pMem->flags&MEM_Dyn) || pMem->xDel );
     437               0 :     if( pMem->flags & MEM_Dyn ){
     438               0 :       void (*xDel)(void*) = pMem->xDel;
     439               0 :       char *z = pMem->z;
     440               0 :       pMem->z = 0;
     441               0 :       pMem->xDel = 0;
     442               0 :       rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
     443               0 :       xDel(z);
     444                 :     }else{
     445               0 :       rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
     446                 :           SQLITE_TRANSIENT);
     447                 :     }
     448                 :   }
     449               0 :   return rc;
     450                 : }
     451                 : #endif /* SQLITE_OMIT_UTF16 */
     452                 : 
     453                 : /*
     454                 : ** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
     455                 : ** return the number of unicode characters in pZ up to (but not including)
     456                 : ** the first 0x00 byte. If nByte is not less than zero, return the
     457                 : ** number of unicode characters in the first nByte of pZ (or up to 
     458                 : ** the first 0x00, whichever comes first).
     459                 : */
     460             273 : int sqlite3utf8CharLen(const char *z, int nByte){
     461             273 :   int r = 0;
     462                 :   const char *zTerm;
     463             273 :   if( nByte>=0 ){
     464             273 :     zTerm = &z[nByte];
     465                 :   }else{
     466               0 :     zTerm = (const char *)(-1);
     467                 :   }
     468                 :   assert( z<=zTerm );
     469           23187 :   while( *z!=0 && z<zTerm ){
     470           22641 :     SKIP_UTF8(z);
     471           22641 :     r++;
     472                 :   }
     473             273 :   return r;
     474                 : }
     475                 : 
     476                 : #ifndef SQLITE_OMIT_UTF16
     477                 : /*
     478                 : ** Convert a UTF-16 string in the native encoding into a UTF-8 string.
     479                 : ** Memory to hold the UTF-8 string is obtained from malloc and must be
     480                 : ** freed by the calling function.
     481                 : **
     482                 : ** NULL is returned if there is an allocation error.
     483                 : */
     484               0 : char *sqlite3utf16to8(const void *z, int nByte){
     485                 :   Mem m;
     486               0 :   memset(&m, 0, sizeof(m));
     487               0 :   sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC);
     488               0 :   sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);
     489                 :   assert( (m.flags & MEM_Term)!=0 || sqlite3MallocFailed() );
     490                 :   assert( (m.flags & MEM_Str)!=0 || sqlite3MallocFailed() );
     491               0 :   return (m.flags & MEM_Dyn)!=0 ? m.z : sqliteStrDup(m.z);
     492                 : }
     493                 : 
     494                 : /*
     495                 : ** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
     496                 : ** return the number of bytes up to (but not including), the first pair
     497                 : ** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
     498                 : ** then return the number of bytes in the first nChar unicode characters
     499                 : ** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
     500                 : */
     501               0 : int sqlite3utf16ByteLen(const void *zIn, int nChar){
     502               0 :   unsigned int c = 1;
     503               0 :   char const *z = zIn;
     504               0 :   int n = 0;
     505                 :   if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
     506                 :     /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
     507                 :     ** and in other parts of this file means that at one branch will
     508                 :     ** not be covered by coverage testing on any single host. But coverage
     509                 :     ** will be complete if the tests are run on both a little-endian and 
     510                 :     ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
     511                 :     ** macros are constant at compile time the compiler can determine
     512                 :     ** which branch will be followed. It is therefore assumed that no runtime
     513                 :     ** penalty is paid for this "if" statement.
     514                 :     */
     515                 :     while( c && ((nChar<0) || n<nChar) ){
     516                 :       READ_UTF16BE(z, c);
     517                 :       n++;
     518                 :     }
     519                 :   }else{
     520               0 :     while( c && ((nChar<0) || n<nChar) ){
     521               0 :       READ_UTF16LE(z, c);
     522               0 :       n++;
     523                 :     }
     524                 :   }
     525               0 :   return (z-(char const *)zIn)-((c==0)?2:0);
     526                 : }
     527                 : 
     528                 : /*
     529                 : ** UTF-16 implementation of the substr()
     530                 : */
     531                 : void sqlite3utf16Substr(
     532                 :   sqlite3_context *context,
     533                 :   int argc,
     534                 :   sqlite3_value **argv
     535               0 : ){
     536                 :   int y, z;
     537                 :   unsigned char const *zStr;
     538                 :   unsigned char const *zStrEnd;
     539                 :   unsigned char const *zStart;
     540                 :   unsigned char const *zEnd;
     541                 :   int i;
     542                 : 
     543               0 :   zStr = (unsigned char const *)sqlite3_value_text16(argv[0]);
     544               0 :   zStrEnd = &zStr[sqlite3_value_bytes16(argv[0])];
     545               0 :   y = sqlite3_value_int(argv[1]);
     546               0 :   z = sqlite3_value_int(argv[2]);
     547                 : 
     548               0 :   if( y>0 ){
     549               0 :     y = y-1;
     550               0 :     zStart = zStr;
     551                 :     if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){
     552                 :       for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16BE(zStart);
     553                 :     }else{
     554               0 :       for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16LE(zStart);
     555                 :     }
     556                 :   }else{
     557               0 :     zStart = zStrEnd;
     558                 :     if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){
     559                 :       for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16BE(zStart);
     560                 :     }else{
     561               0 :       for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16LE(zStart);
     562                 :     }
     563               0 :     for(; i<0; i++) z -= 1;
     564                 :   }
     565                 : 
     566               0 :   zEnd = zStart;
     567                 :   if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){
     568                 :     for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16BE(zEnd);
     569                 :   }else{
     570               0 :     for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16LE(zEnd);
     571                 :   }
     572                 : 
     573               0 :   sqlite3_result_text16(context, zStart, zEnd-zStart, SQLITE_TRANSIENT);
     574               0 : }
     575                 : 
     576                 : #if defined(SQLITE_TEST)
     577                 : /*
     578                 : ** This routine is called from the TCL test function "translate_selftest".
     579                 : ** It checks that the primitives for serializing and deserializing
     580                 : ** characters in each encoding are inverses of each other.
     581                 : */
     582                 : void sqlite3utfSelfTest(){
     583                 :   unsigned int i, t;
     584                 :   unsigned char zBuf[20];
     585                 :   unsigned char *z;
     586                 :   int n;
     587                 :   unsigned int c;
     588                 : 
     589                 :   for(i=0; i<0x00110000; i++){
     590                 :     z = zBuf;
     591                 :     WRITE_UTF8(z, i);
     592                 :     n = z-zBuf;
     593                 :     z = zBuf;
     594                 :     READ_UTF8(z, c);
     595                 :     t = i;
     596                 :     if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
     597                 :     if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
     598                 :     assert( c==t );
     599                 :     assert( (z-zBuf)==n );
     600                 :   }
     601                 :   for(i=0; i<0x00110000; i++){
     602                 :     if( i>=0xD800 && i<=0xE000 ) continue;
     603                 :     z = zBuf;
     604                 :     WRITE_UTF16LE(z, i);
     605                 :     n = z-zBuf;
     606                 :     z = zBuf;
     607                 :     READ_UTF16LE(z, c);
     608                 :     assert( c==i );
     609                 :     assert( (z-zBuf)==n );
     610                 :   }
     611                 :   for(i=0; i<0x00110000; i++){
     612                 :     if( i>=0xD800 && i<=0xE000 ) continue;
     613                 :     z = zBuf;
     614                 :     WRITE_UTF16BE(z, i);
     615                 :     n = z-zBuf;
     616                 :     z = zBuf;
     617                 :     READ_UTF16BE(z, c);
     618                 :     assert( c==i );
     619                 :     assert( (z-zBuf)==n );
     620                 :   }
     621                 : }
     622                 : #endif /* SQLITE_TEST */
     623                 : #endif /* SQLITE_OMIT_UTF16 */

Generated by: LTP GCOV extension version 1.5

Generated at Thu, 19 Nov 2009 08:20:16 +0000 (5 days ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.