PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - json - utf8_decode.c
Test: PHP Code Coverage
Date: 2009-11-21 Instrumented lines: 50
Code covered: 86.0 % Executed lines: 43
Legend: not executed executed

       1                 : /* utf8_decode.c */
       2                 : 
       3                 : /* 2005-12-25 */
       4                 : 
       5                 : /*
       6                 : Copyright (c) 2005 JSON.org
       7                 : 
       8                 : Permission is hereby granted, free of charge, to any person obtaining a copy
       9                 : of this software and associated documentation files (the "Software"), to deal
      10                 : in the Software without restriction, including without limitation the rights
      11                 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      12                 : copies of the Software, and to permit persons to whom the Software is
      13                 : furnished to do so, subject to the following conditions:
      14                 : 
      15                 : The above copyright notice and this permission notice shall be included in all
      16                 : copies or substantial portions of the Software.
      17                 : 
      18                 : The Software shall be used for Good, not Evil.
      19                 : 
      20                 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      21                 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      22                 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      23                 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      24                 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      25                 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      26                 : SOFTWARE.
      27                 : */
      28                 : 
      29                 : #include "utf8_decode.h"
      30                 : 
      31                 : /*
      32                 :     Very Strict UTF-8 Decoder
      33                 : 
      34                 :     UTF-8 is a multibyte character encoding of Unicode. A character can be
      35                 :     represented by 1-4 bytes. The bit pattern of the first byte indicates the
      36                 :     number of continuation bytes.
      37                 : 
      38                 :     Most UTF-8 decoders tend to be lenient, attempting to recover as much
      39                 :     information as possible, even from badly encoded input. This UTF-8
      40                 :     decoder is not lenient. It will reject input which does not include
      41                 :     proper continuation bytes. It will reject aliases (or suboptimal
      42                 :     codings). It will reject surrogates. (Surrogate encoding should only be
      43                 :     used with UTF-16.)
      44                 : 
      45                 :     Code     Contination Minimum Maximum
      46                 :     0xxxxxxx           0       0     127
      47                 :     10xxxxxx       error
      48                 :     110xxxxx           1     128    2047
      49                 :     1110xxxx           2    2048   65535 excluding 55296 - 57343
      50                 :     11110xxx           3   65536 1114111
      51                 :     11111xxx       error
      52                 : */
      53                 : 
      54                 : 
      55                 : /*
      56                 :     Get the next byte. It returns UTF8_END if there are no more bytes.
      57                 : */
      58                 : static int 
      59                 : get(json_utf8_decode *utf8)
      60           18585 : {
      61                 :     int c;
      62           18585 :     if (utf8->the_index >= utf8->the_length) {
      63               6 :         return UTF8_END;
      64                 :     }
      65           18579 :     c = utf8->the_input[utf8->the_index] & 0xFF;
      66           18579 :     utf8->the_index += 1;
      67           18579 :     return c;
      68                 : }
      69                 : 
      70                 : 
      71                 : /*
      72                 :     Get the 6-bit payload of the next continuation byte.
      73                 :     Return UTF8_ERROR if it is not a contination byte.
      74                 : */
      75                 : static int 
      76                 : cont(json_utf8_decode *utf8)
      77             315 : {
      78             315 :     int c = get(utf8);
      79             315 :     return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR;
      80                 : }
      81                 : 
      82                 : 
      83                 : /*
      84                 :     Initialize the UTF-8 decoder. The decoder is not reentrant,
      85                 : */
      86                 : void 
      87                 : utf8_decode_init(json_utf8_decode *utf8, char p[], int length)
      88             464 : {
      89             464 :     utf8->the_index = 0;
      90             464 :     utf8->the_input = p;
      91             464 :     utf8->the_length = length;
      92             464 :     utf8->the_char = 0;
      93             464 :     utf8->the_byte = 0;
      94             464 : }
      95                 : 
      96                 : 
      97                 : /*
      98                 :     Get the current byte offset. This is generally used in error reporting.
      99                 : */
     100                 : int 
     101                 : utf8_decode_at_byte(json_utf8_decode *utf8)
     102               0 : {
     103               0 :     return utf8->the_byte;
     104                 : }
     105                 : 
     106                 : 
     107                 : /*
     108                 :     Get the current character offset. This is generally used in error reporting.
     109                 :     The character offset matches the byte offset if the text is strictly ASCII.
     110                 : */
     111                 : int 
     112                 : utf8_decode_at_character(json_utf8_decode *utf8)
     113               0 : {
     114               0 :     return utf8->the_char > 0 ? utf8->the_char - 1 : 0;
     115                 : }
     116                 : 
     117                 : 
     118                 : /*
     119                 :     Extract the next character.
     120                 :     Returns: the character (between 0 and 1114111)
     121                 :          or  UTF8_END   (the end)
     122                 :          or  UTF8_ERROR (error)
     123                 : */
     124                 : int 
     125                 : utf8_decode_next(json_utf8_decode *utf8)
     126           18730 : {
     127                 :     int c;  /* the first byte of the character */
     128                 :     int r;  /* the result */
     129                 : 
     130           18730 :     if (utf8->the_index >= utf8->the_length) {
     131             460 :         return utf8->the_index == utf8->the_length ? UTF8_END : UTF8_ERROR;
     132                 :     }
     133           18270 :     utf8->the_byte = utf8->the_index;
     134           18270 :     utf8->the_char += 1;
     135           18270 :     c = get(utf8);
     136                 : /*
     137                 :     Zero continuation (0 to 127)
     138                 : */
     139           18270 :     if ((c & 0x80) == 0) {
     140           18113 :         return c;
     141                 :     }
     142                 : /*
     143                 :     One contination (128 to 2047)
     144                 : */
     145             157 :     if ((c & 0xE0) == 0xC0) {
     146              16 :         int c1 = cont(utf8);
     147              16 :         if (c1 < 0) {
     148               0 :             return UTF8_ERROR;
     149                 :         }
     150              16 :         r = ((c & 0x1F) << 6) | c1;
     151              16 :         return r >= 128 ? r : UTF8_ERROR;
     152                 :     }
     153                 : /*
     154                 :     Two continuation (2048 to 55295 and 57344 to 65535) 
     155                 : */
     156             141 :     if ((c & 0xF0) == 0xE0) {
     157             124 :         int c1 = cont(utf8);
     158             124 :         int c2 = cont(utf8);
     159             124 :         if (c1 < 0 || c2 < 0) {
     160               4 :             return UTF8_ERROR;
     161                 :         }
     162             120 :         r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
     163             120 :         return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR;
     164                 :     }
     165                 : /*
     166                 :     Three continuation (65536 to 1114111)
     167                 : */
     168              17 :     if ((c & 0xF8) == 0xF0) {
     169              17 :         int c1 = cont(utf8);
     170              17 :         int c2 = cont(utf8);
     171              17 :         int c3 = cont(utf8);
     172              17 :         if (c1 < 0 || c2 < 0 || c3 < 0) {
     173               0 :             return UTF8_ERROR;
     174                 :         }
     175              17 :         r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3;
     176              17 :         return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR;
     177                 :     }
     178               0 :     return UTF8_ERROR;
     179                 : }

Generated by: LTP GCOV extension version 1.5

Generated at Sat, 21 Nov 2009 12:27:02 +0000 (3 days ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.