PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LTP GCOV extension - code coverage report
Current view: directory - json - utf8_decode.c
Test: PHP Code Coverage
Date: 2009-11-23 Instrumented lines: 50
Code covered: 64.0 % Executed lines: 32
Legend: not executed executed

       1                 : /* utf8_decode.c */
       2                 : 
       3                 : /* 2005-12-25 */
       4                 : 
       5                 : /*
       6                 : Copyright (c) 2005 JSON.org
       7                 : 
       8                 : Permission is hereby granted, free of charge, to any person obtaining a copy
       9                 : of this software and associated documentation files (the "Software"), to deal
      10                 : in the Software without restriction, including without limitation the rights
      11                 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      12                 : copies of the Software, and to permit persons to whom the Software is
      13                 : furnished to do so, subject to the following conditions:
      14                 : 
      15                 : The above copyright notice and this permission notice shall be included in all
      16                 : copies or substantial portions of the Software.
      17                 : 
      18                 : The Software shall be used for Good, not Evil.
      19                 : 
      20                 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      21                 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      22                 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      23                 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      24                 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      25                 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      26                 : SOFTWARE.
      27                 : */
      28                 : 
      29                 : #include "utf8_decode.h"
      30                 : 
      31                 : /*
      32                 :     Very Strict UTF-8 Decoder
      33                 : 
      34                 :     UTF-8 is a multibyte character encoding of Unicode. A character can be
      35                 :     represented by 1-4 bytes. The bit pattern of the first byte indicates the
      36                 :     number of continuation bytes.
      37                 : 
      38                 :     Most UTF-8 decoders tend to be lenient, attempting to recover as much
      39                 :     information as possible, even from badly encoded input. This UTF-8
      40                 :     decoder is not lenient. It will reject input which does not include
      41                 :     proper continuation bytes. It will reject aliases (or suboptimal
      42                 :     codings). It will reject surrogates. (Surrogate encoding should only be
      43                 :     used with UTF-16.)
      44                 : 
      45                 :     Code     Contination Minimum Maximum
      46                 :     0xxxxxxx           0       0     127
      47                 :     10xxxxxx       error
      48                 :     110xxxxx           1     128    2047
      49                 :     1110xxxx           2    2048   65535 excluding 55296 - 57343
      50                 :     11110xxx           3   65536 1114111
      51                 :     11111xxx       error
      52                 : */
      53                 : 
      54                 : 
      55                 : /*
      56                 :     Get the next byte. It returns UTF8_END if there are no more bytes.
      57                 : */
      58                 : static int get(json_utf8_decode *utf8) /* {{{ */
      59            4800 : {
      60                 :         int c;
      61            4800 :         if (utf8->the_index >= utf8->the_length) {
      62               6 :                 return UTF8_END;
      63                 :         }
      64            4794 :         c = utf8->the_input[utf8->the_index] & 0xFF;
      65            4794 :         utf8->the_index += 1;
      66            4794 :         return c;
      67                 : }
      68                 : /* }}} */
      69                 : 
      70                 : /*
      71                 :     Get the 6-bit payload of the next continuation byte.
      72                 :     Return UTF8_ERROR if it is not a contination byte.
      73                 : */
      74                 : static int cont(json_utf8_decode *utf8) /* {{{ */
      75              40 : {
      76              40 :         int c = get(utf8);
      77              40 :         return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR;
      78                 : }
      79                 : /* }}} */
      80                 : 
      81                 : /*
      82                 :     Initialize the UTF-8 decoder. The decoder is not reentrant,
      83                 : */
      84                 : void utf8_decode_init(json_utf8_decode *utf8, char p[], int length) /* {{{ */
      85              19 : {
      86              19 :         utf8->the_index = 0;
      87              19 :         utf8->the_input = p;
      88              19 :         utf8->the_length = length;
      89              19 :         utf8->the_char = 0;
      90              19 :         utf8->the_byte = 0;
      91              19 : }
      92                 : /* }}} */
      93                 : 
      94                 : /*
      95                 :     Get the current byte offset. This is generally used in error reporting.
      96                 : */
      97                 : int utf8_decode_at_byte(json_utf8_decode *utf8) /* {{{ */
      98               0 : {
      99               0 :         return utf8->the_byte;
     100                 : }
     101                 : /* }}} */
     102                 : 
     103                 : /*
     104                 :     Get the current character offset. This is generally used in error reporting.
     105                 :     The character offset matches the byte offset if the text is strictly ASCII.
     106                 : */
     107                 : int utf8_decode_at_character(json_utf8_decode *utf8) /* {{{ */
     108               0 : {
     109               0 :         return utf8->the_char > 0 ? utf8->the_char - 1 : 0;
     110                 : }
     111                 : /* }}} */
     112                 : 
     113                 : /*
     114                 :     Extract the next character.
     115                 :     Returns: the character (between 0 and 1114111)
     116                 :          or  UTF8_END   (the end)
     117                 :          or  UTF8_ERROR (error)
     118                 : */
     119                 : int utf8_decode_next(json_utf8_decode *utf8) /* {{{ */
     120            4775 : {
     121                 :         int c;  /* the first byte of the character */
     122                 :         int r;  /* the result */
     123                 : 
     124            4775 :         if (utf8->the_index >= utf8->the_length) {
     125              15 :                 return utf8->the_index == utf8->the_length ? UTF8_END : UTF8_ERROR;
     126                 :         }
     127            4760 :         utf8->the_byte = utf8->the_index;
     128            4760 :         utf8->the_char += 1;
     129            4760 :         c = get(utf8);
     130                 :         /*
     131                 :            Zero continuation (0 to 127)
     132                 :            */
     133            4760 :         if ((c & 0x80) == 0) {
     134            4740 :                 return c;
     135                 :         }
     136                 :         /*
     137                 :            One contination (128 to 2047)
     138                 :            */
     139              20 :         if ((c & 0xE0) == 0xC0) {
     140               0 :                 int c1 = cont(utf8);
     141               0 :                 if (c1 < 0) {
     142               0 :                         return UTF8_ERROR;
     143                 :                 }
     144               0 :                 r = ((c & 0x1F) << 6) | c1;
     145               0 :                 return r >= 128 ? r : UTF8_ERROR;
     146                 :         }
     147                 :         /*
     148                 :            Two continuation (2048 to 55295 and 57344 to 65535) 
     149                 :            */
     150              20 :         if ((c & 0xF0) == 0xE0) {
     151              20 :                 int c1 = cont(utf8);
     152              20 :                 int c2 = cont(utf8);
     153              20 :                 if (c1 < 0 || c2 < 0) {
     154               4 :                         return UTF8_ERROR;
     155                 :                 }
     156              16 :                 r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
     157              16 :                 return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR;
     158                 :         }
     159                 :         /*
     160                 :            Three continuation (65536 to 1114111)
     161                 :            */
     162               0 :         if ((c & 0xF8) == 0xF0) {
     163               0 :                 int c1 = cont(utf8);
     164               0 :                 int c2 = cont(utf8);
     165               0 :                 int c3 = cont(utf8);
     166               0 :                 if (c1 < 0 || c2 < 0 || c3 < 0) {
     167               0 :                         return UTF8_ERROR;
     168                 :                 }
     169               0 :                 r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3;
     170               0 :                 return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR;
     171                 :         }
     172               0 :         return UTF8_ERROR;
     173                 : }
     174                 : /* }}} */
     175                 : 
     176                 : /*
     177                 :  * Local variables:
     178                 :  * tab-width: 4
     179                 :  * c-basic-offset: 4
     180                 :  * End:
     181                 :  * vim600: noet sw=4 ts=4
     182                 :  * vim<600: noet sw=4 ts=4
     183                 :  */

Generated by: LTP GCOV extension version 1.5

Generated at Mon, 23 Nov 2009 17:39:31 +0000 (34 hours ago)

Copyright © 2005-2009 The PHP Group
All rights reserved.