PHP  
 PHP: Test and Code Coverage Analysis
downloads | QA | documentation | faq | getting help | mailing lists | reporting bugs | php.net sites | links | my php.net 
 

LCOV - code coverage report
Current view: top level - ext/json - utf8_decode.c (source / functions) Hit Total Coverage
Test: PHP Code Coverage Lines: 0 50 0.0 %
Date: 2014-10-22 Functions: 0 6 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* utf8_decode.c */
       2             : 
       3             : /* 2005-12-25 */
       4             : 
       5             : /*
       6             : Copyright (c) 2005 JSON.org
       7             : 
       8             : Permission is hereby granted, free of charge, to any person obtaining a copy
       9             : of this software and associated documentation files (the "Software"), to deal
      10             : in the Software without restriction, including without limitation the rights
      11             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      12             : copies of the Software, and to permit persons to whom the Software is
      13             : furnished to do so, subject to the following conditions:
      14             : 
      15             : The above copyright notice and this permission notice shall be included in all
      16             : copies or substantial portions of the Software.
      17             : 
      18             : The Software shall be used for Good, not Evil.
      19             : 
      20             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      21             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      22             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      23             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      24             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      25             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      26             : SOFTWARE.
      27             : */
      28             : 
      29             : #include "utf8_decode.h"
      30             : 
      31             : /*
      32             :     Very Strict UTF-8 Decoder
      33             : 
      34             :     UTF-8 is a multibyte character encoding of Unicode. A character can be
      35             :     represented by 1-4 bytes. The bit pattern of the first byte indicates the
      36             :     number of continuation bytes.
      37             : 
      38             :     Most UTF-8 decoders tend to be lenient, attempting to recover as much
      39             :     information as possible, even from badly encoded input. This UTF-8
      40             :     decoder is not lenient. It will reject input which does not include
      41             :     proper continuation bytes. It will reject aliases (or suboptimal
      42             :     codings). It will reject surrogates. (Surrogate encoding should only be
      43             :     used with UTF-16.)
      44             : 
      45             :     Code     Contination Minimum Maximum
      46             :     0xxxxxxx           0       0     127
      47             :     10xxxxxx       error
      48             :     110xxxxx           1     128    2047
      49             :     1110xxxx           2    2048   65535 excluding 55296 - 57343
      50             :     11110xxx           3   65536 1114111
      51             :     11111xxx       error
      52             : */
      53             : 
      54             : 
      55             : /*
      56             :     Get the next byte. It returns UTF8_END if there are no more bytes.
      57             : */
      58             : static int 
      59           0 : get(json_utf8_decode *utf8)
      60             : {
      61             :     int c;
      62           0 :     if (utf8->the_index >= utf8->the_length) {
      63           0 :         return UTF8_END;
      64             :     }
      65           0 :     c = utf8->the_input[utf8->the_index] & 0xFF;
      66           0 :     utf8->the_index += 1;
      67           0 :     return c;
      68             : }
      69             : 
      70             : 
      71             : /*
      72             :     Get the 6-bit payload of the next continuation byte.
      73             :     Return UTF8_ERROR if it is not a contination byte.
      74             : */
      75             : static int 
      76           0 : cont(json_utf8_decode *utf8)
      77             : {
      78           0 :     int c = get(utf8);
      79           0 :     return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR;
      80             : }
      81             : 
      82             : 
      83             : /*
      84             :     Initialize the UTF-8 decoder. The decoder is not reentrant,
      85             : */
      86             : void 
      87           0 : utf8_decode_init(json_utf8_decode *utf8, char p[], int length)
      88             : {
      89           0 :     utf8->the_index = 0;
      90           0 :     utf8->the_input = p;
      91           0 :     utf8->the_length = length;
      92           0 :     utf8->the_char = 0;
      93           0 :     utf8->the_byte = 0;
      94           0 : }
      95             : 
      96             : 
      97             : /*
      98             :     Get the current byte offset. This is generally used in error reporting.
      99             : */
     100             : int 
     101           0 : utf8_decode_at_byte(json_utf8_decode *utf8)
     102             : {
     103           0 :     return utf8->the_byte;
     104             : }
     105             : 
     106             : 
     107             : /*
     108             :     Get the current character offset. This is generally used in error reporting.
     109             :     The character offset matches the byte offset if the text is strictly ASCII.
     110             : */
     111             : int 
     112           0 : utf8_decode_at_character(json_utf8_decode *utf8)
     113             : {
     114           0 :     return utf8->the_char > 0 ? utf8->the_char - 1 : 0;
     115             : }
     116             : 
     117             : 
     118             : /*
     119             :     Extract the next character.
     120             :     Returns: the character (between 0 and 1114111)
     121             :          or  UTF8_END   (the end)
     122             :          or  UTF8_ERROR (error)
     123             : */
     124             : int 
     125           0 : utf8_decode_next(json_utf8_decode *utf8)
     126             : {
     127             :     int c;  /* the first byte of the character */
     128             :     int r;  /* the result */
     129             : 
     130           0 :     if (utf8->the_index >= utf8->the_length) {
     131           0 :         return utf8->the_index == utf8->the_length ? UTF8_END : UTF8_ERROR;
     132             :     }
     133           0 :     utf8->the_byte = utf8->the_index;
     134           0 :     utf8->the_char += 1;
     135           0 :     c = get(utf8);
     136             : /*
     137             :     Zero continuation (0 to 127)
     138             : */
     139           0 :     if ((c & 0x80) == 0) {
     140           0 :         return c;
     141             :     }
     142             : /*
     143             :     One contination (128 to 2047)
     144             : */
     145           0 :     if ((c & 0xE0) == 0xC0) {
     146           0 :         int c1 = cont(utf8);
     147           0 :         if (c1 < 0) {
     148           0 :             return UTF8_ERROR;
     149             :         }
     150           0 :         r = ((c & 0x1F) << 6) | c1;
     151           0 :         return r >= 128 ? r : UTF8_ERROR;
     152             :     }
     153             : /*
     154             :     Two continuation (2048 to 55295 and 57344 to 65535) 
     155             : */
     156           0 :     if ((c & 0xF0) == 0xE0) {
     157           0 :         int c1 = cont(utf8);
     158           0 :         int c2 = cont(utf8);
     159           0 :         if (c1 < 0 || c2 < 0) {
     160           0 :             return UTF8_ERROR;
     161             :         }
     162           0 :         r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
     163           0 :         return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR;
     164             :     }
     165             : /*
     166             :     Three continuation (65536 to 1114111)
     167             : */
     168           0 :     if ((c & 0xF8) == 0xF0) {
     169           0 :         int c1 = cont(utf8);
     170           0 :         int c2 = cont(utf8);
     171           0 :         int c3 = cont(utf8);
     172           0 :         if (c1 < 0 || c2 < 0 || c3 < 0) {
     173           0 :             return UTF8_ERROR;
     174             :         }
     175           0 :         r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3;
     176           0 :         return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR;
     177             :     }
     178           0 :     return UTF8_ERROR;
     179             : }

Generated by: LCOV version 1.10

Generated at Wed, 22 Oct 2014 07:24:51 +0000 (17 hours ago)

Copyright © 2005-2014 The PHP Group
All rights reserved.