LCOV - code coverage report
Current view: top level - tests - catch_bom.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 83 83 100.0 %
Date: 2023-01-26 17:17:53 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2021-2022  Made to Order Software Corporation
       2             : //
       3             : // https://snapwebsites.org/project/libutf8
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software; you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation; either version 2 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License along
      17             : // with this program; if not, write to the Free Software Foundation, Inc.,
      18             : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19             : 
      20             : // libutf8
      21             : //
      22             : #include    <libutf8/base.h>
      23             : #include    <libutf8/libutf8.h>
      24             : 
      25             : 
      26             : // unit test
      27             : //
      28             : #include    "catch_main.h"
      29             : 
      30             : 
      31             : // C++
      32             : //
      33             : #include    <cctype>
      34             : #include    <iostream>
      35             : 
      36             : 
      37             : // last include
      38             : //
      39             : #include    <snapdev/poison.h>
      40             : 
      41             : 
      42             : 
      43           7 : CATCH_TEST_CASE("bom", "[characters],[bom]")
      44             : {
      45          10 :     CATCH_START_SECTION("bom: Verify the BOM character")
      46           1 :         CATCH_REQUIRE(libutf8::BOM_CHAR == 0xFEFF);
      47             :     CATCH_END_SECTION()
      48             : 
      49          10 :     CATCH_START_SECTION("bom: Verify with a string that's too small")
      50             :     {
      51           1 :         CATCH_REQUIRE(libutf8::start_with_bom(nullptr, rand()) == libutf8::bom_t::BOM_NONE);
      52           1 :         CATCH_REQUIRE(libutf8::start_with_bom("", 0) == libutf8::bom_t::BOM_NONE);
      53           1 :         CATCH_REQUIRE(libutf8::start_with_bom("a", 1) == libutf8::bom_t::BOM_NONE);
      54             :     }
      55             :     CATCH_END_SECTION()
      56             : 
      57          10 :     CATCH_START_SECTION("bom: Verify the five BOMs as is")
      58             :     {
      59           1 :         char buf[4];
      60           1 :         char32_t const bom(libutf8::BOM_CHAR);
      61             : 
      62             :         // UTF-8
      63           1 :         buf[0] = static_cast<char>((bom >> 12) | 0xE0);
      64           1 :         buf[1] = static_cast<char>(((bom >>  6) & 0x3F) | 0x80);
      65           1 :         buf[2] = static_cast<char>(((bom >>  0) & 0x3F) | 0x80);
      66           1 :         buf[3] = '?';
      67           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF8);
      68             : 
      69             :         // UTF-16 Little Endian
      70           1 :         buf[0] = static_cast<char>(bom >> 0);
      71           1 :         buf[1] = static_cast<char>(bom >> 8);
      72           1 :         buf[2] = static_cast<char>(0x00);
      73           1 :         buf[3] = static_cast<char>(0x34);
      74           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      75             : 
      76             :         // UTF-16 Little Endian (with a zero in the next 2 bytes)
      77           1 :         buf[0] = static_cast<char>(bom >> 0);
      78           1 :         buf[1] = static_cast<char>(bom >> 8);
      79           1 :         buf[2] = static_cast<char>(0x12);
      80           1 :         buf[3] = static_cast<char>(0x00);
      81           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      82             : 
      83             :         // UTF-16 Little Endian (with a zero in the next 2 bytes)
      84           1 :         buf[0] = static_cast<char>(bom >> 0);
      85           1 :         buf[1] = static_cast<char>(bom >> 8);
      86           1 :         buf[2] = static_cast<char>(0x12);
      87           1 :         buf[3] = static_cast<char>(0x34);
      88           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      89             : 
      90             :         // UTF-16 Big Endian
      91           1 :         buf[0] = static_cast<char>(bom >> 8);
      92           1 :         buf[1] = static_cast<char>(bom >> 0);
      93           1 :         buf[2] = static_cast<char>(0xAB);
      94           1 :         buf[3] = static_cast<char>(0xCD);
      95           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
      96             : 
      97             :         // UTF-16 Big Endian (with a zero in the next 2 bytes)
      98           1 :         buf[0] = static_cast<char>(bom >> 8);
      99           1 :         buf[1] = static_cast<char>(bom >> 0);
     100           1 :         buf[2] = static_cast<char>(0x00);
     101           1 :         buf[3] = static_cast<char>(0xCD);
     102           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
     103             : 
     104             :         // UTF-16 Big Endian (with a zero in the next 2 bytes)
     105           1 :         buf[0] = static_cast<char>(bom >> 8);
     106           1 :         buf[1] = static_cast<char>(bom >> 0);
     107           1 :         buf[2] = static_cast<char>(0xAB);
     108           1 :         buf[3] = static_cast<char>(0x00);
     109           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
     110             : 
     111             :         // UTF-32 Little Endian
     112           1 :         buf[0] = static_cast<char>(bom >>  0);
     113           1 :         buf[1] = static_cast<char>(bom >>  8);
     114           1 :         buf[2] = static_cast<char>(bom >> 16);
     115           1 :         buf[3] = static_cast<char>(bom >> 24);
     116           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_LE);
     117             : 
     118             :         // UTF-32 Big Endian
     119           1 :         buf[0] = static_cast<char>(bom >> 24);
     120           1 :         buf[1] = static_cast<char>(bom >> 16);
     121           1 :         buf[2] = static_cast<char>(bom >>  8);
     122           1 :         buf[3] = static_cast<char>(bom >>  0);
     123           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_BE);
     124             :     }
     125             :     CATCH_END_SECTION()
     126             : 
     127          10 :     CATCH_START_SECTION("bom: Verify the five BOMs as is")
     128             :     {
     129           1 :         char buf[4];
     130             : 
     131             :         // unknown 1 byte (well... 1 byte is never really known...)
     132           1 :         buf[0] = '?';
     133           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 1) == libutf8::bom_t::BOM_NONE);
     134             : 
     135             :         // unknown 2 bytes
     136           1 :         buf[0] = 'Q';
     137           1 :         buf[1] = '?';
     138           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 2) == libutf8::bom_t::BOM_NONE);
     139             : 
     140             :         // unknown 3 bytes
     141           1 :         buf[0] = 'B';
     142           1 :         buf[1] = 'O';
     143           1 :         buf[2] = 'M';
     144           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 3) == libutf8::bom_t::BOM_NONE);
     145             : 
     146             :         // unknown 4 bytes
     147           1 :         buf[0] = 'B';
     148           1 :         buf[1] = 'O';
     149           1 :         buf[2] = 'M';
     150           1 :         buf[3] = '?';
     151           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 4) == libutf8::bom_t::BOM_NONE);
     152             :     }
     153             :     CATCH_END_SECTION()
     154             : 
     155          10 :     CATCH_START_SECTION("bom: Verify u32string that starts with a BOM (CPU Endianness)")
     156             :     {
     157           2 :         std::u32string u32str;
     158           1 :         u32str += libutf8::BOM_CHAR;
     159           1 :         u32str += unittest::rand_char(true);
     160           1 :         size_t const size(u32str.length() * sizeof(std::u32string::value_type));
     161          10 :         for(int idx(static_cast<int>(size)); idx >= 0; --idx)
     162             :         {
     163           9 :             if(static_cast<size_t>(idx) >= sizeof(std::u32string::value_type))
     164             :             {
     165             : #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     166             :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_BE);
     167             : #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     168           5 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_LE);
     169             : #else
     170             : #error "Unsupported endianness"
     171             : #endif
     172             :             }
     173             : #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     174           4 :             else if(static_cast<size_t>(idx) >= sizeof(std::u16string::value_type))
     175             :             {
     176           2 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF16_LE);
     177             :             }
     178             : #endif
     179             :             else
     180             :             {
     181             :                 // too short
     182             :                 //
     183           2 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_NONE);
     184             :             }
     185             :         }
     186             :     }
     187             :     CATCH_END_SECTION()
     188          11 : }
     189             : 
     190             : 
     191             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13