LCOV - code coverage report
Current view: top level - tests - catch_bom.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 100.0 % 89 89
Test Date: 2025-06-22 07:49:47 Functions: 100.0 % 1 1
Legend: Lines: hit not hit

            Line data    Source code
       1              : // Copyright (c) 2021-2023  Made to Order Software Corp.  All Rights Reserved
       2              : //
       3              : // https://snapwebsites.org/project/libutf8
       4              : // contact@m2osw.com
       5              : //
       6              : // This program is free software; you can redistribute it and/or modify
       7              : // it under the terms of the GNU General Public License as published by
       8              : // the Free Software Foundation; either version 2 of the License, or
       9              : // (at your option) any later version.
      10              : //
      11              : // This program is distributed in the hope that it will be useful,
      12              : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : // GNU General Public License for more details.
      15              : //
      16              : // You should have received a copy of the GNU General Public License along
      17              : // with this program; if not, write to the Free Software Foundation, Inc.,
      18              : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19              : 
      20              : // libutf8
      21              : //
      22              : #include    <libutf8/base.h>
      23              : #include    <libutf8/libutf8.h>
      24              : 
      25              : 
      26              : // unit test
      27              : //
      28              : #include    "catch_main.h"
      29              : 
      30              : 
      31              : // C++
      32              : //
      33              : #include    <cctype>
      34              : #include    <iostream>
      35              : 
      36              : 
      37              : // last include
      38              : //
      39              : #include    <snapdev/poison.h>
      40              : 
      41              : 
      42              : 
      43            5 : CATCH_TEST_CASE("bom", "[characters],[bom]")
      44              : {
      45            5 :     CATCH_START_SECTION("bom: Verify the BOM character")
      46            1 :         CATCH_REQUIRE(libutf8::BOM_CHAR == 0xFEFF);
      47            5 :     CATCH_END_SECTION()
      48              : 
      49            5 :     CATCH_START_SECTION("bom: Verify with a string that's too small")
      50              :     {
      51            1 :         CATCH_REQUIRE(libutf8::start_with_bom(nullptr, rand()) == libutf8::bom_t::BOM_NONE);
      52            1 :         CATCH_REQUIRE(libutf8::start_with_bom("", 0) == libutf8::bom_t::BOM_NONE);
      53            1 :         CATCH_REQUIRE(libutf8::start_with_bom("a", 1) == libutf8::bom_t::BOM_NONE);
      54              :     }
      55            5 :     CATCH_END_SECTION()
      56              : 
      57            5 :     CATCH_START_SECTION("bom: Verify the five BOMs as is")
      58              :     {
      59            1 :         char buf[4];
      60            1 :         char32_t const bom(libutf8::BOM_CHAR);
      61              : 
      62              :         // UTF-8
      63            1 :         buf[0] = static_cast<char>((bom >> 12) | 0xE0);
      64            1 :         buf[1] = static_cast<char>(((bom >>  6) & 0x3F) | 0x80);
      65            1 :         buf[2] = static_cast<char>(((bom >>  0) & 0x3F) | 0x80);
      66            1 :         buf[3] = '?';
      67            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF8);
      68              : 
      69              :         // UTF-16 Little Endian
      70            1 :         buf[0] = static_cast<char>(bom >> 0);
      71            1 :         buf[1] = static_cast<char>(bom >> 8);
      72            1 :         buf[2] = static_cast<char>(0x00);
      73            1 :         buf[3] = static_cast<char>(0x34);
      74            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      75              : 
      76              :         // UTF-16 Little Endian (with a zero in the next 2 bytes)
      77            1 :         buf[0] = static_cast<char>(bom >> 0);
      78            1 :         buf[1] = static_cast<char>(bom >> 8);
      79            1 :         buf[2] = static_cast<char>(0x12);
      80            1 :         buf[3] = static_cast<char>(0x00);
      81            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      82              : 
      83              :         // UTF-16 Little Endian (with a zero in the next 2 bytes)
      84            1 :         buf[0] = static_cast<char>(bom >> 0);
      85            1 :         buf[1] = static_cast<char>(bom >> 8);
      86            1 :         buf[2] = static_cast<char>(0x12);
      87            1 :         buf[3] = static_cast<char>(0x34);
      88            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      89              : 
      90              :         // UTF-16 Big Endian
      91            1 :         buf[0] = static_cast<char>(bom >> 8);
      92            1 :         buf[1] = static_cast<char>(bom >> 0);
      93            1 :         buf[2] = static_cast<char>(0xAB);
      94            1 :         buf[3] = static_cast<char>(0xCD);
      95            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
      96              : 
      97              :         // UTF-16 Big Endian (with a zero in the next 2 bytes)
      98            1 :         buf[0] = static_cast<char>(bom >> 8);
      99            1 :         buf[1] = static_cast<char>(bom >> 0);
     100            1 :         buf[2] = static_cast<char>(0x00);
     101            1 :         buf[3] = static_cast<char>(0xCD);
     102            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
     103              : 
     104              :         // UTF-16 Big Endian (with a zero in the next 2 bytes)
     105            1 :         buf[0] = static_cast<char>(bom >> 8);
     106            1 :         buf[1] = static_cast<char>(bom >> 0);
     107            1 :         buf[2] = static_cast<char>(0xAB);
     108            1 :         buf[3] = static_cast<char>(0x00);
     109            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
     110              : 
     111              :         // UTF-32 Little Endian
     112            1 :         buf[0] = static_cast<char>(bom >>  0);
     113            1 :         buf[1] = static_cast<char>(bom >>  8);
     114            1 :         buf[2] = static_cast<char>(bom >> 16);
     115            1 :         buf[3] = static_cast<char>(bom >> 24);
     116            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_LE);
     117              : 
     118              :         // UTF-32 Big Endian
     119            1 :         buf[0] = static_cast<char>(bom >> 24);
     120            1 :         buf[1] = static_cast<char>(bom >> 16);
     121            1 :         buf[2] = static_cast<char>(bom >>  8);
     122            1 :         buf[3] = static_cast<char>(bom >>  0);
     123            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_BE);
     124              :     }
     125            5 :     CATCH_END_SECTION()
     126              : 
     127            5 :     CATCH_START_SECTION("bom: Verify the five BOMs as is")
     128              :     {
     129            1 :         char buf[4];
     130              : 
     131              :         // unknown 1 byte (well... 1 byte is never really known...)
     132            1 :         buf[0] = '?';
     133            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 1) == libutf8::bom_t::BOM_NONE);
     134              : 
     135              :         // unknown 2 bytes
     136            1 :         buf[0] = 'Q';
     137            1 :         buf[1] = '?';
     138            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 2) == libutf8::bom_t::BOM_NONE);
     139              : 
     140              :         // unknown 3 bytes
     141            1 :         buf[0] = 'B';
     142            1 :         buf[1] = 'O';
     143            1 :         buf[2] = 'M';
     144            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 3) == libutf8::bom_t::BOM_NONE);
     145              : 
     146              :         // unknown 4 bytes
     147            1 :         buf[0] = 'B';
     148            1 :         buf[1] = 'O';
     149            1 :         buf[2] = 'M';
     150            1 :         buf[3] = '?';
     151            1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 4) == libutf8::bom_t::BOM_NONE);
     152              :     }
     153            5 :     CATCH_END_SECTION()
     154              : 
     155            5 :     CATCH_START_SECTION("bom: Verify u32string that starts with a BOM (CPU Endianness)")
     156              :     {
     157            1 :         std::u32string u32str;
     158            1 :         u32str += libutf8::BOM_CHAR;
     159            1 :         u32str += unittest::rand_char(true);
     160            1 :         size_t const size(u32str.length() * sizeof(std::u32string::value_type));
     161           10 :         for(int idx(static_cast<int>(size)); idx >= 0; --idx)
     162              :         {
     163            9 :             if(static_cast<size_t>(idx) >= sizeof(std::u32string::value_type))
     164              :             {
     165              : #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     166              :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_BE);
     167              : #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     168            5 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_LE);
     169              : #else
     170              : #error "Unsupported endianness"
     171              : #endif
     172              :             }
     173              : #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     174            4 :             else if(static_cast<size_t>(idx) >= sizeof(std::u16string::value_type))
     175              :             {
     176            2 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF16_LE);
     177              :             }
     178              : #endif
     179              :             else
     180              :             {
     181              :                 // too short
     182              :                 //
     183            2 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_NONE);
     184              :             }
     185              :         }
     186            1 :     }
     187            5 :     CATCH_END_SECTION()
     188            5 : }
     189              : 
     190              : 
     191              : // vim: ts=4 sw=4 et
        

Generated by: LCOV version 2.0-1

Snap C++ | List of projects | List of versions