LCOV - code coverage report
Current view: top level - tests - bom.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 77 77 100.0 %
Date: 2019-05-28 17:54:33 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*    tests/bom.cpp
       2             :  *    Copyright (C) 2013-2019  Made to Order Software Corporation
       3             :  *
       4             :  *    This program is free software; you can redistribute it and/or modify
       5             :  *    it under the terms of the GNU General Public License as published by
       6             :  *    the Free Software Foundation; either version 2 of the License, or
       7             :  *    (at your option) any later version.
       8             :  *
       9             :  *    This program is distributed in the hope that it will be useful,
      10             :  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  *    GNU General Public License for more details.
      13             :  *
      14             :  *    You should have received a copy of the GNU General Public License along
      15             :  *    with this program; if not, write to the Free Software Foundation, Inc.,
      16             :  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      17             :  *
      18             :  *    Authors
      19             :  *    Alexis Wilke   alexis@m2osw.com
      20             :  */
      21             : 
      22             : // unit test
      23             : //
      24             : #include "main.h"
      25             : 
      26             : // libutf8 lib
      27             : //
      28             : #include "libutf8/libutf8.h"
      29             : 
      30             : // Catch2 lib
      31             : //
      32             : #include <catch2/catch.hpp>
      33             : 
      34             : // C++ lib
      35             : //
      36             : #include <cctype>
      37             : #include <iostream>
      38             : 
      39             : 
      40           6 : CATCH_TEST_CASE("bom", "characters,bom")
      41             : {
      42           8 :     CATCH_START_SECTION("Verify the BOM character")
      43           1 :         CATCH_REQUIRE(libutf8::BOM_CHAR == 0xFEFF);
      44             :     CATCH_END_SECTION()
      45             : 
      46           8 :     CATCH_START_SECTION("Verify the five BOMs as is")
      47             :         char buf[4];
      48           1 :         char32_t const bom(libutf8::BOM_CHAR);
      49             : 
      50             :         // UTF-8
      51           1 :         buf[0] = static_cast<char>((bom >> 12) | 0xE0);
      52           1 :         buf[1] = static_cast<char>(((bom >>  6) & 0x3F) | 0x80);
      53           1 :         buf[2] = static_cast<char>(((bom >>  0) & 0x3F) | 0x80);
      54           1 :         buf[3] = '?';
      55           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF8);
      56             : 
      57             :         // UTF-16 Little Endian
      58           1 :         buf[0] = static_cast<char>(bom >> 0);
      59           1 :         buf[1] = static_cast<char>(bom >> 8);
      60           1 :         buf[2] = static_cast<char>(0x00);
      61           1 :         buf[3] = static_cast<char>(0x34);
      62           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      63             : 
      64             :         // UTF-16 Little Endian (with a zero in the next 2 bytes)
      65           1 :         buf[0] = static_cast<char>(bom >> 0);
      66           1 :         buf[1] = static_cast<char>(bom >> 8);
      67           1 :         buf[2] = static_cast<char>(0x12);
      68           1 :         buf[3] = static_cast<char>(0x00);
      69           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      70             : 
      71             :         // UTF-16 Little Endian (with a zero in the next 2 bytes)
      72           1 :         buf[0] = static_cast<char>(bom >> 0);
      73           1 :         buf[1] = static_cast<char>(bom >> 8);
      74           1 :         buf[2] = static_cast<char>(0x12);
      75           1 :         buf[3] = static_cast<char>(0x34);
      76           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
      77             : 
      78             :         // UTF-16 Big Endian
      79           1 :         buf[0] = static_cast<char>(bom >> 8);
      80           1 :         buf[1] = static_cast<char>(bom >> 0);
      81           1 :         buf[2] = static_cast<char>(0xAB);
      82           1 :         buf[3] = static_cast<char>(0xCD);
      83           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
      84             : 
      85             :         // UTF-16 Big Endian (with a zero in the next 2 bytes)
      86           1 :         buf[0] = static_cast<char>(bom >> 8);
      87           1 :         buf[1] = static_cast<char>(bom >> 0);
      88           1 :         buf[2] = static_cast<char>(0x00);
      89           1 :         buf[3] = static_cast<char>(0xCD);
      90           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
      91             : 
      92             :         // UTF-16 Big Endian (with a zero in the next 2 bytes)
      93           1 :         buf[0] = static_cast<char>(bom >> 8);
      94           1 :         buf[1] = static_cast<char>(bom >> 0);
      95           1 :         buf[2] = static_cast<char>(0xAB);
      96           1 :         buf[3] = static_cast<char>(0x00);
      97           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
      98             : 
      99             :         // UTF-32 Little Endian
     100           1 :         buf[0] = static_cast<char>(bom >>  0);
     101           1 :         buf[1] = static_cast<char>(bom >>  8);
     102           1 :         buf[2] = static_cast<char>(bom >> 16);
     103           1 :         buf[3] = static_cast<char>(bom >> 24);
     104           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_LE);
     105             : 
     106             :         // UTF-32 Big Endian
     107           1 :         buf[0] = static_cast<char>(bom >> 24);
     108           1 :         buf[1] = static_cast<char>(bom >> 16);
     109           1 :         buf[2] = static_cast<char>(bom >>  8);
     110           1 :         buf[3] = static_cast<char>(bom >>  0);
     111           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_BE);
     112             :     CATCH_END_SECTION()
     113             : 
     114           8 :     CATCH_START_SECTION("Verify the five BOMs as is")
     115             :         char buf[4];
     116             : 
     117             :         // unknown 1 byte (well... 1 byte is never really known...)
     118           1 :         buf[0] = '?';
     119           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 1) == libutf8::bom_t::BOM_NONE);
     120             : 
     121             :         // unknown 2 bytes
     122           1 :         buf[0] = 'Q';
     123           1 :         buf[1] = '?';
     124           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 2) == libutf8::bom_t::BOM_NONE);
     125             : 
     126             :         // unknown 3 bytes
     127           1 :         buf[0] = 'B';
     128           1 :         buf[1] = 'O';
     129           1 :         buf[2] = 'M';
     130           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 3) == libutf8::bom_t::BOM_NONE);
     131             : 
     132             :         // unknown 4 bytes
     133           1 :         buf[0] = 'B';
     134           1 :         buf[1] = 'O';
     135           1 :         buf[2] = 'M';
     136           1 :         buf[3] = '?';
     137           1 :         CATCH_REQUIRE(libutf8::start_with_bom(buf, 4) == libutf8::bom_t::BOM_NONE);
     138             :     CATCH_END_SECTION()
     139             : 
     140           8 :     CATCH_START_SECTION("Verify u32string that starts with a BOM (CPU Endianness)")
     141           2 :         std::u32string u32str;
     142           1 :         u32str += libutf8::BOM_CHAR;
     143           1 :         u32str += unittest::rand_char(true);
     144           1 :         size_t const size(u32str.length() * sizeof(std::u32string::value_type));
     145          10 :         for(int idx(static_cast<int>(size)); idx >= 0; --idx)
     146             :         {
     147           9 :             if(static_cast<size_t>(idx) >= sizeof(std::u32string::value_type))
     148             :             {
     149             : #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     150             :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_BE);
     151             : #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     152           5 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_LE);
     153             : #else
     154             : #error "Unsupported endianness"
     155             : #endif
     156             :             }
     157             : #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     158           4 :             else if(static_cast<size_t>(idx) >= sizeof(std::u16string::value_type))
     159             :             {
     160           2 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF16_LE);
     161             :             }
     162             : #endif
     163             :             else
     164             :             {
     165             :                 // too short
     166             :                 //
     167           2 :                 CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_NONE);
     168             :             }
     169             :         }
     170             :     CATCH_END_SECTION()
     171          10 : }
     172             : 
     173             : 
     174             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.12