Line data Source code
1 : /* tests/bom.cpp
2 : * Copyright (C) 2013-2019 Made to Order Software Corporation
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License along
15 : * with this program; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : *
18 : * Authors
19 : * Alexis Wilke alexis@m2osw.com
20 : */
21 :
22 : // unit test
23 : //
24 : #include "main.h"
25 :
26 : // libutf8 lib
27 : //
28 : #include "libutf8/libutf8.h"
29 :
30 : // C++ lib
31 : //
32 : #include <cctype>
33 : #include <iostream>
34 :
35 :
36 7 : CATCH_TEST_CASE("bom", "[characters],[bom]")
37 : {
38 10 : CATCH_START_SECTION("Verify the BOM character")
39 1 : CATCH_REQUIRE(libutf8::BOM_CHAR == 0xFEFF);
40 : CATCH_END_SECTION()
41 :
42 10 : CATCH_START_SECTION("Verify with a string that's too small")
43 : {
44 1 : CATCH_REQUIRE(libutf8::start_with_bom(nullptr, rand()) == libutf8::bom_t::BOM_NONE);
45 1 : CATCH_REQUIRE(libutf8::start_with_bom("", 0) == libutf8::bom_t::BOM_NONE);
46 1 : CATCH_REQUIRE(libutf8::start_with_bom("a", 1) == libutf8::bom_t::BOM_NONE);
47 : }
48 : CATCH_END_SECTION()
49 :
50 10 : CATCH_START_SECTION("Verify the five BOMs as is")
51 : char buf[4];
52 1 : char32_t const bom(libutf8::BOM_CHAR);
53 :
54 : // UTF-8
55 1 : buf[0] = static_cast<char>((bom >> 12) | 0xE0);
56 1 : buf[1] = static_cast<char>(((bom >> 6) & 0x3F) | 0x80);
57 1 : buf[2] = static_cast<char>(((bom >> 0) & 0x3F) | 0x80);
58 1 : buf[3] = '?';
59 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF8);
60 :
61 : // UTF-16 Little Endian
62 1 : buf[0] = static_cast<char>(bom >> 0);
63 1 : buf[1] = static_cast<char>(bom >> 8);
64 1 : buf[2] = static_cast<char>(0x00);
65 1 : buf[3] = static_cast<char>(0x34);
66 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
67 :
68 : // UTF-16 Little Endian (with a zero in the next 2 bytes)
69 1 : buf[0] = static_cast<char>(bom >> 0);
70 1 : buf[1] = static_cast<char>(bom >> 8);
71 1 : buf[2] = static_cast<char>(0x12);
72 1 : buf[3] = static_cast<char>(0x00);
73 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
74 :
75 : // UTF-16 Little Endian (with a zero in the next 2 bytes)
76 1 : buf[0] = static_cast<char>(bom >> 0);
77 1 : buf[1] = static_cast<char>(bom >> 8);
78 1 : buf[2] = static_cast<char>(0x12);
79 1 : buf[3] = static_cast<char>(0x34);
80 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_LE);
81 :
82 : // UTF-16 Big Endian
83 1 : buf[0] = static_cast<char>(bom >> 8);
84 1 : buf[1] = static_cast<char>(bom >> 0);
85 1 : buf[2] = static_cast<char>(0xAB);
86 1 : buf[3] = static_cast<char>(0xCD);
87 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
88 :
89 : // UTF-16 Big Endian (with a zero in the next 2 bytes)
90 1 : buf[0] = static_cast<char>(bom >> 8);
91 1 : buf[1] = static_cast<char>(bom >> 0);
92 1 : buf[2] = static_cast<char>(0x00);
93 1 : buf[3] = static_cast<char>(0xCD);
94 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
95 :
96 : // UTF-16 Big Endian (with a zero in the next 2 bytes)
97 1 : buf[0] = static_cast<char>(bom >> 8);
98 1 : buf[1] = static_cast<char>(bom >> 0);
99 1 : buf[2] = static_cast<char>(0xAB);
100 1 : buf[3] = static_cast<char>(0x00);
101 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF16_BE);
102 :
103 : // UTF-32 Little Endian
104 1 : buf[0] = static_cast<char>(bom >> 0);
105 1 : buf[1] = static_cast<char>(bom >> 8);
106 1 : buf[2] = static_cast<char>(bom >> 16);
107 1 : buf[3] = static_cast<char>(bom >> 24);
108 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_LE);
109 :
110 : // UTF-32 Big Endian
111 1 : buf[0] = static_cast<char>(bom >> 24);
112 1 : buf[1] = static_cast<char>(bom >> 16);
113 1 : buf[2] = static_cast<char>(bom >> 8);
114 1 : buf[3] = static_cast<char>(bom >> 0);
115 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, sizeof(buf)) == libutf8::bom_t::BOM_UTF32_BE);
116 : CATCH_END_SECTION()
117 :
118 10 : CATCH_START_SECTION("Verify the five BOMs as is")
119 : char buf[4];
120 :
121 : // unknown 1 byte (well... 1 byte is never really known...)
122 1 : buf[0] = '?';
123 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, 1) == libutf8::bom_t::BOM_NONE);
124 :
125 : // unknown 2 bytes
126 1 : buf[0] = 'Q';
127 1 : buf[1] = '?';
128 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, 2) == libutf8::bom_t::BOM_NONE);
129 :
130 : // unknown 3 bytes
131 1 : buf[0] = 'B';
132 1 : buf[1] = 'O';
133 1 : buf[2] = 'M';
134 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, 3) == libutf8::bom_t::BOM_NONE);
135 :
136 : // unknown 4 bytes
137 1 : buf[0] = 'B';
138 1 : buf[1] = 'O';
139 1 : buf[2] = 'M';
140 1 : buf[3] = '?';
141 1 : CATCH_REQUIRE(libutf8::start_with_bom(buf, 4) == libutf8::bom_t::BOM_NONE);
142 : CATCH_END_SECTION()
143 :
144 10 : CATCH_START_SECTION("Verify u32string that starts with a BOM (CPU Endianness)")
145 2 : std::u32string u32str;
146 1 : u32str += libutf8::BOM_CHAR;
147 1 : u32str += unittest::rand_char(true);
148 1 : size_t const size(u32str.length() * sizeof(std::u32string::value_type));
149 10 : for(int idx(static_cast<int>(size)); idx >= 0; --idx)
150 : {
151 9 : if(static_cast<size_t>(idx) >= sizeof(std::u32string::value_type))
152 : {
153 : #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
154 : CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_BE);
155 : #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
156 5 : CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF32_LE);
157 : #else
158 : #error "Unsupported endianness"
159 : #endif
160 : }
161 : #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
162 4 : else if(static_cast<size_t>(idx) >= sizeof(std::u16string::value_type))
163 : {
164 2 : CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_UTF16_LE);
165 : }
166 : #endif
167 : else
168 : {
169 : // too short
170 : //
171 2 : CATCH_REQUIRE(libutf8::start_with_bom(reinterpret_cast<char const *>(u32str.c_str()), idx) == libutf8::bom_t::BOM_NONE);
172 : }
173 : }
174 : CATCH_END_SECTION()
175 11 : }
176 :
177 :
178 : // vim: ts=4 sw=4 et
|