LCOV - code coverage report
Current view: top level - tests - catch_character.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 325 325 100.0 %
Date: 2023-01-26 17:17:53 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2021-2022  Made to Order Software Corporation
       2             : //
       3             : // https://snapwebsites.org/project/libutf8
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software; you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation; either version 2 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License along
      17             : // with this program; if not, write to the Free Software Foundation, Inc.,
      18             : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19             : 
      20             : // libutf8
      21             : //
      22             : #include    <libutf8/base.h>
      23             : #include    <libutf8/exception.h>
      24             : 
      25             : 
      26             : // unit test
      27             : //
      28             : #include    "catch_main.h"
      29             : 
      30             : 
      31             : // C++
      32             : //
      33             : #include    <cctype>
      34             : #include    <iostream>
      35             : 
      36             : 
      37             : // last include
      38             : //
      39             : #include    <snapdev/poison.h>
      40             : 
      41             : 
      42             : 
      43           8 : CATCH_TEST_CASE("character_conversions", "[characters]")
      44             : {
      45          12 :     CATCH_START_SECTION("character_conversions: Verify minimum buffer length for MBS conversions")
      46             :     {
      47           1 :         CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
      48             :     }
      49             :     CATCH_END_SECTION()
      50             : 
      51          12 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
      52             :     {
      53         129 :         for(char32_t wc(0); wc < 0x000080; ++wc)
      54             :         {
      55         128 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      56         128 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
      57         128 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
      58             : 
      59         128 :             CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
      60         128 :             CATCH_REQUIRE(buf[1] == '\0');
      61             : 
      62         128 :             char32_t back(rand());
      63         128 :             char const * s(buf);
      64         128 :             size_t len(1);
      65         128 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
      66         128 :             CATCH_REQUIRE(back == wc);
      67             :         }
      68             :     }
      69             :     CATCH_END_SECTION()
      70             : 
      71          12 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
      72             :     {
      73        1921 :         for(char32_t wc(0x000080); wc < 0x000800; ++wc)
      74             :         {
      75        1920 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      76        1920 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
      77        1920 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
      78        1920 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
      79             : 
      80        3840 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
      81        1920 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
      82        1920 :             CATCH_REQUIRE(found == wc);
      83        1920 :             CATCH_REQUIRE(buf[2] == '\0');
      84             : 
      85        1920 :             char32_t back(rand());
      86        1920 :             char const * s(buf);
      87        1920 :             size_t len(2);
      88        1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
      89        1920 :             CATCH_REQUIRE(back == wc);
      90             :         }
      91             :     }
      92             :     CATCH_END_SECTION()
      93             : 
      94          12 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
      95             :     {
      96       61442 :         for(char32_t wc(0x000800); wc < 0x010000; ++wc)
      97             :         {
      98       61442 :             if(wc >= 0xD800 && wc <= 0xDFFF)
      99             :             {
     100             :                 // skip UTF-16 surrogates
     101             :                 //
     102           1 :                 wc = 0xDFFF;
     103           1 :                 continue;
     104             :             }
     105             : 
     106       61440 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     107       61440 :             if(rand() % 10 == 0)
     108             :             {
     109        6051 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
     110        6051 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
     111        6051 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
     112             :             }
     113       61440 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
     114             : 
     115      122880 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
     116       61440 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) <<  6)
     117       61440 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  0));
     118       61440 :             CATCH_REQUIRE(found == wc);
     119       61440 :             CATCH_REQUIRE(buf[3] == '\0');
     120             : 
     121       61440 :             char32_t back(rand());
     122       61440 :             char const * s(buf);
     123       61440 :             size_t len(3);
     124       61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
     125       61440 :             CATCH_REQUIRE(back == wc);
     126             :         }
     127             :     }
     128             :     CATCH_END_SECTION()
     129             : 
     130          12 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
     131             :     {
     132     1048577 :         for(char32_t wc(0x010000); wc < 0x110000; ++wc)
     133             :         {
     134     1048576 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     135     1048576 :             if(rand() % 100 == 0)
     136             :             {
     137       10517 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
     138       10517 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
     139       10517 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
     140       10517 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
     141             :             }
     142     1048576 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
     143             : 
     144     2097152 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
     145     1048576 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
     146     1048576 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  6)
     147     1048576 :                                | ((static_cast<char32_t>(buf[3]) & 0x3F) <<  0));
     148     1048576 :             CATCH_REQUIRE(found == wc);
     149     1048576 :             CATCH_REQUIRE(buf[4] == '\0');
     150             : 
     151     1048576 :             char32_t back(rand());
     152     1048576 :             char const * s(buf);
     153     1048576 :             size_t len(4);
     154     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
     155     1048576 :             CATCH_REQUIRE(back == wc);
     156             :         }
     157             :     }
     158             :     CATCH_END_SECTION()
     159             : 
     160          12 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with an empty input string")
     161             :     {
     162          11 :         for(char32_t repeat(0); repeat < 10; ++repeat)
     163             :         {
     164          10 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     165          10 :             char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
     166          60 :             for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
     167             :             {
     168          50 :                 buf[idx] = rand();
     169          50 :                 copy[idx] = buf[idx];
     170             :             }
     171          10 :             char const * s(buf);
     172          10 :             char32_t null = rand();
     173          10 :             size_t len(0);
     174          10 :             CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
     175          10 :             CATCH_REQUIRE(null == '\0');
     176          60 :             for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
     177             :             {
     178          50 :                 CATCH_REQUIRE(copy[idx] == buf[idx]);
     179             :             }
     180             :         }
     181             :     }
     182             :     CATCH_END_SECTION()
     183           6 : }
     184             : 
     185             : 
     186           4 : CATCH_TEST_CASE("invalid_utf32_to_utf8", "[characters],[invalid]")
     187             : {
     188           4 :     CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that surrogates do not work in UTF-8")
     189             :     {
     190        2049 :         for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
     191             :         {
     192        2048 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     193             :             {
     194             :                 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     195             :             };
     196        2048 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     197        2048 :             CATCH_REQUIRE(buf[0] == '\0');
     198             :         }
     199             :     }
     200             :     CATCH_END_SECTION()
     201             : 
     202           4 :     CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that too large a number is not supported")
     203             :     {
     204        1001 :         for(int repeat(0); repeat < 1000; ++repeat)
     205             :         {
     206        1000 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     207             :             {
     208             :                 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     209             :             };
     210        1000 :             char32_t wc(0);
     211           1 :             do
     212             :             {
     213        1001 :                 wc = (rand() << 16) + (rand() & 0x0000FFFF);
     214             :             }
     215        1001 :             while(wc < 0x110000);
     216        1000 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     217        1000 :             CATCH_REQUIRE(buf[0] == '\0');
     218             :         }
     219             :     }
     220             :     CATCH_END_SECTION()
     221           2 : }
     222             : 
     223             : 
     224           7 : CATCH_TEST_CASE("invalid_utf8_to_utf32", "[characters],[invalid]")
     225             : {
     226          10 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that surrogates do not work in UTF-8")
     227             :     {
     228        2049 :         for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
     229             :         {
     230             : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
     231        2048 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
     232        2048 :             buf[0] = static_cast<char>((wc >> 12) | 0xE0);
     233        2048 :             buf[1] = ((wc >> 6) & 0x3F) | 0x80;
     234        2048 :             buf[2] = (wc & 0x3F) | 0x80;
     235        2048 :             buf[3] = '\0';
     236        2048 :             char const * s = buf;
     237        2048 :             size_t len(3);
     238        2048 :             char32_t cwc(rand());
     239        2048 :             CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
     240        2048 :             CATCH_REQUIRE(cwc == libutf8::NOT_A_CHARACTER);
     241        2048 :             char const c1(static_cast<char>((wc >> 12) | 0xE0));
     242        2048 :             CATCH_REQUIRE(buf[0] == c1);
     243        2048 :             char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
     244        2048 :             CATCH_REQUIRE(buf[1] == c2);
     245        2048 :             char const c3((wc & 0x3F) | 0x80);
     246        2048 :             CATCH_REQUIRE(buf[2] == c3);
     247        2048 :             CATCH_REQUIRE(buf[3] == '\0');
     248        2048 :             CATCH_REQUIRE(s == buf + 3);
     249        2048 :             CATCH_REQUIRE(len == 0);
     250             :         }
     251             :     }
     252             :     CATCH_END_SECTION()
     253             : 
     254             :     //CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that too large a number is not supported")
     255             :     //{
     256             :     //    for(int idx(0); idx < 1000; ++idx)
     257             :     //    {
     258             :     //        char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     259             :     //        {
     260             :     //            'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     261             :     //        };
     262             :     //        char32_t wc(0);
     263             :     //        do
     264             :     //        {
     265             :     //            wc = (rand() << 16) + (rand() & 0x0000FFFF);
     266             :     //        }
     267             :     //        while(wc < 0x110000);
     268             :     //        CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     269             :     //        CATCH_REQUIRE(buf[0] == libutf8::NOT_A_CHARACTER);
     270             :     //    }
     271             :     //}
     272             :     //CATCH_END_SECTION()
     273             : 
     274          10 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
     275             :     {
     276        1921 :         for(char32_t wc(0x000080); wc < 0x000800; ++wc)
     277             :         {
     278        1920 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     279        1920 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
     280             : 
     281        3840 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
     282        1920 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
     283        1920 :             CATCH_REQUIRE(found == wc);
     284        1920 :             CATCH_REQUIRE(buf[2] == '\0');
     285             : 
     286             :             // too short
     287             :             //
     288        1920 :             char32_t back(rand());
     289        1920 :             char const * s(buf);
     290        1920 :             size_t len(1);
     291        1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     292             : 
     293             :             // invalid middle byte
     294             :             //
     295        1920 :             char const second_byte(buf[1]);
     296        1920 :             back = rand();
     297        1920 :             s = buf;
     298        1920 :             int c(rand() % (255 - 0x40) + 1);
     299        1920 :             if(c >= 0x80)
     300             :             {
     301         662 :                 c += 0x40;
     302             :             }
     303        1920 :             buf[1] = static_cast<char>(c);
     304        1920 :             len = 2;
     305        1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     306        1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     307        1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     308        1920 :             CATCH_REQUIRE(len == 1);
     309        1920 :             buf[1] = second_byte;
     310             : 
     311             :             // invalid introducer (0x80 to 0xBF)
     312             :             //
     313        1920 :             back = rand();
     314        1920 :             s = buf;
     315        1920 :             buf[0] = rand() % 64 + 0x80;
     316        1920 :             len = 2;
     317        1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     318        1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     319        1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     320        1920 :             CATCH_REQUIRE(len == 0);
     321             : 
     322             :             // invalid introducer (0xF8 to 0xFF)
     323             :             //
     324        1920 :             back = rand();
     325        1920 :             s = buf;
     326        1920 :             buf[0] = rand() % 8 + 0xF8;
     327        1920 :             len = 2;
     328        1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     329        1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     330        1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     331        1920 :             CATCH_REQUIRE(len == 0);
     332             :         }
     333             :     }
     334             :     CATCH_END_SECTION()
     335             : 
     336          10 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
     337             :     {
     338       61442 :         for(char32_t wc(0x000800); wc < 0x010000; ++wc)
     339             :         {
     340       61442 :             if(wc >= 0xD800 && wc <= 0xDFFF)
     341             :             {
     342             :                 // skip UTF-16 surrogates -- this is not the test for those
     343             :                 //
     344           1 :                 wc = 0xDFFF;
     345           1 :                 continue;
     346             :             }
     347             : 
     348       61440 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     349       61440 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
     350             : 
     351      122880 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
     352       61440 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) <<  6)
     353       61440 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  0));
     354       61440 :             CATCH_REQUIRE(found == wc);
     355       61440 :             CATCH_REQUIRE(buf[3] == '\0');
     356             : 
     357             :             // too short
     358             :             //
     359       61440 :             char32_t back(rand());
     360       61440 :             char const * s(buf);
     361       61440 :             size_t len(2);
     362       61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     363             : 
     364             :             // invalid middle byte
     365             :             //
     366       61440 :             char const second_byte(buf[1]);
     367       61440 :             back = rand();
     368       61440 :             s = buf;
     369       61440 :             int c(rand() % (255 - 0x40) + 1);
     370       61440 :             if(c >= 0x80)
     371             :             {
     372       20703 :                 c += 0x40;
     373             :             }
     374       61440 :             buf[1] = static_cast<char>(c);
     375       61440 :             len = 3;
     376       61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     377       61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     378       61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     379       61440 :             CATCH_REQUIRE(len == 2);
     380       61440 :             buf[1] = second_byte;
     381             : 
     382       61440 :             char const third_byte(buf[2]);
     383       61440 :             back = rand();
     384       61440 :             s = buf;
     385       61440 :             c = rand() % (255 - 0x40) + 1;
     386       61440 :             if(c >= 0x80)
     387             :             {
     388       20710 :                 c += 0x40;
     389             :             }
     390       61440 :             buf[2] = static_cast<char>(c);
     391       61440 :             len = 3;
     392       61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     393       61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     394       61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     395       61440 :             CATCH_REQUIRE(len == 1);
     396       61440 :             buf[2] = third_byte;
     397             : 
     398             :             // invalid introducer (0x80 to 0xBF)
     399             :             //
     400       61440 :             back = rand();
     401       61440 :             s = buf;
     402       61440 :             buf[0] = rand() % 64 + 0x80;
     403       61440 :             len = 3;
     404       61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     405       61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     406       61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     407       61440 :             CATCH_REQUIRE(len == 0);
     408             : 
     409             :             // invalid introducer (0xF8 to 0xFF)
     410             :             //
     411       61440 :             back = rand();
     412       61440 :             s = buf;
     413       61440 :             buf[0] = rand() % 8 + 0xF8;
     414       61440 :             len = 3;
     415       61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     416       61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     417       61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     418       61440 :             CATCH_REQUIRE(len == 0);
     419             :         }
     420             :     }
     421             :     CATCH_END_SECTION()
     422             : 
     423          10 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
     424             :     {
     425     1048577 :         for(char32_t wc(0x010000); wc < 0x110000; ++wc)
     426             :         {
     427     1048576 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     428     1048576 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
     429             : 
     430     2097152 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
     431     1048576 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
     432     1048576 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  6)
     433     1048576 :                                | ((static_cast<char32_t>(buf[3]) & 0x3F) <<  0));
     434     1048576 :             CATCH_REQUIRE(found == wc);
     435     1048576 :             CATCH_REQUIRE(buf[4] == '\0');
     436             : 
     437     1048576 :             char32_t back(rand());
     438     1048576 :             char const * s(buf);
     439             : 
     440             :             // too short
     441             :             //
     442     1048576 :             size_t len(3);
     443     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     444             : 
     445             :             // invalid middle byte
     446             :             //
     447     1048576 :             char const second_byte(buf[1]);
     448     1048576 :             back = rand();
     449     1048576 :             s = buf;
     450     1048576 :             int c(rand() % (255 - 0x40) + 1);
     451     1048576 :             if(c >= 0x80)
     452             :             {
     453      351086 :                 c += 0x40;
     454             :             }
     455     1048576 :             buf[1] = static_cast<char>(c);
     456     1048576 :             len = 4;
     457     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     458     1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     459     1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     460     1048576 :             CATCH_REQUIRE(len == 3);
     461     1048576 :             buf[1] = second_byte;
     462             : 
     463     1048576 :             char const third_byte(buf[2]);
     464     1048576 :             back = rand();
     465     1048576 :             s = buf;
     466     1048576 :             c = rand() % (255 - 0x40) + 1;
     467     1048576 :             if(c >= 0x80)
     468             :             {
     469      352058 :                 c += 0x40;
     470             :             }
     471     1048576 :             buf[2] = static_cast<char>(c);
     472     1048576 :             len = 4;
     473     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     474     1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     475     1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     476     1048576 :             CATCH_REQUIRE(len == 2);
     477     1048576 :             buf[2] = third_byte;
     478             : 
     479     1048576 :             char const forth_byte(buf[3]);
     480     1048576 :             back = rand();
     481     1048576 :             s = buf;
     482     1048576 :             c = rand() % (255 - 0x40) + 1;
     483     1048576 :             if(c >= 0x80)
     484             :             {
     485      351566 :                 c += 0x40;
     486             :             }
     487     1048576 :             buf[3] = static_cast<char>(c);
     488     1048576 :             len = 4;
     489     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     490     1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     491     1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     492     1048576 :             CATCH_REQUIRE(len == 1);
     493     1048576 :             buf[3] = forth_byte;
     494             : 
     495             :             // invalid introducer (0x80 to 0xBF)
     496             :             //
     497     1048576 :             back = rand();
     498     1048576 :             s = buf;
     499     1048576 :             buf[0] = rand() % 64 + 0x80;
     500     1048576 :             len = 3;
     501     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     502     1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     503     1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     504     1048576 :             CATCH_REQUIRE(len == 0);
     505             : 
     506             :             // invalid introducer (0x80 to 0xBF)
     507             :             //
     508     1048576 :             back = rand();
     509     1048576 :             buf[0] = rand() % 64 + 0x80;
     510     1048576 :             s = buf;
     511     1048576 :             len = 4;
     512     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     513     1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     514     1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
     515     1048576 :             CATCH_REQUIRE(len == 0);
     516             : 
     517             :             // invalid introducer (0xF8 to 0xFF)
     518             :             //
     519     1048576 :             back = rand();
     520     1048576 :             s = buf;
     521     1048576 :             buf[0] = rand() % 8 + 0xF8;
     522     1048576 :             len = 4;
     523     1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     524     1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     525     1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
     526     1048576 :             CATCH_REQUIRE(len == 0);
     527             :         }
     528             :     }
     529             :     CATCH_END_SECTION()
     530             : 
     531          10 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test three random characters, destroy the second one and make sure it gets skipped properly")
     532             :     {
     533        1001 :         for(int repeat(0); repeat < 1000; ++repeat)
     534             :         {
     535             :             char32_t wc[3]
     536             :             {
     537        1000 :                 unittest::rand_char(true),
     538        1000 :                 unittest::rand_char(true),
     539        1000 :                 unittest::rand_char(true),
     540        3000 :             };
     541        1000 :             size_t sz[3] = {};
     542             : 
     543        1000 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
     544        1000 :             char * s(buf);
     545        1000 :             sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
     546        1000 :             s += sz[0];
     547        1000 :             sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
     548        1000 :             s += sz[1];
     549        1000 :             sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
     550             : 
     551        1000 :             char32_t back(rand());
     552        1000 :             s = buf;
     553        1000 :             buf[sz[0]] = rand() % 64 + 0x80;
     554        1000 :             size_t len(sizeof(buf));
     555        1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
     556        1000 :             CATCH_REQUIRE(back == wc[0]);
     557             : 
     558        1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     559        1000 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     560             : 
     561        1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
     562        1000 :             CATCH_REQUIRE(back == wc[2]);
     563             :         }
     564             :     }
     565             :     CATCH_END_SECTION()
     566          11 : }
     567             : 
     568             : 
     569             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13