LCOV - code coverage report
Current view: top level - tests - catch_character.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 100.0 % 337 337
Test Date: 2025-06-22 07:49:47 Functions: 100.0 % 3 3
Legend: Lines: hit not hit

            Line data    Source code
       1              : // Copyright (c) 2021-2023  Made to Order Software Corp.  All Rights Reserved
       2              : //
       3              : // https://snapwebsites.org/project/libutf8
       4              : // contact@m2osw.com
       5              : //
       6              : // This program is free software; you can redistribute it and/or modify
       7              : // it under the terms of the GNU General Public License as published by
       8              : // the Free Software Foundation; either version 2 of the License, or
       9              : // (at your option) any later version.
      10              : //
      11              : // This program is distributed in the hope that it will be useful,
      12              : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : // GNU General Public License for more details.
      15              : //
      16              : // You should have received a copy of the GNU General Public License along
      17              : // with this program; if not, write to the Free Software Foundation, Inc.,
      18              : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19              : 
      20              : // libutf8
      21              : //
      22              : #include    <libutf8/base.h>
      23              : #include    <libutf8/exception.h>
      24              : 
      25              : 
      26              : // unit test
      27              : //
      28              : #include    "catch_main.h"
      29              : 
      30              : 
      31              : // C++
      32              : //
      33              : #include    <cctype>
      34              : #include    <iostream>
      35              : 
      36              : 
      37              : // last include
      38              : //
      39              : #include    <snapdev/poison.h>
      40              : 
      41              : 
      42              : 
      43            6 : CATCH_TEST_CASE("character_conversions", "[characters]")
      44              : {
      45            6 :     CATCH_START_SECTION("character_conversions: Verify minimum buffer length for MBS conversions")
      46              :     {
      47            1 :         CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
      48              :     }
      49            6 :     CATCH_END_SECTION()
      50              : 
      51            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
      52              :     {
      53          129 :         for(char32_t wc(0); wc < 0x000080; ++wc)
      54              :         {
      55          128 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      56          128 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
      57          128 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
      58              : 
      59          128 :             CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
      60          128 :             CATCH_REQUIRE(buf[1] == '\0');
      61              : 
      62          128 :             char32_t back(rand());
      63          128 :             char const * s(buf);
      64          128 :             size_t len(1);
      65          128 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
      66          128 :             CATCH_REQUIRE(back == wc);
      67              :         }
      68              :     }
      69            6 :     CATCH_END_SECTION()
      70              : 
      71            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
      72              :     {
      73         1921 :         for(char32_t wc(0x000080); wc < 0x000800; ++wc)
      74              :         {
      75         1920 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      76         1920 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
      77         1920 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
      78         1920 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
      79              : 
      80         1920 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
      81         1920 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
      82         1920 :             CATCH_REQUIRE(found == wc);
      83         1920 :             CATCH_REQUIRE(buf[2] == '\0');
      84              : 
      85         1920 :             char32_t back(rand());
      86         1920 :             char const * s(buf);
      87         1920 :             size_t len(2);
      88         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
      89         1920 :             CATCH_REQUIRE(back == wc);
      90              :         }
      91              :     }
      92            6 :     CATCH_END_SECTION()
      93              : 
      94            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
      95              :     {
      96        61442 :         for(char32_t wc(0x000800); wc < 0x010000; ++wc)
      97              :         {
      98        61441 :             if(wc >= 0xD800 && wc <= 0xDFFF)
      99              :             {
     100              :                 // skip UTF-16 surrogates
     101              :                 //
     102            1 :                 wc = 0xDFFF;
     103            1 :                 continue;
     104              :             }
     105              : 
     106        61440 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     107        61440 :             if(rand() % 10 == 0)
     108              :             {
     109         6085 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
     110         6085 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
     111         6085 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
     112              :             }
     113        61440 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
     114              : 
     115        61440 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
     116        61440 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) <<  6)
     117        61440 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  0));
     118        61440 :             CATCH_REQUIRE(found == wc);
     119        61440 :             CATCH_REQUIRE(buf[3] == '\0');
     120              : 
     121        61440 :             char32_t back(rand());
     122        61440 :             char const * s(buf);
     123        61440 :             size_t len(3);
     124        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
     125        61440 :             CATCH_REQUIRE(back == wc);
     126              :         }
     127              :     }
     128            6 :     CATCH_END_SECTION()
     129              : 
     130            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
     131              :     {
     132      1048577 :         for(char32_t wc(0x010000); wc < 0x110000; ++wc)
     133              :         {
     134      1048576 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     135      1048576 :             if(rand() % 100 == 0)
     136              :             {
     137        10489 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
     138        10489 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
     139        10489 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
     140        10489 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
     141              :             }
     142      1048576 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
     143              : 
     144      1048576 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
     145      1048576 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
     146      1048576 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  6)
     147      1048576 :                                | ((static_cast<char32_t>(buf[3]) & 0x3F) <<  0));
     148      1048576 :             CATCH_REQUIRE(found == wc);
     149      1048576 :             CATCH_REQUIRE(buf[4] == '\0');
     150              : 
     151      1048576 :             char32_t back(rand());
     152      1048576 :             char const * s(buf);
     153      1048576 :             size_t len(4);
     154      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
     155      1048576 :             CATCH_REQUIRE(back == wc);
     156              :         }
     157              :     }
     158            6 :     CATCH_END_SECTION()
     159              : 
     160            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with an empty input string")
     161              :     {
     162           11 :         for(char32_t repeat(0); repeat < 10; ++repeat)
     163              :         {
     164           10 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     165           10 :             char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
     166           60 :             for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
     167              :             {
     168           50 :                 buf[idx] = rand();
     169           50 :                 copy[idx] = buf[idx];
     170              :             }
     171           10 :             char const * s(buf);
     172           10 :             char32_t null = rand();
     173           10 :             size_t len(0);
     174           10 :             CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
     175           10 :             CATCH_REQUIRE(null == '\0');
     176           60 :             for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
     177              :             {
     178           50 :                 CATCH_REQUIRE(copy[idx] == buf[idx]);
     179              :             }
     180              :         }
     181              :     }
     182            6 :     CATCH_END_SECTION()
     183            6 : }
     184              : 
     185              : 
     186            2 : CATCH_TEST_CASE("invalid_utf32_to_utf8", "[characters],[invalid]")
     187              : {
     188            2 :     CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that surrogates do not work in UTF-8")
     189              :     {
     190         2049 :         for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
     191              :         {
     192         2048 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     193              :             {
     194              :                 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     195              :             };
     196         2048 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     197         2048 :             CATCH_REQUIRE(buf[0] == '\0');
     198              :         }
     199              :     }
     200            2 :     CATCH_END_SECTION()
     201              : 
     202            2 :     CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that too large a number is not supported")
     203              :     {
     204         1001 :         for(int repeat(0); repeat < 1000; ++repeat)
     205              :         {
     206         1000 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     207              :             {
     208              :                 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     209              :             };
     210         1000 :             char32_t wc(0);
     211              :             do
     212              :             {
     213         1001 :                 wc = (rand() << 16) + (rand() & 0x0000FFFF);
     214              :             }
     215         1001 :             while(wc < 0x110000);
     216         1000 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     217         1000 :             CATCH_REQUIRE(buf[0] == '\0');
     218              :         }
     219              :     }
     220            2 :     CATCH_END_SECTION()
     221            2 : }
     222              : 
     223              : 
     224            5 : CATCH_TEST_CASE("invalid_utf8_to_utf32", "[characters],[invalid]")
     225              : {
     226            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that surrogates do not work in UTF-8")
     227              :     {
     228         2049 :         for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
     229              :         {
     230              : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
     231         2048 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
     232         2048 :             buf[0] = static_cast<char>((wc >> 12) | 0xE0);
     233         2048 :             buf[1] = ((wc >> 6) & 0x3F) | 0x80;
     234         2048 :             buf[2] = (wc & 0x3F) | 0x80;
     235         2048 :             buf[3] = '\0';
     236         2048 :             char const * s = buf;
     237         2048 :             size_t len(3);
     238         2048 :             char32_t cwc(rand());
     239         2048 :             CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
     240         2048 :             CATCH_REQUIRE(cwc == libutf8::NOT_A_CHARACTER);
     241         2048 :             char const c1(static_cast<char>((wc >> 12) | 0xE0));
     242         2048 :             CATCH_REQUIRE(buf[0] == c1);
     243         2048 :             char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
     244         2048 :             CATCH_REQUIRE(buf[1] == c2);
     245         2048 :             char const c3((wc & 0x3F) | 0x80);
     246         2048 :             CATCH_REQUIRE(buf[2] == c3);
     247         2048 :             CATCH_REQUIRE(buf[3] == '\0');
     248         2048 :             CATCH_REQUIRE(s == buf + 3);
     249         2048 :             CATCH_REQUIRE(len == 0);
     250              :         }
     251              :     }
     252            5 :     CATCH_END_SECTION()
     253              : 
     254              :     //CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that too large a number is not supported")
     255              :     //{
     256              :     //    for(int idx(0); idx < 1000; ++idx)
     257              :     //    {
     258              :     //        char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     259              :     //        {
     260              :     //            'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     261              :     //        };
     262              :     //        char32_t wc(0);
     263              :     //        do
     264              :     //        {
     265              :     //            wc = (rand() << 16) + (rand() & 0x0000FFFF);
     266              :     //        }
     267              :     //        while(wc < 0x110000);
     268              :     //        CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     269              :     //        CATCH_REQUIRE(buf[0] == libutf8::NOT_A_CHARACTER);
     270              :     //    }
     271              :     //}
     272              :     //CATCH_END_SECTION()
     273              : 
     274            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
     275              :     {
     276         1921 :         for(char32_t wc(0x000080); wc < 0x000800; ++wc)
     277              :         {
     278         1920 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     279         1920 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
     280              : 
     281         1920 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
     282         1920 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
     283         1920 :             CATCH_REQUIRE(found == wc);
     284         1920 :             CATCH_REQUIRE(buf[2] == '\0');
     285              : 
     286              :             // too short
     287              :             //
     288         1920 :             char32_t back(rand());
     289         1920 :             char const * s(buf);
     290         1920 :             size_t len(1);
     291         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     292              : 
     293              :             // invalid middle byte
     294              :             //
     295         1920 :             char const second_byte(buf[1]);
     296         1920 :             back = rand();
     297         1920 :             s = buf;
     298         1920 :             int c(rand() % (255 - 0x40) + 1);
     299         1920 :             if(c >= 0x80)
     300              :             {
     301          635 :                 c += 0x40;
     302              :             }
     303         1920 :             buf[1] = static_cast<char>(c);
     304         1920 :             len = 2;
     305         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     306         1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     307         1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     308         1920 :             CATCH_REQUIRE(len == 1);
     309         1920 :             buf[1] = second_byte;
     310              : 
     311              :             // invalid introducer (0x80 to 0xBF)
     312              :             //
     313         1920 :             back = rand();
     314         1920 :             s = buf;
     315         1920 :             buf[0] = rand() % 64 + 0x80;
     316         1920 :             len = 2;
     317         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     318         1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     319         1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     320         1920 :             CATCH_REQUIRE(len == 0);
     321              : 
     322              :             // invalid introducer (0xF8 to 0xFF)
     323              :             //
     324         1920 :             back = rand();
     325         1920 :             s = buf;
     326         1920 :             buf[0] = rand() % 8 + 0xF8;
     327         1920 :             len = 2;
     328         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     329         1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     330         1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     331         1920 :             CATCH_REQUIRE(len == 0);
     332              :         }
     333              :     }
     334            5 :     CATCH_END_SECTION()
     335              : 
     336            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
     337              :     {
     338        61442 :         for(char32_t wc(0x000800); wc < 0x010000; ++wc)
     339              :         {
     340        61441 :             if(wc >= 0xD800 && wc <= 0xDFFF)
     341              :             {
     342              :                 // skip UTF-16 surrogates -- this is not the test for those
     343              :                 //
     344            1 :                 wc = 0xDFFF;
     345            1 :                 continue;
     346              :             }
     347              : 
     348        61440 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     349        61440 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
     350              : 
     351        61440 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
     352        61440 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) <<  6)
     353        61440 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  0));
     354        61440 :             CATCH_REQUIRE(found == wc);
     355        61440 :             CATCH_REQUIRE(buf[3] == '\0');
     356              : 
     357              :             // too short
     358              :             //
     359        61440 :             char32_t back(rand());
     360        61440 :             char const * s(buf);
     361        61440 :             size_t len(2);
     362        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     363              : 
     364              :             // invalid middle byte
     365              :             //
     366        61440 :             char const second_byte(buf[1]);
     367        61440 :             back = rand();
     368        61440 :             s = buf;
     369        61440 :             int c(rand() % (255 - 0x40) + 1);
     370        61440 :             if(c >= 0x80)
     371              :             {
     372        20649 :                 c += 0x40;
     373              :             }
     374        61440 :             buf[1] = static_cast<char>(c);
     375        61440 :             len = 3;
     376        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     377        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     378        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     379        61440 :             CATCH_REQUIRE(len == 2);
     380        61440 :             buf[1] = second_byte;
     381              : 
     382        61440 :             char const third_byte(buf[2]);
     383        61440 :             back = rand();
     384        61440 :             s = buf;
     385        61440 :             c = rand() % (255 - 0x40) + 1;
     386        61440 :             if(c >= 0x80)
     387              :             {
     388        20676 :                 c += 0x40;
     389              :             }
     390        61440 :             buf[2] = static_cast<char>(c);
     391        61440 :             len = 3;
     392        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     393        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     394        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     395        61440 :             CATCH_REQUIRE(len == 1);
     396        61440 :             buf[2] = third_byte;
     397              : 
     398              :             // invalid introducer (0x80 to 0xBF)
     399              :             //
     400        61440 :             back = rand();
     401        61440 :             s = buf;
     402        61440 :             buf[0] = rand() % 64 + 0x80;
     403        61440 :             len = 3;
     404        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     405        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     406        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     407        61440 :             CATCH_REQUIRE(len == 0);
     408              : 
     409              :             // invalid introducer (0xF8 to 0xFF)
     410              :             //
     411        61440 :             back = rand();
     412        61440 :             s = buf;
     413        61440 :             buf[0] = rand() % 8 + 0xF8;
     414        61440 :             len = 3;
     415        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     416        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     417        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     418        61440 :             CATCH_REQUIRE(len == 0);
     419              :         }
     420              :     }
     421            5 :     CATCH_END_SECTION()
     422              : 
     423            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
     424              :     {
     425      1048577 :         for(char32_t wc(0x010000); wc < 0x110000; ++wc)
     426              :         {
     427      1048576 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     428      1048576 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
     429              : 
     430      1048576 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
     431      1048576 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
     432      1048576 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  6)
     433      1048576 :                                | ((static_cast<char32_t>(buf[3]) & 0x3F) <<  0));
     434      1048576 :             CATCH_REQUIRE(found == wc);
     435      1048576 :             CATCH_REQUIRE(buf[4] == '\0');
     436              : 
     437      1048576 :             char32_t back(rand());
     438      1048576 :             char const * s(buf);
     439              : 
     440              :             // too short
     441              :             //
     442      1048576 :             size_t len(3);
     443      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     444              : 
     445              :             // invalid middle byte
     446              :             //
     447      1048576 :             char const second_byte(buf[1]);
     448      1048576 :             back = rand();
     449      1048576 :             s = buf;
     450      1048576 :             int c(rand() % (255 - 0x40) + 1);
     451      1048576 :             if(c >= 0x80)
     452              :             {
     453       352013 :                 c += 0x40;
     454              :             }
     455      1048576 :             buf[1] = static_cast<char>(c);
     456      1048576 :             len = 4;
     457      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     458      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     459      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     460      1048576 :             CATCH_REQUIRE(len == 3);
     461      1048576 :             buf[1] = second_byte;
     462              : 
     463      1048576 :             char const third_byte(buf[2]);
     464      1048576 :             back = rand();
     465      1048576 :             s = buf;
     466      1048576 :             c = rand() % (255 - 0x40) + 1;
     467      1048576 :             if(c >= 0x80)
     468              :             {
     469       351765 :                 c += 0x40;
     470              :             }
     471      1048576 :             buf[2] = static_cast<char>(c);
     472      1048576 :             len = 4;
     473      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     474      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     475      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     476      1048576 :             CATCH_REQUIRE(len == 2);
     477      1048576 :             buf[2] = third_byte;
     478              : 
     479      1048576 :             char const forth_byte(buf[3]);
     480      1048576 :             back = rand();
     481      1048576 :             s = buf;
     482      1048576 :             c = rand() % (255 - 0x40) + 1;
     483      1048576 :             if(c >= 0x80)
     484              :             {
     485       351963 :                 c += 0x40;
     486              :             }
     487      1048576 :             buf[3] = static_cast<char>(c);
     488      1048576 :             len = 4;
     489      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     490      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     491      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     492      1048576 :             CATCH_REQUIRE(len == 1);
     493      1048576 :             buf[3] = forth_byte;
     494              : 
     495              :             // invalid introducer (0x80 to 0xBF)
     496              :             //
     497      1048576 :             back = rand();
     498      1048576 :             s = buf;
     499      1048576 :             buf[0] = rand() % 64 + 0x80;
     500      1048576 :             len = 3;
     501      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     502      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     503      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     504      1048576 :             CATCH_REQUIRE(len == 0);
     505              : 
     506              :             // invalid introducer (0x80 to 0xBF)
     507              :             //
     508      1048576 :             back = rand();
     509      1048576 :             buf[0] = rand() % 64 + 0x80;
     510      1048576 :             s = buf;
     511      1048576 :             len = 4;
     512      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     513      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     514      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
     515      1048576 :             CATCH_REQUIRE(len == 0);
     516              : 
     517              :             // invalid introducer (0xF8 to 0xFF)
     518              :             //
     519      1048576 :             back = rand();
     520      1048576 :             s = buf;
     521      1048576 :             buf[0] = rand() % 8 + 0xF8;
     522      1048576 :             len = 4;
     523      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     524      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     525      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
     526      1048576 :             CATCH_REQUIRE(len == 0);
     527              :         }
     528              :     }
     529            5 :     CATCH_END_SECTION()
     530              : 
     531            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test three random characters, destroy the second one and make sure it gets skipped properly")
     532              :     {
     533         1001 :         for(int repeat(0); repeat < 1000; ++repeat)
     534              :         {
     535              :             char32_t wc[3]
     536              :             {
     537         1000 :                 unittest::rand_char(true),
     538         1000 :                 unittest::rand_char(true),
     539         1000 :                 unittest::rand_char(true),
     540         1000 :             };
     541         1000 :             size_t sz[3] = {};
     542              : 
     543         1000 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
     544         1000 :             char * s(buf);
     545         1000 :             sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
     546         1000 :             s += sz[0];
     547         1000 :             sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
     548         1000 :             s += sz[1];
     549         1000 :             sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
     550              : 
     551         1000 :             char32_t back(rand());
     552         1000 :             s = buf;
     553         1000 :             buf[sz[0]] = rand() % 64 + 0x80;
     554         1000 :             size_t len(sizeof(buf));
     555         1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
     556         1000 :             CATCH_REQUIRE(back == wc[0]);
     557              : 
     558         1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     559         1000 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     560              : 
     561         1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
     562         1000 :             CATCH_REQUIRE(back == wc[2]);
     563              :         }
     564              :     }
     565            5 :     CATCH_END_SECTION()
     566            5 : }
     567              : 
     568              : 
     569              : // vim: ts=4 sw=4 et
        

Generated by: LCOV version 2.0-1

Snap C++ | List of projects | List of versions