LCOV - code coverage report
Current view: top level - tests - catch_character.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 100.0 % 337 337
Test Date: 2025-08-03 08:53:08 Functions: 100.0 % 3 3
Legend: Lines: hit not hit

            Line data    Source code
       1              : // Copyright (c) 2021-2025  Made to Order Software Corp.  All Rights Reserved
       2              : //
       3              : // https://snapwebsites.org/project/libutf8
       4              : // contact@m2osw.com
       5              : //
       6              : // This program is free software: you can redistribute it and/or modify
       7              : // it under the terms of the GNU General Public License as published by
       8              : // the Free Software Foundation, either version 3 of the License, or
       9              : // (at your option) any later version.
      10              : //
      11              : // This program is distributed in the hope that it will be useful,
      12              : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : // GNU General Public License for more details.
      15              : //
      16              : // You should have received a copy of the GNU General Public License
      17              : // along with this program.  If not, see <https://www.gnu.org/licenses/>.
      18              : 
      19              : // libutf8
      20              : //
      21              : #include    <libutf8/base.h>
      22              : #include    <libutf8/exception.h>
      23              : 
      24              : 
      25              : // unit test
      26              : //
      27              : #include    "catch_main.h"
      28              : 
      29              : 
      30              : // C++
      31              : //
      32              : #include    <cctype>
      33              : #include    <iostream>
      34              : 
      35              : 
      36              : // last include
      37              : //
      38              : #include    <snapdev/poison.h>
      39              : 
      40              : 
      41              : 
      42            6 : CATCH_TEST_CASE("character_conversions", "[characters]")
      43              : {
      44            6 :     CATCH_START_SECTION("character_conversions: Verify minimum buffer length for MBS conversions")
      45              :     {
      46            1 :         CATCH_REQUIRE(libutf8::MBS_MIN_BUFFER_LENGTH >= 5);
      47              :     }
      48            6 :     CATCH_END_SECTION()
      49              : 
      50            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x00 and 0x7F inclusive")
      51              :     {
      52          129 :         for(char32_t wc(0); wc < 0x000080; ++wc)
      53              :         {
      54          128 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      55          128 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
      56          128 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 1);
      57              : 
      58          128 :             CATCH_REQUIRE(static_cast<char32_t>(buf[0]) == wc);
      59          128 :             CATCH_REQUIRE(buf[1] == '\0');
      60              : 
      61          128 :             char32_t back(rand());
      62          128 :             char const * s(buf);
      63          128 :             size_t len(1);
      64          128 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 1);
      65          128 :             CATCH_REQUIRE(back == wc);
      66              :         }
      67              :     }
      68            6 :     CATCH_END_SECTION()
      69              : 
      70            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x80 and 0x7FF inclusive")
      71              :     {
      72         1921 :         for(char32_t wc(0x000080); wc < 0x000800; ++wc)
      73              :         {
      74         1920 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      75         1920 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
      76         1920 :             CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
      77         1920 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
      78              : 
      79         1920 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
      80         1920 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
      81         1920 :             CATCH_REQUIRE(found == wc);
      82         1920 :             CATCH_REQUIRE(buf[2] == '\0');
      83              : 
      84         1920 :             char32_t back(rand());
      85         1920 :             char const * s(buf);
      86         1920 :             size_t len(2);
      87         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 2);
      88         1920 :             CATCH_REQUIRE(back == wc);
      89              :         }
      90              :     }
      91            6 :     CATCH_END_SECTION()
      92              : 
      93            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x800 and 0xFFFF inclusive")
      94              :     {
      95        61442 :         for(char32_t wc(0x000800); wc < 0x010000; ++wc)
      96              :         {
      97        61441 :             if(wc >= 0xD800 && wc <= 0xDFFF)
      98              :             {
      99              :                 // skip UTF-16 surrogates
     100              :                 //
     101            1 :                 wc = 0xDFFF;
     102            1 :                 continue;
     103              :             }
     104              : 
     105        61440 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     106        61440 :             if(rand() % 10 == 0)
     107              :             {
     108         6213 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
     109         6213 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
     110         6213 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
     111              :             }
     112        61440 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
     113              : 
     114        61440 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
     115        61440 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) <<  6)
     116        61440 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  0));
     117        61440 :             CATCH_REQUIRE(found == wc);
     118        61440 :             CATCH_REQUIRE(buf[3] == '\0');
     119              : 
     120        61440 :             char32_t back(rand());
     121        61440 :             char const * s(buf);
     122        61440 :             size_t len(3);
     123        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 3);
     124        61440 :             CATCH_REQUIRE(back == wc);
     125              :         }
     126              :     }
     127            6 :     CATCH_END_SECTION()
     128              : 
     129            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with characters between 0x10000 and 0x10FFFF inclusive")
     130              :     {
     131      1048577 :         for(char32_t wc(0x010000); wc < 0x110000; ++wc)
     132              :         {
     133      1048576 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     134      1048576 :             if(rand() % 100 == 0)
     135              :             {
     136        10482 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 0), libutf8::libutf8_logic_exception);
     137        10482 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 1), libutf8::libutf8_logic_exception);
     138        10482 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 2), libutf8::libutf8_logic_exception);
     139        10482 :                 CATCH_REQUIRE_THROWS_AS(libutf8::wctombs(buf, wc, 3), libutf8::libutf8_logic_exception);
     140              :             }
     141      1048576 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
     142              : 
     143      1048576 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
     144      1048576 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
     145      1048576 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  6)
     146      1048576 :                                | ((static_cast<char32_t>(buf[3]) & 0x3F) <<  0));
     147      1048576 :             CATCH_REQUIRE(found == wc);
     148      1048576 :             CATCH_REQUIRE(buf[4] == '\0');
     149              : 
     150      1048576 :             char32_t back(rand());
     151      1048576 :             char const * s(buf);
     152      1048576 :             size_t len(4);
     153      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == 4);
     154      1048576 :             CATCH_REQUIRE(back == wc);
     155              :         }
     156              :     }
     157            6 :     CATCH_END_SECTION()
     158              : 
     159            6 :     CATCH_START_SECTION("character_conversions: Test UTF-32 to UTF-8 with an empty input string")
     160              :     {
     161           11 :         for(char32_t repeat(0); repeat < 10; ++repeat)
     162              :         {
     163           10 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     164           10 :             char copy[libutf8::MBS_MIN_BUFFER_LENGTH];
     165           60 :             for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
     166              :             {
     167           50 :                 buf[idx] = rand();
     168           50 :                 copy[idx] = buf[idx];
     169              :             }
     170           10 :             char const * s(buf);
     171           10 :             char32_t null = rand();
     172           10 :             size_t len(0);
     173           10 :             CATCH_REQUIRE(libutf8::mbstowc(null, s, len) == 0);
     174           10 :             CATCH_REQUIRE(null == '\0');
     175           60 :             for(size_t idx(0); idx < libutf8::MBS_MIN_BUFFER_LENGTH; ++idx)
     176              :             {
     177           50 :                 CATCH_REQUIRE(copy[idx] == buf[idx]);
     178              :             }
     179              :         }
     180              :     }
     181            6 :     CATCH_END_SECTION()
     182            6 : }
     183              : 
     184              : 
     185            2 : CATCH_TEST_CASE("invalid_utf32_to_utf8", "[characters],[invalid]")
     186              : {
     187            2 :     CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that surrogates do not work in UTF-8")
     188              :     {
     189         2049 :         for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
     190              :         {
     191         2048 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     192              :             {
     193              :                 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     194              :             };
     195         2048 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     196         2048 :             CATCH_REQUIRE(buf[0] == '\0');
     197              :         }
     198              :     }
     199            2 :     CATCH_END_SECTION()
     200              : 
     201            2 :     CATCH_START_SECTION("invalid_utf32_to_utf8: Verify that too large a number is not supported")
     202              :     {
     203         1001 :         for(int repeat(0); repeat < 1000; ++repeat)
     204              :         {
     205         1000 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     206              :             {
     207              :                 'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     208              :             };
     209         1000 :             char32_t wc(0);
     210              :             do
     211              :             {
     212         1000 :                 wc = (rand() << 16) + (rand() & 0x0000FFFF);
     213              :             }
     214         1000 :             while(wc < 0x110000);
     215         1000 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     216         1000 :             CATCH_REQUIRE(buf[0] == '\0');
     217              :         }
     218              :     }
     219            2 :     CATCH_END_SECTION()
     220            2 : }
     221              : 
     222              : 
     223            5 : CATCH_TEST_CASE("invalid_utf8_to_utf32", "[characters],[invalid]")
     224              : {
     225            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that surrogates do not work in UTF-8")
     226              :     {
     227         2049 :         for(char32_t wc(0x00D800); wc < 0x00E000; ++wc)
     228              :         {
     229              : //int mbstowc(char32_t & wc, char const * & mb, size_t & len);
     230         2048 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH] = {};
     231         2048 :             buf[0] = static_cast<char>((wc >> 12) | 0xE0);
     232         2048 :             buf[1] = ((wc >> 6) & 0x3F) | 0x80;
     233         2048 :             buf[2] = (wc & 0x3F) | 0x80;
     234         2048 :             buf[3] = '\0';
     235         2048 :             char const * s = buf;
     236         2048 :             size_t len(3);
     237         2048 :             char32_t cwc(rand());
     238         2048 :             CATCH_REQUIRE(libutf8::mbstowc(cwc, s, len) == -1);
     239         2048 :             CATCH_REQUIRE(cwc == libutf8::NOT_A_CHARACTER);
     240         2048 :             char const c1(static_cast<char>((wc >> 12) | 0xE0));
     241         2048 :             CATCH_REQUIRE(buf[0] == c1);
     242         2048 :             char const c2(static_cast<char>(((wc >> 6) & 0x3F) | 0x80));
     243         2048 :             CATCH_REQUIRE(buf[1] == c2);
     244         2048 :             char const c3((wc & 0x3F) | 0x80);
     245         2048 :             CATCH_REQUIRE(buf[2] == c3);
     246         2048 :             CATCH_REQUIRE(buf[3] == '\0');
     247         2048 :             CATCH_REQUIRE(s == buf + 3);
     248         2048 :             CATCH_REQUIRE(len == 0);
     249              :         }
     250              :     }
     251            5 :     CATCH_END_SECTION()
     252              : 
     253              :     //CATCH_START_SECTION("invalid_utf8_to_utf32: Verify that too large a number is not supported")
     254              :     //{
     255              :     //    for(int idx(0); idx < 1000; ++idx)
     256              :     //    {
     257              :     //        char buf[libutf8::MBS_MIN_BUFFER_LENGTH]
     258              :     //        {
     259              :     //            'n', 'o', 't', 'e', 'm' //, 'p', 't', 'y'
     260              :     //        };
     261              :     //        char32_t wc(0);
     262              :     //        do
     263              :     //        {
     264              :     //            wc = (rand() << 16) + (rand() & 0x0000FFFF);
     265              :     //        }
     266              :     //        while(wc < 0x110000);
     267              :     //        CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == -1);
     268              :     //        CATCH_REQUIRE(buf[0] == libutf8::NOT_A_CHARACTER);
     269              :     //    }
     270              :     //}
     271              :     //CATCH_END_SECTION()
     272              : 
     273            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x80 to 0x7FF)")
     274              :     {
     275         1921 :         for(char32_t wc(0x000080); wc < 0x000800; ++wc)
     276              :         {
     277         1920 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     278         1920 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 2);
     279              : 
     280         1920 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x1F) << 6)
     281         1920 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 0));
     282         1920 :             CATCH_REQUIRE(found == wc);
     283         1920 :             CATCH_REQUIRE(buf[2] == '\0');
     284              : 
     285              :             // too short
     286              :             //
     287         1920 :             char32_t back(rand());
     288         1920 :             char const * s(buf);
     289         1920 :             size_t len(1);
     290         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     291              : 
     292              :             // invalid middle byte
     293              :             //
     294         1920 :             char const second_byte(buf[1]);
     295         1920 :             back = rand();
     296         1920 :             s = buf;
     297         1920 :             int c(rand() % (255 - 0x40) + 1);
     298         1920 :             if(c >= 0x80)
     299              :             {
     300          631 :                 c += 0x40;
     301              :             }
     302         1920 :             buf[1] = static_cast<char>(c);
     303         1920 :             len = 2;
     304         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     305         1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     306         1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     307         1920 :             CATCH_REQUIRE(len == 1);
     308         1920 :             buf[1] = second_byte;
     309              : 
     310              :             // invalid introducer (0x80 to 0xBF)
     311              :             //
     312         1920 :             back = rand();
     313         1920 :             s = buf;
     314         1920 :             buf[0] = rand() % 64 + 0x80;
     315         1920 :             len = 2;
     316         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     317         1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     318         1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     319         1920 :             CATCH_REQUIRE(len == 0);
     320              : 
     321              :             // invalid introducer (0xF8 to 0xFF)
     322              :             //
     323         1920 :             back = rand();
     324         1920 :             s = buf;
     325         1920 :             buf[0] = rand() % 8 + 0xF8;
     326         1920 :             len = 2;
     327         1920 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     328         1920 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     329         1920 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     330         1920 :             CATCH_REQUIRE(len == 0);
     331              :         }
     332              :     }
     333            5 :     CATCH_END_SECTION()
     334              : 
     335            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test an invalid UTF-8 sequence (0x800 to 0xFFFF)")
     336              :     {
     337        61442 :         for(char32_t wc(0x000800); wc < 0x010000; ++wc)
     338              :         {
     339        61441 :             if(wc >= 0xD800 && wc <= 0xDFFF)
     340              :             {
     341              :                 // skip UTF-16 surrogates -- this is not the test for those
     342              :                 //
     343            1 :                 wc = 0xDFFF;
     344            1 :                 continue;
     345              :             }
     346              : 
     347        61440 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     348        61440 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 3);
     349              : 
     350        61440 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x0F) << 12)
     351        61440 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) <<  6)
     352        61440 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  0));
     353        61440 :             CATCH_REQUIRE(found == wc);
     354        61440 :             CATCH_REQUIRE(buf[3] == '\0');
     355              : 
     356              :             // too short
     357              :             //
     358        61440 :             char32_t back(rand());
     359        61440 :             char const * s(buf);
     360        61440 :             size_t len(2);
     361        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     362              : 
     363              :             // invalid middle byte
     364              :             //
     365        61440 :             char const second_byte(buf[1]);
     366        61440 :             back = rand();
     367        61440 :             s = buf;
     368        61440 :             int c(rand() % (255 - 0x40) + 1);
     369        61440 :             if(c >= 0x80)
     370              :             {
     371        20577 :                 c += 0x40;
     372              :             }
     373        61440 :             buf[1] = static_cast<char>(c);
     374        61440 :             len = 3;
     375        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     376        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     377        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     378        61440 :             CATCH_REQUIRE(len == 2);
     379        61440 :             buf[1] = second_byte;
     380              : 
     381        61440 :             char const third_byte(buf[2]);
     382        61440 :             back = rand();
     383        61440 :             s = buf;
     384        61440 :             c = rand() % (255 - 0x40) + 1;
     385        61440 :             if(c >= 0x80)
     386              :             {
     387        20862 :                 c += 0x40;
     388              :             }
     389        61440 :             buf[2] = static_cast<char>(c);
     390        61440 :             len = 3;
     391        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     392        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     393        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     394        61440 :             CATCH_REQUIRE(len == 1);
     395        61440 :             buf[2] = third_byte;
     396              : 
     397              :             // invalid introducer (0x80 to 0xBF)
     398              :             //
     399        61440 :             back = rand();
     400        61440 :             s = buf;
     401        61440 :             buf[0] = rand() % 64 + 0x80;
     402        61440 :             len = 3;
     403        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     404        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     405        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     406        61440 :             CATCH_REQUIRE(len == 0);
     407              : 
     408              :             // invalid introducer (0xF8 to 0xFF)
     409              :             //
     410        61440 :             back = rand();
     411        61440 :             s = buf;
     412        61440 :             buf[0] = rand() % 8 + 0xF8;
     413        61440 :             len = 3;
     414        61440 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     415        61440 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     416        61440 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     417        61440 :             CATCH_REQUIRE(len == 0);
     418              :         }
     419              :     }
     420            5 :     CATCH_END_SECTION()
     421              : 
     422            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test UTF-32 to UTF-8 with characters between 0x010000 and 0x110000 inclusive")
     423              :     {
     424      1048577 :         for(char32_t wc(0x010000); wc < 0x110000; ++wc)
     425              :         {
     426      1048576 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
     427      1048576 :             CATCH_REQUIRE(libutf8::wctombs(buf, wc, sizeof(buf)) == 4);
     428              : 
     429      1048576 :             char32_t const found(((static_cast<char32_t>(buf[0]) & 0x07) << 18)
     430      1048576 :                                | ((static_cast<char32_t>(buf[1]) & 0x3F) << 12)
     431      1048576 :                                | ((static_cast<char32_t>(buf[2]) & 0x3F) <<  6)
     432      1048576 :                                | ((static_cast<char32_t>(buf[3]) & 0x3F) <<  0));
     433      1048576 :             CATCH_REQUIRE(found == wc);
     434      1048576 :             CATCH_REQUIRE(buf[4] == '\0');
     435              : 
     436      1048576 :             char32_t back(rand());
     437      1048576 :             char const * s(buf);
     438              : 
     439              :             // too short
     440              :             //
     441      1048576 :             size_t len(3);
     442      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     443              : 
     444              :             // invalid middle byte
     445              :             //
     446      1048576 :             char const second_byte(buf[1]);
     447      1048576 :             back = rand();
     448      1048576 :             s = buf;
     449      1048576 :             int c(rand() % (255 - 0x40) + 1);
     450      1048576 :             if(c >= 0x80)
     451              :             {
     452       351758 :                 c += 0x40;
     453              :             }
     454      1048576 :             buf[1] = static_cast<char>(c);
     455      1048576 :             len = 4;
     456      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     457      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     458      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 1) == reinterpret_cast<void const *>(s));
     459      1048576 :             CATCH_REQUIRE(len == 3);
     460      1048576 :             buf[1] = second_byte;
     461              : 
     462      1048576 :             char const third_byte(buf[2]);
     463      1048576 :             back = rand();
     464      1048576 :             s = buf;
     465      1048576 :             c = rand() % (255 - 0x40) + 1;
     466      1048576 :             if(c >= 0x80)
     467              :             {
     468       351234 :                 c += 0x40;
     469              :             }
     470      1048576 :             buf[2] = static_cast<char>(c);
     471      1048576 :             len = 4;
     472      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     473      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     474      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 2) == reinterpret_cast<void const *>(s));
     475      1048576 :             CATCH_REQUIRE(len == 2);
     476      1048576 :             buf[2] = third_byte;
     477              : 
     478      1048576 :             char const forth_byte(buf[3]);
     479      1048576 :             back = rand();
     480      1048576 :             s = buf;
     481      1048576 :             c = rand() % (255 - 0x40) + 1;
     482      1048576 :             if(c >= 0x80)
     483              :             {
     484       351758 :                 c += 0x40;
     485              :             }
     486      1048576 :             buf[3] = static_cast<char>(c);
     487      1048576 :             len = 4;
     488      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     489      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     490      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     491      1048576 :             CATCH_REQUIRE(len == 1);
     492      1048576 :             buf[3] = forth_byte;
     493              : 
     494              :             // invalid introducer (0x80 to 0xBF)
     495              :             //
     496      1048576 :             back = rand();
     497      1048576 :             s = buf;
     498      1048576 :             buf[0] = rand() % 64 + 0x80;
     499      1048576 :             len = 3;
     500      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     501      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     502      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 3) == reinterpret_cast<void const *>(s));
     503      1048576 :             CATCH_REQUIRE(len == 0);
     504              : 
     505              :             // invalid introducer (0x80 to 0xBF)
     506              :             //
     507      1048576 :             back = rand();
     508      1048576 :             buf[0] = rand() % 64 + 0x80;
     509      1048576 :             s = buf;
     510      1048576 :             len = 4;
     511      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     512      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     513      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
     514      1048576 :             CATCH_REQUIRE(len == 0);
     515              : 
     516              :             // invalid introducer (0xF8 to 0xFF)
     517              :             //
     518      1048576 :             back = rand();
     519      1048576 :             s = buf;
     520      1048576 :             buf[0] = rand() % 8 + 0xF8;
     521      1048576 :             len = 4;
     522      1048576 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     523      1048576 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     524      1048576 :             CATCH_REQUIRE(reinterpret_cast<void const *>(buf + 4) == reinterpret_cast<void const *>(s));
     525      1048576 :             CATCH_REQUIRE(len == 0);
     526              :         }
     527              :     }
     528            5 :     CATCH_END_SECTION()
     529              : 
     530            5 :     CATCH_START_SECTION("invalid_utf8_to_utf32: Test three random characters, destroy the second one and make sure it gets skipped properly")
     531              :     {
     532         1001 :         for(int repeat(0); repeat < 1000; ++repeat)
     533              :         {
     534              :             char32_t wc[3]
     535              :             {
     536         1000 :                 unittest::rand_char(true),
     537         1000 :                 unittest::rand_char(true),
     538         1000 :                 unittest::rand_char(true),
     539         1000 :             };
     540         1000 :             size_t sz[3] = {};
     541              : 
     542         1000 :             char buf[libutf8::MBS_MIN_BUFFER_LENGTH * 3];
     543         1000 :             char * s(buf);
     544         1000 :             sz[0] += libutf8::wctombs(s, wc[0], sizeof(buf));
     545         1000 :             s += sz[0];
     546         1000 :             sz[1] = libutf8::wctombs(s, wc[1], sizeof(buf));
     547         1000 :             s += sz[1];
     548         1000 :             sz[2] = libutf8::wctombs(s, wc[2], sizeof(buf));
     549              : 
     550         1000 :             char32_t back(rand());
     551         1000 :             s = buf;
     552         1000 :             buf[sz[0]] = rand() % 64 + 0x80;
     553         1000 :             size_t len(sizeof(buf));
     554         1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
     555         1000 :             CATCH_REQUIRE(back == wc[0]);
     556              : 
     557         1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) == -1);
     558         1000 :             CATCH_REQUIRE(back == libutf8::NOT_A_CHARACTER);
     559              : 
     560         1000 :             CATCH_REQUIRE(libutf8::mbstowc(back, s, len) != -1);
     561         1000 :             CATCH_REQUIRE(back == wc[2]);
     562              :         }
     563              :     }
     564            5 :     CATCH_END_SECTION()
     565            5 : }
     566              : 
     567              : 
     568              : // vim: ts=4 sw=4 et
        

Generated by: LCOV version 2.0-1

Snap C++ | List of projects | List of versions