LCOV - code coverage report
Current view: top level - tests - catch_iterator.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 96.6 % 232 224
Test Date: 2025-08-03 08:53:08 Functions: 100.0 % 2 2
Legend: Lines: hit not hit

            Line data    Source code
       1              : // Copyright (c) 2013-2025  Made to Order Software Corp.  All Rights Reserved
       2              : //
       3              : // https://snapwebsites.org/project/libutf8
       4              : // contact@m2osw.com
       5              : //
       6              : // This program is free software: you can redistribute it and/or modify
       7              : // it under the terms of the GNU General Public License as published by
       8              : // the Free Software Foundation, either version 3 of the License, or
       9              : // (at your option) any later version.
      10              : //
      11              : // This program is distributed in the hope that it will be useful,
      12              : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : // GNU General Public License for more details.
      15              : //
      16              : // You should have received a copy of the GNU General Public License
      17              : // along with this program.  If not, see <https://www.gnu.org/licenses/>.
      18              : 
      19              : // libutf8
      20              : //
      21              : #include    <libutf8/iterator.h>
      22              : 
      23              : #include    <libutf8/base.h>
      24              : #include    <libutf8/libutf8.h>
      25              : 
      26              : 
      27              : // unit test
      28              : //
      29              : #include    "catch_main.h"
      30              : 
      31              : 
      32              : // C++
      33              : //
      34              : #include    <cctype>
      35              : #include    <iostream>
      36              : 
      37              : 
      38              : // last include
      39              : //
      40              : #include    <snapdev/poison.h>
      41              : 
      42              : 
      43              : 
      44            1 : CATCH_TEST_CASE("libutf8_iterator", "[iterator]")
      45              : {
      46            1 :     CATCH_START_SECTION("libutf8_iterator: valid iterators tests")
      47              :     {
      48            1 :         char32_t p(0);
      49              :         do
      50              :         {
      51            1 :             p = rand() % 0x11 * 0x10000;
      52              :         }
      53            1 :         while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
      54              : 
      55           18 :         for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
      56              :         {
      57              :             // create one plan in one string
      58              :             //
      59           17 :             std::string str;
      60           17 :             str.reserve(0x10000 * 4);
      61      1112082 :             for(char32_t wc(0); wc < 0x10000; ++wc)
      62              :             {
      63      1112065 :                 if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      64              :                 {
      65            1 :                     wc = 0xDFFF;
      66            1 :                     continue;
      67              :                 }
      68      1112064 :                 char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      69      1112064 :                 CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
      70      1112064 :                 if(plan == 0 && wc == 0)
      71              :                 {
      72              :                     // this is a special case as buf[0] = '\0' and the += with
      73              :                     // the string won't work
      74              :                     //
      75            1 :                     str += '\0';
      76              :                 }
      77              :                 else
      78              :                 {
      79      1112063 :                     str += buf;
      80              :                 }
      81              :             }
      82              : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
      83              : 
      84              :             {
      85           17 :                 libutf8::utf8_iterator it(str);
      86           17 :                 libutf8::utf8_iterator it_end(str, true);
      87           17 :                 libutf8::utf8_iterator it_next(str);
      88           17 :                 ++it_next;
      89              : 
      90           17 :                 CATCH_REQUIRE(it == str.begin());
      91           17 :                 CATCH_REQUIRE(it == str.cbegin());
      92           17 :                 CATCH_REQUIRE(it != str.end());
      93           17 :                 CATCH_REQUIRE(it != str.cend());
      94              : 
      95           17 :                 CATCH_REQUIRE(it == it);
      96           17 :                 CATCH_REQUIRE(it != it_end);
      97           17 :                 CATCH_REQUIRE(it != it_next);
      98              : 
      99           17 :                 CATCH_REQUIRE(str.begin() == it);
     100           17 :                 CATCH_REQUIRE(str.cbegin() == it);
     101           17 :                 CATCH_REQUIRE(str.end() != it);
     102           17 :                 CATCH_REQUIRE(str.cend() != it);
     103              : 
     104      1112082 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     105              :                 {
     106      1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     107              :                     {
     108            1 :                         wc = 0xDFFF;
     109            1 :                         continue;
     110              :                     }
     111      1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     112      1112064 :                     ++it;
     113              :                 }
     114              : 
     115           17 :                 CATCH_REQUIRE(it != str.begin());
     116           17 :                 CATCH_REQUIRE(it != str.cbegin());
     117           17 :                 CATCH_REQUIRE(it == str.end());
     118           17 :                 CATCH_REQUIRE(it == str.cend());
     119              : 
     120           17 :                 CATCH_REQUIRE(str.begin() != it);
     121           17 :                 CATCH_REQUIRE(str.cbegin() != it);
     122           17 :                 CATCH_REQUIRE(str.end() == it);
     123           17 :                 CATCH_REQUIRE(str.cend() == it);
     124              : 
     125           17 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     126           17 :                 ++it;
     127           17 :                 it++;
     128           17 :                 CATCH_REQUIRE(it == str.cend());
     129              : 
     130      1112082 :                 for(char32_t wc(0x10000); wc > 0; )
     131              :                 {
     132      1112065 :                     --wc;
     133      1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     134              :                     {
     135            1 :                         wc = 0xD800;
     136            1 :                         continue;
     137              :                     }
     138      1112064 :                     --it;
     139      1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     140              :                 }
     141              : 
     142           17 :                 --it;
     143           17 :                 it--;
     144              : 
     145           17 :                 CATCH_REQUIRE(it.good());
     146           17 :                 CATCH_REQUIRE_FALSE(it.bad());
     147              :             }
     148              : 
     149           17 :             if(plan == p)
     150              :             {
     151            1 :                 libutf8::utf8_iterator it(str);
     152              : 
     153        65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     154              :                 {
     155        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     156              :                     {
     157            0 :                         wc = 0xDFFF;
     158            0 :                         continue;
     159              :                     }
     160        65536 :                     CATCH_REQUIRE(*it++ == wc + plan);
     161              :                 }
     162              : 
     163            1 :                 CATCH_REQUIRE(it == str.end());
     164            1 :                 it++;
     165            1 :                 CATCH_REQUIRE(it.good());
     166            1 :                 CATCH_REQUIRE_FALSE(it.bad());
     167            1 :                 ++it;
     168            1 :                 CATCH_REQUIRE(it.good());
     169            1 :                 CATCH_REQUIRE_FALSE(it.bad());
     170            1 :                 CATCH_REQUIRE(it == str.end());
     171            1 :                 CATCH_REQUIRE(it.good());
     172            1 :                 CATCH_REQUIRE_FALSE(it.bad());
     173              : 
     174        65537 :                 for(char32_t wc(0x10000); wc > 0; )
     175              :                 {
     176        65536 :                     --wc;
     177        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     178              :                     {
     179            0 :                         wc = 0xD800;
     180            0 :                         continue;
     181              :                     }
     182        65536 :                     CATCH_REQUIRE(*--it == wc + plan);
     183              :                 }
     184              : 
     185            1 :                 CATCH_REQUIRE(it == str.begin());
     186            1 :                 CATCH_REQUIRE(str.begin() == it);
     187            1 :                 it--;
     188            1 :                 --it;
     189            1 :                 CATCH_REQUIRE(it == str.begin());
     190            1 :                 CATCH_REQUIRE(str.begin() == it);
     191              :             }
     192              : 
     193           17 :             if(plan == (p + 0x10000) % 0x110000)
     194              :             {
     195            1 :                 libutf8::utf8_iterator it(str);
     196            1 :                 libutf8::utf8_iterator start(str);
     197            1 :                 CATCH_REQUIRE(it - start == 0);
     198            1 :                 CATCH_REQUIRE(start - it == 0);
     199              : 
     200        65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     201              :                 {
     202        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     203              :                     {
     204            0 :                         wc = 0xDFFF;
     205            0 :                         continue;
     206              :                     }
     207        65536 :                     CATCH_REQUIRE(*it == wc + plan);
     208        65536 :                     it++;
     209              : 
     210        65536 :                     libutf8::utf8_iterator zero(it);
     211        65536 :                     zero.rewind();
     212        65536 :                     CATCH_REQUIRE(zero == start);
     213              :                 }
     214              : 
     215            1 :                 libutf8::utf8_iterator copy(it);
     216            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(it - start) == str.length());
     217            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(copy - start) == str.length());
     218            1 :                 CATCH_REQUIRE(copy - it == 0);
     219            1 :                 CATCH_REQUIRE(it - copy == 0);
     220            1 :                 copy.rewind();
     221            1 :                 CATCH_REQUIRE(copy - start == 0);
     222            1 :                 CATCH_REQUIRE(start - copy == 0);
     223            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(start - copy) == 0);
     224            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(copy - start) == 0);
     225              : 
     226        65537 :                 for(char32_t wc(0x10000); wc > 0; )
     227              :                 {
     228        65536 :                     --wc;
     229        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     230              :                     {
     231            0 :                         wc = 0xD800;
     232            0 :                         continue;
     233              :                     }
     234        65536 :                     it--;
     235        65536 :                     CATCH_REQUIRE(*it == wc + plan);
     236              :                 }
     237              :             }
     238           17 :         }
     239              :     }
     240            1 :     CATCH_END_SECTION()
     241            1 : }
     242              : 
     243              : 
     244            2 : CATCH_TEST_CASE("libutf8_iterator_invalid_string", "[iterator],[invalid]")
     245              : {
     246            2 :     CATCH_START_SECTION("libutf8_iterator_invalid_string: iterators with invalid characters (bad UTF-8)")
     247              :     {
     248          101 :         for(int repeat(0); repeat < 100; ++repeat)
     249              :         {
     250              :             // create one plan in one string
     251              :             //
     252          100 :             constexpr size_t STR_LENGTH = 4;
     253              :             char32_t wc;
     254          100 :             std::u32string wstr;
     255          100 :             wstr.reserve(STR_LENGTH);
     256          500 :             for(size_t idx(0); idx < STR_LENGTH; ++idx)
     257              :             {
     258              :                 do
     259              :                 {
     260          400 :                     wc = unittest::rand_char(true);
     261              :                 }
     262          400 :                 while(wc < 0x80);
     263          400 :                 wstr += wc;
     264              :             }
     265          100 :             std::string str(libutf8::to_u8string(wstr));
     266              : 
     267              : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
     268              : 
     269              :             // first verify that it works
     270              :             //
     271          100 :             std::string::size_type pos[STR_LENGTH];
     272              :             {
     273          100 :                 libutf8::utf8_iterator it(str);
     274              : 
     275          100 :                 CATCH_REQUIRE(it == str.begin());
     276          100 :                 CATCH_REQUIRE(it == str.cbegin());
     277          100 :                 CATCH_REQUIRE(it != str.end());
     278          100 :                 CATCH_REQUIRE(it != str.cend());
     279              : 
     280          100 :                 CATCH_REQUIRE(str.begin()  == it);
     281          100 :                 CATCH_REQUIRE(str.cbegin() == it);
     282          100 :                 CATCH_REQUIRE(str.end()    != it);
     283          100 :                 CATCH_REQUIRE(str.cend()   != it);
     284              : 
     285          500 :                 for(size_t idx(0); idx < STR_LENGTH; ++idx)
     286              :                 {
     287          400 :                     CATCH_REQUIRE(*it == wstr[idx]);
     288          400 :                     if(rand() % 2 == 0)
     289              :                     {
     290          196 :                         pos[idx] = it - str.begin();
     291              :                     }
     292              :                     else
     293              :                     {
     294          204 :                         pos[idx] = -(str.begin() - it);
     295              :                     }
     296          400 :                     ++it;
     297              :                 }
     298              : 
     299          100 :                 CATCH_REQUIRE(it != str.begin());
     300          100 :                 CATCH_REQUIRE(it != str.cbegin());
     301          100 :                 CATCH_REQUIRE(it == str.end());
     302          100 :                 CATCH_REQUIRE(it == str.cend());
     303              : 
     304          100 :                 CATCH_REQUIRE(str.begin()  != it);
     305          100 :                 CATCH_REQUIRE(str.cbegin() != it);
     306          100 :                 CATCH_REQUIRE(str.end()    == it);
     307          100 :                 CATCH_REQUIRE(str.cend()   == it);
     308              : 
     309          100 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     310          100 :                 ++it;
     311          100 :                 it++;
     312          100 :                 CATCH_REQUIRE(it == str.cend());
     313              : 
     314          100 :                 CATCH_REQUIRE(it.good());
     315          100 :                 CATCH_REQUIRE_FALSE(it.bad());
     316              :             }
     317              : 
     318              :             {
     319          100 :                 libutf8::utf8_iterator it(str);
     320              : 
     321          100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     322              : 
     323          100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     324          100 :                 CATCH_REQUIRE(*it++ == libutf8::NOT_A_CHARACTER);       // we broke this one
     325          100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     326          100 :                 CATCH_REQUIRE(*it++ == wstr[3]);
     327          100 :                 CATCH_REQUIRE(*it++ == libutf8::EOS);
     328              : 
     329          100 :                 CATCH_REQUIRE_FALSE(it.good());
     330          100 :                 CATCH_REQUIRE(it.bad());
     331          100 :                 it.clear();
     332          100 :                 CATCH_REQUIRE(it.good());
     333          100 :                 CATCH_REQUIRE_FALSE(it.bad());
     334              :             }
     335              : 
     336              :             {
     337          100 :                 str.erase(str.length() - 1);
     338          100 :                 libutf8::utf8_iterator it(str);
     339              : 
     340          100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     341              : 
     342          100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     343          100 :                 CATCH_REQUIRE(*it++ == libutf8::NOT_A_CHARACTER);
     344          100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     345          100 :                 CATCH_REQUIRE(*it++ == libutf8::NOT_A_CHARACTER);
     346              : 
     347          100 :                 CATCH_REQUIRE_FALSE(it.good());
     348          100 :                 CATCH_REQUIRE(it.bad());
     349          100 :                 it.clear();
     350          100 :                 CATCH_REQUIRE(it.good());
     351          100 :                 CATCH_REQUIRE_FALSE(it.bad());
     352              :             }
     353          100 :         }
     354              :     }
     355            2 :     CATCH_END_SECTION()
     356              : 
     357            2 :     CATCH_START_SECTION("libutf8_iterator_invalid_string: iterators with invalid characters (too large)")
     358              :     {
     359       983040 :         for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
     360              :         {
     361              :             // since this character is not valid
     362              :             // we have to encode it _manually_
     363              :             //
     364       983039 :             char buf[4];
     365       983039 :             buf[0] = 0xF0 | ((wc >> 18) & 0x07);
     366       983039 :             buf[1] = 0x80 | ((wc >> 12) & 0x3F);
     367       983039 :             buf[2] = 0x80 | ((wc >>  6) & 0x3F);
     368       983039 :             buf[3] = 0x80 | ((wc >>  0) & 0x3F);
     369              : 
     370      2949117 :             std::string str(buf, 4);
     371              : 
     372              :             // first verify that it works
     373              :             //
     374              :             {
     375       983039 :                 libutf8::utf8_iterator it(str);
     376              : 
     377       983039 :                 CATCH_REQUIRE(it == str.begin());
     378       983039 :                 CATCH_REQUIRE(it == str.cbegin());
     379       983039 :                 CATCH_REQUIRE(it != str.end());
     380       983039 :                 CATCH_REQUIRE(it != str.cend());
     381              : 
     382       983039 :                 CATCH_REQUIRE(str.begin()  == it);
     383       983039 :                 CATCH_REQUIRE(str.cbegin() == it);
     384       983039 :                 CATCH_REQUIRE(str.end()    != it);
     385       983039 :                 CATCH_REQUIRE(str.cend()   != it);
     386              : 
     387       983039 :                 CATCH_REQUIRE(*it == libutf8::NOT_A_CHARACTER);
     388              : 
     389       983039 :                 CATCH_REQUIRE_FALSE(it.good());
     390       983039 :                 CATCH_REQUIRE(it.bad());
     391       983039 :                 it.clear();
     392       983039 :                 CATCH_REQUIRE(it.good());
     393       983039 :                 CATCH_REQUIRE_FALSE(it.bad());
     394              : 
     395       983039 :                 ++it;
     396              : 
     397       983039 :                 CATCH_REQUIRE(it != str.begin());
     398       983039 :                 CATCH_REQUIRE(it != str.cbegin());
     399       983039 :                 CATCH_REQUIRE(it == str.end());
     400       983039 :                 CATCH_REQUIRE(it == str.cend());
     401              : 
     402       983039 :                 CATCH_REQUIRE(str.begin()  != it);
     403       983039 :                 CATCH_REQUIRE(str.cbegin() != it);
     404       983039 :                 CATCH_REQUIRE(str.end()    == it);
     405       983039 :                 CATCH_REQUIRE(str.cend()   == it);
     406              : 
     407       983039 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     408       983039 :                 ++it;
     409       983039 :                 it++;
     410       983039 :                 CATCH_REQUIRE(it == str.cend());
     411              : 
     412       983039 :                 CATCH_REQUIRE_FALSE(it.good());
     413       983039 :                 CATCH_REQUIRE(it.bad());
     414              :             }
     415       983039 :         }
     416              :     }
     417            2 :     CATCH_END_SECTION()
     418            2 : }
     419              : 
     420              : 
     421              : 
     422              : // vim: ts=4 sw=4 et
        

Generated by: LCOV version 2.0-1

Snap C++ | List of projects | List of versions