LCOV - code coverage report
Current view: top level - tests - catch_iterator.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 96.6 % 232 224
Test Date: 2025-06-22 07:49:47 Functions: 100.0 % 2 2
Legend: Lines: hit not hit

            Line data    Source code
       1              : // Copyright (c) 2013-2023  Made to Order Software Corp.  All Rights Reserved
       2              : //
       3              : // https://snapwebsites.org/project/libutf8
       4              : // contact@m2osw.com
       5              : //
       6              : // This program is free software; you can redistribute it and/or modify
       7              : // it under the terms of the GNU General Public License as published by
       8              : // the Free Software Foundation; either version 2 of the License, or
       9              : // (at your option) any later version.
      10              : //
      11              : // This program is distributed in the hope that it will be useful,
      12              : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : // GNU General Public License for more details.
      15              : //
      16              : // You should have received a copy of the GNU General Public License along
      17              : // with this program; if not, write to the Free Software Foundation, Inc.,
      18              : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19              : 
      20              : // libutf8
      21              : //
      22              : #include    <libutf8/iterator.h>
      23              : 
      24              : #include    <libutf8/base.h>
      25              : #include    <libutf8/libutf8.h>
      26              : 
      27              : 
      28              : // unit test
      29              : //
      30              : #include    "catch_main.h"
      31              : 
      32              : 
      33              : // C++
      34              : //
      35              : #include    <cctype>
      36              : #include    <iostream>
      37              : 
      38              : 
      39              : // last include
      40              : //
      41              : #include    <snapdev/poison.h>
      42              : 
      43              : 
      44              : 
      45            1 : CATCH_TEST_CASE("libutf8_iterator", "[iterator]")
      46              : {
      47            1 :     CATCH_START_SECTION("libutf8_iterator: valid iterators tests")
      48              :     {
      49            1 :         char32_t p(0);
      50              :         do
      51              :         {
      52            1 :             p = rand() % 0x11 * 0x10000;
      53              :         }
      54            1 :         while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
      55              : 
      56           18 :         for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
      57              :         {
      58              :             // create one plan in one string
      59              :             //
      60           17 :             std::string str;
      61           17 :             str.reserve(0x10000 * 4);
      62      1112082 :             for(char32_t wc(0); wc < 0x10000; ++wc)
      63              :             {
      64      1112065 :                 if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      65              :                 {
      66            1 :                     wc = 0xDFFF;
      67            1 :                     continue;
      68              :                 }
      69      1112064 :                 char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      70      1112064 :                 CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
      71      1112064 :                 if(plan == 0 && wc == 0)
      72              :                 {
      73              :                     // this is a special case as buf[0] = '\0' and the += with
      74              :                     // the string won't work
      75              :                     //
      76            1 :                     str += '\0';
      77              :                 }
      78              :                 else
      79              :                 {
      80      1112063 :                     str += buf;
      81              :                 }
      82              :             }
      83              : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
      84              : 
      85              :             {
      86           17 :                 libutf8::utf8_iterator it(str);
      87           17 :                 libutf8::utf8_iterator it_end(str, true);
      88           17 :                 libutf8::utf8_iterator it_next(str);
      89           17 :                 ++it_next;
      90              : 
      91           17 :                 CATCH_REQUIRE(it == str.begin());
      92           17 :                 CATCH_REQUIRE(it == str.cbegin());
      93           17 :                 CATCH_REQUIRE(it != str.end());
      94           17 :                 CATCH_REQUIRE(it != str.cend());
      95              : 
      96           17 :                 CATCH_REQUIRE(it == it);
      97           17 :                 CATCH_REQUIRE(it != it_end);
      98           17 :                 CATCH_REQUIRE(it != it_next);
      99              : 
     100           17 :                 CATCH_REQUIRE(str.begin() == it);
     101           17 :                 CATCH_REQUIRE(str.cbegin() == it);
     102           17 :                 CATCH_REQUIRE(str.end() != it);
     103           17 :                 CATCH_REQUIRE(str.cend() != it);
     104              : 
     105      1112082 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     106              :                 {
     107      1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     108              :                     {
     109            1 :                         wc = 0xDFFF;
     110            1 :                         continue;
     111              :                     }
     112      1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     113      1112064 :                     ++it;
     114              :                 }
     115              : 
     116           17 :                 CATCH_REQUIRE(it != str.begin());
     117           17 :                 CATCH_REQUIRE(it != str.cbegin());
     118           17 :                 CATCH_REQUIRE(it == str.end());
     119           17 :                 CATCH_REQUIRE(it == str.cend());
     120              : 
     121           17 :                 CATCH_REQUIRE(str.begin() != it);
     122           17 :                 CATCH_REQUIRE(str.cbegin() != it);
     123           17 :                 CATCH_REQUIRE(str.end() == it);
     124           17 :                 CATCH_REQUIRE(str.cend() == it);
     125              : 
     126           17 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     127           17 :                 ++it;
     128           17 :                 it++;
     129           17 :                 CATCH_REQUIRE(it == str.cend());
     130              : 
     131      1112082 :                 for(char32_t wc(0x10000); wc > 0; )
     132              :                 {
     133      1112065 :                     --wc;
     134      1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     135              :                     {
     136            1 :                         wc = 0xD800;
     137            1 :                         continue;
     138              :                     }
     139      1112064 :                     --it;
     140      1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     141              :                 }
     142              : 
     143           17 :                 --it;
     144           17 :                 it--;
     145              : 
     146           17 :                 CATCH_REQUIRE(it.good());
     147           17 :                 CATCH_REQUIRE_FALSE(it.bad());
     148              :             }
     149              : 
     150           17 :             if(plan == p)
     151              :             {
     152            1 :                 libutf8::utf8_iterator it(str);
     153              : 
     154        65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     155              :                 {
     156        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     157              :                     {
     158            0 :                         wc = 0xDFFF;
     159            0 :                         continue;
     160              :                     }
     161        65536 :                     CATCH_REQUIRE(*it++ == wc + plan);
     162              :                 }
     163              : 
     164            1 :                 CATCH_REQUIRE(it == str.end());
     165            1 :                 it++;
     166            1 :                 CATCH_REQUIRE(it.good());
     167            1 :                 CATCH_REQUIRE_FALSE(it.bad());
     168            1 :                 ++it;
     169            1 :                 CATCH_REQUIRE(it.good());
     170            1 :                 CATCH_REQUIRE_FALSE(it.bad());
     171            1 :                 CATCH_REQUIRE(it == str.end());
     172            1 :                 CATCH_REQUIRE(it.good());
     173            1 :                 CATCH_REQUIRE_FALSE(it.bad());
     174              : 
     175        65537 :                 for(char32_t wc(0x10000); wc > 0; )
     176              :                 {
     177        65536 :                     --wc;
     178        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     179              :                     {
     180            0 :                         wc = 0xD800;
     181            0 :                         continue;
     182              :                     }
     183        65536 :                     CATCH_REQUIRE(*--it == wc + plan);
     184              :                 }
     185              : 
     186            1 :                 CATCH_REQUIRE(it == str.begin());
     187            1 :                 CATCH_REQUIRE(str.begin() == it);
     188            1 :                 it--;
     189            1 :                 --it;
     190            1 :                 CATCH_REQUIRE(it == str.begin());
     191            1 :                 CATCH_REQUIRE(str.begin() == it);
     192              :             }
     193              : 
     194           17 :             if(plan == (p + 0x10000) % 0x110000)
     195              :             {
     196            1 :                 libutf8::utf8_iterator it(str);
     197            1 :                 libutf8::utf8_iterator start(str);
     198            1 :                 CATCH_REQUIRE(it - start == 0);
     199            1 :                 CATCH_REQUIRE(start - it == 0);
     200              : 
     201        65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     202              :                 {
     203        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     204              :                     {
     205            0 :                         wc = 0xDFFF;
     206            0 :                         continue;
     207              :                     }
     208        65536 :                     CATCH_REQUIRE(*it == wc + plan);
     209        65536 :                     it++;
     210              : 
     211        65536 :                     libutf8::utf8_iterator zero(it);
     212        65536 :                     zero.rewind();
     213        65536 :                     CATCH_REQUIRE(zero == start);
     214              :                 }
     215              : 
     216            1 :                 libutf8::utf8_iterator copy(it);
     217            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(it - start) == str.length());
     218            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(copy - start) == str.length());
     219            1 :                 CATCH_REQUIRE(copy - it == 0);
     220            1 :                 CATCH_REQUIRE(it - copy == 0);
     221            1 :                 copy.rewind();
     222            1 :                 CATCH_REQUIRE(copy - start == 0);
     223            1 :                 CATCH_REQUIRE(start - copy == 0);
     224            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(start - copy) == 0);
     225            1 :                 CATCH_REQUIRE(static_cast<std::size_t>(copy - start) == 0);
     226              : 
     227        65537 :                 for(char32_t wc(0x10000); wc > 0; )
     228              :                 {
     229        65536 :                     --wc;
     230        65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     231              :                     {
     232            0 :                         wc = 0xD800;
     233            0 :                         continue;
     234              :                     }
     235        65536 :                     it--;
     236        65536 :                     CATCH_REQUIRE(*it == wc + plan);
     237              :                 }
     238              :             }
     239           17 :         }
     240              :     }
     241            1 :     CATCH_END_SECTION()
     242            1 : }
     243              : 
     244              : 
     245            2 : CATCH_TEST_CASE("libutf8_iterator_invalid_string", "[iterator],[invalid]")
     246              : {
     247            2 :     CATCH_START_SECTION("libutf8_iterator_invalid_string: iterators with invalid characters (bad UTF-8)")
     248              :     {
     249          101 :         for(int repeat(0); repeat < 100; ++repeat)
     250              :         {
     251              :             // create one plan in one string
     252              :             //
     253          100 :             constexpr size_t STR_LENGTH = 4;
     254              :             char32_t wc;
     255          100 :             std::u32string wstr;
     256          100 :             wstr.reserve(STR_LENGTH);
     257          500 :             for(size_t idx(0); idx < STR_LENGTH; ++idx)
     258              :             {
     259              :                 do
     260              :                 {
     261          400 :                     wc = unittest::rand_char(true);
     262              :                 }
     263          400 :                 while(wc < 0x80);
     264          400 :                 wstr += wc;
     265              :             }
     266          100 :             std::string str(libutf8::to_u8string(wstr));
     267              : 
     268              : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
     269              : 
     270              :             // first verify that it works
     271              :             //
     272          100 :             std::string::size_type pos[STR_LENGTH];
     273              :             {
     274          100 :                 libutf8::utf8_iterator it(str);
     275              : 
     276          100 :                 CATCH_REQUIRE(it == str.begin());
     277          100 :                 CATCH_REQUIRE(it == str.cbegin());
     278          100 :                 CATCH_REQUIRE(it != str.end());
     279          100 :                 CATCH_REQUIRE(it != str.cend());
     280              : 
     281          100 :                 CATCH_REQUIRE(str.begin()  == it);
     282          100 :                 CATCH_REQUIRE(str.cbegin() == it);
     283          100 :                 CATCH_REQUIRE(str.end()    != it);
     284          100 :                 CATCH_REQUIRE(str.cend()   != it);
     285              : 
     286          500 :                 for(size_t idx(0); idx < STR_LENGTH; ++idx)
     287              :                 {
     288          400 :                     CATCH_REQUIRE(*it == wstr[idx]);
     289          400 :                     if(rand() % 2 == 0)
     290              :                     {
     291          196 :                         pos[idx] = it - str.begin();
     292              :                     }
     293              :                     else
     294              :                     {
     295          204 :                         pos[idx] = -(str.begin() - it);
     296              :                     }
     297          400 :                     ++it;
     298              :                 }
     299              : 
     300          100 :                 CATCH_REQUIRE(it != str.begin());
     301          100 :                 CATCH_REQUIRE(it != str.cbegin());
     302          100 :                 CATCH_REQUIRE(it == str.end());
     303          100 :                 CATCH_REQUIRE(it == str.cend());
     304              : 
     305          100 :                 CATCH_REQUIRE(str.begin()  != it);
     306          100 :                 CATCH_REQUIRE(str.cbegin() != it);
     307          100 :                 CATCH_REQUIRE(str.end()    == it);
     308          100 :                 CATCH_REQUIRE(str.cend()   == it);
     309              : 
     310          100 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     311          100 :                 ++it;
     312          100 :                 it++;
     313          100 :                 CATCH_REQUIRE(it == str.cend());
     314              : 
     315          100 :                 CATCH_REQUIRE(it.good());
     316          100 :                 CATCH_REQUIRE_FALSE(it.bad());
     317              :             }
     318              : 
     319              :             {
     320          100 :                 libutf8::utf8_iterator it(str);
     321              : 
     322          100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     323              : 
     324          100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     325          100 :                 CATCH_REQUIRE(*it++ == libutf8::NOT_A_CHARACTER);       // we broke this one
     326          100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     327          100 :                 CATCH_REQUIRE(*it++ == wstr[3]);
     328          100 :                 CATCH_REQUIRE(*it++ == libutf8::EOS);
     329              : 
     330          100 :                 CATCH_REQUIRE_FALSE(it.good());
     331          100 :                 CATCH_REQUIRE(it.bad());
     332          100 :                 it.clear();
     333          100 :                 CATCH_REQUIRE(it.good());
     334          100 :                 CATCH_REQUIRE_FALSE(it.bad());
     335              :             }
     336              : 
     337              :             {
     338          100 :                 str.erase(str.length() - 1);
     339          100 :                 libutf8::utf8_iterator it(str);
     340              : 
     341          100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     342              : 
     343          100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     344          100 :                 CATCH_REQUIRE(*it++ == libutf8::NOT_A_CHARACTER);
     345          100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     346          100 :                 CATCH_REQUIRE(*it++ == libutf8::NOT_A_CHARACTER);
     347              : 
     348          100 :                 CATCH_REQUIRE_FALSE(it.good());
     349          100 :                 CATCH_REQUIRE(it.bad());
     350          100 :                 it.clear();
     351          100 :                 CATCH_REQUIRE(it.good());
     352          100 :                 CATCH_REQUIRE_FALSE(it.bad());
     353              :             }
     354          100 :         }
     355              :     }
     356            2 :     CATCH_END_SECTION()
     357              : 
     358            2 :     CATCH_START_SECTION("libutf8_iterator_invalid_string: iterators with invalid characters (too large)")
     359              :     {
     360       983040 :         for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
     361              :         {
     362              :             // since this character is not valid
     363              :             // we have to encode it _manually_
     364              :             //
     365       983039 :             char buf[4];
     366       983039 :             buf[0] = 0xF0 | ((wc >> 18) & 0x07);
     367       983039 :             buf[1] = 0x80 | ((wc >> 12) & 0x3F);
     368       983039 :             buf[2] = 0x80 | ((wc >>  6) & 0x3F);
     369       983039 :             buf[3] = 0x80 | ((wc >>  0) & 0x3F);
     370              : 
     371      2949117 :             std::string str(buf, 4);
     372              : 
     373              :             // first verify that it works
     374              :             //
     375              :             {
     376       983039 :                 libutf8::utf8_iterator it(str);
     377              : 
     378       983039 :                 CATCH_REQUIRE(it == str.begin());
     379       983039 :                 CATCH_REQUIRE(it == str.cbegin());
     380       983039 :                 CATCH_REQUIRE(it != str.end());
     381       983039 :                 CATCH_REQUIRE(it != str.cend());
     382              : 
     383       983039 :                 CATCH_REQUIRE(str.begin()  == it);
     384       983039 :                 CATCH_REQUIRE(str.cbegin() == it);
     385       983039 :                 CATCH_REQUIRE(str.end()    != it);
     386       983039 :                 CATCH_REQUIRE(str.cend()   != it);
     387              : 
     388       983039 :                 CATCH_REQUIRE(*it == libutf8::NOT_A_CHARACTER);
     389              : 
     390       983039 :                 CATCH_REQUIRE_FALSE(it.good());
     391       983039 :                 CATCH_REQUIRE(it.bad());
     392       983039 :                 it.clear();
     393       983039 :                 CATCH_REQUIRE(it.good());
     394       983039 :                 CATCH_REQUIRE_FALSE(it.bad());
     395              : 
     396       983039 :                 ++it;
     397              : 
     398       983039 :                 CATCH_REQUIRE(it != str.begin());
     399       983039 :                 CATCH_REQUIRE(it != str.cbegin());
     400       983039 :                 CATCH_REQUIRE(it == str.end());
     401       983039 :                 CATCH_REQUIRE(it == str.cend());
     402              : 
     403       983039 :                 CATCH_REQUIRE(str.begin()  != it);
     404       983039 :                 CATCH_REQUIRE(str.cbegin() != it);
     405       983039 :                 CATCH_REQUIRE(str.end()    == it);
     406       983039 :                 CATCH_REQUIRE(str.cend()   == it);
     407              : 
     408       983039 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     409       983039 :                 ++it;
     410       983039 :                 it++;
     411       983039 :                 CATCH_REQUIRE(it == str.cend());
     412              : 
     413       983039 :                 CATCH_REQUIRE_FALSE(it.good());
     414       983039 :                 CATCH_REQUIRE(it.bad());
     415              :             }
     416       983039 :         }
     417              :     }
     418            2 :     CATCH_END_SECTION()
     419            2 : }
     420              : 
     421              : 
     422              : 
     423              : // vim: ts=4 sw=4 et
        

Generated by: LCOV version 2.0-1

Snap C++ | List of projects | List of versions