LCOV - code coverage report
Current view: top level - tests - catch_iterator.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 218 228 95.6 %
Date: 2022-07-31 10:17:08 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2013-2022  Made to Order Software Corporation
       2             : //
       3             : // https://snapwebsites.org/project/libutf8
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software; you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation; either version 2 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License along
      17             : // with this program; if not, write to the Free Software Foundation, Inc.,
      18             : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19             : 
      20             : // libutf8 lib
      21             : //
      22             : #include    <libutf8/iterator.h>
      23             : 
      24             : #include    <libutf8/base.h>
      25             : #include    <libutf8/libutf8.h>
      26             : 
      27             : 
      28             : // unit test
      29             : //
      30             : #include    "catch_main.h"
      31             : 
      32             : 
      33             : // C++ lib
      34             : //
      35             : #include    <cctype>
      36             : #include    <iostream>
      37             : 
      38             : 
      39             : // last include
      40             : //
      41             : #include    <snapdev/poison.h>
      42             : 
      43             : 
      44             : 
      45           3 : CATCH_TEST_CASE("libutf8_iterator", "[iterator]")
      46             : {
      47           2 :     CATCH_START_SECTION("valid iterators tests")
      48             :     {
      49           1 :         char32_t p(0);
      50           0 :         do
      51             :         {
      52           1 :             p = rand() % 0x11 * 0x10000;
      53             :         }
      54           1 :         while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
      55             : 
      56          18 :         for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
      57             :         {
      58             :             // create one plan in one string
      59             :             //
      60          34 :             std::string str;
      61          17 :             str.reserve(0x10000 * 4);
      62     1112082 :             for(char32_t wc(0); wc < 0x10000; ++wc)
      63             :             {
      64     1112066 :                 if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      65             :                 {
      66           1 :                     wc = 0xDFFF;
      67           1 :                     continue;
      68             :                 }
      69     1112064 :                 char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      70     1112064 :                 CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
      71     1112064 :                 if(plan == 0 && wc == 0)
      72             :                 {
      73             :                     // this is a special case as buf[0] = '\0' and the += with
      74             :                     // the string won't work
      75             :                     //
      76           1 :                     str += '\0';
      77             :                 }
      78             :                 else
      79             :                 {
      80     1112063 :                     str += buf;
      81             :                 }
      82             :             }
      83             : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
      84             : 
      85             :             {
      86          17 :                 libutf8::utf8_iterator it(str);
      87          17 :                 libutf8::utf8_iterator it_end(str, true);
      88          17 :                 libutf8::utf8_iterator it_next(str);
      89          17 :                 ++it_next;
      90             : 
      91          17 :                 CATCH_REQUIRE(it == str.begin());
      92          17 :                 CATCH_REQUIRE(it == str.cbegin());
      93          17 :                 CATCH_REQUIRE(it != str.end());
      94          17 :                 CATCH_REQUIRE(it != str.cend());
      95             : 
      96          17 :                 CATCH_REQUIRE(it == it);
      97          17 :                 CATCH_REQUIRE(it != it_end);
      98          17 :                 CATCH_REQUIRE(it != it_next);
      99             : 
     100          17 :                 CATCH_REQUIRE(str.begin() == it);
     101          17 :                 CATCH_REQUIRE(str.cbegin() == it);
     102          17 :                 CATCH_REQUIRE(str.end() != it);
     103          17 :                 CATCH_REQUIRE(str.cend() != it);
     104             : 
     105     1112082 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     106             :                 {
     107     1112066 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     108             :                     {
     109           1 :                         wc = 0xDFFF;
     110           1 :                         continue;
     111             :                     }
     112     1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     113     1112064 :                     ++it;
     114             :                 }
     115             : 
     116          17 :                 CATCH_REQUIRE(it != str.begin());
     117          17 :                 CATCH_REQUIRE(it != str.cbegin());
     118          17 :                 CATCH_REQUIRE(it == str.end());
     119          17 :                 CATCH_REQUIRE(it == str.cend());
     120             : 
     121          17 :                 CATCH_REQUIRE(str.begin() != it);
     122          17 :                 CATCH_REQUIRE(str.cbegin() != it);
     123          17 :                 CATCH_REQUIRE(str.end() == it);
     124          17 :                 CATCH_REQUIRE(str.cend() == it);
     125             : 
     126          17 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     127          17 :                 ++it;
     128          17 :                 it++;
     129          17 :                 CATCH_REQUIRE(it == str.cend());
     130             : 
     131     1112082 :                 for(char32_t wc(0x10000); wc > 0; )
     132             :                 {
     133     1112065 :                     --wc;
     134     1112066 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     135             :                     {
     136           1 :                         wc = 0xD800;
     137           1 :                         continue;
     138             :                     }
     139     1112064 :                     --it;
     140     1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     141             :                 }
     142             : 
     143          17 :                 --it;
     144          17 :                 it--;
     145             : 
     146          17 :                 CATCH_REQUIRE(it.good());
     147          17 :                 CATCH_REQUIRE_FALSE(it.bad());
     148             :             }
     149             : 
     150          17 :             if(plan == p)
     151             :             {
     152           1 :                 libutf8::utf8_iterator it(str);
     153             : 
     154       65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     155             :                 {
     156       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     157             :                     {
     158           0 :                         wc = 0xDFFF;
     159           0 :                         continue;
     160             :                     }
     161       65536 :                     CATCH_REQUIRE(*it++ == wc + plan);
     162             :                 }
     163             : 
     164           1 :                 CATCH_REQUIRE(it == str.end());
     165           1 :                 it++;
     166           1 :                 CATCH_REQUIRE(it.good());
     167           1 :                 CATCH_REQUIRE_FALSE(it.bad());
     168           1 :                 ++it;
     169           1 :                 CATCH_REQUIRE(it.good());
     170           1 :                 CATCH_REQUIRE_FALSE(it.bad());
     171           1 :                 CATCH_REQUIRE(it == str.end());
     172           1 :                 CATCH_REQUIRE(it.good());
     173           1 :                 CATCH_REQUIRE_FALSE(it.bad());
     174             : 
     175       65537 :                 for(char32_t wc(0x10000); wc > 0; )
     176             :                 {
     177       65536 :                     --wc;
     178       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     179             :                     {
     180           0 :                         wc = 0xD800;
     181           0 :                         continue;
     182             :                     }
     183       65536 :                     CATCH_REQUIRE(*--it == wc + plan);
     184             :                 }
     185             : 
     186           1 :                 CATCH_REQUIRE(it == str.begin());
     187           1 :                 CATCH_REQUIRE(str.begin() == it);
     188           1 :                 it--;
     189           1 :                 --it;
     190           1 :                 CATCH_REQUIRE(it == str.begin());
     191           1 :                 CATCH_REQUIRE(str.begin() == it);
     192             :             }
     193             : 
     194          17 :             if(plan == (p + 0x10000) % 0x110000)
     195             :             {
     196           1 :                 libutf8::utf8_iterator it(str);
     197           1 :                 libutf8::utf8_iterator start(str);
     198           1 :                 CATCH_REQUIRE(it - start == 0);
     199           1 :                 CATCH_REQUIRE(start - it == 0);
     200             : 
     201       65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     202             :                 {
     203       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     204             :                     {
     205           0 :                         wc = 0xDFFF;
     206           0 :                         continue;
     207             :                     }
     208       65536 :                     CATCH_REQUIRE(*it == wc + plan);
     209       65536 :                     it++;
     210             : 
     211       65536 :                     libutf8::utf8_iterator zero(it);
     212       65536 :                     zero.rewind();
     213       65536 :                     CATCH_REQUIRE(zero == start);
     214             :                 }
     215             : 
     216           1 :                 libutf8::utf8_iterator copy(it);
     217           1 :                 CATCH_REQUIRE(static_cast<std::size_t>(it - start) == str.length());
     218           1 :                 CATCH_REQUIRE(static_cast<std::size_t>(copy - start) == str.length());
     219           1 :                 CATCH_REQUIRE(copy - it == 0);
     220           1 :                 CATCH_REQUIRE(it - copy == 0);
     221           1 :                 copy.rewind();
     222           1 :                 CATCH_REQUIRE(copy - start == 0);
     223           1 :                 CATCH_REQUIRE(start - copy == 0);
     224           1 :                 CATCH_REQUIRE(static_cast<std::size_t>(start - copy) == 0);
     225           1 :                 CATCH_REQUIRE(static_cast<std::size_t>(copy - start) == 0);
     226             : 
     227       65537 :                 for(char32_t wc(0x10000); wc > 0; )
     228             :                 {
     229       65536 :                     --wc;
     230       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     231             :                     {
     232           0 :                         wc = 0xD800;
     233           0 :                         continue;
     234             :                     }
     235       65536 :                     it--;
     236       65536 :                     CATCH_REQUIRE(*it == wc + plan);
     237             :                 }
     238             :             }
     239             :         }
     240             :     }
     241             :     CATCH_END_SECTION()
     242           1 : }
     243             : 
     244             : 
     245           4 : CATCH_TEST_CASE("libutf8_iterator_invalid_string", "[iterator],[invalid]")
     246             : {
     247           4 :     CATCH_START_SECTION("iterators with invalid characters (bad UTF-8)")
     248             :     {
     249         101 :         for(int repeat(0); repeat < 100; ++repeat)
     250             :         {
     251             :             // create one plan in one string
     252             :             //
     253         100 :             constexpr size_t STR_LENGTH = 4;
     254             :             char32_t wc;
     255         200 :             std::u32string wstr;
     256         100 :             wstr.reserve(STR_LENGTH);
     257         500 :             for(size_t idx(0); idx < STR_LENGTH; ++idx)
     258             :             {
     259           0 :                 do
     260             :                 {
     261         400 :                     wc = unittest::rand_char(true);
     262             :                 }
     263         400 :                 while(wc < 0x80);
     264         400 :                 wstr += wc;
     265             :             }
     266         200 :             std::string str(libutf8::to_u8string(wstr));
     267             : 
     268             : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
     269             : 
     270             :             // first verify that it works
     271             :             //
     272         100 :             std::string::size_type pos[STR_LENGTH];
     273             :             {
     274         100 :                 libutf8::utf8_iterator it(str);
     275             : 
     276         100 :                 CATCH_REQUIRE(it == str.begin());
     277         100 :                 CATCH_REQUIRE(it == str.cbegin());
     278         100 :                 CATCH_REQUIRE(it != str.end());
     279         100 :                 CATCH_REQUIRE(it != str.cend());
     280             : 
     281         100 :                 CATCH_REQUIRE(str.begin()  == it);
     282         100 :                 CATCH_REQUIRE(str.cbegin() == it);
     283         100 :                 CATCH_REQUIRE(str.end()    != it);
     284         100 :                 CATCH_REQUIRE(str.cend()   != it);
     285             : 
     286         500 :                 for(size_t idx(0); idx < STR_LENGTH; ++idx)
     287             :                 {
     288         400 :                     CATCH_REQUIRE(*it == wstr[idx]);
     289         400 :                     if(rand() % 2 == 0)
     290             :                     {
     291         192 :                         pos[idx] = it - str.begin();
     292             :                     }
     293             :                     else
     294             :                     {
     295         208 :                         pos[idx] = -(str.begin() - it);
     296             :                     }
     297         400 :                     ++it;
     298             :                 }
     299             : 
     300         100 :                 CATCH_REQUIRE(it != str.begin());
     301         100 :                 CATCH_REQUIRE(it != str.cbegin());
     302         100 :                 CATCH_REQUIRE(it == str.end());
     303         100 :                 CATCH_REQUIRE(it == str.cend());
     304             : 
     305         100 :                 CATCH_REQUIRE(str.begin()  != it);
     306         100 :                 CATCH_REQUIRE(str.cbegin() != it);
     307         100 :                 CATCH_REQUIRE(str.end()    == it);
     308         100 :                 CATCH_REQUIRE(str.cend()   == it);
     309             : 
     310         100 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     311         100 :                 ++it;
     312         100 :                 it++;
     313         100 :                 CATCH_REQUIRE(it == str.cend());
     314             : 
     315         100 :                 CATCH_REQUIRE(it.good());
     316         100 :                 CATCH_REQUIRE_FALSE(it.bad());
     317             :             }
     318             : 
     319             :             {
     320         100 :                 libutf8::utf8_iterator it(str);
     321             : 
     322         100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     323             : 
     324         100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     325         100 :                 CATCH_REQUIRE(*it++ == U'\0');       // we broke this one
     326         100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     327         100 :                 CATCH_REQUIRE(*it++ == wstr[3]);
     328         100 :                 CATCH_REQUIRE(*it++ == libutf8::EOS);
     329             : 
     330         100 :                 CATCH_REQUIRE_FALSE(it.good());
     331         100 :                 CATCH_REQUIRE(it.bad());
     332         100 :                 it.clear();
     333         100 :                 CATCH_REQUIRE(it.good());
     334         100 :                 CATCH_REQUIRE_FALSE(it.bad());
     335             :             }
     336             : 
     337             :             {
     338         100 :                 str.erase(str.length() - 1);
     339         100 :                 libutf8::utf8_iterator it(str);
     340             : 
     341         100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     342             : 
     343         100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     344         100 :                 CATCH_REQUIRE(*it++ == U'\0');
     345         100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     346         100 :                 CATCH_REQUIRE(*it++ == U'\0');
     347             : 
     348         100 :                 CATCH_REQUIRE_FALSE(it.good());
     349         100 :                 CATCH_REQUIRE(it.bad());
     350         100 :                 it.clear();
     351         100 :                 CATCH_REQUIRE(it.good());
     352         100 :                 CATCH_REQUIRE_FALSE(it.bad());
     353             :             }
     354             :         }
     355             :     }
     356             :     CATCH_END_SECTION()
     357             : 
     358           4 :     CATCH_START_SECTION("iterators with invalid characters (too large)")
     359             :     {
     360      983040 :         for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
     361             :         {
     362             :             // since this character is not valid
     363             :             // we have to encode it _manually_
     364             :             //
     365      983039 :             char buf[4];
     366      983039 :             buf[0] = 0xF0 | ((wc >> 18) & 0x07);
     367      983039 :             buf[1] = 0x80 | ((wc >> 12) & 0x3F);
     368      983039 :             buf[2] = 0x80 | ((wc >>  6) & 0x3F);
     369      983039 :             buf[3] = 0x80 | ((wc >>  0) & 0x3F);
     370             : 
     371     1966078 :             std::string str(buf, 4);
     372             : 
     373             :             // first verify that it works
     374             :             //
     375             :             {
     376      983039 :                 libutf8::utf8_iterator it(str);
     377             : 
     378      983039 :                 CATCH_REQUIRE(it == str.begin());
     379      983039 :                 CATCH_REQUIRE(it == str.cbegin());
     380      983039 :                 CATCH_REQUIRE(it != str.end());
     381      983039 :                 CATCH_REQUIRE(it != str.cend());
     382             : 
     383      983039 :                 CATCH_REQUIRE(str.begin()  == it);
     384      983039 :                 CATCH_REQUIRE(str.cbegin() == it);
     385      983039 :                 CATCH_REQUIRE(str.end()    != it);
     386      983039 :                 CATCH_REQUIRE(str.cend()   != it);
     387             : 
     388      983039 :                 CATCH_REQUIRE(*it == '\0');
     389             : 
     390      983039 :                 CATCH_REQUIRE_FALSE(it.good());
     391      983039 :                 CATCH_REQUIRE(it.bad());
     392      983039 :                 it.clear();
     393      983039 :                 CATCH_REQUIRE(it.good());
     394      983039 :                 CATCH_REQUIRE_FALSE(it.bad());
     395             : 
     396      983039 :                 ++it;
     397             : 
     398      983039 :                 CATCH_REQUIRE(it != str.begin());
     399      983039 :                 CATCH_REQUIRE(it != str.cbegin());
     400      983039 :                 CATCH_REQUIRE(it == str.end());
     401      983039 :                 CATCH_REQUIRE(it == str.cend());
     402             : 
     403      983039 :                 CATCH_REQUIRE(str.begin()  != it);
     404      983039 :                 CATCH_REQUIRE(str.cbegin() != it);
     405      983039 :                 CATCH_REQUIRE(str.end()    == it);
     406      983039 :                 CATCH_REQUIRE(str.cend()   == it);
     407             : 
     408      983039 :                 CATCH_REQUIRE(*it == libutf8::EOS);
     409      983039 :                 ++it;
     410      983039 :                 it++;
     411      983039 :                 CATCH_REQUIRE(it == str.cend());
     412             : 
     413      983039 :                 CATCH_REQUIRE_FALSE(it.good());
     414      983039 :                 CATCH_REQUIRE(it.bad());
     415             :             }
     416             :         }
     417             :     }
     418             :     CATCH_END_SECTION()
     419           8 : }
     420             : 
     421             : 
     422             : 
     423             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13