LCOV - code coverage report
Current view: top level - tests - iterator.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 158 178 88.8 %
Date: 2019-06-01 00:57:17 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*    tests/iterator.cpp
       2             :  *    Copyright (C) 2013-2019  Made to Order Software Corporation
       3             :  *
       4             :  *    This program is free software; you can redistribute it and/or modify
       5             :  *    it under the terms of the GNU General Public License as published by
       6             :  *    the Free Software Foundation; either version 2 of the License, or
       7             :  *    (at your option) any later version.
       8             :  *
       9             :  *    This program is distributed in the hope that it will be useful,
      10             :  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  *    GNU General Public License for more details.
      13             :  *
      14             :  *    You should have received a copy of the GNU General Public License along
      15             :  *    with this program; if not, write to the Free Software Foundation, Inc.,
      16             :  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      17             :  *
      18             :  *    Authors
      19             :  *    Alexis Wilke   alexis@m2osw.com
      20             :  */
      21             : 
      22             : // unit test
      23             : //
      24             : #include "main.h"
      25             : 
      26             : // libutf8 lib
      27             : //
      28             : #include "libutf8/base.h"
      29             : #include "libutf8/iterator.h"
      30             : #include "libutf8/libutf8.h"
      31             : 
      32             : // C++ lib
      33             : //
      34             : #include <cctype>
      35             : #include <iostream>
      36             : 
      37             : 
      38           3 : CATCH_TEST_CASE("libutf8 iterator", "iterator")
      39             : {
      40           2 :     CATCH_START_SECTION("valid iterators tests")
      41           1 :         char32_t p(0);
      42           0 :         do
      43             :         {
      44           1 :             p = rand() % 0x11 * 0x10000;
      45             :         }
      46           1 :         while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
      47             : 
      48          18 :         for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
      49             :         {
      50             :             // create one plan in one string
      51             :             //
      52          34 :             std::string str;
      53          17 :             str.reserve(0x10000 * 4);
      54     1112082 :             for(char32_t wc(0); wc < 0x10000; ++wc)
      55             :             {
      56     1112065 :                 if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      57             :                 {
      58           1 :                     wc = 0xDFFF;
      59           1 :                     continue;
      60             :                 }
      61             :                 char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      62     1112064 :                 CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
      63     1112064 :                 if(plan == 0 && wc == 0)
      64             :                 {
      65             :                     // this is a special case as buf[0] = '\0' and the += with
      66             :                     // the string won't work
      67             :                     //
      68           1 :                     str += '\0';
      69             :                 }
      70             :                 else
      71             :                 {
      72     1112063 :                     str += buf;
      73             :                 }
      74             :             }
      75             : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
      76             : 
      77             :             {
      78          17 :                 libutf8::utf8_iterator it(str);
      79             : 
      80          17 :                 CATCH_REQUIRE(it == str.begin());
      81          17 :                 CATCH_REQUIRE(it == str.cbegin());
      82          17 :                 CATCH_REQUIRE(it != str.end());
      83          17 :                 CATCH_REQUIRE(it != str.cend());
      84             : 
      85          17 :                 CATCH_REQUIRE(str.begin() == it);
      86          17 :                 CATCH_REQUIRE(str.cbegin() == it);
      87          17 :                 CATCH_REQUIRE(str.end() != it);
      88          17 :                 CATCH_REQUIRE(str.cend() != it);
      89             : 
      90     1112082 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
      91             :                 {
      92     1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      93             :                     {
      94           1 :                         wc = 0xDFFF;
      95           1 :                         continue;
      96             :                     }
      97     1112064 :                     CATCH_REQUIRE(*it == wc + plan);
      98     1112064 :                     ++it;
      99             :                 }
     100             : 
     101          17 :                 CATCH_REQUIRE(it != str.begin());
     102          17 :                 CATCH_REQUIRE(it != str.cbegin());
     103          17 :                 CATCH_REQUIRE(it == str.end());
     104          17 :                 CATCH_REQUIRE(it == str.cend());
     105             : 
     106          17 :                 CATCH_REQUIRE(str.begin() != it);
     107          17 :                 CATCH_REQUIRE(str.cbegin() != it);
     108          17 :                 CATCH_REQUIRE(str.end() == it);
     109          17 :                 CATCH_REQUIRE(str.cend() == it);
     110             : 
     111          17 :                 CATCH_REQUIRE(*it == EOF);
     112          17 :                 ++it;
     113          17 :                 it++;
     114          17 :                 CATCH_REQUIRE(it == str.cend());
     115             : 
     116     1112082 :                 for(char32_t wc(0x10000); wc > 0; )
     117             :                 {
     118     1112065 :                     --wc;
     119     1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     120             :                     {
     121           1 :                         wc = 0xD800;
     122           1 :                         continue;
     123             :                     }
     124     1112064 :                     --it;
     125     1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     126             :                 }
     127             : 
     128          17 :                 --it;
     129          17 :                 it--;
     130             : 
     131          17 :                 CATCH_REQUIRE(it.good());
     132          17 :                 CATCH_REQUIRE(!it.bad());
     133             :             }
     134             : 
     135          17 :             if(plan == p)
     136             :             {
     137           1 :                 libutf8::utf8_iterator it(str);
     138             : 
     139       65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     140             :                 {
     141       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     142             :                     {
     143           0 :                         wc = 0xDFFF;
     144           0 :                         continue;
     145             :                     }
     146       65536 :                     CATCH_REQUIRE(*it++ == wc + plan);
     147             :                 }
     148             : 
     149           1 :                 CATCH_REQUIRE(it == str.end());
     150           1 :                 it++;
     151           1 :                 ++it;
     152           1 :                 CATCH_REQUIRE(it == str.end());
     153             : 
     154       65537 :                 for(char32_t wc(0x10000); wc > 0; )
     155             :                 {
     156       65536 :                     --wc;
     157       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     158             :                     {
     159           0 :                         wc = 0xD800;
     160           0 :                         continue;
     161             :                     }
     162       65536 :                     CATCH_REQUIRE(*--it == wc + plan);
     163             :                 }
     164             : 
     165           1 :                 CATCH_REQUIRE(it == str.begin());
     166           1 :                 CATCH_REQUIRE(str.begin() == it);
     167           1 :                 it--;
     168           1 :                 --it;
     169           1 :                 CATCH_REQUIRE(it == str.begin());
     170           1 :                 CATCH_REQUIRE(str.begin() == it);
     171             :             }
     172             : 
     173          17 :             if(plan == (p + 1) % 0x11)
     174             :             {
     175           0 :                 libutf8::utf8_iterator it(str);
     176             : 
     177           0 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     178             :                 {
     179           0 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     180             :                     {
     181           0 :                         wc = 0xDFFF;
     182           0 :                         continue;
     183             :                     }
     184           0 :                     CATCH_REQUIRE(*it == wc + plan);
     185           0 :                     it++;
     186             :                 }
     187             : 
     188           0 :                 for(char32_t wc(0x10000); wc > 0; )
     189             :                 {
     190           0 :                     --wc;
     191           0 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     192             :                     {
     193           0 :                         wc = 0xD800;
     194           0 :                         continue;
     195             :                     }
     196           0 :                     it--;
     197           0 :                     CATCH_REQUIRE(*it == wc + plan);
     198             :                 }
     199             :             }
     200             :         }
     201             :     CATCH_END_SECTION()
     202           1 : }
     203             : 
     204             : 
     205           4 : CATCH_TEST_CASE("libutf8 iterator invalid string", "iterator,invalid")
     206             : {
     207           4 :     CATCH_START_SECTION("iterators with invalid characters (bad UTF-8)")
     208         101 :         for(int repeat(0); repeat < 100; ++repeat)
     209             :         {
     210             :             // create one plan in one string
     211             :             //
     212         100 :             constexpr size_t STR_LENGTH = 4;
     213             :             char32_t wc;
     214         200 :             std::u32string wstr;
     215         100 :             wstr.reserve(STR_LENGTH);
     216         500 :             for(size_t idx(0); idx < STR_LENGTH; ++idx)
     217             :             {
     218           0 :                 do
     219             :                 {
     220         400 :                     wc = unittest::rand_char(true);
     221             :                 }
     222         400 :                 while(wc < 0x80);
     223         400 :                 wstr += wc;
     224             :             }
     225         200 :             std::string str(libutf8::to_u8string(wstr));
     226             : 
     227             : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
     228             : 
     229             :             // first verify that it works
     230             :             //
     231             :             std::string::size_type pos[STR_LENGTH];
     232             :             {
     233         100 :                 libutf8::utf8_iterator it(str);
     234             : 
     235         100 :                 CATCH_REQUIRE(it == str.begin());
     236         100 :                 CATCH_REQUIRE(it == str.cbegin());
     237         100 :                 CATCH_REQUIRE(it != str.end());
     238         100 :                 CATCH_REQUIRE(it != str.cend());
     239             : 
     240         100 :                 CATCH_REQUIRE(str.begin()  == it);
     241         100 :                 CATCH_REQUIRE(str.cbegin() == it);
     242         100 :                 CATCH_REQUIRE(str.end()    != it);
     243         100 :                 CATCH_REQUIRE(str.cend()   != it);
     244             : 
     245         500 :                 for(size_t idx(0); idx < STR_LENGTH; ++idx)
     246             :                 {
     247         400 :                     CATCH_REQUIRE(*it == wstr[idx]);
     248         400 :                     if(rand() % 2 == 0)
     249             :                     {
     250         193 :                         pos[idx] = it - str.begin();
     251             :                     }
     252             :                     else
     253             :                     {
     254         207 :                         pos[idx] = -(str.begin() - it);
     255             :                     }
     256         400 :                     ++it;
     257             :                 }
     258             : 
     259         100 :                 CATCH_REQUIRE(it != str.begin());
     260         100 :                 CATCH_REQUIRE(it != str.cbegin());
     261         100 :                 CATCH_REQUIRE(it == str.end());
     262         100 :                 CATCH_REQUIRE(it == str.cend());
     263             : 
     264         100 :                 CATCH_REQUIRE(str.begin()  != it);
     265         100 :                 CATCH_REQUIRE(str.cbegin() != it);
     266         100 :                 CATCH_REQUIRE(str.end()    == it);
     267         100 :                 CATCH_REQUIRE(str.cend()   == it);
     268             : 
     269         100 :                 CATCH_REQUIRE(*it == EOF);
     270         100 :                 ++it;
     271         100 :                 it++;
     272         100 :                 CATCH_REQUIRE(it == str.cend());
     273             :             }
     274             : 
     275             :             {
     276         100 :                 libutf8::utf8_iterator it(str);
     277             : 
     278         100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     279             : 
     280         100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     281         100 :                 CATCH_REQUIRE(*it++ == U'\0');       // we broke this one
     282         100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     283         100 :                 CATCH_REQUIRE(*it++ == wstr[3]);
     284         100 :                 CATCH_REQUIRE(*it++ == EOF);
     285             :             }
     286             : 
     287             :             {
     288         100 :                 str.erase(str.length() - 1);
     289         100 :                 libutf8::utf8_iterator it(str);
     290             : 
     291         100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     292             : 
     293         100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     294         100 :                 CATCH_REQUIRE(*it++ == U'\0');
     295         100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     296         100 :                 CATCH_REQUIRE(*it++ == U'\0');
     297             :             }
     298             :         }
     299             :     CATCH_END_SECTION()
     300             : 
     301           4 :     CATCH_START_SECTION("iterators with invalid characters (too large)")
     302      983040 :         for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
     303             :         {
     304             :             // since this character is not we have to encode it _manually_
     305             :             //
     306             :             char buf[4];
     307      983039 :             buf[0] = 0xF0 | ((wc >> 18) & 0x07);
     308      983039 :             buf[1] = 0x80 | ((wc >> 12) & 0x3F);
     309      983039 :             buf[2] = 0x80 | ((wc >>  6) & 0x3F);
     310      983039 :             buf[3] = 0x80 | ((wc >>  0) & 0x3F);
     311             : 
     312     1966078 :             std::string str(buf, 4);
     313             : 
     314             :             // first verify that it works
     315             :             //
     316             :             {
     317      983039 :                 libutf8::utf8_iterator it(str);
     318             : 
     319      983039 :                 CATCH_REQUIRE(it == str.begin());
     320      983039 :                 CATCH_REQUIRE(it == str.cbegin());
     321      983039 :                 CATCH_REQUIRE(it != str.end());
     322      983039 :                 CATCH_REQUIRE(it != str.cend());
     323             : 
     324      983039 :                 CATCH_REQUIRE(str.begin()  == it);
     325      983039 :                 CATCH_REQUIRE(str.cbegin() == it);
     326      983039 :                 CATCH_REQUIRE(str.end()    != it);
     327      983039 :                 CATCH_REQUIRE(str.cend()   != it);
     328             : 
     329      983039 :                 CATCH_REQUIRE(*it == '\0');
     330      983039 :                 ++it;
     331             : 
     332      983039 :                 CATCH_REQUIRE(it != str.begin());
     333      983039 :                 CATCH_REQUIRE(it != str.cbegin());
     334      983039 :                 CATCH_REQUIRE(it == str.end());
     335      983039 :                 CATCH_REQUIRE(it == str.cend());
     336             : 
     337      983039 :                 CATCH_REQUIRE(str.begin()  != it);
     338      983039 :                 CATCH_REQUIRE(str.cbegin() != it);
     339      983039 :                 CATCH_REQUIRE(str.end()    == it);
     340      983039 :                 CATCH_REQUIRE(str.cend()   == it);
     341             : 
     342      983039 :                 CATCH_REQUIRE(*it == EOF);
     343      983039 :                 ++it;
     344      983039 :                 it++;
     345      983039 :                 CATCH_REQUIRE(it == str.cend());
     346             :             }
     347             :         }
     348             :     CATCH_END_SECTION()
     349           8 : }
     350             : 
     351             : 
     352             : 
     353             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.12