LCOV - code coverage report
Current view: top level - tests - iterator.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 158 178 88.8 %
Date: 2019-05-28 17:54:33 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*    tests/iterator.cpp
       2             :  *    Copyright (C) 2013-2019  Made to Order Software Corporation
       3             :  *
       4             :  *    This program is free software; you can redistribute it and/or modify
       5             :  *    it under the terms of the GNU General Public License as published by
       6             :  *    the Free Software Foundation; either version 2 of the License, or
       7             :  *    (at your option) any later version.
       8             :  *
       9             :  *    This program is distributed in the hope that it will be useful,
      10             :  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  *    GNU General Public License for more details.
      13             :  *
      14             :  *    You should have received a copy of the GNU General Public License along
      15             :  *    with this program; if not, write to the Free Software Foundation, Inc.,
      16             :  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      17             :  *
      18             :  *    Authors
      19             :  *    Alexis Wilke   alexis@m2osw.com
      20             :  */
      21             : 
      22             : // unit test
      23             : //
      24             : #include "main.h"
      25             : 
      26             : // libutf8 lib
      27             : //
      28             : #include "libutf8/base.h"
      29             : #include "libutf8/iterator.h"
      30             : #include "libutf8/libutf8.h"
      31             : 
      32             : // catch lib
      33             : //
      34             : #include <catch2/catch.hpp>
      35             : 
      36             : // C++ lib
      37             : //
      38             : #include <cctype>
      39             : #include <iostream>
      40             : 
      41             : 
      42           3 : CATCH_TEST_CASE("libutf8 iterator", "iterator")
      43             : {
      44           2 :     CATCH_START_SECTION("valid iterators tests")
      45           1 :         char32_t p(0);
      46           0 :         do
      47             :         {
      48           1 :             p = rand() % 0x11 * 0x10000;
      49             :         }
      50           1 :         while(p == 0 || (p >= 0xD800 && p <= 0xDFFF));
      51             : 
      52          18 :         for(char32_t plan(0); plan < 0x110000; plan += 0x10000)
      53             :         {
      54             :             // create one plan in one string
      55             :             //
      56          34 :             std::string str;
      57          17 :             str.reserve(0x10000 * 4);
      58     1112082 :             for(char32_t wc(0); wc < 0x10000; ++wc)
      59             :             {
      60     1112065 :                 if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      61             :                 {
      62           1 :                     wc = 0xDFFF;
      63           1 :                     continue;
      64             :                 }
      65             :                 char buf[libutf8::MBS_MIN_BUFFER_LENGTH];
      66     1112064 :                 CATCH_REQUIRE(libutf8::wctombs(buf, wc + plan, sizeof(buf)) >= 1);
      67     1112064 :                 if(plan == 0 && wc == 0)
      68             :                 {
      69             :                     // this is a special case as buf[0] = '\0' and the += with
      70             :                     // the string won't work
      71             :                     //
      72           1 :                     str += '\0';
      73             :                 }
      74             :                 else
      75             :                 {
      76     1112063 :                     str += buf;
      77             :                 }
      78             :             }
      79             : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
      80             : 
      81             :             {
      82          17 :                 libutf8::utf8_iterator it(str);
      83             : 
      84          17 :                 CATCH_REQUIRE(it == str.begin());
      85          17 :                 CATCH_REQUIRE(it == str.cbegin());
      86          17 :                 CATCH_REQUIRE(it != str.end());
      87          17 :                 CATCH_REQUIRE(it != str.cend());
      88             : 
      89          17 :                 CATCH_REQUIRE(str.begin() == it);
      90          17 :                 CATCH_REQUIRE(str.cbegin() == it);
      91          17 :                 CATCH_REQUIRE(str.end() != it);
      92          17 :                 CATCH_REQUIRE(str.cend() != it);
      93             : 
      94     1112082 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
      95             :                 {
      96     1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
      97             :                     {
      98           1 :                         wc = 0xDFFF;
      99           1 :                         continue;
     100             :                     }
     101     1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     102     1112064 :                     ++it;
     103             :                 }
     104             : 
     105          17 :                 CATCH_REQUIRE(it != str.begin());
     106          17 :                 CATCH_REQUIRE(it != str.cbegin());
     107          17 :                 CATCH_REQUIRE(it == str.end());
     108          17 :                 CATCH_REQUIRE(it == str.cend());
     109             : 
     110          17 :                 CATCH_REQUIRE(str.begin() != it);
     111          17 :                 CATCH_REQUIRE(str.cbegin() != it);
     112          17 :                 CATCH_REQUIRE(str.end() == it);
     113          17 :                 CATCH_REQUIRE(str.cend() == it);
     114             : 
     115          17 :                 CATCH_REQUIRE(*it == EOF);
     116          17 :                 ++it;
     117          17 :                 it++;
     118          17 :                 CATCH_REQUIRE(it == str.cend());
     119             : 
     120     1112082 :                 for(char32_t wc(0x10000); wc > 0; )
     121             :                 {
     122     1112065 :                     --wc;
     123     1112065 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     124             :                     {
     125           1 :                         wc = 0xD800;
     126           1 :                         continue;
     127             :                     }
     128     1112064 :                     --it;
     129     1112064 :                     CATCH_REQUIRE(*it == wc + plan);
     130             :                 }
     131             : 
     132          17 :                 --it;
     133          17 :                 it--;
     134             : 
     135          17 :                 CATCH_REQUIRE(it.good());
     136          17 :                 CATCH_REQUIRE(!it.bad());
     137             :             }
     138             : 
     139          17 :             if(plan == p)
     140             :             {
     141           1 :                 libutf8::utf8_iterator it(str);
     142             : 
     143       65537 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     144             :                 {
     145       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     146             :                     {
     147           0 :                         wc = 0xDFFF;
     148           0 :                         continue;
     149             :                     }
     150       65536 :                     CATCH_REQUIRE(*it++ == wc + plan);
     151             :                 }
     152             : 
     153           1 :                 CATCH_REQUIRE(it == str.end());
     154           1 :                 it++;
     155           1 :                 ++it;
     156           1 :                 CATCH_REQUIRE(it == str.end());
     157             : 
     158       65537 :                 for(char32_t wc(0x10000); wc > 0; )
     159             :                 {
     160       65536 :                     --wc;
     161       65536 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     162             :                     {
     163           0 :                         wc = 0xD800;
     164           0 :                         continue;
     165             :                     }
     166       65536 :                     CATCH_REQUIRE(*--it == wc + plan);
     167             :                 }
     168             : 
     169           1 :                 CATCH_REQUIRE(it == str.begin());
     170           1 :                 CATCH_REQUIRE(str.begin() == it);
     171           1 :                 it--;
     172           1 :                 --it;
     173           1 :                 CATCH_REQUIRE(it == str.begin());
     174           1 :                 CATCH_REQUIRE(str.begin() == it);
     175             :             }
     176             : 
     177          17 :             if(plan == (p + 1) % 0x11)
     178             :             {
     179           0 :                 libutf8::utf8_iterator it(str);
     180             : 
     181           0 :                 for(char32_t wc(0); wc < 0x10000; ++wc)
     182             :                 {
     183           0 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     184             :                     {
     185           0 :                         wc = 0xDFFF;
     186           0 :                         continue;
     187             :                     }
     188           0 :                     CATCH_REQUIRE(*it == wc + plan);
     189           0 :                     it++;
     190             :                 }
     191             : 
     192           0 :                 for(char32_t wc(0x10000); wc > 0; )
     193             :                 {
     194           0 :                     --wc;
     195           0 :                     if(plan == 0 && wc >= 0xD800 && wc <= 0xDFFF)
     196             :                     {
     197           0 :                         wc = 0xD800;
     198           0 :                         continue;
     199             :                     }
     200           0 :                     it--;
     201           0 :                     CATCH_REQUIRE(*it == wc + plan);
     202             :                 }
     203             :             }
     204             :         }
     205             :     CATCH_END_SECTION()
     206           1 : }
     207             : 
     208             : 
     209           4 : CATCH_TEST_CASE("libutf8 iterator invalid string", "iterator,invalid")
     210             : {
     211           4 :     CATCH_START_SECTION("iterators with invalid characters (bad UTF-8)")
     212         101 :         for(int repeat(0); repeat < 100; ++repeat)
     213             :         {
     214             :             // create one plan in one string
     215             :             //
     216         100 :             constexpr size_t STR_LENGTH = 4;
     217             :             char32_t wc;
     218         200 :             std::u32string wstr;
     219         100 :             wstr.reserve(STR_LENGTH);
     220         500 :             for(size_t idx(0); idx < STR_LENGTH; ++idx)
     221             :             {
     222           0 :                 do
     223             :                 {
     224         400 :                     wc = unittest::rand_char(true);
     225             :                 }
     226         400 :                 while(wc < 0x80);
     227         400 :                 wstr += wc;
     228             :             }
     229         200 :             std::string str(libutf8::to_u8string(wstr));
     230             : 
     231             : //std::cerr << "-------------- Plan " << static_cast<int>(plan) << " String ready " << str.length() << " ...\n";
     232             : 
     233             :             // first verify that it works
     234             :             //
     235             :             std::string::size_type pos[STR_LENGTH];
     236             :             {
     237         100 :                 libutf8::utf8_iterator it(str);
     238             : 
     239         100 :                 CATCH_REQUIRE(it == str.begin());
     240         100 :                 CATCH_REQUIRE(it == str.cbegin());
     241         100 :                 CATCH_REQUIRE(it != str.end());
     242         100 :                 CATCH_REQUIRE(it != str.cend());
     243             : 
     244         100 :                 CATCH_REQUIRE(str.begin()  == it);
     245         100 :                 CATCH_REQUIRE(str.cbegin() == it);
     246         100 :                 CATCH_REQUIRE(str.end()    != it);
     247         100 :                 CATCH_REQUIRE(str.cend()   != it);
     248             : 
     249         500 :                 for(size_t idx(0); idx < STR_LENGTH; ++idx)
     250             :                 {
     251         400 :                     CATCH_REQUIRE(*it == wstr[idx]);
     252         400 :                     if(rand() % 2 == 0)
     253             :                     {
     254         209 :                         pos[idx] = it - str.begin();
     255             :                     }
     256             :                     else
     257             :                     {
     258         191 :                         pos[idx] = -(str.begin() - it);
     259             :                     }
     260         400 :                     ++it;
     261             :                 }
     262             : 
     263         100 :                 CATCH_REQUIRE(it != str.begin());
     264         100 :                 CATCH_REQUIRE(it != str.cbegin());
     265         100 :                 CATCH_REQUIRE(it == str.end());
     266         100 :                 CATCH_REQUIRE(it == str.cend());
     267             : 
     268         100 :                 CATCH_REQUIRE(str.begin()  != it);
     269         100 :                 CATCH_REQUIRE(str.cbegin() != it);
     270         100 :                 CATCH_REQUIRE(str.end()    == it);
     271         100 :                 CATCH_REQUIRE(str.cend()   == it);
     272             : 
     273         100 :                 CATCH_REQUIRE(*it == EOF);
     274         100 :                 ++it;
     275         100 :                 it++;
     276         100 :                 CATCH_REQUIRE(it == str.cend());
     277             :             }
     278             : 
     279             :             {
     280         100 :                 libutf8::utf8_iterator it(str);
     281             : 
     282         100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     283             : 
     284         100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     285         100 :                 CATCH_REQUIRE(*it++ == U'\0');       // we broke this one
     286         100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     287         100 :                 CATCH_REQUIRE(*it++ == wstr[3]);
     288         100 :                 CATCH_REQUIRE(*it++ == EOF);
     289             :             }
     290             : 
     291             :             {
     292         100 :                 str.erase(str.length() - 1);
     293         100 :                 libutf8::utf8_iterator it(str);
     294             : 
     295         100 :                 str[pos[1]] = rand() % 0x40 + 0x80;
     296             : 
     297         100 :                 CATCH_REQUIRE(*it++ == wstr[0]);
     298         100 :                 CATCH_REQUIRE(*it++ == U'\0');
     299         100 :                 CATCH_REQUIRE(*it++ == wstr[2]);
     300         100 :                 CATCH_REQUIRE(*it++ == U'\0');
     301             :             }
     302             :         }
     303             :     CATCH_END_SECTION()
     304             : 
     305           4 :     CATCH_START_SECTION("iterators with invalid characters (too large)")
     306      983040 :         for(char32_t wc(0x110000); wc < 0x1FFFFF; ++wc)
     307             :         {
     308             :             // since this character is not we have to encode it _manually_
     309             :             //
     310             :             char buf[4];
     311      983039 :             buf[0] = 0xF0 | ((wc >> 18) & 0x07);
     312      983039 :             buf[1] = 0x80 | ((wc >> 12) & 0x3F);
     313      983039 :             buf[2] = 0x80 | ((wc >>  6) & 0x3F);
     314      983039 :             buf[3] = 0x80 | ((wc >>  0) & 0x3F);
     315             : 
     316     1966078 :             std::string str(buf, 4);
     317             : 
     318             :             // first verify that it works
     319             :             //
     320             :             {
     321      983039 :                 libutf8::utf8_iterator it(str);
     322             : 
     323      983039 :                 CATCH_REQUIRE(it == str.begin());
     324      983039 :                 CATCH_REQUIRE(it == str.cbegin());
     325      983039 :                 CATCH_REQUIRE(it != str.end());
     326      983039 :                 CATCH_REQUIRE(it != str.cend());
     327             : 
     328      983039 :                 CATCH_REQUIRE(str.begin()  == it);
     329      983039 :                 CATCH_REQUIRE(str.cbegin() == it);
     330      983039 :                 CATCH_REQUIRE(str.end()    != it);
     331      983039 :                 CATCH_REQUIRE(str.cend()   != it);
     332             : 
     333      983039 :                 CATCH_REQUIRE(*it == '\0');
     334      983039 :                 ++it;
     335             : 
     336      983039 :                 CATCH_REQUIRE(it != str.begin());
     337      983039 :                 CATCH_REQUIRE(it != str.cbegin());
     338      983039 :                 CATCH_REQUIRE(it == str.end());
     339      983039 :                 CATCH_REQUIRE(it == str.cend());
     340             : 
     341      983039 :                 CATCH_REQUIRE(str.begin()  != it);
     342      983039 :                 CATCH_REQUIRE(str.cbegin() != it);
     343      983039 :                 CATCH_REQUIRE(str.end()    == it);
     344      983039 :                 CATCH_REQUIRE(str.cend()   == it);
     345             : 
     346      983039 :                 CATCH_REQUIRE(*it == EOF);
     347      983039 :                 ++it;
     348      983039 :                 it++;
     349      983039 :                 CATCH_REQUIRE(it == str.cend());
     350             :             }
     351             :         }
     352             :     CATCH_END_SECTION()
     353           8 : }
     354             : 
     355             : 
     356             : 
     357             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.12