LCOV - code coverage report
Current view: top level - tests - unittest_string.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 82 82 100.0 %
Date: 2019-05-28 01:02:48 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*    unittest_string.cpp
       2             :  *    Copyright (C) 2013-2019  Made to Order Software Corporation
       3             :  *
       4             :  *    This program is free software; you can redistribute it and/or modify
       5             :  *    it under the terms of the GNU General Public License as published by
       6             :  *    the Free Software Foundation; either version 2 of the License, or
       7             :  *    (at your option) any later version.
       8             :  *
       9             :  *    This program is distributed in the hope that it will be useful,
      10             :  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  *    GNU General Public License for more details.
      13             :  *
      14             :  *    You should have received a copy of the GNU General Public License along
      15             :  *    with this program; if not, write to the Free Software Foundation, Inc.,
      16             :  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      17             :  *
      18             :  *    Authors
      19             :  *    Alexis Wilke   alexis@m2osw.com
      20             :  */
      21             : 
      22             : // unit test
      23             : //
      24             : #include "unittest_main.h"
      25             : 
      26             : // libutf8 lib
      27             : //
      28             : #include "libutf8/exception.h"
      29             : #include "libutf8/libutf8.h"
      30             : 
      31             : // catch2 lib
      32             : //
      33             : #include <catch2/catch.hpp>
      34             : 
      35             : // C++ lib
      36             : //
      37             : #include <cctype>
      38             : #include <iostream>
      39             : #include <iomanip>
      40             : 
      41             : 
      42           3 : CATCH_TEST_CASE("string conversions", "strings")
      43             : {
      44           2 :     CATCH_START_SECTION("test conversion strings")
      45           2 :         std::string str;
      46           2 :         std::u32string u32str, back;
      47             :         int i;
      48             : 
      49             :         // create a string with all the characters defined in plane 1
      50       65534 :         for(i = 1; i < 0x0FFFE; ++i)
      51             :         {
      52             :             // skip the surrogate, they are not considered valid characters
      53             :             //
      54       65533 :             if(i < 0xD800 || i > 0xDFFF)
      55             :             {
      56       63485 :                 u32str += static_cast<char32_t>(i);
      57             :             }
      58             :         }
      59             : 
      60           1 :         str = libutf8::to_u8string(u32str);
      61             : 
      62             :         // verify the UTF-8 string
      63             :         //
      64           1 :         char const *s(str.c_str());
      65         128 :         for(i = 1; i < 0x080; ++i)
      66             :         {
      67         127 :             CATCH_REQUIRE(*s++ == static_cast<char>(i));
      68             :         }
      69        3841 :         for(; i < 0x0800; ++i)
      70             :         {
      71        1920 :             CATCH_REQUIRE(*s++ == static_cast<char>((i >> 6) | 0xC0));
      72        1920 :             CATCH_REQUIRE(*s++ == static_cast<char>((i & 0x3F) | 0x80));
      73             :         }
      74      126973 :         for(; i < 0x0FFFE; ++i)
      75             :         {
      76       63486 :             if(i < 0xD800 || i > 0xDFFF)
      77             :             {
      78       61438 :                 CATCH_REQUIRE(*s++ == static_cast<char>((i >> 12) | 0xE0));
      79       61438 :                 CATCH_REQUIRE(*s++ == static_cast<char>(((i >> 6) & 0x3F) | 0x80));
      80       61438 :                 CATCH_REQUIRE(*s++ == static_cast<char>((i & 0x3F) | 0x80));
      81             :             }
      82             :         }
      83             : 
      84             :         // verify the UTF-8 to char32_t
      85             :         //
      86           1 :         back = libutf8::to_u32string(str);
      87           1 :         CATCH_REQUIRE(back == u32str);
      88             :     CATCH_END_SECTION()
      89           1 : }
      90             : 
      91             : 
      92             : 
      93           3 : CATCH_TEST_CASE("compare strings", "strings")
      94             : {
      95           2 :     CATCH_START_SECTION("compare UTF-8 strings")
      96       63489 :         for(int i(1); i < 0x10000; ++i)
      97             :         {
      98       63488 :             if(i >= 0xD800 && i <= 0xDFFF)
      99             :             {
     100           1 :                 i = 0xDFFF;
     101           1 :                 continue;
     102             :             }
     103             : 
     104             :             // as is against itself
     105      126974 :             std::u32string in;
     106       63487 :             in += static_cast<char32_t>(i);
     107      126974 :             std::string mb(libutf8::to_u8string(in));
     108       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(mb, mb) == 0);
     109             : 
     110             :             // as is against uppercase
     111      126974 :             std::u32string uin;
     112       63487 :             uin += std::towupper(static_cast<char32_t>(i));
     113      126974 :             std::string umb(libutf8::to_u8string(uin));
     114       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(mb, umb) == 0);
     115             : 
     116             :             // as is against lowercase
     117      126974 :             std::u32string lin;
     118       63487 :             lin += std::towlower(static_cast<char32_t>(i));
     119      126974 :             std::string lmb(libutf8::to_u8string(lin));
     120       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(mb, lmb) == 0);
     121             : 
     122             :             // random
     123     1968097 :             for(int j(0); j < 30; ++j)
     124             :             {
     125     1904610 :                 char32_t const rwc(unittest::rand_char());
     126     1904610 :                 in += rwc;
     127     1904610 :                 uin += std::towupper(rwc);
     128     1904610 :                 lin += std::towlower(rwc);
     129             : 
     130     3809220 :                 std::string rmb(libutf8::to_u8string(in));
     131     1904610 :                 CATCH_REQUIRE(libutf8::u8casecmp(rmb, rmb) == 0);
     132     3809220 :                 std::string rumb(libutf8::to_u8string(uin));
     133     1904610 :                 CATCH_REQUIRE(libutf8::u8casecmp(rmb, rumb) == 0);
     134     3809220 :                 std::string rlmb(libutf8::to_u8string(lin));
     135     1904610 :                 CATCH_REQUIRE(libutf8::u8casecmp(rmb, rlmb) == 0);
     136             : 
     137     1904610 :                 if(rwc >= 0x80 && rand() % 100 == 0)
     138             :                 {
     139       18758 :                     rmb.resize(rmb.length() - 1);
     140       18758 :                     CATCH_REQUIRE_THROWS_AS(libutf8::u8casecmp(rmb, rlmb) == 0, libutf8::libutf8_exception_decoding);
     141       18758 :                     CATCH_REQUIRE_THROWS_AS(libutf8::u8casecmp(rlmb, rmb) == 0, libutf8::libutf8_exception_decoding);
     142             :                 }
     143             :             }
     144             : 
     145       63487 :             char32_t wc(unittest::rand_char());
     146       63487 :             in += wc;
     147      126974 :             std::string emb(libutf8::to_u8string(in));
     148       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(emb, emb) == 0);
     149       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(emb, umb) == 1);
     150       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(emb, lmb) == 1);
     151       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(umb, emb) == -1);
     152       63487 :             CATCH_REQUIRE(libutf8::u8casecmp(lmb, emb) == -1);
     153             : 
     154             :             {
     155       63487 :                 wchar_t lwc(unittest::rand_char());
     156       63487 :                 lin += std::towlower(lwc);
     157      126974 :                 std::string elmb(libutf8::to_u8string(lin));
     158             : //std::cerr << "LOWER compare U+" << std::hex << std::setw(4) << static_cast<int>(wc)
     159             : //                         << "/" << std::setw(4) << std::towlower(wc)
     160             : //                         << " with U+" << std::setw(4) << static_cast<int>(lwc)
     161             : //                         << "/" << std::setw(4) << std::towlower(lwc)
     162             : //                         << " wc < lwc -> " << std::setw(4) << (std::towlower(wc) < std::towlower(lwc))
     163             : //                         << "\n" << std::dec;
     164             : //std::cerr << " result: [" << libutf8::u8casecmp(emb, elmb) << "]\n";
     165       63487 :                 if(std::towlower(wc) == std::towlower(lwc))
     166             :                 {
     167           1 :                     CATCH_REQUIRE(libutf8::u8casecmp(emb, elmb) == 0);
     168             :                 }
     169       63486 :                 else if(std::towlower(wc) < std::towlower(lwc))
     170             :                 {
     171       31724 :                     CATCH_REQUIRE(libutf8::u8casecmp(emb, elmb) == -1);
     172       31724 :                     CATCH_REQUIRE(libutf8::u8casecmp(lmb, elmb) == -1);
     173             :                 }
     174             :                 else
     175             :                 {
     176       31762 :                     CATCH_REQUIRE(libutf8::u8casecmp(emb, elmb) == 1);
     177       31762 :                     CATCH_REQUIRE(libutf8::u8casecmp(lmb, elmb) == -1);
     178             :                 }
     179             :             }
     180             : 
     181             :             // here we check with an uppercase character, but notice that the
     182             :             // compare uses lowercase!
     183             :             {
     184       63487 :                 char32_t uwc(unittest::rand_char());
     185       63487 :                 uin += std::towupper(uwc);
     186      126974 :                 std::string const eumb(libutf8::to_u8string(uin));
     187             : //std::cerr << "UPPER compare U+" << std::hex << std::setw(4) << static_cast<int>(wc)
     188             : //                         << "/" << std::setw(4) << std::towlower(wc)
     189             : //                         << " with U+" << std::setw(4) << static_cast<int>(uwc)
     190             : //                         << "/" << std::setw(4) << std::towlower(uwc)
     191             : //                         << " wc < uwc -> " << std::setw(4) << (std::towlower(wc) < std::towlower(uwc))
     192             : //                         << "\n" << std::dec;
     193             : //std::cerr << " result: [" << libutf8::u8casecmp(emb, eumb) << "]\n";
     194       63487 :                 if(std::towlower(wc) == std::towlower(uwc))
     195             :                 {
     196           2 :                     CATCH_REQUIRE(libutf8::u8casecmp(emb, eumb) == 0);
     197             :                 }
     198       63485 :                 else if(std::towlower(wc) < std::towlower(uwc))
     199             :                 {
     200       31664 :                     CATCH_REQUIRE(libutf8::u8casecmp(emb, eumb) == -1);
     201             :                 }
     202             :                 else
     203             :                 {
     204       31821 :                     CATCH_REQUIRE(libutf8::u8casecmp(emb, eumb) == 1);
     205             :                 }
     206             :             }
     207             :         }
     208             :     CATCH_END_SECTION()
     209           7 : }
     210             : 
     211             : 
     212             : // With MS-Windows, we can check that our functions work the same way
     213             : // (return the expected value) as this Windows API function:
     214             : // 
     215             : // CompareStringOrdinal(L"This string", 11, L"That string", 11, TRUE);
     216             : 
     217             : 
     218             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.12