Line data Source code
1 : // Copyright (c) 2000-2022 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/libutf8
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 : #pragma once
20 :
21 : /** \file
22 : * \brief The declarations of the UTF-8 library.
23 : *
24 : * This file is the declarations of the UTF-8 library which are just a few
25 : * functions used to convert a string from one format to another.
26 : */
27 :
28 : // C++
29 : //
30 : #include <string>
31 :
32 :
33 :
34 : namespace libutf8
35 : {
36 :
37 :
38 : enum class bom_t
39 : {
40 : BOM_NONE,
41 : BOM_UTF8,
42 : BOM_UTF16_LE,
43 : BOM_UTF16_BE,
44 : BOM_UTF32_LE,
45 : BOM_UTF32_BE
46 : };
47 :
48 :
49 : enum class surrogate_t
50 : {
51 : SURROGATE_NO,
52 : SURROGATE_HIGH,
53 : SURROGATE_LOW
54 : };
55 :
56 :
57 : constexpr char32_t const BOM_CHAR = U'\U0000FEFF';
58 : constexpr char32_t const NOT_A_CHARACTER = static_cast<char32_t>(-1);
59 :
60 :
61 : bool is_valid_ascii(char c, bool ctrl = true);
62 : bool is_valid_ascii(char const * str, bool ctrl = true);
63 : bool is_valid_ascii(std::string const & str, bool ctrl = true);
64 : bool is_valid_utf8(char const * str);
65 : bool is_valid_utf8(std::string const & str);
66 : bool is_valid_unicode(char32_t const wc, bool ctrl = true);
67 : bool is_valid_unicode(char32_t const * str, bool ctrl = true);
68 : bool is_valid_unicode(std::u32string const & str, bool ctrl = true);
69 : surrogate_t is_surrogate(char32_t wc);
70 : bom_t start_with_bom(char const * str, size_t len);
71 : std::string to_u8string(std::u32string const & str);
72 : std::string to_u8string(std::u16string const & str);
73 : std::string to_u8string(std::wstring const & str);
74 : std::string to_u8string(wchar_t one, wchar_t two = L'\0');
75 : std::string to_u8string(char16_t one, char16_t two = u'\0');
76 : std::string to_u8string(char32_t wc);
77 : std::u16string to_u16string(std::string const & str);
78 : std::u32string to_u32string(std::string const & str);
79 : size_t u8length(std::string const & str);
80 : int u8casecmp(std::string const & lhs, std::string const & rhs);
81 :
82 :
83 :
84 : } // libutf8 namespace
85 :
86 :
87 : template<typename CharT, typename Traits>
88 0 : std::basic_ostream<CharT, Traits> & operator << (std::basic_ostream<CharT, Traits> & out, char32_t const & c)
89 : {
90 0 : return out << libutf8::to_u8string(c);
91 : }
92 :
93 :
94 : // vim: ts=4 sw=4 et
|