basic-xml 1.0.1
Very basic loader/writer of XML tags with attributes and content.
type.cpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Made to Order Software Corp. All Rights Reserved
2//
3// https://snapwebsites.org/project/basic-xml
4// contact@m2osw.com
5//
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10//
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15//
16// You should have received a copy of the GNU General Public License
17// along with this program. If not, see <https://www.gnu.org/licenses/>.
18
19
45// self
46//
47#include "basic-xml/type.h"
48
49
50// libutf8
51//
52#include <libutf8/iterator.h>
53
54
55// C++
56//
57#include <iostream>
58
59
60// last include
61//
62#include <snapdev/poison.h>
63
64
65
66namespace basic_xml
67{
68
69
70namespace
71{
72
73
74
75// inclusive range of characters
76//
77struct char_range_t
78{
79 char32_t f_first = U'\0';
80 char32_t f_last = U'\0';
81
82 bool operator < (char_range_t const & rhs) const
83 {
84 return f_last < rhs.f_first;
85 }
86};
87
88
89constexpr char_range_t const g_name_start_char[] =
90{
91 { 0x00003A, 0x00003A }, // :
92 { 0x000041, 0x00005A }, // A-Z
93 { 0x00005F, 0x00005F }, // _
94 { 0x000061, 0x00007A }, // a-z
95 { 0x0000C0, 0x0000D6 },
96 { 0x0000D8, 0x0000F6 },
97 { 0x0000F8, 0x0002FF },
98 { 0x000370, 0x00037D },
99 { 0x00037F, 0x001FFF },
100 { 0x00200C, 0x00200D },
101 { 0x002070, 0x00218F },
102 { 0x002C00, 0x002FEF },
103 { 0x003001, 0x00D7FF },
104 { 0x00F900, 0x00FDCF },
105 { 0x00FDF0, 0x00FFFD },
106 { 0x010000, 0x0EFFFF }, // forbid all FFFE & FFFF?
107};
108
109constexpr char_range_t const g_name_char[] =
110{
111 { 0x00002D, 0x00002E }, // --.
112 { 0x000030, 0x00003A }, // 0-:
113 { 0x000041, 0x00005A }, // A-Z
114 { 0x00005F, 0x00005F }, // _
115 { 0x000061, 0x00007A }, // a-z
116 { 0x0000B7, 0x0000B7 }, // bullet point
117 { 0x0000C0, 0x0000D6 },
118 { 0x0000D8, 0x0000F6 },
119 { 0x0000F8, 0x0002FF },
120 { 0x000300, 0x00037D },
121 { 0x00037F, 0x001FFF },
122 { 0x00200C, 0x00200D },
123 { 0x00203F, 0x002040 },
124 { 0x002070, 0x00218F },
125 { 0x002C00, 0x002FEF },
126 { 0x003001, 0x00D7FF },
127 { 0x00F900, 0x00FDCF },
128 { 0x00FDF0, 0x00FFFD },
129 { 0x010000, 0x0EFFFF }, // forbid all XFFFE & XFFFF?
130};
131
132
133bool find_char(char32_t c, char_range_t const * b, char_range_t const * e)
134{
135 char_range_t r = { c, c };
136 auto range(std::lower_bound(b, e, r));
137//std::cout << std::flush;
138//std::cout << "--- searched character " << std::hex << "0x" << static_cast<int>(c) << " -> found range: 0x"
139//<< (range == e ? '?' : static_cast<int>(range->f_first)) << " ... 0x"
140//<< (range == e ? '?' : static_cast<int>(range->f_last))
141//<< std::endl;
142 return range != e && c >= range->f_first && c <= range->f_last;
143}
144
145
146} // no name namespace
147
148
149
150bool is_name_start_char(char32_t c)
151{
152 return find_char(c, std::begin(g_name_start_char), std::end(g_name_start_char));
153}
154
155
156bool is_name_char(char32_t c)
157{
158 return find_char(c, std::begin(g_name_char), std::end(g_name_char));
159}
160
161
162bool is_digit(char32_t c)
163{
164 return (c >= U'0' && c <= U'9')
165 || c == U'-';
166}
167
168
169bool is_space(char32_t c)
170{
171 // the XML reference clearly defines the following as white spaces
172 // other Unicode what spaces are not considered as such in XML documents
173 //
174 return c == U' '
175 || c == U'\t'
176 || c == U'\n'
177 || c == U'\r';
178}
179
180
204bool is_token(std::string const & token)
205{
206 if(token.empty())
207 {
208 return false;
209 }
210
211 libutf8::utf8_iterator it(token);
212
213 // here it != token.end() is always true since the token is not empty
214 // so there is no need to check that again
215 //
216 if(!is_name_start_char(*it))
217 {
218 return false;
219 }
220
221 for(++it; it != token.end(); ++it)
222 {
223 if(!is_name_char(*it))
224 {
225 return false;
226 }
227 }
228
229 return true;
230}
231
232
233
234} // namespace basic_xml
235// vim: ts=4 sw=4 et
bool is_token(std::string const &token)
Verify that token is a valid string.
Definition type.cpp:204
Database file implementation.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.