Line data Source code
1 : // Copyright (c) 2019 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/snapdatabase
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 :
21 : /** \file
22 : * \brief Database file implementation.
23 : *
24 : * Each table uses one or more files. Each file is handled by a dbfile
25 : * object and a corresponding set of blocks.
26 : */
27 :
28 : // self
29 : //
30 : #include "snapdatabase/data/schema.h"
31 :
32 : #include "snapdatabase/data/convert.h"
33 : #include "snapdatabase/data/script.h"
34 :
35 :
36 : // snaplogger lib
37 : //
38 : #include <snaplogger/message.h>
39 :
40 :
41 : // C++ lib
42 : //
43 : #include <iostream>
44 : #include <type_traits>
45 :
46 :
47 : // boost lib
48 : //
49 : #include <boost/algorithm/string.hpp>
50 :
51 :
52 : // last include
53 : //
54 : #include <snapdev/poison.h>
55 :
56 :
57 :
58 : namespace snapdatabase
59 : {
60 :
61 :
62 :
63 : namespace
64 : {
65 :
66 :
67 :
68 :
69 :
70 : struct_description_t g_column_description[] =
71 : {
72 : define_description(
73 : FieldName("hash")
74 : , FieldType(struct_type_t::STRUCT_TYPE_UINT128)
75 : ),
76 : define_description(
77 : FieldName("name")
78 : , FieldType(struct_type_t::STRUCT_TYPE_P8STRING)
79 : ),
80 : define_description(
81 : FieldName("column_id")
82 : , FieldType(struct_type_t::STRUCT_TYPE_UINT16)
83 : ),
84 : define_description(
85 : FieldName("type")
86 : , FieldType(struct_type_t::STRUCT_TYPE_UINT16)
87 : ),
88 : define_description(
89 : FieldName("flags=limited/required/blob/system/revision_type:2")
90 : , FieldType(struct_type_t::STRUCT_TYPE_BITS32)
91 : ),
92 : define_description(
93 : FieldName("encrypt_key_name")
94 : , FieldType(struct_type_t::STRUCT_TYPE_P16STRING)
95 : ),
96 : define_description(
97 : FieldName("default_value")
98 : , FieldType(struct_type_t::STRUCT_TYPE_BUFFER32)
99 : ),
100 : define_description(
101 : FieldName("minimum_value")
102 : , FieldType(struct_type_t::STRUCT_TYPE_BUFFER32)
103 : ),
104 : define_description(
105 : FieldName("maximum_value")
106 : , FieldType(struct_type_t::STRUCT_TYPE_BUFFER32)
107 : ),
108 : define_description(
109 : FieldName("minimum_length")
110 : , FieldType(struct_type_t::STRUCT_TYPE_UINT32)
111 : ),
112 : define_description(
113 : FieldName("maximum_length")
114 : , FieldType(struct_type_t::STRUCT_TYPE_UINT32)
115 : ),
116 : define_description(
117 : FieldName("validation")
118 : , FieldType(struct_type_t::STRUCT_TYPE_BUFFER32)
119 : ),
120 : end_descriptions()
121 : };
122 :
123 :
124 : struct_description_t g_table_column_reference[] =
125 : {
126 : define_description(
127 : FieldName("column_id")
128 : , FieldType(struct_type_t::STRUCT_TYPE_UINT16)
129 : ),
130 : end_descriptions()
131 : };
132 :
133 :
134 : struct_description_t g_table_secondary_index[] =
135 : {
136 : define_description(
137 : FieldName("name")
138 : , FieldType(struct_type_t::STRUCT_TYPE_P8STRING)
139 : ),
140 : define_description(
141 : FieldName("flags=distributed")
142 : , FieldType(struct_type_t::STRUCT_TYPE_BITS32)
143 : , FieldSubDescription(g_table_column_reference)
144 : ),
145 : define_description(
146 : FieldName("columns")
147 : , FieldType(struct_type_t::STRUCT_TYPE_ARRAY16)
148 : , FieldSubDescription(g_table_column_reference)
149 : ),
150 : end_descriptions()
151 : };
152 :
153 :
154 :
155 :
156 : struct_description_t g_table_description[] =
157 : {
158 : define_description(
159 : FieldName("version")
160 : , FieldType(struct_type_t::STRUCT_TYPE_VERSION)
161 : ),
162 : define_description(
163 : FieldName("name")
164 : , FieldType(struct_type_t::STRUCT_TYPE_P8STRING)
165 : ),
166 : define_description(
167 : FieldName("flags=temporary/sparse")
168 : , FieldType(struct_type_t::STRUCT_TYPE_BITS64)
169 : ),
170 : define_description(
171 : FieldName("block_size")
172 : , FieldType(struct_type_t::STRUCT_TYPE_UINT32)
173 : ),
174 : define_description(
175 : FieldName("model")
176 : , FieldType(struct_type_t::STRUCT_TYPE_UINT8)
177 : ),
178 : define_description(
179 : FieldName("row_key")
180 : , FieldType(struct_type_t::STRUCT_TYPE_ARRAY16)
181 : , FieldSubDescription(g_table_column_reference)
182 : ),
183 : define_description(
184 : FieldName("secondary_indexes")
185 : , FieldType(struct_type_t::STRUCT_TYPE_ARRAY16)
186 : , FieldSubDescription(g_table_secondary_index)
187 : ),
188 : define_description(
189 : FieldName("columns")
190 : , FieldType(struct_type_t::STRUCT_TYPE_ARRAY16)
191 : , FieldSubDescription(g_column_description)
192 : ),
193 : end_descriptions()
194 : };
195 :
196 :
197 :
198 :
199 :
200 3 : bool validate_name(std::string const & name, size_t max_length = 255)
201 : {
202 3 : if(name.empty())
203 : {
204 0 : return false;
205 : }
206 3 : if(name.length() > max_length)
207 : {
208 0 : return false;
209 : }
210 :
211 3 : char c(name[0]);
212 3 : if((c < 'a' || c > 'z')
213 0 : && (c < 'A' || c > 'Z')
214 0 : && c != '_')
215 : {
216 0 : return false;
217 : }
218 :
219 3 : auto const max(name.length());
220 10 : for(std::remove_const<decltype(max)>::type idx(0); idx < max; ++idx)
221 : {
222 7 : c = name[idx];
223 7 : if((c < 'a' || c > 'z')
224 2 : && (c < 'A' || c > 'Z')
225 2 : && (c < '0' || c > '9')
226 0 : && c != '_')
227 : {
228 0 : return false;
229 : }
230 : }
231 :
232 3 : return true;
233 : }
234 :
235 :
236 : }
237 : // no name namespace
238 :
239 :
240 :
241 : struct model_and_name_t
242 : {
243 : model_t const f_model = model_t::TABLE_MODEL_CONTENT;
244 : char const * const f_name = nullptr;
245 : };
246 :
247 : #define MODEL_AND_NAME(name) { model_t::TABLE_MODEL_##name, #name }
248 :
249 : model_and_name_t g_model_and_name[] =
250 : {
251 : MODEL_AND_NAME(CONTENT),
252 : MODEL_AND_NAME(DATA),
253 : MODEL_AND_NAME(DEFAULT),
254 : MODEL_AND_NAME(LOG),
255 : MODEL_AND_NAME(QUEUE),
256 : MODEL_AND_NAME(SEQUENCIAL),
257 : MODEL_AND_NAME(SESSION),
258 : MODEL_AND_NAME(TREE)
259 : };
260 :
261 :
262 1 : model_t name_to_model(std::string const & name)
263 : {
264 : #ifdef _DEBUG
265 : // verify in debug because if not in order we can't do a binary search
266 8 : for(size_t idx(1);
267 8 : idx < sizeof(g_model_and_name) / sizeof(g_model_and_name[0]);
268 : ++idx)
269 : {
270 7 : if(strcmp(g_model_and_name[idx - 1].f_name
271 7 : , g_model_and_name[idx].f_name) >= 0)
272 : {
273 : throw snapdatabase_logic_error(
274 : "names in g_model_and_name are not in alphabetical order: "
275 0 : + std::string(g_model_and_name[idx - 1].f_name)
276 0 : + " >= "
277 0 : + g_model_and_name[idx].f_name
278 0 : + " (position: "
279 0 : + std::to_string(idx)
280 0 : + ").");
281 : }
282 : }
283 : #endif
284 :
285 1 : if(name.empty())
286 : {
287 1 : return model_t::TABLE_MODEL_DEFAULT;
288 : }
289 :
290 0 : std::string const uc(boost::algorithm::to_upper_copy(name));
291 :
292 0 : int i(0);
293 0 : int j(sizeof(g_model_and_name) / sizeof(g_model_and_name[0]));
294 0 : while(i < j)
295 : {
296 0 : int const p((j - i) / 2 + i);
297 0 : int const r(uc.compare(g_model_and_name[p].f_name));
298 0 : if(r < 0)
299 : {
300 0 : i = p + 1;
301 : }
302 0 : else if(r > 0)
303 : {
304 0 : j = p;
305 : }
306 : else
307 : {
308 0 : return g_model_and_name[p].f_model;
309 : }
310 : }
311 :
312 0 : SNAP_LOG_WARNING
313 0 : << "Unknown model name \""
314 : << name
315 : << "\" for your table. Please check the spelling. The name is case insensitive."
316 : << SNAP_LOG_SEND;
317 :
318 : // return the default, this is just a warning
319 : //
320 0 : return model_t::TABLE_MODEL_DEFAULT;
321 : }
322 :
323 :
324 :
325 :
326 :
327 : // required constructor for copying in the map
328 0 : schema_complex_type::schema_complex_type()
329 : {
330 0 : }
331 :
332 :
333 : /** \brief Initialize a complex type from an XML node.
334 : *
335 : * Once in a list of columns, a complex type becomes a
336 : * `STRUCT_TYPE_STRUCTURE`.
337 : */
338 0 : schema_complex_type::schema_complex_type(xml_node::pointer_t x)
339 : {
340 0 : if(x->tag_name() != "complex-type")
341 : {
342 : throw invalid_xml(
343 : "A column schema must be a \"column\" tag. \""
344 0 : + x->tag_name()
345 0 : + "\" is not acceptable.");
346 : }
347 :
348 0 : f_name = x->attribute("name");
349 :
350 0 : struct_type_t last_type(struct_type_t::STRUCT_TYPE_VOID);
351 0 : for(auto child(x->first_child()); child != nullptr; child = child->next())
352 : {
353 0 : if(child->tag_name() == "type")
354 : {
355 0 : if(last_type == struct_type_t::STRUCT_TYPE_END)
356 : {
357 : throw invalid_xml(
358 : "The complex type was already ended with an explicit END. You can have additional types after that. Yet \""
359 0 : + child->text()
360 0 : + "\" was found after the END.");
361 : }
362 0 : field_t ct;
363 0 : ct.f_name = child->attribute("name");
364 0 : ct.f_type = name_to_struct_type(child->text());
365 0 : if(ct.f_type == INVALID_STRUCT_TYPE)
366 : {
367 : throw invalid_xml(
368 : "Found unknown type \""
369 0 : + child->text()
370 0 : + "\" in your complex type definition.");
371 : }
372 0 : last_type = ct.f_type;
373 :
374 0 : if(ct.f_type != struct_type_t::STRUCT_TYPE_END)
375 : {
376 0 : f_fields.push_back(ct);
377 : }
378 : }
379 : else
380 : {
381 0 : SNAP_LOG_WARNING
382 0 : << "Unknown tag \""
383 0 : << child->tag_name()
384 : << "\" within a <complex-type> tag ignored."
385 : << SNAP_LOG_SEND;
386 : }
387 : }
388 0 : }
389 :
390 :
391 0 : std::string schema_complex_type::name() const
392 : {
393 0 : return f_name;
394 : }
395 :
396 :
397 0 : size_t schema_complex_type::size() const
398 : {
399 0 : return f_fields.size();
400 : }
401 :
402 :
403 0 : std::string schema_complex_type::type_name(int idx) const
404 : {
405 0 : if(static_cast<std::size_t>(idx) >= f_fields.size())
406 : {
407 : throw snapdatabase_out_of_range(
408 : "index ("
409 0 : + std::to_string(idx)
410 0 : + ") is too large for this complex type list of fields (max: "
411 0 : + std::to_string(f_fields.size())
412 0 : + ").");
413 : }
414 :
415 0 : return f_fields[idx].f_name;
416 : }
417 :
418 :
419 0 : struct_type_t schema_complex_type::type(int idx) const
420 : {
421 0 : if(static_cast<std::size_t>(idx) >= f_fields.size())
422 : {
423 : throw snapdatabase_out_of_range(
424 : "index ("
425 0 : + std::to_string(idx)
426 0 : + ") is too large for this complex type list of fields (max: "
427 0 : + std::to_string(f_fields.size())
428 0 : + ").");
429 : }
430 :
431 0 : return f_fields[idx].f_type;
432 : }
433 :
434 :
435 :
436 :
437 :
438 :
439 2 : schema_column::schema_column(schema_table::pointer_t table, xml_node::pointer_t x)
440 2 : : f_schema_table(table)
441 : {
442 2 : if(x->tag_name() != "column")
443 : {
444 : throw invalid_xml(
445 : "A column schema must be a \"column\" tag. \""
446 0 : + x->tag_name()
447 0 : + "\" is not acceptable.");
448 : }
449 :
450 2 : f_name = x->attribute("name");
451 2 : if(!validate_name(f_name))
452 : {
453 : throw invalid_xml(
454 : "\""
455 0 : + f_name
456 0 : + "\" is not a valid column name.");
457 : }
458 :
459 2 : f_type = name_to_struct_type(x->attribute("type"));
460 2 : if(f_type == INVALID_STRUCT_TYPE)
461 : {
462 : // TODO: search for complex type first
463 : //
464 : throw invalid_xml(
465 : "Found unknown type \""
466 0 : + x->attribute("type")
467 0 : + "\" in your column definition.");
468 : }
469 :
470 2 : f_flags = 0;
471 2 : if(x->attribute("limited") == "limited")
472 : {
473 0 : f_flags |= COLUMN_FLAG_LIMITED;
474 : }
475 2 : if(x->attribute("required") == "required")
476 : {
477 0 : f_flags |= COLUMN_FLAG_REQUIRED;
478 : }
479 2 : if(x->attribute("blob") == "blob")
480 : {
481 0 : f_flags |= COLUMN_FLAG_BLOB;
482 : }
483 :
484 2 : f_encrypt_key_name = x->attribute("encrypt");
485 :
486 17 : for(auto child(x->first_child()); child != nullptr; child = child->next())
487 : {
488 15 : if(child->tag_name() == "description")
489 : {
490 2 : f_description = child->text();
491 : }
492 13 : else if(child->tag_name() == "default")
493 : {
494 2 : f_default_value = string_to_typed_buffer(f_type, child->text());
495 : }
496 11 : else if(child->tag_name() == "external")
497 : {
498 2 : f_internal_size_limit = convert_to_int(child->text(), 32, unit_t::UNIT_SIZE);
499 : }
500 9 : else if(child->tag_name() == "min-value")
501 : {
502 2 : f_minimum_value = string_to_typed_buffer(f_type, child->text());
503 : }
504 7 : else if(child->tag_name() == "max-value")
505 : {
506 2 : f_maximum_value = string_to_typed_buffer(f_type, child->text());
507 : }
508 5 : else if(child->tag_name() == "min-length")
509 : {
510 2 : f_minimum_length = convert_to_uint(child->text(), 32);
511 : }
512 3 : else if(child->tag_name() == "max-length")
513 : {
514 2 : f_maximum_length = convert_to_uint(child->text(), 32);
515 : }
516 1 : else if(child->tag_name() == "validation")
517 : {
518 1 : f_validation = compile_script(child->text());
519 : }
520 : else
521 : {
522 : // generate an error for unknown tags or ignore?
523 : //
524 0 : SNAP_LOG_WARNING
525 0 : << "Unknown tag \""
526 0 : << child->tag_name()
527 : << "\" within a <column> tag ignored."
528 : << SNAP_LOG_SEND;
529 : }
530 : }
531 2 : }
532 :
533 :
534 0 : schema_column::schema_column(schema_table::pointer_t table, structure::pointer_t s)
535 0 : : f_schema_table(table)
536 : {
537 0 : from_structure(s);
538 0 : }
539 :
540 :
541 8 : schema_column::schema_column(
542 : schema_table_pointer_t table
543 : , std::string name
544 : , struct_type_t type
545 8 : , flag32_t flags)
546 : : f_name(name)
547 : , f_type(type)
548 : , f_flags(flags)
549 8 : , f_schema_table(table)
550 : {
551 8 : }
552 :
553 :
554 0 : void schema_column::from_structure(structure::pointer_t s)
555 : {
556 0 : auto const large_uint(s->get_large_uinteger("hash"));
557 0 : f_hash[0] = large_uint.f_value[0];
558 0 : f_hash[1] = large_uint.f_value[1];
559 0 : f_name = s->get_string("name");
560 0 : f_column_id = s->get_uinteger("column_id");
561 0 : f_type = static_cast<struct_type_t>(s->get_uinteger("type"));
562 0 : f_flags = s->get_uinteger("flags");
563 0 : f_encrypt_key_name = s->get_string("encrypt_key_name");
564 0 : f_default_value = s->get_buffer("default_value");
565 0 : f_minimum_value = s->get_buffer("minimum_value");
566 0 : f_maximum_value = s->get_buffer("maximum_value");
567 0 : f_minimum_length = s->get_uinteger("minimum_length");
568 0 : f_maximum_length = s->get_uinteger("maximum_length");
569 0 : f_validation = s->get_buffer("validation");
570 0 : }
571 :
572 :
573 0 : schema_table::pointer_t schema_column::table() const
574 : {
575 0 : return f_schema_table.lock();
576 : }
577 :
578 :
579 10 : column_id_t schema_column::column_id() const
580 : {
581 10 : return f_column_id;
582 : }
583 :
584 :
585 10 : void schema_column::set_column_id(column_id_t id)
586 : {
587 10 : if(f_column_id != COLUMN_NULL)
588 : {
589 : throw id_already_assigned(
590 : "This column already has an identifier ("
591 0 : + std::to_string(static_cast<int>(f_column_id))
592 0 : + "). You can't assigned it another one.");
593 : }
594 :
595 10 : f_column_id = id;
596 10 : }
597 :
598 :
599 10 : void schema_column::hash(uint64_t & h0, uint64_t & h1) const
600 : {
601 10 : h0 = f_hash[0];
602 10 : h1 = f_hash[1];
603 10 : }
604 :
605 :
606 22 : std::string schema_column::name() const
607 : {
608 22 : return f_name;
609 : }
610 :
611 :
612 10 : struct_type_t schema_column::type() const
613 : {
614 10 : return f_type;
615 : }
616 :
617 :
618 10 : flag32_t schema_column::flags() const
619 : {
620 10 : return f_flags;
621 : }
622 :
623 :
624 10 : std::string schema_column::encrypt_key_name() const
625 : {
626 10 : return f_encrypt_key_name;
627 : }
628 :
629 :
630 10 : buffer_t schema_column::default_value() const
631 : {
632 10 : return f_default_value;
633 : }
634 :
635 :
636 10 : buffer_t schema_column::minimum_value() const
637 : {
638 10 : return f_minimum_value;
639 : }
640 :
641 :
642 10 : buffer_t schema_column::maximum_value() const
643 : {
644 10 : return f_maximum_value;
645 : }
646 :
647 :
648 10 : std::uint32_t schema_column::minimum_length() const
649 : {
650 10 : return f_minimum_length;
651 : }
652 :
653 :
654 10 : std::uint32_t schema_column::maximum_length() const
655 : {
656 10 : return f_maximum_length;
657 : }
658 :
659 :
660 10 : buffer_t schema_column::validation() const
661 : {
662 10 : return f_validation;
663 : }
664 :
665 :
666 :
667 :
668 :
669 :
670 :
671 :
672 :
673 :
674 :
675 :
676 :
677 0 : std::string schema_secondary_index::get_index_name() const
678 : {
679 0 : return f_index_name;
680 : }
681 :
682 :
683 0 : void schema_secondary_index::set_index_name(std::string const & index_name)
684 : {
685 0 : f_index_name = index_name;
686 0 : }
687 :
688 :
689 0 : bool schema_secondary_index::get_distributed_index() const
690 : {
691 0 : return (f_flags & SECONDARY_INDEX_FLAG_DISTRIBUTED) != 0;
692 : }
693 :
694 :
695 0 : void schema_secondary_index::set_distributed_index(bool distributed)
696 : {
697 0 : if(distributed)
698 : {
699 0 : f_flags |= SECONDARY_INDEX_FLAG_DISTRIBUTED;
700 : }
701 : else
702 : {
703 0 : f_flags &= ~SECONDARY_INDEX_FLAG_DISTRIBUTED;
704 : }
705 0 : }
706 :
707 :
708 0 : size_t schema_secondary_index::get_column_count()
709 : {
710 0 : return f_column_ids.size();
711 : }
712 :
713 :
714 0 : column_id_t schema_secondary_index::get_column_id(int idx)
715 : {
716 0 : if(static_cast<size_t>(idx) >= f_column_ids.size())
717 : {
718 : throw snapdatabase_out_of_range(
719 : "Index ("
720 0 : + std::to_string(idx)
721 0 : + ") is too large to pick a column identifier.");
722 : }
723 :
724 0 : return f_column_ids[idx];
725 : }
726 :
727 :
728 0 : void schema_secondary_index::add_column_id(column_id_t id)
729 : {
730 0 : f_column_ids.push_back(id);
731 0 : }
732 :
733 :
734 :
735 :
736 :
737 :
738 :
739 :
740 :
741 :
742 :
743 :
744 1 : void schema_table::from_xml(xml_node::pointer_t x)
745 : {
746 1 : if(x->tag_name() != "table")
747 : {
748 : throw invalid_xml(
749 : "A table schema must be a \"keyspaces\" or \"context\". \""
750 0 : + x->tag_name()
751 0 : + "\" is not acceptable.");
752 : }
753 :
754 : // start at version 1.0
755 : //
756 1 : f_version.set_major(1);
757 :
758 1 : f_name = x->attribute("name");
759 1 : if(!validate_name(f_name))
760 : {
761 : throw invalid_xml(
762 : "\""
763 0 : + f_name
764 0 : + "\" is not a valid table name.");
765 : }
766 :
767 1 : bool drop(x->attribute("drop") == "drop");
768 1 : if(drop)
769 : {
770 : // do not ever save a table when the DROP flag is set (actually
771 : // we want to delete the entire folder if it still exists!)
772 : //
773 0 : f_flags |= TABLE_FLAG_DROP;
774 0 : return;
775 : }
776 :
777 1 : if(x->attribute("temporary") == "temporary")
778 : {
779 0 : f_flags |= TABLE_FLAG_TEMPORARY;
780 : }
781 :
782 1 : if(x->attribute("sparse") == "sparse")
783 : {
784 1 : f_flags |= TABLE_FLAG_SPARSE;
785 : }
786 :
787 1 : if(x->attribute("secure") == "secure")
788 : {
789 0 : f_flags |= TABLE_FLAG_SECURE;
790 : }
791 :
792 2 : xml_node::deque_t schemata;
793 2 : xml_node::deque_t secondary_indexes;
794 :
795 1 : f_model = name_to_model(x->attribute("model"));
796 :
797 : // 1. fully parse the complex types on the first iteration
798 : //
799 4 : for(auto child(x->first_child()); child != nullptr; child = child->next())
800 : {
801 3 : if(child->tag_name() == "block-size")
802 : {
803 1 : f_block_size = convert_to_uint(child->text(), 32);
804 :
805 : // TBD--we adjust the size in dbfile
806 : //size_t const page_size(dbfile::get_system_page_size());
807 : //if((f_block_size % page_size) != 0)
808 : //{
809 : // throw invalid_xml(
810 : // "Table \""
811 : // + f_name
812 : // + "\" is not compatible, block size "
813 : // + std::to_string(f_block_size)
814 : // + " is not supported because it is not an exact multiple of "
815 : // + std::to_string(page_size)
816 : // + ".");
817 : //}
818 : }
819 2 : else if(child->tag_name() == "description")
820 : {
821 1 : if(!f_description.empty())
822 : {
823 : throw invalid_xml(
824 : "Table \""
825 0 : + f_name
826 0 : + "\" has two <description> tags, only one is allowed.");
827 : }
828 1 : f_description = child->text();
829 : }
830 1 : else if(child->tag_name() == "schema")
831 : {
832 1 : schemata.push_back(child);
833 : }
834 0 : else if(child->tag_name() == "secondary-index")
835 : {
836 0 : secondary_indexes.push_back(child);
837 : }
838 0 : else if(child->tag_name() == "complex-type")
839 : {
840 0 : schema_complex_type ct(child);
841 0 : f_complex_types[ct.name()] = ct;
842 : }
843 : else
844 : {
845 : // generate an error for unknown tags or ignore?
846 : //
847 0 : SNAP_LOG_WARNING
848 0 : << "Unknown tag \""
849 0 : << child->tag_name()
850 : << "\" within <table name=\""
851 : << f_name
852 : << "\"> tag ignored."
853 : << SNAP_LOG_SEND;
854 : }
855 : }
856 :
857 : // 2. add system columns and parse user defined columns
858 : //
859 : //column_id_t col_id(1); -- TBD
860 :
861 : // object identifier -- to place the rows in our indirect index
862 : {
863 : auto c(std::make_shared<schema_column>(
864 2 : shared_from_this()
865 : , "_oid"
866 : , struct_type_t::STRUCT_TYPE_UINT64
867 4 : , COLUMN_FLAG_REQUIRED | COLUMN_FLAG_SYSTEM));
868 :
869 : //f_columns_by_id[c->column_id()] = c;
870 1 : f_columns_by_name[c->name()] = c;
871 : }
872 :
873 : // date when the row was created
874 : {
875 : auto c(std::make_shared<schema_column>(
876 2 : shared_from_this()
877 : , "_created_on"
878 : , struct_type_t::STRUCT_TYPE_USTIME
879 4 : , COLUMN_FLAG_SYSTEM));
880 :
881 : //f_columns_by_id[c->column_id()] = c;
882 1 : f_columns_by_name[c->name()] = c;
883 : }
884 :
885 : // when the row was last updated
886 : {
887 : auto c(std::make_shared<schema_column>(
888 2 : shared_from_this()
889 : , "_last_updated"
890 : , struct_type_t::STRUCT_TYPE_USTIME
891 4 : , COLUMN_FLAG_REQUIRED | COLUMN_FLAG_SYSTEM));
892 :
893 : //f_columns_by_id[c->column_id()] = c;
894 1 : f_columns_by_name[c->name()] = c;
895 : }
896 :
897 : // the date when it gets deleted automatically
898 : {
899 : auto c(std::make_shared<schema_column>(
900 2 : shared_from_this()
901 : , "_deleted_on"
902 : , struct_type_t::STRUCT_TYPE_USTIME
903 4 : , COLUMN_FLAG_SYSTEM));
904 :
905 : //f_columns_by_id[c->column_id()] = c;
906 1 : f_columns_by_name[c->name()] = c;
907 : }
908 :
909 : // ID of user who created this row
910 : {
911 : auto c(std::make_shared<schema_column>(
912 2 : shared_from_this()
913 : , "_created_by"
914 : , struct_type_t::STRUCT_TYPE_UINT64
915 4 : , COLUMN_FLAG_SYSTEM));
916 :
917 : //f_columns_by_id[c->column_id()] = c;
918 1 : f_columns_by_name[c->name()] = c;
919 : }
920 :
921 : // ID of user who last updated this row
922 : {
923 : auto c(std::make_shared<schema_column>(
924 2 : shared_from_this()
925 : , "_updated_by"
926 : , struct_type_t::STRUCT_TYPE_UINT64
927 4 : , COLUMN_FLAG_SYSTEM));
928 :
929 : //f_columns_by_id[c->column_id()] = c;
930 1 : f_columns_by_name[c->name()] = c;
931 : }
932 :
933 : // ID of user who deleted this row
934 : {
935 : auto c(std::make_shared<schema_column>(
936 2 : shared_from_this()
937 : , "_deleted_by"
938 : , struct_type_t::STRUCT_TYPE_UINT64
939 4 : , COLUMN_FLAG_SYSTEM));
940 :
941 : //f_columns_by_id[c->column_id()] = c;
942 1 : f_columns_by_name[c->name()] = c;
943 : }
944 :
945 : // version of this row (TBD TBD TBD)
946 : //
947 : // how this will be implemented is not clear at this point--it will
948 : // only be for the `content` table; the version itself would not be
949 : // saved as a column per se, instead it would be a form of sub-index
950 : // where the version is ignored for fields that are marked `global`,
951 : // only the `major` part is used for fields marked as `branch`, and
952 : // both, `major` and `minor` are used for fields marked as
953 : // `revision`... as far as the client is concerned, though, it look
954 : // like we have a full version column.
955 : {
956 : auto c(std::make_shared<schema_column>(
957 2 : shared_from_this()
958 : , "_version"
959 : , struct_type_t::STRUCT_TYPE_VERSION
960 4 : , COLUMN_FLAG_SYSTEM));
961 :
962 : //f_columns_by_id[c->column_id()] = c;
963 1 : f_columns_by_name[c->name()] = c;
964 : }
965 :
966 2 : for(auto const & child : schemata)
967 : {
968 5 : for(auto column(child->first_child());
969 3 : column != nullptr;
970 4 : column = column->next())
971 : {
972 4 : auto c(std::make_shared<schema_column>(shared_from_this(), column)); // TBD: + col_id?
973 2 : if(f_columns_by_name.find(c->name()) != f_columns_by_name.end())
974 : {
975 : throw invalid_xml(
976 : "Column \""
977 0 : + f_name
978 0 : + "."
979 0 : + c->name()
980 0 : + "\" defined twice.");
981 : }
982 :
983 : //f_columns_by_id[c->column_id()] = c;
984 2 : f_columns_by_name[c->name()] = c;
985 : //++col_id; -- TBD
986 : }
987 : }
988 :
989 : // 3. the row-key is transformed in an array of column identifiers
990 : //
991 : // the parameter in the XML is a string of column names separated
992 : // by commas
993 : //
994 2 : std::string row_key_name(x->attribute("row-key"));
995 :
996 2 : advgetopt::string_list_t row_key_names;
997 1 : advgetopt::split_string(row_key_name, row_key_names, {","});
998 :
999 1 : for(auto const & n : row_key_names)
1000 : {
1001 0 : schema_column::pointer_t c(column(n));
1002 0 : if(c == nullptr)
1003 : {
1004 : throw invalid_xml(
1005 : "A column referenced in the row-key attribute of table \""
1006 0 : + f_name
1007 0 : + "\" must exist. We could not find \""
1008 0 : + f_name
1009 0 : + "."
1010 0 : + n
1011 0 : + "\".");
1012 : }
1013 0 : f_row_key.push_back(c->column_id());
1014 : }
1015 :
1016 : // 4. the secondary indexes are transformed to array of columns
1017 : //
1018 1 : for(auto const & si : secondary_indexes)
1019 : {
1020 0 : schema_secondary_index::pointer_t index(std::make_shared<schema_secondary_index>());
1021 0 : index->set_index_name(si->attribute("name"));
1022 :
1023 0 : std::string const distributed(si->attribute("distributed"));
1024 0 : if(distributed.empty() || distributed == "distributed")
1025 : {
1026 0 : index->set_distributed_index(true);
1027 : }
1028 0 : else if(distributed == "one-instance")
1029 : {
1030 0 : index->set_distributed_index(false);
1031 : }
1032 : else
1033 : {
1034 0 : SNAP_LOG_WARNING
1035 0 : << "Unknown distributed attribute value \""
1036 : << distributed
1037 : << "\" within a <secondary-index> tag ignored."
1038 : << SNAP_LOG_SEND;
1039 : }
1040 :
1041 0 : std::string const columns(si->text());
1042 0 : advgetopt::string_list_t column_names;
1043 0 : advgetopt::split_string(
1044 : columns
1045 : , column_names
1046 0 : , {","});
1047 :
1048 0 : for(auto const & n : column_names)
1049 : {
1050 0 : schema_column::pointer_t c(column(n));
1051 0 : if(c == nullptr)
1052 : {
1053 : throw invalid_xml(
1054 : "A column referenced in the secondary-index of table \""
1055 0 : + f_name
1056 0 : + "\" must exist. We could not find \""
1057 0 : + f_name
1058 0 : + "."
1059 0 : + n
1060 0 : + "\".");
1061 : }
1062 0 : index->add_column_id(c->column_id());
1063 : }
1064 :
1065 0 : f_secondary_indexes.push_back(index);
1066 : }
1067 : }
1068 :
1069 :
1070 0 : void schema_table::load_extension(xml_node::pointer_t e)
1071 : {
1072 : // determine the largest column identifier, but really this is not
1073 : // the right way of assigning the ids
1074 : //
1075 0 : column_id_t col_id(0);
1076 0 : for(auto const & c : f_columns_by_id)
1077 : {
1078 0 : if(c.second->column_id() > col_id)
1079 : {
1080 0 : col_id = c.second->column_id();
1081 : }
1082 : }
1083 0 : ++col_id;
1084 :
1085 0 : for(auto child(e->first_child()); child != nullptr; child = child->next())
1086 : {
1087 0 : if(child->tag_name() == "schema")
1088 : {
1089 : // TODO: move to sub-function & make sure we do not get duplicates
1090 0 : for(auto column(child->first_child());
1091 0 : column != nullptr;
1092 0 : column = column->next())
1093 : {
1094 0 : auto c(std::make_shared<schema_column>(shared_from_this(), column));
1095 0 : f_columns_by_id[c->column_id()] = c;
1096 0 : f_columns_by_name[c->name()] = c;
1097 0 : ++col_id;
1098 : }
1099 : }
1100 : // TODO: once we have a better handle on column identifiers?
1101 : //else if(child->tag_name() == "secondary-index")
1102 : //{
1103 : // secondary_index_t si;
1104 : // si.f_name = child->attribute("name");
1105 : // si.f_columns = child->attribute("columns");
1106 : // secondary_indexes.push_back(si);
1107 : //}
1108 : else
1109 : {
1110 : // generate an error for unknown tags or ignore?
1111 : //
1112 0 : SNAP_LOG_WARNING
1113 0 : << "Unknown tag \""
1114 0 : << child->tag_name()
1115 : << "\" within a <table-extension> tag ignored."
1116 : << SNAP_LOG_SEND;
1117 : }
1118 : }
1119 0 : }
1120 :
1121 :
1122 0 : void schema_table::from_binary(virtual_buffer::pointer_t b)
1123 : {
1124 0 : structure::pointer_t s(std::make_shared<structure>(g_table_description));
1125 :
1126 0 : s->set_virtual_buffer(b, 0);
1127 :
1128 0 : f_version = s->get_uinteger("version");
1129 0 : f_name = s->get_string("name");
1130 0 : f_flags = s->get_uinteger("flags");
1131 0 : f_model = static_cast<model_t>(s->get_uinteger("model"));
1132 :
1133 : {
1134 0 : auto const field(s->get_field("row_key"));
1135 0 : auto const max(field->size());
1136 0 : for(std::remove_const<decltype(max)>::type idx(0); idx < max; ++idx)
1137 : {
1138 0 : f_row_key.push_back((*field)[idx]->get_uinteger("column_id"));
1139 : }
1140 : }
1141 :
1142 : {
1143 0 : auto const field(s->get_field("secondary_indexes"));
1144 0 : auto const max(field->size());
1145 0 : for(std::remove_const<decltype(max)>::type idx(0); idx < max; ++idx)
1146 : {
1147 0 : schema_secondary_index::pointer_t secondary_index(std::make_shared<schema_secondary_index>());
1148 :
1149 0 : secondary_index->set_index_name((*field)[idx]->get_string("name"));
1150 :
1151 0 : auto const columns_field((*field)[idx]->get_field("columns"));
1152 0 : auto const columns_max(columns_field->size());
1153 0 : for(std::remove_const<decltype(columns_max)>::type j(0); j < columns_max; ++j)
1154 : {
1155 0 : secondary_index->add_column_id((*field)[idx]->get_uinteger("column_id"));
1156 : }
1157 :
1158 0 : f_secondary_indexes.push_back(secondary_index);
1159 : }
1160 : }
1161 :
1162 : {
1163 0 : auto field(s->get_field("columns"));
1164 0 : auto const max(field->size());
1165 0 : for(std::remove_const<decltype(max)>::type idx(0); idx < max; ++idx)
1166 : {
1167 0 : schema_column::pointer_t column(std::make_shared<schema_column>(shared_from_this(), (*field)[idx]));
1168 0 : if(column->column_id() != 0)
1169 : {
1170 : throw id_missing(
1171 : "loaded column \""
1172 0 : + column->name()
1173 0 : + "\" from the database and its column identifier is 0.");
1174 : }
1175 :
1176 0 : f_columns_by_name[column->name()] = column;
1177 0 : f_columns_by_id[column->column_id()] = column;
1178 : }
1179 : }
1180 0 : }
1181 :
1182 :
1183 1 : virtual_buffer::pointer_t schema_table::to_binary() const
1184 : {
1185 2 : structure::pointer_t s(std::make_shared<structure>(g_table_description));
1186 1 : s->init_buffer();
1187 1 : s->set_uinteger("version", f_version.to_binary());
1188 1 : s->set_string("name", f_name);
1189 1 : s->set_uinteger("flags", f_flags);
1190 1 : s->set_uinteger("model", static_cast<uint8_t>(f_model));
1191 :
1192 : {
1193 2 : structure::vector_t v;
1194 1 : auto const max(f_row_key.size());
1195 1 : for(std::remove_const<decltype(max)>::type i(0); i < max; ++i)
1196 : {
1197 0 : structure::pointer_t column_id_structure(std::make_shared<structure>(g_table_column_reference));
1198 0 : column_id_structure->init_buffer();
1199 0 : column_id_structure->set_uinteger("column_id", f_row_key[i]);
1200 0 : v.push_back(column_id_structure);
1201 : }
1202 1 : s->set_array("row_key", v);
1203 : }
1204 :
1205 : {
1206 2 : structure::vector_t v;
1207 1 : auto const max(f_secondary_indexes.size());
1208 1 : for(std::remove_const<decltype(max)>::type i(0); i < max; ++i)
1209 : {
1210 0 : structure::pointer_t secondary_index_structure(std::make_shared<structure>(g_table_secondary_index));
1211 0 : secondary_index_structure->init_buffer();
1212 0 : secondary_index_structure->set_string("name", f_secondary_indexes[i]->get_index_name());
1213 :
1214 0 : structure::vector_t c;
1215 0 : auto const jmax(f_secondary_indexes[i]->get_column_count());
1216 0 : for(std::remove_const<decltype(max)>::type j(0); j < jmax; ++j)
1217 : {
1218 0 : structure::pointer_t column_id_structure(std::make_shared<structure>(g_table_column_reference));
1219 0 : column_id_structure->init_buffer();
1220 0 : column_id_structure->set_uinteger("column_id", f_secondary_indexes[i]->get_column_id(j));
1221 0 : c.push_back(column_id_structure);
1222 : }
1223 :
1224 0 : secondary_index_structure->set_array("columns", c);
1225 :
1226 0 : v.push_back(secondary_index_structure);
1227 : }
1228 :
1229 1 : s->set_array("secondary_indexes", v);
1230 : }
1231 :
1232 : {
1233 11 : for(auto const & col : f_columns_by_id)
1234 : {
1235 20 : structure::pointer_t column_description(s->new_array_item("columns"));
1236 : //column_description->init_buffer();
1237 10 : column_description->set_string("name", col.second->name());
1238 10 : uint512_t hash;
1239 10 : col.second->hash(hash.f_value[0], hash.f_value[1]);
1240 10 : column_description->set_large_uinteger("hash", hash);
1241 10 : column_description->set_uinteger("column_id", col.second->column_id());
1242 10 : column_description->set_uinteger("type", static_cast<uint16_t>(col.second->type()));
1243 10 : column_description->set_uinteger("flags", col.second->flags());
1244 10 : column_description->set_string("encrypt_key_name", col.second->encrypt_key_name());
1245 10 : column_description->set_buffer("default_value", col.second->default_value());
1246 10 : column_description->set_buffer("minimum_value", col.second->minimum_value());
1247 10 : column_description->set_buffer("maximum_value", col.second->maximum_value());
1248 10 : column_description->set_uinteger("minimum_length", col.second->minimum_length());
1249 10 : column_description->set_uinteger("maximum_length", col.second->maximum_length());
1250 10 : column_description->set_buffer("validation", col.second->validation());
1251 : }
1252 : }
1253 :
1254 : // we know it is zero so we ignore that one anyay
1255 : //
1256 1 : uint64_t start_offset(0);
1257 2 : return s->get_virtual_buffer(start_offset);
1258 : }
1259 :
1260 :
1261 0 : version_t schema_table::version() const
1262 : {
1263 0 : return f_version;
1264 : }
1265 :
1266 :
1267 2 : std::string schema_table::name() const
1268 : {
1269 2 : return f_name;
1270 : }
1271 :
1272 :
1273 0 : model_t schema_table::model() const
1274 : {
1275 0 : return f_model;
1276 : }
1277 :
1278 :
1279 1 : bool schema_table::is_sparse() const
1280 : {
1281 1 : return (f_flags & TABLE_FLAG_SPARSE) != 0;
1282 : }
1283 :
1284 :
1285 0 : bool schema_table::is_secure() const
1286 : {
1287 0 : return (f_flags & TABLE_FLAG_SECURE) != 0;
1288 : }
1289 :
1290 :
1291 0 : column_ids_t schema_table::row_key() const
1292 : {
1293 0 : return f_row_key;
1294 : }
1295 :
1296 :
1297 1 : void schema_table::assign_column_ids()
1298 : {
1299 1 : if(!f_columns_by_id.empty())
1300 : {
1301 0 : return;
1302 : }
1303 :
1304 1 : column_id_t id(1);
1305 11 : for(auto c : f_columns_by_name)
1306 : {
1307 10 : c.second->set_column_id(id);
1308 10 : f_columns_by_id[id] = c.second;
1309 10 : ++id;
1310 : }
1311 : }
1312 :
1313 :
1314 0 : schema_column::pointer_t schema_table::column(std::string const & name) const
1315 : {
1316 0 : auto it(f_columns_by_name.find(name));
1317 0 : if(it == f_columns_by_name.end())
1318 : {
1319 0 : return schema_column::pointer_t();
1320 : }
1321 0 : return it->second;
1322 : }
1323 :
1324 :
1325 0 : schema_column::pointer_t schema_table::column(column_id_t id) const
1326 : {
1327 0 : auto it(f_columns_by_id.find(id));
1328 0 : if(it == f_columns_by_id.end())
1329 : {
1330 0 : return schema_column::pointer_t();
1331 : }
1332 0 : return it->second;
1333 : }
1334 :
1335 :
1336 0 : schema_column::map_by_name_t schema_table::columns_by_name() const
1337 : {
1338 0 : return f_columns_by_name;
1339 : }
1340 :
1341 :
1342 0 : schema_column::map_by_id_t schema_table::columns_by_id() const
1343 : {
1344 0 : return f_columns_by_id;
1345 : }
1346 :
1347 :
1348 0 : std::string schema_table::description() const
1349 : {
1350 0 : return f_description;
1351 : }
1352 :
1353 :
1354 1 : std::uint32_t schema_table::block_size() const
1355 : {
1356 1 : return f_block_size;
1357 : }
1358 :
1359 :
1360 :
1361 6 : } // namespace snapdatabase
1362 : // vim: ts=4 sw=4 et
|