Line data Source code
1 : // Copyright (c) 2019 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/snapdatabase
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 :
21 : /** \file
22 : * \brief Database file implementation.
23 : *
24 : * Each table uses one or more files. Each file is handled by a dbfile
25 : * object and a corresponding set of blocks.
26 : */
27 :
28 : // self
29 : //
30 : #include "snapdatabase/data/dbfile.h"
31 :
32 : #include "snapdatabase/block/block_free_block.h"
33 : #include "snapdatabase/data/dbtype.h"
34 : #include "snapdatabase/exception.h"
35 : #include "snapdatabase/file/file_snap_database_table.h"
36 : #include "snapdatabase/data/structure.h"
37 : #include "snapdatabase/database/table.h"
38 :
39 :
40 : // snapdev lib
41 : //
42 : #include <snapdev/not_used.h>
43 :
44 :
45 : // C lib
46 : //
47 : #include <sys/mman.h>
48 : #include <sys/stat.h>
49 :
50 :
51 : // C++ lib
52 : //
53 : #include <iostream>
54 :
55 :
56 : // last include
57 : //
58 : #include <snapdev/poison.h>
59 :
60 :
61 :
62 : namespace snapdatabase
63 : {
64 :
65 :
66 : namespace
67 : {
68 :
69 :
70 : constexpr char const * g_table_extension = ".snapdb";
71 : constexpr char const * g_global_lock_filename = "global.lock";
72 :
73 :
74 1 : std::string generate_table_dir(std::string const & path, std::string const & table_name)
75 : {
76 1 : std::string dirname(path);
77 1 : if(!dirname.empty())
78 : {
79 1 : dirname += '/';
80 : }
81 1 : dirname += table_name;
82 :
83 : struct stat s;
84 1 : if(::stat(dirname.c_str(), &s) != 0)
85 : {
86 1 : snap::NOTUSED(mkdir(dirname.c_str(), S_IRWXU));
87 :
88 1 : if(::stat(dirname.c_str(), &s) != 0)
89 : {
90 : throw io_error(
91 : "System could not properly create directory \""
92 0 : + dirname
93 0 : + "\" to handle table \""
94 0 : + table_name
95 0 : + "\".");
96 : }
97 : }
98 :
99 1 : if(!S_ISDIR(s.st_mode))
100 : {
101 : throw io_error(
102 : "\""
103 0 : + dirname
104 0 : + "\" must be a directory.");
105 : }
106 :
107 1 : return dirname;
108 : }
109 :
110 :
111 : }
112 : // no name namespace
113 :
114 :
115 :
116 1 : dbfile::dbfile(std::string const & path, std::string const & table_name, std::string const & filename)
117 : : f_path(path)
118 : , f_table_name(table_name)
119 : , f_filename(filename)
120 : , f_dirname(generate_table_dir(path, table_name))
121 2 : , f_fullname(f_dirname + "/" + f_filename + g_table_extension)
122 2 : , f_lock_filename(f_dirname + "/" + g_global_lock_filename)
123 4 : , f_pid(getpid())
124 : {
125 1 : }
126 :
127 :
128 0 : dbfile::~dbfile()
129 : {
130 0 : close();
131 0 : }
132 :
133 :
134 0 : std::string dbfile::get_fullname() const
135 : {
136 0 : return f_fullname;
137 : }
138 :
139 :
140 1 : void dbfile::set_table(table::pointer_t t)
141 : {
142 1 : f_table = t;
143 1 : }
144 :
145 :
146 0 : table::pointer_t dbfile::get_table() const
147 : {
148 0 : return f_table;
149 : }
150 :
151 :
152 0 : void dbfile::close()
153 : {
154 0 : if(f_fd != -1)
155 : {
156 0 : ::close(f_fd);
157 0 : f_fd = -1;
158 : }
159 0 : }
160 :
161 :
162 1 : size_t dbfile::get_system_page_size()
163 : {
164 1 : static long const sc_page_size(sysconf(_SC_PAGE_SIZE));
165 1 : return sc_page_size;
166 : }
167 :
168 :
169 1 : void dbfile::set_page_size(size_t page_size)
170 : {
171 1 : if(f_page_size != 0)
172 : {
173 0 : throw snapdatabase_logic_error("The size of a page in a dbfile can only be set once.");
174 : }
175 :
176 : // make sure it is at least one system page in size and a multiple of
177 : // the system page so we can easily mmap() our blocks
178 : //
179 1 : size_t const system_page_size(get_system_page_size());
180 1 : size_t const count((page_size + system_page_size - 1) / system_page_size);
181 1 : if(count <= 1)
182 : {
183 1 : f_page_size = system_page_size;
184 : }
185 : else
186 : {
187 0 : f_page_size = count * system_page_size;
188 : }
189 1 : }
190 :
191 :
192 57 : size_t dbfile::get_page_size() const
193 : {
194 57 : if(f_page_size == 0)
195 : {
196 0 : throw snapdatabase_logic_error("The dbfile page size is not yet defined.");
197 : }
198 :
199 57 : return f_page_size;
200 : }
201 :
202 :
203 1 : void dbfile::set_sparse(bool sparse)
204 : {
205 1 : f_sparse_file = sparse;
206 1 : }
207 :
208 :
209 0 : bool dbfile::get_sparse() const
210 : {
211 0 : return f_sparse_file;
212 : }
213 :
214 :
215 0 : void dbfile::set_type(dbtype_t type)
216 : {
217 0 : if(f_type != dbtype_t::DBTYPE_UNKNOWN)
218 : {
219 0 : throw snapdatabase_logic_error("The dbfile type is already defined.");
220 : }
221 0 : if(type == dbtype_t::DBTYPE_UNKNOWN)
222 : {
223 0 : throw snapdatabase_logic_error("The dbfile type cannot be set to dbtype_t::DBTYPE_UNKNOWN.");
224 : }
225 :
226 0 : f_type = type;
227 0 : }
228 :
229 :
230 0 : dbtype_t dbfile::get_type() const
231 : {
232 0 : return f_type;
233 : }
234 :
235 :
236 6 : int dbfile::open_file()
237 : {
238 : // already open?
239 : //
240 6 : if(f_fd != -1)
241 : {
242 5 : return f_fd;
243 : }
244 :
245 1 : size_t const page_size(get_page_size());
246 :
247 : // we need to have a global lock in case the file was not yet created
248 : //
249 2 : snap::lockfile global_lock(f_lock_filename, snap::lockfile::mode_t::LOCKFILE_EXCLUSIVE);
250 1 : global_lock.lock();
251 :
252 : // first attempt a regular open because once a file was created, this
253 : // works every time
254 : //
255 1 : f_fd = open(f_fullname.c_str(), O_RDWR | O_CLOEXEC | O_NOATIME | O_NOFOLLOW);
256 1 : if(f_fd == -1)
257 : {
258 1 : if(errno != ENOENT)
259 : {
260 0 : int const e(errno);
261 : throw io_error(
262 : "System could not open file \""
263 0 : + f_fullname
264 0 : + "\" (errno: "
265 0 : + std::to_string(e)
266 0 : + ", "
267 0 : + strerror(e)
268 0 : + ".");
269 : }
270 :
271 1 : f_fd = open(f_fullname.c_str(), O_RDWR | O_CLOEXEC | O_NOATIME | O_NOFOLLOW | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
272 1 : if(f_fd == -1)
273 : {
274 : // nothing more we can do, whatever the error, fail
275 : //
276 : // (note we have a global lock so we should not have a problem
277 : // with the O_EXCL flag)
278 : //
279 0 : int const e(errno);
280 : throw io_error(
281 : "System could not open file \""
282 0 : + f_fullname
283 0 : + "\" (errno: "
284 0 : + std::to_string(e)
285 0 : + ", "
286 0 : + strerror(e)
287 0 : + "\".");
288 : }
289 :
290 : // in this one case we are in creation mode which means we
291 : // create the header block, which is important because it has
292 : // the special offset of 0 and we use that block to allocate
293 : // other blocks
294 : //
295 1 : version_t v(STRUCTURE_VERSION_MAJOR, STRUCTURE_VERSION_MINOR);
296 :
297 : file_snap_database_table::pointer_t sdbt(std::static_pointer_cast<file_snap_database_table>(
298 2 : f_table->allocate_new_block(dbtype_t::FILE_TYPE_SNAP_DATABASE_TABLE)));
299 :
300 1 : sdbt->set_first_free_block(page_size);
301 1 : sdbt->set_block_size(page_size);
302 1 : sdbt->set_file_version(v);
303 1 : sdbt->sync(false);
304 : }
305 :
306 1 : return f_fd;
307 : }
308 :
309 :
310 6 : data_t dbfile::data(reference_t offset)
311 : {
312 6 : int fd(open_file());
313 :
314 6 : size_t const sz(get_page_size());
315 :
316 6 : reference_t page_offset(offset % sz);
317 6 : reference_t page_start(offset - page_offset);
318 :
319 6 : auto it(f_pages.left.find(page_start));
320 6 : if(it != f_pages.left.end())
321 : {
322 4 : return it->second;
323 : }
324 :
325 2 : data_t ptr(reinterpret_cast<data_t>(mmap(
326 : nullptr
327 : , get_page_size()
328 : , PROT_READ | PROT_WRITE
329 : , MAP_SHARED
330 : , fd
331 2 : , page_start)));
332 :
333 2 : if(ptr == nullptr)
334 : {
335 : throw io_error(
336 : "mmap() failed on \""
337 0 : + f_filename
338 0 : + "\" at offset "
339 0 : + std::to_string(offset)
340 0 : + ".");
341 : }
342 :
343 2 : f_pages.insert(page_bimap_t::value_type(page_start, ptr));
344 :
345 2 : return ptr + page_offset;
346 : }
347 :
348 :
349 0 : void dbfile::release_data(data_t data)
350 : {
351 0 : size_t const sz(get_page_size());
352 :
353 0 : intptr_t const data_ptr(reinterpret_cast<intptr_t>(data));
354 0 : intptr_t const page_ptr(data_ptr - data_ptr % sz);
355 0 : auto it(f_pages.right.find(reinterpret_cast<data_t>(page_ptr)));
356 0 : if(it == f_pages.right.end())
357 : {
358 : throw page_not_found(
359 : "page "
360 0 : + std::to_string(page_ptr)
361 0 : + " not found. It cannot be unmapped.");
362 : }
363 0 : f_pages.right.erase(it);
364 :
365 0 : munmap(reinterpret_cast<data_t>(page_ptr), sz);
366 0 : }
367 :
368 :
369 2 : void dbfile::sync(data_t data, bool immediate)
370 : {
371 2 : size_t const sz(get_page_size());
372 :
373 2 : intptr_t const data_ptr(reinterpret_cast<intptr_t>(data));
374 2 : intptr_t const page_ptr(data_ptr - data_ptr % sz);
375 :
376 2 : msync(reinterpret_cast<data_t>(page_ptr)
377 : , sz
378 : , (immediate ? MS_SYNC : MS_ASYNC) | MS_INVALIDATE);
379 2 : }
380 :
381 :
382 3 : size_t dbfile::get_size() const
383 : {
384 3 : if(f_fd == -1)
385 : {
386 : throw file_not_opened(
387 0 : "file is not yet opened, get_size() can't be called.");
388 : }
389 :
390 : struct stat s;
391 3 : if(::fstat(f_fd, &s) == -1)
392 : {
393 : throw io_error(
394 : "stat() failed on \""
395 0 : + f_filename
396 0 : + "\".");
397 : }
398 :
399 3 : return s.st_size;
400 : }
401 :
402 :
403 16 : reference_t dbfile::append_free_block(reference_t const previous_block_offset)
404 : {
405 16 : if(f_fd == -1)
406 : {
407 : throw file_not_opened(
408 0 : "file is not yet opened, append_free_block() can't be called.");
409 : }
410 :
411 16 : reference_t const p(lseek(f_fd, 0, SEEK_END));
412 16 : if(p == static_cast<reference_t>(-1))
413 : {
414 0 : close();
415 : throw io_error(
416 : "lseek() failed on \""
417 0 : + f_filename
418 0 : + "\".");
419 : }
420 :
421 16 : dbtype_t const magic(dbtype_t::BLOCK_TYPE_FREE_BLOCK);
422 16 : write_data(&magic, sizeof(magic));
423 16 : version_t const version(0, 1);
424 16 : auto const v(version.to_binary());
425 16 : write_data(&v, sizeof(v));
426 16 : write_data(&previous_block_offset, sizeof(previous_block_offset));
427 16 : if(!f_sparse_file)
428 : {
429 : // make sure to write the rest too so for sure it's not sparse
430 : //
431 0 : std::vector<uint8_t> zeroes(get_page_size() - sizeof(magic) - sizeof(previous_block_offset));
432 0 : write_data(zeroes.data(), zeroes.size());
433 : }
434 : else
435 : {
436 : // this is what makes the file sparse
437 : //
438 : // (note that really happens only when
439 : // `get_page_size() > get_system_page_size()`)
440 : //
441 16 : ftruncate(f_fd, p + get_page_size());
442 : }
443 :
444 16 : return p;
445 : }
446 :
447 :
448 : /** \brief Grow the file.
449 : *
450 : * We use this function to grow the file with a full page of data.
451 : *
452 : * \exception io_error
453 : * On an error, the function raises this exception and closes the file.
454 : *
455 : * \param[in] ptr Pointer to the block of data to write to the file.
456 : * \param[in] size The number of bytes in the block of data to write.
457 : */
458 48 : void dbfile::write_data(void const * ptr, size_t size)
459 : {
460 48 : if(f_fd == -1)
461 : {
462 : throw file_not_opened(
463 0 : "file is not yet opened, write_data() can't be called.");
464 : }
465 :
466 48 : int const sz(write(f_fd, ptr, size));
467 48 : if(static_cast<size_t>(sz) != size)
468 : {
469 0 : close();
470 : throw io_error(
471 : "System could not properly write to file \""
472 0 : + f_filename
473 0 : + "\".");
474 : }
475 48 : }
476 :
477 :
478 0 : std::string to_string(dbtype_t type)
479 : {
480 0 : switch(type)
481 : {
482 0 : case dbtype_t::DBTYPE_UNKNOWN:
483 0 : return std::string("Unknown");
484 :
485 0 : case dbtype_t::FILE_TYPE_SNAP_DATABASE_TABLE:
486 0 : return std::string("Snap Database Type (SDBT)");
487 :
488 0 : case dbtype_t::FILE_TYPE_EXTERNAL_INDEX:
489 0 : return std::string("External Index File (INDX)");
490 :
491 0 : case dbtype_t::FILE_TYPE_BLOOM_FILTER:
492 0 : return std::string("Bloom Filter File (BLMF)");
493 :
494 0 : case dbtype_t::BLOCK_TYPE_BLOB:
495 0 : return std::string("Blob Block (BLOB)");
496 :
497 0 : case dbtype_t::BLOCK_TYPE_DATA:
498 0 : return std::string("Data Block (DATA)");
499 :
500 0 : case dbtype_t::BLOCK_TYPE_ENTRY_INDEX:
501 0 : return std::string("Entry Index Block (EIDX)");
502 :
503 0 : case dbtype_t::BLOCK_TYPE_FREE_BLOCK:
504 0 : return std::string("Free Block (FREE)");
505 :
506 0 : case dbtype_t::BLOCK_TYPE_FREE_SPACE:
507 0 : return std::string("Free Space Block (FSPC)");
508 :
509 0 : case dbtype_t::BLOCK_TYPE_INDEX_POINTERS:
510 0 : return std::string("Index Pointer Block (IDXP)");
511 :
512 0 : case dbtype_t::BLOCK_TYPE_INDIRECT_INDEX:
513 0 : return std::string("Indirect Index Block (INDR)");
514 :
515 0 : case dbtype_t::BLOCK_TYPE_SECONDARY_INDEX:
516 0 : return std::string("Secondary Index Block (SIDX)");
517 :
518 0 : case dbtype_t::BLOCK_TYPE_SCHEMA:
519 0 : return std::string("Schema Block (SCHM)");
520 :
521 0 : case dbtype_t::BLOCK_TYPE_TOP_INDEX:
522 0 : return std::string("Top Index Block (TIDX)");
523 :
524 : }
525 :
526 0 : return std::string("Invalid");
527 : }
528 :
529 :
530 :
531 6 : } // namespace snapdatabase
532 : // vim: ts=4 sw=4 et
|