Line data Source code
1 : // Copyright (c) 2011-2023 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/snapdev
4 : // contact@m2osw.com
5 : //
6 : // This program is free software: you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation, either version 3 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License
17 : // along with this program. If not, see <https://www.gnu.org/licenses/>.
18 : #pragma once
19 :
20 : /** \file
21 : * \brief Utility functions to handle paths.
22 : *
23 : * Since we started before the C++ extension supporting various paths, we
24 : * have our own set of functions. Also the system path support does not match
25 : * our needs one to one.
26 : */
27 :
28 : // self
29 : //
30 : #include "snapdev/join_strings.h"
31 : #include "snapdev/not_reached.h"
32 : #include "snapdev/reverse_cstring.h"
33 : #include "snapdev/tokenize_string.h"
34 :
35 :
36 : // C++
37 : //
38 : #include <sstream>
39 : #include <string>
40 : #include <vector>
41 :
42 :
43 : // C
44 : //
45 : #include <limits.h>
46 : #include <stdlib.h>
47 : #include <string.h>
48 :
49 :
50 :
51 : namespace snapdev
52 : {
53 : namespace pathinfo
54 : {
55 :
56 :
57 :
58 : /** \brief Retrieve the basename of a path.
59 : *
60 : * This function retrieves the basename of a path. You can also remove the
61 : * suffix (often called a file extension) and a prefix.
62 : *
63 : * \code
64 : * // the following returns true
65 : * snapdev::pathinfo::basename(
66 : * "/usr/share/snapwebsites/in.basename.txt"
67 : * , ".txt"
68 : * , "in.") == "basename"
69 : * \endcode
70 : *
71 : * To remove the suffix, whatever it is, use the special pattern ".*".
72 : *
73 : * \tparam StringT The type of string to parse.
74 : * \param[in] path The path from which basename gets retrieved.
75 : * \param[in] suffix If the path ends with that suffix, remove it.
76 : * \param[in] prefix If the path starts with that prefix, remove it.
77 : *
78 : * \return The basename of \p path.
79 : */
80 : template<class StringT>
81 : StringT basename(StringT const & path
82 : , typename std::decay<StringT>::type const & suffix = ""
83 : , typename std::decay<StringT>::type const & prefix = "")
84 : {
85 : // ignore path if present
86 : //
87 : typename StringT::size_type pos(path.rfind('/'));
88 : if(pos == StringT::npos)
89 : {
90 : // if no '/' in string, the entire name is a basename
91 : //
92 : pos = 0;
93 : }
94 : else
95 : {
96 : ++pos; // skip the actual '/'
97 : }
98 :
99 : // ignore prefix if present
100 : //
101 : if(prefix.length() <= path.length() - pos
102 : && path.compare(pos, prefix.length(), prefix) == 0)
103 : {
104 : pos += prefix.length();
105 : }
106 :
107 : // if the path ends with suffix, then return the path without it
108 : //
109 : if(suffix.length() == 2
110 : && suffix[0] == '.'
111 : && suffix[1] == '*')
112 : {
113 : typename StringT::size_type end(path.rfind('.'));
114 : if(end != StringT::npos && end > pos)
115 : {
116 : // whatever the suffix is
117 : //
118 : return path.substr(pos, end - pos);
119 : }
120 : }
121 : else if(suffix.length() <= path.length() - pos
122 : && path.compare(path.length() - suffix.length(), suffix.length(), suffix) == 0)
123 : {
124 : // remove a specific suffix
125 : //
126 : return path.substr(pos, path.length() - pos - suffix.length());
127 : }
128 :
129 : // ignore possible suffix
130 : //
131 : return path.substr(pos);
132 : }
133 :
134 :
135 : /** \brief Replace the suffix with another.
136 : *
137 : * This function checks whether a file ends with a given suffix. If so then
138 : * the existing suffix gets removed. Then it happens the new suffix.
139 : *
140 : * The function is not checking whether a suffix starts with a period.
141 : * It can include any other character.
142 : *
143 : * \code
144 : * // the following expressions return true
145 : * snap::pathinfo::replace_suffix(
146 : * "/usr/share/snapwebsites/replace.cpp"
147 : * , ".cpp"
148 : * , ".h") == "/usr/share/snapwebsites/replace.h"
149 : *
150 : * snap::pathinfo::replace_suffix(
151 : * "/usr/share/snapwebsites/replace"
152 : * , ".cpp"
153 : * , ".h") == "/usr/share/snapwebsites/replace.h"
154 : *
155 : * snap::pathinfo::replace_suffix(
156 : * "/usr/share/snapwebsites/replace.txt"
157 : * , ".*"
158 : * , ".h") == "/usr/share/snapwebsites/replace.h"
159 : * \endcode
160 : *
161 : * \note
162 : * By default, the \p new_suffix parameter is set to the empty string.
163 : * This means the function can be used to trim the string from
164 : * \p old_suffix.
165 : *
166 : * \todo
167 : * Add a function which supports an array of \p old_suffix.
168 : *
169 : * \tparam StringT The type of string to parse.
170 : * \param[in] path The path from which to replace a suffix.
171 : * \param[in] old_suffix If the path ends with that suffix, remove it.
172 : * \param[in] new_suffix Append this suffix.
173 : * \param[in] no_change_on_missing If old_suffix is missing, do not change
174 : * the \t path.
175 : *
176 : * \return \p path with its suffix replaced as defined above.
177 : */
178 : template<class StringT>
179 28 : StringT replace_suffix(
180 : StringT const & path
181 : , typename std::decay<StringT>::type const & old_suffix
182 : , typename std::decay<StringT>::type const & new_suffix = ""
183 : , bool no_change_on_missing = false)
184 : {
185 28 : if(old_suffix == ".*")
186 : {
187 0 : std::size_t const slash(path.rfind('/') + 1);
188 0 : std::size_t const pos(path.rfind('.'));
189 0 : if(pos <= slash)
190 : {
191 0 : return path + new_suffix;
192 : }
193 0 : return path.substr(0, pos) + new_suffix;
194 : }
195 :
196 : // TODO: with C++20 we could use: path.ends_with(old_suffix)
197 : //
198 28 : if(path.length() >= old_suffix.length()
199 28 : && path.c_str() + path.length() - old_suffix.length() == old_suffix)
200 : {
201 12 : return path.substr(0, path.length() - old_suffix.length()) + new_suffix;
202 : }
203 :
204 16 : if(no_change_on_missing)
205 : {
206 4 : return path;
207 : }
208 :
209 12 : return path + new_suffix;
210 : }
211 :
212 :
213 : /** \brief Retrieve the directory name of a path.
214 : *
215 : * This function retrieves the directory name of a path. The returned path
216 : * is the empty string if the input does not include any '/'.
217 : *
218 : * \code
219 : * // the following returns true
220 : * snap::pathinfo::dirname(
221 : * "/usr/share/snapwebsites/in.filename.txt");
222 : * == "/usr/share/snapwebsites";
223 : * \endcode
224 : *
225 : * \param[in] path The path from which basename gets retrieved.
226 : *
227 : * \return The directory name of \p path.
228 : */
229 : template<class StringT>
230 2 : StringT dirname(StringT const & path)
231 : {
232 2 : typename StringT::size_type pos(path.rfind('/'));
233 2 : if(pos == StringT::npos)
234 : {
235 0 : return StringT();
236 : }
237 2 : else if(pos == 0)
238 : {
239 0 : if(path[0] == '/')
240 : {
241 0 : return StringT("/");
242 : }
243 0 : return StringT(".");
244 : }
245 : else
246 : {
247 2 : return path.substr(0, pos);
248 : }
249 : }
250 :
251 :
252 : /** \brief Check whether filename represents "." or ".."
253 : *
254 : * The two special files named "." and ".." are in general not useful in
255 : * our applications. You can check whether a filename represents one
256 : * of those files and if so ignore the file altogether.
257 : *
258 : * Note that those two filenames do represent valid files. In most cases,
259 : * you want to ignore them when reading a directory recursively including
260 : * filenames starting with a period (hidden files).
261 : *
262 : * \param[in] filename The filename to be checked.
263 : *
264 : * \return true if filename ends with "." or "..".
265 : */
266 196 : inline bool is_dot_or_dot_dot(char const * filename)
267 : {
268 196 : char const * end(filename);
269 :
270 : // go to the end of filename
271 : //
272 14970 : while(*end != '\0')
273 : {
274 14774 : ++end;
275 : }
276 :
277 : // just "."
278 : //
279 196 : if(end - filename == 1 && filename[0] == '.')
280 : {
281 0 : return true;
282 : }
283 :
284 : // just ".."
285 : //
286 196 : if(end - filename == 2 && filename[0] == '.' && filename[1] == '.')
287 : {
288 0 : return true;
289 : }
290 :
291 : // ends with "/."
292 : //
293 196 : if(end - filename >= 2 && end[-2] == '/' && end[-1] == '.')
294 : {
295 17 : return true;
296 : }
297 :
298 : // ends with "/.."
299 : //
300 179 : if(end - filename >= 3 && end[-3] == '/' && end[-2] == '.' && end[-1] == '.')
301 : {
302 17 : return true;
303 : }
304 :
305 162 : return false;
306 : }
307 :
308 :
309 : inline bool is_dot_or_dot_dot(std::string const & filename)
310 : {
311 : return is_dot_or_dot_dot(filename.c_str());
312 : }
313 :
314 :
315 : /** \brief Convert the input \p path in a canonicalized path.
316 : *
317 : * This function goes through the specified \p path and canonicalize
318 : * it. This means:
319 : *
320 : * * removing any "/./"
321 : * * removing any "/../"
322 : * * replacing softlinks with the target path
323 : *
324 : * The resulting path is likely going to be a full path.
325 : *
326 : * \note
327 : * If the input path is an empty string (equivalent to ".") then the
328 : * result may also be the empty string even though no errors happened.
329 : * You can distinghuish both cases by checking `error_msg.empty()` first.
330 : * If true, then the function did not generate an error.
331 : *
332 : * \todo
333 : * Create a version which does not generate a full path. Instead, we could
334 : * do the work on realpath(3) and look at only returning a path which is
335 : * equivalent to what we had on input (i.e. keeping it relative if it
336 : * were relative on input). Similarly, we could then allow for softlink
337 : * to be taken or not and completely replace the input or not.
338 : *
339 : * \param[in] path The path to canonicalize.
340 : * \param[out] error_msg A variable where we save the error message.
341 : *
342 : * \return The canonicalized version of \p path or an error and errno set
343 : * to whatever error realpath(3) returned.
344 : */
345 93 : inline std::string realpath(std::string const & path, std::string & error_msg)
346 : {
347 93 : char buf[PATH_MAX + 1];
348 93 : buf[PATH_MAX] = '\0';
349 93 : if(::realpath(path.c_str(), buf) == buf)
350 : {
351 93 : error_msg.clear();
352 93 : return buf;
353 : }
354 :
355 : // it failed
356 : //
357 0 : int const e(errno);
358 :
359 0 : std::stringstream ss;
360 : ss << "realpath(\""
361 : << path
362 0 : << "\") ";
363 0 : switch(e)
364 : {
365 0 : case EACCES:
366 0 : ss << "is missing permission to read or search a component of the path.";
367 0 : break;
368 :
369 0 : case EIO:
370 0 : ss << "had I/O issues while searching.";
371 0 : break;
372 :
373 0 : case ELOOP:
374 0 : ss << "found too many symbolic links.";
375 0 : break;
376 :
377 0 : case ENAMETOOLONG:
378 0 : ss << "output buffer too small for path.";
379 0 : break;
380 :
381 0 : case ENOENT:
382 0 : ss << "could not find the specified file.";
383 0 : break;
384 :
385 0 : case ENOMEM:
386 0 : ss << "could not allocate necessary memory.";
387 0 : break;
388 :
389 0 : case ENOTDIR:
390 0 : ss << "found a file instead of a directory within the path.";
391 0 : break;
392 :
393 0 : default:
394 : ss << "failed: "
395 0 : << strerror(e);
396 0 : break;
397 :
398 : }
399 0 : error_msg = ss.str();
400 :
401 : // trying to get errno returned as expected, assuming std::string does
402 : // not modify it we should be good
403 : //
404 0 : std::string result;
405 0 : errno = e;
406 0 : return result;
407 0 : }
408 :
409 :
410 : /** \brief Canonicalize a path and filename.
411 : *
412 : * This function concatenate path and filename with a "/" in between and
413 : * then it canonicalize the result.
414 : *
415 : * The canonicalization means that the resulting path will:
416 : *
417 : * \li not include more than one "/" between two names,
418 : * \li not include any "." unless the result would otherwise be the empty
419 : * string then "." is returned instead,
420 : * \li not include a ".." preceeded by a name other than ".."
421 : *
422 : * The \p filename parameter can be the empty string.
423 : *
424 : * \note
425 : * The removal of the ".." is not verifying whether the path is valid on
426 : * the current file system. If you want to do, use the realpath() function
427 : * instead.
428 : *
429 : * \param[in] path The introducer path.
430 : * \param[in] filename A filename to happen to the path.
431 : *
432 : * \return The path and filename canonicalized.
433 : */
434 3 : inline std::string canonicalize(
435 : std::string const & path
436 : , std::string const & filename)
437 : {
438 : bool const is_root(
439 3 : path.empty()
440 4 : ? (filename.empty()
441 1 : ? false
442 0 : : filename[0] == '/')
443 2 : : path[0] == '/');
444 :
445 : // break up the path & filename as segments
446 : //
447 3 : std::vector<std::string> segments;
448 3 : snapdev::tokenize_string(segments, path, "/", true);
449 3 : snapdev::tokenize_string(segments, filename, "/", true);
450 :
451 : // remove ".", they are not useful
452 : //
453 3 : auto no_dot_end(std::remove(segments.begin(), segments.end(), "."));
454 3 : segments.erase(no_dot_end, segments.end());
455 :
456 : // remove "<path>" and ".." when "<path>" is not ".."
457 : //
458 5 : for(auto it(segments.begin()); it != segments.end(); )
459 : {
460 4 : if(*it == ".."
461 0 : && it == segments.begin()
462 4 : && is_root)
463 : {
464 0 : it = segments.erase(it);
465 : }
466 2 : else if(*it == ".." && it != segments.begin() && *(it - 1) != "..")
467 : {
468 0 : it = segments.erase(it - 1, it + 1);
469 : }
470 : else
471 : {
472 2 : ++it;
473 : }
474 : }
475 :
476 : // transform back to a path
477 : //
478 9 : std::string const new_path(join_strings(segments, "/"));
479 3 : if(is_root)
480 : {
481 : // the path is a root path
482 : //
483 2 : return "/" + new_path;
484 : }
485 :
486 : // this is a relative path
487 : //
488 1 : return new_path.empty() ? "." : new_path;
489 3 : }
490 :
491 :
492 : /** \brief Check a path to determine whether it includes a pattern or not.
493 : *
494 : * This function scans the specified \p path for a glob() like pattern.
495 : * We detect the `*`, `?`, and the start of `[` or `{`. Note that the
496 : * glob function generally views the `[` and `{` as regular characters
497 : * if these characters are not closed with a match `]` or `}` respectively.
498 : *
499 : * The flags below correspond to flags supported by the glob(3) and
500 : * fnmatch(3) functions.
501 : *
502 : * * `GLOB_NOESCAPE`, `FNM_NOESCAPE` -- \p escape
503 : * * `GLOB_BRACE` -- \p braces
504 : * * `FNM_EXTMATCH` -- \p extended
505 : *
506 : * \param[in] path The path to check for a pattern.
507 : * \param[in] escape Whether to allow the `\\` character to escape
508 : * metacharacters. The default is true since a path with a pattern is more
509 : * powerful if the `\\` is properly supported.
510 : * \param[in] braces Whether the braces (`{`) are accepted. This is a
511 : * GNU extension so by default we do not accept braces.
512 : * \param[in] extended Whether the extended patterns are supported. GNU
513 : * supports `#(pattern-list)` where `#` is one of `?*+@!`.
514 : *
515 : * \return true if \p path includes one of the glob() metacharacters.
516 : */
517 304 : inline bool has_pattern(
518 : std::string const & path
519 : , bool escape = true
520 : , bool braces = false
521 : , bool extended = false)
522 : {
523 4300 : for(char const * s(path.c_str()); *s != '\0'; ++s)
524 : {
525 4184 : switch(*s)
526 : {
527 128 : case '\\':
528 128 : if(escape)
529 : {
530 : // silently skip one character
531 : //
532 64 : ++s;
533 64 : if(*s == '\0')
534 : {
535 0 : return false;
536 : }
537 : }
538 128 : break;
539 :
540 108 : case '*':
541 : case '?':
542 108 : return true;
543 :
544 28 : case '[': // a ']' must be present for '[' to represent a valid character class
545 : {
546 : // backslashes are viewed as themself within a class definition
547 : // so there is not particular anything if we find such here;
548 : //
549 : // also POSIX says that if a '/' is included, then the pattern
550 : // is invalid; we just ignore that at the moment
551 : //
552 28 : char const * c(s + 1);
553 28 : if(*c == '!' || *c == '^') // POSIX does not support '^', bash does, though
554 : {
555 0 : ++c;
556 : }
557 28 : if(*c == ']')
558 : {
559 0 : ++c;
560 : }
561 120 : for(; *c != '\0'; ++c)
562 : {
563 120 : if(*c == ']')
564 : {
565 28 : return true;
566 : }
567 : }
568 : }
569 0 : break;
570 :
571 20 : case '{': // a '}' must be present for '{' to represent a valid pattern
572 20 : if(braces)
573 : {
574 84 : for(char const * c(s + 1); *c != '\0'; ++c)
575 : {
576 84 : if(*c == '\\' && escape)
577 : {
578 0 : ++c;
579 0 : if(*c == '\0')
580 : {
581 0 : break;
582 : }
583 : }
584 84 : else if(*c == '}')
585 : {
586 10 : return true;
587 : }
588 : }
589 : }
590 10 : break;
591 :
592 108 : case '+': // an extended pattern must be between '(...)'
593 : case '@':
594 : case '!':
595 108 : if(extended)
596 : {
597 54 : char const * c(s + 1);
598 54 : if(*c == '(')
599 : {
600 360 : for(++c; *c != '\0'; ++c)
601 : {
602 360 : if(*c == '\\' && escape)
603 : {
604 0 : ++c;
605 0 : if(*c == '\0')
606 : {
607 0 : break;
608 : }
609 : }
610 360 : else if(*c == ')')
611 : {
612 42 : return true;
613 : }
614 : }
615 : }
616 : }
617 66 : break;
618 :
619 : }
620 : }
621 :
622 116 : return false;
623 : }
624 :
625 :
626 : /** \brief Check whether a path is a equal or a child of another path.
627 : *
628 : * This function compares \p child against \p parent to see whether
629 : * \p child is indeed a child of \p parent.
630 : *
631 : * For example, the following returns true:
632 : *
633 : * \code
634 : * snapdev::is_child_path("/var", "/var/lib/");
635 : * \endcode
636 : *
637 : * All paths are considered to be a child of the root path `/`.
638 : *
639 : * We considered two types of paths: relative and full. Both paths must
640 : * be either relative or full, otherwise the function returns false.
641 : *
642 : * The function canonicalize the paths, so if multiple slashes separate
643 : * some of the names, these are viewed as one slash (i.e. `"/" == "//"`).
644 : *
645 : * \param[in] parent The parent path.
646 : * \param[in] child The child to match against \p parent.
647 : * \param[in] equal Return this boolean value if the \p parent equals
648 : * \p child.
649 : *
650 : * \return true if \p child is a child of \p parent.
651 : */
652 24 : inline bool is_child_path(
653 : std::string const & parent
654 : , std::string const & child
655 : , bool equal = true)
656 : {
657 : // paths should not be empty, but if so, handle specially
658 : //
659 24 : if(parent.empty()
660 24 : || child.empty())
661 : {
662 4 : return parent.empty() && child.empty() ? equal : false;
663 : }
664 :
665 : // both paths must be full or relative
666 : //
667 20 : if((parent[0] == '/') ^ (child[0] == '/'))
668 : {
669 2 : return false;
670 : }
671 :
672 18 : char const * p(parent.c_str());
673 18 : char const * c(child.c_str());
674 : for(;;)
675 : {
676 44 : while(*p == '/')
677 : {
678 24 : ++p;
679 : }
680 38 : while(*c == '/')
681 : {
682 18 : ++c;
683 : }
684 20 : if(*p == '\0')
685 : {
686 2 : return *c == '\0' ? equal : true;
687 : }
688 : do
689 : {
690 60 : if(*p != *c)
691 : {
692 14 : if((*p == '/' || *p == '\0')
693 12 : && (*c == '/' || *c == '\0'))
694 : {
695 20 : while(*c == '/')
696 : {
697 10 : ++c;
698 : }
699 10 : return *c == '\0' ? equal : true;
700 : }
701 4 : return false;
702 : }
703 46 : if(*p == '\0')
704 : {
705 2 : return equal;
706 : }
707 44 : ++p;
708 44 : ++c;
709 : }
710 44 : while(*p != '/' || *c != '/');
711 : }
712 : snapdev::NOT_REACHED();
713 : }
714 :
715 :
716 :
717 : } // namespace pathinfo
718 : } // namespace snapdev
719 : // vim: ts=4 sw=4 et
|