Line data Source code
1 : // Snap Websites Server -- DOM helper functions
2 : // Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
3 : //
4 : // This program is free software; you can redistribute it and/or modify
5 : // it under the terms of the GNU General Public License as published by
6 : // the Free Software Foundation; either version 2 of the License, or
7 : // (at your option) any later version.
8 : //
9 : // This program is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : // GNU General Public License for more details.
13 : //
14 : // You should have received a copy of the GNU General Public License
15 : // along with this program; if not, write to the Free Software
16 : // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 :
18 :
19 : // self
20 : //
21 : #include "snapwebsites/qdomhelpers.h"
22 :
23 :
24 : // snapwebsites
25 : //
26 : #include "snapwebsites/qstring_stream.h"
27 : #include "snapwebsites/snap_string_list.h"
28 :
29 :
30 : // Qt lib
31 : //
32 : #include <QTextStream>
33 :
34 :
35 : // C++ lib
36 : //
37 : #include <iostream>
38 :
39 :
40 : // last include
41 : //
42 : #include <snapdev/poison.h>
43 :
44 :
45 :
46 :
47 :
48 :
49 : namespace snap
50 : {
51 : namespace snap_dom
52 : {
53 :
54 :
55 : /** \brief Retrieve a tag, create it if it doesn't exist.
56 : *
57 : * This function searches for an element which is expected to exist and
58 : * have one instance. If not found, it creates it (by default, you may
59 : * prevent the creation by setting the \p create parameter to false.)
60 : *
61 : * The result is the tag set to the tag we've found if the function
62 : * returns true. When false is returned, tag is not modified.
63 : *
64 : * \param[in] tag_name The name of the tag to search or create.
65 : * \param[in] element The parent element of the tag to find or create.
66 : * \param[in] tag The found tag (i.e. the answer of this function.)
67 : * \param[in] create Whether the tag is created if it doesn't exist yet.
68 : *
69 : * \return true when the tag was found or created and can be returned.
70 : */
71 0 : bool get_tag(QString const & tag_name, QDomElement & element, QDomElement & tag, bool create)
72 : {
73 0 : QDomNodeList all_tags(element.elementsByTagName(tag_name));
74 0 : switch(all_tags.count())
75 : {
76 0 : case 0:
77 0 : if(create)
78 : {
79 : // missing, create a new one and retrieve it back out
80 0 : tag = element.ownerDocument().createElement(tag_name);
81 0 : element.appendChild(tag);
82 : }
83 : else
84 : {
85 0 : return false;
86 : }
87 0 : break;
88 :
89 0 : case 1:
90 : // we have it already!
91 : {
92 0 : QDomNode node(all_tags.at(0));
93 0 : if(!node.isElement())
94 : {
95 0 : return false;
96 : }
97 0 : tag = node.toElement();
98 : }
99 0 : break;
100 :
101 0 : default:
102 : // we have a problem here
103 0 : return false;
104 :
105 : }
106 :
107 0 : return true;
108 : }
109 :
110 :
111 : /** \brief Useful function to append a string of text to a QDomNode.
112 : *
113 : * This function appends a node of text at the end of the specified \p node.
114 : * This is simply creating a text node and appending it.
115 : *
116 : * \note
117 : * This is equivalent to insert_html_string_to_xml_doc() when \p plain_text
118 : * does not include any tags or entities.
119 : *
120 : * \param[in,out] node DOM element where the plain text is to be inserted.
121 : * \param[in] plain_text The plain text to append.
122 : */
123 0 : void append_plain_text_to_node(QDomNode & node, QString const & plain_text)
124 : {
125 0 : QDomText text(node.ownerDocument().createTextNode(plain_text));
126 0 : node.appendChild(text);
127 0 : }
128 :
129 :
130 : /** \brief Useful function to append an integer to a QDomNode.
131 : *
132 : * The function creates a Text node set to the integer converted to
133 : * ASCII and append the result to the specified child.
134 : *
135 : * \param[in,out] node The child where the integer is appended.
136 : * \param[in] integer The integer to append to the node.
137 : */
138 0 : void append_integer_to_node(QDomNode & node, int64_t integer)
139 : {
140 0 : QDomText text(node.ownerDocument().createTextNode(QString("%1").arg(integer)));
141 0 : node.appendChild(text);
142 0 : }
143 :
144 :
145 : /** \brief Useful function that transforms a QString to XML.
146 : *
147 : * When inserting a string in the XML document and that string may include
148 : * HTML code, call this function, it will first convert the string to XML
149 : * then insert the result as children of the \p node element.
150 : *
151 : * \warning
152 : * If the string is plain text, YOU are responsible for converting the
153 : * \<, \>, and \& characters before calling this function. Or maybe just
154 : * make use of the doc.createTextNode(plain_text) function.
155 : *
156 : * \param[in,out] node DOM element receiving the result as children nodes.
157 : * \param[in] xml The input XML string.
158 : */
159 0 : void insert_html_string_to_xml_doc(QDomNode & node, QString const & xml)
160 : {
161 : // parsing the XML can be slow, try to avoid that if possible
162 0 : for(QChar const * s(xml.data()); !s->isNull(); ++s)
163 : {
164 0 : switch(s->unicode())
165 : {
166 0 : case '<':
167 : case '>':
168 : case '&':
169 : // this requires the full XML round trip
170 : {
171 0 : QDomDocument xml_doc("wrapper");
172 0 : xml_doc.setContent("<wrapper>" + xml + "</wrapper>", true, nullptr, nullptr, nullptr);
173 0 : insert_node_to_xml_doc(node, xml_doc.documentElement());
174 : }
175 0 : return;
176 :
177 : }
178 : }
179 :
180 0 : append_plain_text_to_node(node, xml);
181 : }
182 :
183 :
184 : /** \brief Insert a node's children in a node of another document.
185 : *
186 : * This function copies all the children of the specified \p node
187 : * at the end of the child node.
188 : *
189 : * The source and destination documents do not need to be the same.
190 : *
191 : * \param[in,out] child The destination node.
192 : * \param[in] node The source element node.
193 : */
194 0 : void insert_node_to_xml_doc(QDomNode & child, QDomNode const & node)
195 : {
196 : // copy the result in a fragment of our document
197 0 : QDomDocumentFragment frag(child.ownerDocument().createDocumentFragment());
198 0 : frag.appendChild(child.ownerDocument().importNode(node, true));
199 :
200 : // copy the fragment nodes at the right place
201 0 : QDomNodeList children(frag.firstChild().childNodes());
202 :
203 0 : QDomNode previous;
204 0 : while(!children.isEmpty())
205 : {
206 0 : QDomNode l(children.at(0));
207 0 : if(previous.isNull())
208 : {
209 : // the first time append at the end of the existing data
210 0 : child.appendChild(l);
211 : }
212 : else
213 : {
214 0 : child.insertAfter(l, previous);
215 : }
216 0 : previous = l;
217 : }
218 0 : }
219 :
220 :
221 : /** \brief Transform a node into a string including all the tags.
222 : *
223 : * This function transforms a node to a string. The node is included
224 : * in the result.
225 : *
226 : * \warning
227 : * The node element name will appear in the result. If you do not
228 : * want the root node tag to appear in the output, use the
229 : * xml_children_to_string() instead.
230 : *
231 : * \param[in] node The node to transform to a string.
232 : *
233 : * \return The converted node.
234 : *
235 : * \sa xml_children_to_string()
236 : */
237 0 : QString xml_to_string(QDomNode const & node)
238 : {
239 0 : QString buffer;
240 0 : QTextStream stream(&buffer);
241 0 : stream.setCodec("UTF-8");
242 0 : node.save(stream, -1);
243 0 : return buffer;
244 : }
245 :
246 :
247 : /** \brief Transform a node into a string including all its children.
248 : *
249 : * This function transforms all the children of a node to a string.
250 : * The node itself is not included in the result.
251 : *
252 : * \param[in] node The node to transform to a string.
253 : *
254 : * \return The converted node.
255 : *
256 : * \sa xml_to_string()
257 : */
258 0 : QString xml_children_to_string(QDomNode const & node)
259 : {
260 0 : QString buffer;
261 0 : if(node.hasChildNodes())
262 : {
263 0 : QTextStream stream(&buffer);
264 0 : stream.setCodec("UTF-8");
265 0 : for(QDomNode n(node.firstChild()); !n.isNull(); n = n.nextSibling())
266 : {
267 0 : n.save(stream, 0);
268 : }
269 : }
270 0 : return buffer;
271 : }
272 :
273 :
274 : /** \brief Useful function that transforms a QString to HTML.
275 : *
276 : * When inserting a string in the HTML document and that string may include
277 : * HTML code, call this function, it will first convert the string to HTML
278 : * then insert the result as children of the \p child element.
279 : *
280 : * The HTML has to be 100% XML compatible.
281 : *
282 : * \param[in,out] child DOM element receiving the result as children nodes.
283 : * \param[in] html The input HTML string.
284 : */
285 0 : void replace_node_with_html_string(QDomNode & replace, QString const & html)
286 : {
287 : // parsing the XML can be slow, try to avoid that if possible
288 0 : for(QChar const * s(html.data()); !s->isNull(); ++s)
289 : {
290 0 : switch(s->unicode())
291 : {
292 0 : case '<':
293 : case '>':
294 : case '&':
295 : // this requires the full XML round trip
296 : {
297 0 : QDomDocument xml_doc("wrapper");
298 0 : xml_doc.setContent("<wrapper>" + html + "</wrapper>", true, nullptr, nullptr, nullptr);
299 0 : replace_node_with_elements(replace, xml_doc.documentElement());
300 : }
301 0 : return;
302 :
303 : }
304 : }
305 :
306 : // plain text is faster
307 0 : QDomText text(replace.toText());
308 0 : text.setData(html);
309 : }
310 :
311 :
312 : /** \brief Replace a node with another.
313 : *
314 : * This function replaces the node \p replace with the node \p node.
315 : *
316 : * Note that the function creates a copy of \p node as if it were from
317 : * another document.
318 : *
319 : * \param[in,out] replace The node to be replaced.
320 : * \param[in] node The source node to copy in place of \p replace.
321 : */
322 0 : void replace_node_with_elements(QDomNode & replace, QDomNode const & node)
323 : {
324 0 : QDomNode parent(replace.parentNode());
325 :
326 : // copy the result in a fragment of our document
327 0 : QDomDocumentFragment frag(replace.ownerDocument().createDocumentFragment());
328 0 : frag.appendChild(replace.ownerDocument().importNode(node, true));
329 :
330 : // copy the fragment nodes at the right place
331 0 : QDomNodeList children(frag.firstChild().childNodes());
332 :
333 0 : QDomNode previous(replace);
334 0 : while(!children.isEmpty())
335 : {
336 0 : QDomNode l(children.at(0));
337 0 : parent.insertAfter(l, previous);
338 0 : previous = l;
339 : }
340 :
341 : // got replaced, now remove that node
342 0 : parent.removeChild(replace);
343 0 : }
344 :
345 :
346 : /** \brief Delete all the children of a given element node.
347 : *
348 : * This function loops until all the children of a given element node
349 : * were removed.
350 : *
351 : * \param[in,out] parent The node from which all the children should be
352 : * removed.
353 : */
354 0 : void remove_all_children(QDomElement& parent)
355 : {
356 : for(;;)
357 : {
358 : // Note: I use the last child because it is much more likely that
359 : // this way we avoid a memmove() of the vector of children
360 0 : QDomNode child(parent.lastChild());
361 0 : if(child.isNull())
362 : {
363 0 : return;
364 : }
365 0 : parent.removeChild(child);
366 0 : }
367 : }
368 :
369 :
370 : /** \brief Get a specific element from a DOM document.
371 : *
372 : * This function returns the first element (tag) with the specified name.
373 : * In most cases this will represent the tag defined in a layout XML file
374 : * although it is not required to be.
375 : *
376 : * Note that the function could return an element from the HTML or other
377 : * data found in that XML document if such tags are present as is.
378 : *
379 : * \exception snap_logic_exception
380 : * The logic exception is raised if the tag cannot be found. If the
381 : * must_exist parameter is set to false, then this exception is not raised.
382 : *
383 : * \param[in] doc The document being searched for the specific element.
384 : * \param[in] name The name of the element to retrieve.
385 : * \param[in] must_exist If true and the element cannot be found, throw.
386 : *
387 : * \return The element found in the document.
388 : */
389 0 : QDomElement get_element(QDomDocument & doc, QString const & name, bool must_exist)
390 : {
391 0 : QDomNodeList elements(doc.elementsByTagName(name));
392 0 : if(elements.isEmpty())
393 : {
394 : // this should never happen because we do explicitly create this
395 : // <page> tag before calling this function
396 0 : if(must_exist)
397 : {
398 0 : throw snap_logic_exception(QString("<%1> tag not found in the body DOM").arg(name));
399 : }
400 0 : return QDomElement();
401 : }
402 :
403 0 : QDomElement element(elements.at(0).toElement());
404 0 : if(must_exist && element.isNull())
405 : {
406 : // we just got a tag, this is really impossible!?
407 0 : throw snap_logic_exception(QString("<%1> tag not a DOM Element???").arg(name));
408 : }
409 :
410 0 : return element;
411 : }
412 :
413 :
414 : /** \brief Get a specific child element defined by path under parent.
415 : *
416 : * Starting from the node \p parent search the children as defined by
417 : * \p path. The process checks whether each child already exists, if
418 : * so then it goes on in the search.
419 : *
420 : * Although this could be done with our xpath implementation, it is a lot
421 : * faster to find the tag you are looking for. Note that if there are
422 : * multiple tags with the same name at any level, only the first one is
423 : * used.
424 : *
425 : * \attention
426 : * Again, the function gets the FIRST of each tag it finds. If you want
427 : * to get all the children, use the QDomXPath instead.
428 : *
429 : * \note
430 : * The type of parent is set to QDomNode even though an element is required
431 : * because that way we do not force the caller to convert the node.
432 : *
433 : * \param[in,out] parent The node from which children are added (i.e. body).
434 : * \param[in] path The path representing the child to retrieve.
435 : *
436 : * \return The element found, may be a null node (isNull() is true).
437 : */
438 0 : QDomElement get_child_element(QDomNode parent, QString const& path)
439 : {
440 : #ifdef _DEBUG
441 0 : if(path.startsWith("/"))
442 : {
443 0 : throw snap_logic_exception(QString("path \"%1\" for get_child_element cannot start with a slash").arg(path));
444 : }
445 : #endif
446 :
447 : // This is not necessary at this point, unless we want to err?
448 : //if(parent.isNull())
449 : //{
450 : // // we cannot add anything starting from a null node
451 : // // (TBD: should we err instead?)
452 : // return parent.toElement();
453 : //}
454 :
455 0 : snap_string_list const p(path.split('/'));
456 :
457 0 : int const max_children(p.size());
458 0 : for(int i(0); i < max_children && !parent.isNull(); ++i)
459 : {
460 0 : QString const name(p[i]);
461 0 : if(name.isEmpty())
462 : {
463 : // skip in case of a "//" or starting "/"
464 0 : continue;
465 : }
466 0 : parent = parent.firstChildElement(name);
467 : }
468 :
469 : // the parent parameter becomes the child most item along
470 : // the course of this function
471 0 : return parent.toElement();
472 : }
473 :
474 :
475 : /** \brief Create the elements defined by path under parent.
476 : *
477 : * Starting from the node \p parent create each child as defined by
478 : * \p path. The process checks whether each child already exists, if
479 : * so then it doesn't re-create them (this is important to understand,
480 : * this function does not append new tags.)
481 : *
482 : * This is particularly useful when dealing with XML documents where you
483 : * have to add many tags at different locations and you do not know whether
484 : * there is already a tag there.
485 : *
486 : * \attention
487 : * The function gets the FIRST of each tag it finds. So if you want to
488 : * create a child named \<foo\> and there are 3 tags named that way
489 : * under \p parent, then the first one will be used.
490 : *
491 : * \note
492 : * This function is similar to a get_element() with a path if all the
493 : * elements in \p path already exist.
494 : *
495 : * \note
496 : * The type of parent is set to QDomNode even though an element is required
497 : * because that way we do not force the caller to convert the node.
498 : *
499 : * \param[in,out] parent The node from which children are added (i.e. body).
500 : * \param[in] path The path representing the children to create.
501 : */
502 0 : QDomElement create_element(QDomNode parent, QString const& path)
503 : {
504 : #ifdef _DEBUG
505 0 : if(path.startsWith("/"))
506 : {
507 0 : throw snap_logic_exception(QString("path \"%1\" for create_element cannot start with a slash").arg(path));
508 : }
509 : #endif
510 :
511 0 : if(parent.isNull())
512 : {
513 : // we cannot add anything starting from a null node
514 : // (TBD: should we err instead?)
515 0 : return parent.toElement();
516 : }
517 :
518 0 : snap_string_list p(path.split('/'));
519 :
520 0 : QDomDocument doc(parent.ownerDocument());
521 :
522 0 : int const max_children(p.size());
523 0 : for(int i(0); i < max_children; ++i)
524 : {
525 0 : QString const name(p[i]);
526 0 : if(name.isEmpty())
527 : {
528 : // skip in case of a "//" or starting "/"
529 0 : continue;
530 : }
531 0 : QDomNode child(parent.firstChildElement(name));
532 0 : if(child.isNull())
533 : {
534 0 : child = doc.createElement(name);
535 0 : parent.appendChild(child);
536 : }
537 0 : parent = child;
538 : }
539 :
540 : // the parent parameter becomes the child most item along
541 : // the course of this function
542 0 : return parent.toElement();
543 : }
544 :
545 :
546 : /** \brief Remove tags from a string of HTML.
547 : *
548 : * This function is used to transform the specified \p html string to
549 : * plain text without any tags.
550 : *
551 : * To do so, it puts the string in a wrapper in an QDomDocument and
552 : * then retrieves the text from the wrapper.
553 : *
554 : * \todo
555 : * We may want to support any type of entities which I think the current
556 : * implementation will fail to convert (because XML is limited to three:
557 : * \&, \<, \>.)
558 : *
559 : * \param[in] html The input that includes tags.
560 : *
561 : * \return The text found in the html string if any.
562 : */
563 0 : QString remove_tags(QString const & html)
564 : {
565 0 : QDomDocument doc;
566 : // TBD: shall we make sure that this 'html' string is compatible XML?
567 0 : doc.setContent("<wrapper>" + html + "</wrapper>", true, nullptr, nullptr, nullptr);
568 0 : QDomElement wrapper(doc.documentElement());
569 0 : return wrapper.text();
570 : }
571 :
572 :
573 : /** \brief Encode entities converting plain text to a valid HTML string.
574 : *
575 : * Somehow the linker cannot find the Qt::escape() function so we
576 : * have our own version here.
577 : *
578 : * \note
579 : * The function transforms the double quote (") character to "
580 : * so the resulting string can be used as an attribute value quoted
581 : * with double quotes:
582 : *
583 : * \code
584 : * QString html(QString("<a href=\"%1\">Click Here</a>")
585 : * .arg(snap_dom::escape("This \"string\" here"));
586 : * \endcode
587 : *
588 : * \param[in] str The string to transform.
589 : *
590 : * \return The converted string.
591 : */
592 0 : QString escape(QString const & str)
593 : {
594 0 : QString result;
595 0 : result.reserve(str.length() * 112 / 100 + 20);
596 :
597 0 : for(QChar const *s(str.data()); s->unicode() != '\0'; ++s)
598 : {
599 0 : switch(s->unicode())
600 : {
601 0 : case '&':
602 0 : result += "&";
603 0 : break;
604 :
605 0 : case '<':
606 0 : result += "<";
607 0 : break;
608 :
609 0 : case '>':
610 0 : result += ">";
611 0 : break;
612 :
613 0 : case '"':
614 0 : result += """;
615 0 : break;
616 :
617 0 : default:
618 0 : result += *s;
619 0 : break;
620 :
621 : }
622 : }
623 :
624 0 : return result;
625 : }
626 :
627 :
628 : /** \brief Decode entities converting a string to plain text.
629 : *
630 : * When receiving certain strings from the website, they may include
631 : * HTML entities even though you want to consider the string as plain
632 : * text which means entities need to be changed to plain text.
633 : *
634 : * Qt offers a function called escape() which transforms plain text
635 : * to HTML with entities (so for example \< becomes \<,) but for
636 : * some weird reason they do not offer an unescape() function...
637 : *
638 : * \param[in] str The string where HTML characters need to be transformed
639 : * to regular characters.
640 : *
641 : * \return The resulting unescaped string.
642 : */
643 0 : QString unescape(QString const & str)
644 : {
645 0 : QString result;
646 0 : result.reserve(str.length() + 10);
647 :
648 0 : QString name;
649 0 : name.reserve(25);
650 :
651 0 : for(QChar const *s(str.data()); s->unicode() != '\0'; )
652 : {
653 0 : if(s->unicode() == '&')
654 : {
655 0 : ++s;
656 0 : bool const number(s->unicode() == '#');
657 0 : if(number)
658 : {
659 : // numerical
660 0 : ++s;
661 : }
662 : // named/number
663 0 : name.resize(0);
664 0 : for(int i(0); i < 20 && s->unicode() != '\0' && s->unicode() != ';' && !s->isSpace(); ++i, ++s)
665 : {
666 0 : name += *s;
667 : }
668 0 : if(s->unicode() == ';')
669 : {
670 0 : ++s;
671 : }
672 0 : uint c('\0');
673 0 : if(number)
674 : {
675 0 : bool ok(false);
676 0 : if(name[0] == 'x')
677 : {
678 : // hexadecimal
679 0 : name.remove(0, 1);
680 0 : c = name.toLongLong(&ok, 16);
681 : }
682 : else
683 : {
684 0 : c = name.toLongLong(&ok, 10);
685 : }
686 0 : if(!ok)
687 : {
688 0 : c = '\0';
689 : }
690 : }
691 0 : else if(name == "quot")
692 : {
693 0 : c = '"';
694 : }
695 0 : else if(name == "apos")
696 : {
697 0 : c = '\'';
698 : }
699 0 : else if(name == "lt")
700 : {
701 0 : c = '<';
702 : }
703 0 : else if(name == "gt")
704 : {
705 0 : c = '>';
706 : }
707 0 : else if(name == "amp")
708 : {
709 0 : c = '&';
710 : }
711 : // TODO: add all the names supported by browsers (HTML)
712 0 : if(c != 0)
713 : {
714 0 : if(QChar::requiresSurrogates(c))
715 : {
716 0 : result += QChar::highSurrogate(c);
717 0 : result += QChar::lowSurrogate(c);
718 : }
719 : else
720 : {
721 0 : result += QChar(c);
722 : }
723 : }
724 : }
725 : else
726 : {
727 0 : result += *s;
728 0 : ++s;
729 : }
730 : }
731 :
732 0 : return result;
733 : }
734 :
735 :
736 :
737 : } // namespace snap_dom
738 6 : } // namespace snap
739 : // vim: ts=4 sw=4 et
|