]> BookStack Code Mirror - bookstack/blob - app/Entities/Tools/PageIncludeParser.php
Maintenance: Continued work towards PHPstan level 2
[bookstack] / app / Entities / Tools / PageIncludeParser.php
1 <?php
2
3 namespace BookStack\Entities\Tools;
4
5 use BookStack\Util\HtmlDocument;
6 use Closure;
7 use DOMDocument;
8 use DOMElement;
9 use DOMNode;
10
11 class PageIncludeParser
12 {
13     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
14
15     /**
16      * Elements to clean up and remove if left empty after a parsing operation.
17      * @var DOMElement[]
18      */
19     protected array $toCleanup = [];
20
21     /**
22      * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
23      */
24     public function __construct(
25         protected HtmlDocument $doc,
26         protected Closure $pageContentForId,
27     ) {
28     }
29
30     /**
31      * Parse out the include tags.
32      * Returns the count of new content DOM nodes added to the document.
33      */
34     public function parse(): int
35     {
36         $nodesAdded = 0;
37         $tags = $this->locateAndIsolateIncludeTags();
38
39         foreach ($tags as $tag) {
40             /** @var PageIncludeContent $content */
41             $content = $this->pageContentForId->call($this, $tag);
42
43             if (!$content->isInline()) {
44                 $parentP = $this->getParentParagraph($tag->domNode);
45                 $isWithinParentP = $parentP === $tag->domNode->parentNode;
46                 if ($parentP && $isWithinParentP) {
47                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
48                 } else if ($parentP) {
49                     $this->moveTagNodeToBesideParent($tag, $parentP);
50                 }
51             }
52
53             $replacementNodes = $content->toDomNodes();
54             $nodesAdded += count($replacementNodes);
55             $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
56         }
57
58         $this->cleanup();
59
60         return $nodesAdded;
61     }
62
63     /**
64      * Locate include tags within the given document, isolating them to their
65      * own nodes in the DOM for future targeted manipulation.
66      * @return PageIncludeTag[]
67      */
68     protected function locateAndIsolateIncludeTags(): array
69     {
70         $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
71         $includeTags = [];
72
73         /** @var DOMNode $node */
74         foreach ($includeHosts as $node) {
75             /** @var DOMNode $childNode */
76             foreach ($node->childNodes as $childNode) {
77                 if ($childNode->nodeName === '#text') {
78                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
79                 }
80             }
81         }
82
83         return $includeTags;
84     }
85
86     /**
87      * Takes a text DOMNode and splits its text content at include tags
88      * into multiple text nodes within the original parent.
89      * Returns found PageIncludeTag references.
90      * @return PageIncludeTag[]
91      */
92     protected function splitTextNodesAtTags(DOMNode $textNode): array
93     {
94         $includeTags = [];
95         $text = $textNode->textContent;
96         preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
97
98         $currentOffset = 0;
99         foreach ($matches[0] as $index => $fullTagMatch) {
100             $tagOuterContent = $fullTagMatch[0];
101             $tagInnerContent = $matches[1][$index][0];
102             $tagStartOffset = $fullTagMatch[1];
103
104             if ($currentOffset < $tagStartOffset) {
105                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
106                 $textNode->parentNode->insertBefore($this->doc->createTextNode($previousText), $textNode);
107             }
108
109             $node = $textNode->parentNode->insertBefore($this->doc->createTextNode($tagOuterContent), $textNode);
110             $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
111             $currentOffset = $tagStartOffset + strlen($tagOuterContent);
112         }
113
114         if ($currentOffset > 0) {
115             $textNode->textContent = substr($text, $currentOffset);
116         }
117
118         return $includeTags;
119     }
120
121     /**
122      * Replace the given node with all those in $replacements
123      * @param DOMNode[] $replacements
124      */
125     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
126     {
127         /** @var DOMDocument $targetDoc */
128         $targetDoc = $toReplace->ownerDocument;
129
130         foreach ($replacements as $replacement) {
131             if ($replacement->ownerDocument !== $targetDoc) {
132                 $replacement = $targetDoc->importNode($replacement, true);
133             }
134
135             $toReplace->parentNode->insertBefore($replacement, $toReplace);
136         }
137
138         $toReplace->parentNode->removeChild($toReplace);
139     }
140
141     /**
142      * Move a tag node to become a sibling of the given parent.
143      * Will attempt to guess a position based upon the tag content within the parent.
144      */
145     protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
146     {
147         $parentText = $parent->textContent;
148         $tagPos = strpos($parentText, $tag->tagContent);
149         $before = $tagPos < (strlen($parentText) / 2);
150         $this->toCleanup[] = $tag->domNode->parentNode;
151
152         if ($before) {
153             $parent->parentNode->insertBefore($tag->domNode, $parent);
154         } else {
155             $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
156         }
157     }
158
159     /**
160      * Splits the given $parentNode at the location of the $domNode within it.
161      * Attempts to replicate the original $parentNode, moving some of their parent
162      * children in where needed, before adding the $domNode between.
163      */
164     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
165     {
166         $children = [...$parentNode->childNodes];
167         $splitPos = array_search($domNode, $children, true);
168         if ($splitPos === false) {
169             $splitPos = count($children) - 1;
170         }
171
172         $parentClone = $parentNode->cloneNode();
173         if (!($parentClone instanceof DOMElement)) {
174             return;
175         }
176
177         $parentNode->parentNode->insertBefore($parentClone, $parentNode);
178         $parentClone->removeAttribute('id');
179
180         for ($i = 0; $i < $splitPos; $i++) {
181             /** @var DOMNode $child */
182             $child = $children[$i];
183             $parentClone->appendChild($child);
184         }
185
186         $parentNode->parentNode->insertBefore($domNode, $parentNode);
187
188         $this->toCleanup[] = $parentNode;
189         $this->toCleanup[] = $parentClone;
190     }
191
192     /**
193      * Get the parent paragraph of the given node, if existing.
194      */
195     protected function getParentParagraph(DOMNode $parent): ?DOMNode
196     {
197         do {
198             if (strtolower($parent->nodeName) === 'p') {
199                 return $parent;
200             }
201
202             $parent = $parent->parentNode;
203         } while ($parent !== null);
204
205         return null;
206     }
207
208     /**
209      * Cleanup after a parse operation.
210      * Removes stranded elements we may have left during the parse.
211      */
212     protected function cleanup(): void
213     {
214         foreach ($this->toCleanup as $element) {
215             $element->normalize();
216             while ($element->parentNode && !$element->hasChildNodes()) {
217                 $parent = $element->parentNode;
218                 $parent->removeChild($element);
219                 $element = $parent;
220             }
221         }
222     }
223 }