3 namespace BookStack\Entities\Tools;
5 use BookStack\Util\HtmlDocument;
11 class PageIncludeParser
13 protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
16 * Elements to clean up and remove if left empty after a parsing operation.
19 protected array $toCleanup = [];
22 * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
24 public function __construct(
25 protected HtmlDocument $doc,
26 protected Closure $pageContentForId,
31 * Parse out the include tags.
32 * Returns the count of new content DOM nodes added to the document.
34 public function parse(): int
37 $tags = $this->locateAndIsolateIncludeTags();
39 foreach ($tags as $tag) {
40 /** @var PageIncludeContent $content */
41 $content = $this->pageContentForId->call($this, $tag);
43 if (!$content->isInline()) {
44 $parentP = $this->getParentParagraph($tag->domNode);
45 $isWithinParentP = $parentP === $tag->domNode->parentNode;
46 if ($parentP && $isWithinParentP) {
47 $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
48 } else if ($parentP) {
49 $this->moveTagNodeToBesideParent($tag, $parentP);
53 $replacementNodes = $content->toDomNodes();
54 $nodesAdded += count($replacementNodes);
55 $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
64 * Locate include tags within the given document, isolating them to their
65 * own nodes in the DOM for future targeted manipulation.
66 * @return PageIncludeTag[]
68 protected function locateAndIsolateIncludeTags(): array
70 $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
73 /** @var DOMNode $node */
74 foreach ($includeHosts as $node) {
75 /** @var DOMNode $childNode */
76 foreach ($node->childNodes as $childNode) {
77 if ($childNode->nodeName === '#text') {
78 array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
87 * Takes a text DOMNode and splits its text content at include tags
88 * into multiple text nodes within the original parent.
89 * Returns found PageIncludeTag references.
90 * @return PageIncludeTag[]
92 protected function splitTextNodesAtTags(DOMNode $textNode): array
95 $text = $textNode->textContent;
96 preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
99 foreach ($matches[0] as $index => $fullTagMatch) {
100 $tagOuterContent = $fullTagMatch[0];
101 $tagInnerContent = $matches[1][$index][0];
102 $tagStartOffset = $fullTagMatch[1];
104 if ($currentOffset < $tagStartOffset) {
105 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
106 $textNode->parentNode->insertBefore($this->doc->createTextNode($previousText), $textNode);
109 $node = $textNode->parentNode->insertBefore($this->doc->createTextNode($tagOuterContent), $textNode);
110 $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
111 $currentOffset = $tagStartOffset + strlen($tagOuterContent);
114 if ($currentOffset > 0) {
115 $textNode->textContent = substr($text, $currentOffset);
122 * Replace the given node with all those in $replacements
123 * @param DOMNode[] $replacements
125 protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
127 /** @var DOMDocument $targetDoc */
128 $targetDoc = $toReplace->ownerDocument;
130 foreach ($replacements as $replacement) {
131 if ($replacement->ownerDocument !== $targetDoc) {
132 $replacement = $targetDoc->importNode($replacement, true);
135 $toReplace->parentNode->insertBefore($replacement, $toReplace);
138 $toReplace->parentNode->removeChild($toReplace);
142 * Move a tag node to become a sibling of the given parent.
143 * Will attempt to guess a position based upon the tag content within the parent.
145 protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
147 $parentText = $parent->textContent;
148 $tagPos = strpos($parentText, $tag->tagContent);
149 $before = $tagPos < (strlen($parentText) / 2);
150 $this->toCleanup[] = $tag->domNode->parentNode;
153 $parent->parentNode->insertBefore($tag->domNode, $parent);
155 $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
160 * Splits the given $parentNode at the location of the $domNode within it.
161 * Attempts to replicate the original $parentNode, moving some of their parent
162 * children in where needed, before adding the $domNode between.
164 protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
166 $children = [...$parentNode->childNodes];
167 $splitPos = array_search($domNode, $children, true);
168 if ($splitPos === false) {
169 $splitPos = count($children) - 1;
172 $parentClone = $parentNode->cloneNode();
173 if (!($parentClone instanceof DOMElement)) {
177 $parentNode->parentNode->insertBefore($parentClone, $parentNode);
178 $parentClone->removeAttribute('id');
180 for ($i = 0; $i < $splitPos; $i++) {
181 /** @var DOMNode $child */
182 $child = $children[$i];
183 $parentClone->appendChild($child);
186 $parentNode->parentNode->insertBefore($domNode, $parentNode);
188 $this->toCleanup[] = $parentNode;
189 $this->toCleanup[] = $parentClone;
193 * Get the parent paragraph of the given node, if existing.
195 protected function getParentParagraph(DOMNode $parent): ?DOMNode
198 if (strtolower($parent->nodeName) === 'p') {
202 $parent = $parent->parentNode;
203 } while ($parent !== null);
209 * Cleanup after a parse operation.
210 * Removes stranded elements we may have left during the parse.
212 protected function cleanup(): void
214 foreach ($this->toCleanup as $element) {
215 $element->normalize();
216 while ($element->parentNode && !$element->hasChildNodes()) {
217 $parent = $element->parentNode;
218 $parent->removeChild($element);