{
protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
+ /**
+ * Elements to clean up and remove if left empty after a parsing operation.
+ * @var DOMElement[]
+ */
+ protected array $toCleanup = [];
+
+ /**
+ * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
+ */
public function __construct(
- protected string $pageHtml,
+ protected HtmlDocument $doc,
protected Closure $pageContentForId,
) {
}
- public function parse(): string
+ /**
+ * Parse out the include tags.
+ * Returns the count of new content DOM nodes added to the document.
+ */
+ public function parse(): int
{
- $doc = new HtmlDocument($this->pageHtml);
-
- $tags = $this->locateAndIsolateIncludeTags($doc);
- $topLevel = [...$doc->getBodyChildren()];
+ $nodesAdded = 0;
+ $tags = $this->locateAndIsolateIncludeTags();
foreach ($tags as $tag) {
- $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
- $content = new PageIncludeContent($htmlContent, $tag);
+ /** @var PageIncludeContent $content */
+ $content = $this->pageContentForId->call($this, $tag);
if (!$content->isInline()) {
- $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
- if ($isParentTopLevel) {
+ $parentP = $this->getParentParagraph($tag->domNode);
+ $isWithinParentP = $parentP === $tag->domNode->parentNode;
+ if ($parentP && $isWithinParentP) {
$this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
- } else {
- $this->promoteTagNodeToBody($tag, $doc->getBody());
+ } else if ($parentP) {
+ $this->moveTagNodeToBesideParent($tag, $parentP);
}
}
- $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
+ $replacementNodes = $content->toDomNodes();
+ $nodesAdded += count($replacementNodes);
+ $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
}
- // TODO Notes: May want to eventually parse through backwards, which should avoid issues
- // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
+ $this->cleanup();
- return $doc->getBodyInnerHtml();
+ return $nodesAdded;
}
/**
* own nodes in the DOM for future targeted manipulation.
* @return PageIncludeTag[]
*/
- protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+ protected function locateAndIsolateIncludeTags(): array
{
- $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+ $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
$includeTags = [];
/** @var DOMNode $node */
- /** @var DOMNode $childNode */
foreach ($includeHosts as $node) {
+ /** @var DOMNode $childNode */
foreach ($node->childNodes as $childNode) {
if ($childNode->nodeName === '#text') {
array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
}
/**
+ * Replace the given node with all those in $replacements
* @param DOMNode[] $replacements
*/
protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
foreach ($replacements as $replacement) {
if ($replacement->ownerDocument !== $targetDoc) {
- $replacement = $targetDoc->adoptNode($replacement);
+ $replacement = $targetDoc->importNode($replacement, true);
}
$toReplace->parentNode->insertBefore($replacement, $toReplace);
$toReplace->parentNode->removeChild($toReplace);
}
- protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+ /**
+ * Move a tag node to become a sibling of the given parent.
+ * Will attempt to guess a position based upon the tag content within the parent.
+ */
+ protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
{
- /** @var DOMNode $topParent */
- $topParent = $tag->domNode->parentNode;
- while ($topParent->parentNode !== $body) {
- $topParent = $topParent->parentNode;
- }
-
- $parentText = $topParent->textContent;
+ $parentText = $parent->textContent;
$tagPos = strpos($parentText, $tag->tagContent);
$before = $tagPos < (strlen($parentText) / 2);
+ $this->toCleanup[] = $tag->domNode->parentNode;
if ($before) {
- $body->insertBefore($tag->domNode, $topParent);
+ $parent->parentNode->insertBefore($tag->domNode, $parent);
} else {
- $body->insertBefore($tag->domNode, $topParent->nextSibling);
+ $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
}
}
+ /**
+ * Splits the given $parentNode at the location of the $domNode within it.
+ * Attempts replicate the original $parentNode, moving some of their parent
+ * children in where needed, before adding the $domNode between.
+ */
protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
{
$children = [...$parentNode->childNodes];
- $splitPos = array_search($domNode, $children, true) ?: count($children);
+ $splitPos = array_search($domNode, $children, true);
+ if ($splitPos === false) {
+ $splitPos = count($children) - 1;
+ }
+
$parentClone = $parentNode->cloneNode();
+ $parentNode->parentNode->insertBefore($parentClone, $parentNode);
$parentClone->removeAttribute('id');
- /** @var DOMNode $child */
for ($i = 0; $i < $splitPos; $i++) {
- $child = $children[0];
+ /** @var DOMNode $child */
+ $child = $children[$i];
$parentClone->appendChild($child);
}
- if ($parentClone->hasChildNodes()) {
- $parentNode->parentNode->insertBefore($parentClone, $parentNode);
- }
-
$parentNode->parentNode->insertBefore($domNode, $parentNode);
- $parentClone->normalize();
- $parentNode->normalize();
- if (!$parentNode->hasChildNodes()) {
- $parentNode->remove();
- }
- if (!$parentClone->hasChildNodes()) {
- $parentClone->remove();
+ $this->toCleanup[] = $parentNode;
+ $this->toCleanup[] = $parentClone;
+ }
+
+ /**
+ * Get the parent paragraph of the given node, if existing.
+ */
+ protected function getParentParagraph(DOMNode $parent): ?DOMNode
+ {
+ do {
+ if (strtolower($parent->nodeName) === 'p') {
+ return $parent;
+ }
+
+ $parent = $parent->parentNode;
+ } while ($parent !== null);
+
+ return null;
+ }
+
+ /**
+ * Cleanup after a parse operation.
+ * Removes stranded elements we may have left during the parse.
+ */
+ protected function cleanup(): void
+ {
+ foreach ($this->toCleanup as $element) {
+ $element->normalize();
+ while ($element->parentNode && !$element->hasChildNodes()) {
+ $parent = $element->parentNode;
+ $parent->removeChild($element);
+ $element = $parent;
+ }
}
}
}