diff options
| author | Magdalena Stojek <magdalena.stojek@qt.io> | 2025-03-27 14:26:15 +0100 |
|---|---|---|
| committer | Magdalena Stojek <magdalena.stojek@qt.io> | 2025-04-29 16:00:12 +0200 |
| commit | 776dbdce7b1f50c97220e74a8b4f4cc08814d346 (patch) | |
| tree | a4785d9370a67743ee8fc97853c2d8f675e35494 /src/corelib/serialization/qxmlstream.cpp | |
| parent | 9319f790604c89acafdefd8c7dd97f90bcc9a37b (diff) | |
QXmlStreamReader: add support for retrieving raw inner XML content
This change introduces a new function, readRawInnerData(), that returns
the raw inner XML content of the current element, including nested tags,
comments, CDATA, and processing instructions.
[ChangeLog][QtCore][QXmlStreamReader] Added readRawInnerData() for
retrieving the raw inner XML content of an element.
Fixes: QTBUG-85141
Change-Id: I96dd0790d726cf8a196125384cbf8f8fa2587880
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
Diffstat (limited to 'src/corelib/serialization/qxmlstream.cpp')
| -rw-r--r-- | src/corelib/serialization/qxmlstream.cpp | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp index 7e14b098de9..77c13e9e766 100644 --- a/src/corelib/serialization/qxmlstream.cpp +++ b/src/corelib/serialization/qxmlstream.cpp @@ -778,6 +778,115 @@ void QXmlStreamReader::skipCurrentElement() } } +/*! + Reads and returns the raw inner XML content of the current element. + This function is useful for retrieving the full contents embedded inside + an element, including nested tags, text, comments, processing instructions, + CDATA sections, and other markup — preserving the original XML structure. + + The current element is the element matching the most recently parsed start + element of which a matching end element has not yet been reached. When the + parser has reached the end element, the current element becomes the parent + element. + + \note Entity references defined in the DTD are resolved during parsing + and returned as plain text, since DTD declarations are processed + separately and are not part of the element’s content. + Only the five predefined XML entities (\c <, \c >, \c &, + \c ', \c ") are re-escaped in the output. + + \since 6.10 +*/ +QString QXmlStreamReader::readRawInnerData() +{ + Q_D(QXmlStreamReader); + QString raw; + + auto specialToEntities = [](QStringView text, QString &output) { + qsizetype chunk = 0; + QLatin1StringView replacement; + const qsizetype sz = text.size(); + for (qsizetype i = 0; i < sz; ++i) { + switch (text[i].unicode()) { + case '<': + replacement = "<"_L1; + break; + case '>': + replacement = ">"_L1; + break; + case '&': + replacement = "&"_L1; + break; + case '"': + replacement = """_L1; + break; + case '\'': + replacement = "'"_L1; + break; + default: + continue; + } + if (chunk < i) + output += text.mid(chunk, i - chunk); + output += replacement; + chunk = i + 1; + } + if (chunk < text.size()) + output += text.mid(chunk); + }; + + if (isStartElement()) { + int depth = 1; + while (!atEnd() && depth) { + switch (readNext()) { + case StartElement: { + raw += '<'_L1 + name(); + const QXmlStreamAttributes attrs = attributes(); + for (auto it = attrs.begin(); it != attrs.end(); ++it) { + raw += ' '_L1 + it->name() + "=\""_L1; + specialToEntities(it->value(), raw); + raw += '"'_L1; + } + raw += '>'_L1; + ++depth; + break; + } + case EndElement: + --depth; + if (depth > 0) + raw += "</"_L1 + name() + '>'_L1; + break; + case Characters: + if (isCDATA()) + raw += "<![CDATA["_L1 + text() + "]]>"_L1; + else + specialToEntities(text(), raw); + break; + case Comment: + raw += "<!--"_L1 + text() + "-->"_L1; + break; + case EntityReference: + raw += '&'_L1 + name() + ';'_L1; + break; + case ProcessingInstruction: + raw += "<?"_L1 + processingInstructionTarget() + + ' '_L1 + processingInstructionData() + + "?>"_L1; + break; + Q_FALLTHROUGH(); + default: + if (!hasError()) { + d->raiseError(NotWellFormedError, + QXmlStream::tr("Unexpected token while " + "reading raw inner data.")); + } + return raw; + } + } + } + return raw; +} + static constexpr auto QXmlStreamReader_tokenTypeString = qOffsetStringArray( "NoToken", "Invalid", |
