summaryrefslogtreecommitdiffstats
path: root/src/corelib/serialization/qxmlstream.cpp
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2024-02-15 15:04:18 -0800
committerThiago Macieira <thiago.macieira@intel.com>2024-04-18 14:35:09 -0700
commit94c62e322264e2e7d61193ae74ba8556a330385c (patch)
tree294690436c07cb22159545f38daed4538c51e8fe /src/corelib/serialization/qxmlstream.cpp
parent17c964c4e874ab59a2af7859ae23f5cb4ad01d36 (diff)
QXmlStreamWriter: decode UTF-8 into code points
We were iterating over code *units* and that yielded wrong results. The one from the bug report was simply caused by the fact that QUtf8StringView::value_type is char, which is signed on x86, so the expression: *it <= u'\x1F' was true for all non-Latin1 content. But in attempting to fix this, I needed to do the proper UTF-8 decoding, as otherwise we wouldn't catch non-Latin1 sequences and such. [ChangeLog][QtCore][QXmlStreamWriter] Fixed a bug that caused the class to fail to write UTF-8 strings with non-US-ASCII content when passed as a QUtf8StringView. Fixes: QTBUG-122241 Pick-to: 6.5 6.6 6.7 Change-Id: I83dda2d36c904517b3c0fffd17b42bbf09a493d0 Reviewed-by: Mate Barany <mate.barany@qt.io>
Diffstat (limited to 'src/corelib/serialization/qxmlstream.cpp')
-rw-r--r--src/corelib/serialization/qxmlstream.cpp46
1 files changed, 36 insertions, 10 deletions
diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp
index 56330027372..f0fad77a085 100644
--- a/src/corelib/serialization/qxmlstream.cpp
+++ b/src/corelib/serialization/qxmlstream.cpp
@@ -2963,54 +2963,80 @@ void QXmlStreamWriterPrivate::write(QAnyStringView s)
void QXmlStreamWriterPrivate::writeEscaped(QAnyStringView s, bool escapeWhitespace)
{
+ struct NextLatin1 {
+ char32_t operator()(const char *&it, const char *) const
+ { return uchar(*it++); }
+ };
+ struct NextUtf8 {
+ char32_t operator()(const char *&it, const char *end) const
+ {
+ uchar uc = *it++;
+ char32_t utf32 = 0;
+ char32_t *output = &utf32;
+ qsizetype n = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(uc, output, it, end);
+ return n < 0 ? 0 : utf32;
+ }
+ };
+ struct NextUtf16 {
+ char32_t operator()(const QChar *&it, const QChar *) const
+ {
+ return (it++)->unicode();
+ }
+ };
+
QString escaped;
escaped.reserve(s.size());
s.visit([&] (auto s) {
using View = decltype(s);
+ using Decoder = std::conditional_t<std::is_same_v<View, QLatin1StringView>, NextLatin1,
+ std::conditional_t<std::is_same_v<View, QUtf8StringView>, NextUtf8, NextUtf16>>;
auto it = s.begin();
const auto end = s.end();
+ Decoder decoder;
while (it != end) {
QLatin1StringView replacement;
auto mark = it;
while (it != end) {
- if (*it == u'<') {
+ auto next_it = it;
+ char32_t uc = decoder(next_it, end);
+ if (uc == u'<') {
replacement = "&lt;"_L1;
break;
- } else if (*it == u'>') {
+ } else if (uc == u'>') {
replacement = "&gt;"_L1;
break;
- } else if (*it == u'&') {
+ } else if (uc == u'&') {
replacement = "&amp;"_L1;
break;
- } else if (*it == u'\"') {
+ } else if (uc == u'\"') {
replacement = "&quot;"_L1;
break;
- } else if (*it == u'\t') {
+ } else if (uc == u'\t') {
if (escapeWhitespace) {
replacement = "&#9;"_L1;
break;
}
- } else if (*it == u'\n') {
+ } else if (uc == u'\n') {
if (escapeWhitespace) {
replacement = "&#10;"_L1;
break;
}
- } else if (*it == u'\v' || *it == u'\f') {
+ } else if (uc == u'\v' || uc == u'\f') {
hasEncodingError = true;
break;
- } else if (*it == u'\r') {
+ } else if (uc == u'\r') {
if (escapeWhitespace) {
replacement = "&#13;"_L1;
break;
}
- } else if (*it <= u'\x1F' || *it >= u'\uFFFE') {
+ } else if (uc <= u'\x1F' || uc == u'\uFFFE' || uc == u'\uFFFF') {
hasEncodingError = true;
break;
}
- ++it;
+ it = next_it;
}
escaped.append(View{mark, it});