summaryrefslogtreecommitdiffstats
path: root/src/corelib/serialization/qxmlstream.cpp
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2025-03-13 17:45:16 +0100
committerMarc Mutz <marc.mutz@qt.io>2025-08-27 07:10:45 +0200
commitf5d936642b18c5d06942171530a164499e4e9113 (patch)
treec32edafedcd638ba6d6c50e163c5988e028aeb56 /src/corelib/serialization/qxmlstream.cpp
parenta3d7182e034f9813a796a8c12861dabdbf8d78ed (diff)
QUtf8Functions: wrap fromUtf8() in a simpler API
Coverity had an issue with how we called this function to get the next character from a UTF-8 sequence. In particular, it complained about passing the address of a char32_t object to fromUtf8(), which "treats it as an array", as Coverity notes. The C++ standard says this is fine¹, but callers of this function, specifically, are almost by definition security-critical, so we shouldn't leave Coverity issues unfixed in them. The fix is to use an array of one element instead: char32_t[1]. We have already applied this fix in qstring.cpp (4eb9e0d3eedfc1b6de968308167af01b19f6ffe7) and qurlrecode.cpp (7a32a2238f52217bc4f0dc4c9620a2a2d350a1ca), but there are more cases in QXmlStream and QStringConverter, some of which this patch fixes. In order to simplify the task, package up the fix in a small function for easier reuse and so that the proverbial maintenance programmer is less likely to undo these fixes again. Incidentally, this function makes those callers that can use it much more readable than before. Many other calls (incl. the one in qurlrecode.cpp and some others in qstringconverter.cpp) cannot be ported over, as they are using the return value of fromUtf8(), or have already advanced the input pointer themselves when they call fromUtf8(). Amends 94c62e322264e2e7d61193ae74ba8556a330385c, ea0a08c898fed9cfd8d8eb16613e352740d3eb02 and b977ae371a753a82e1d0bb32c5b62099da663721. ¹ https://eel.is/c++draft/basic.compound#3.sentence-11 Pick-to: 6.10 6.9 6.8 Coverity-Id: 378348 Coverity-Id: 403740 Coverity-Id: 403748 Coverity-Id: 459945 Change-Id: I4957ea2ee7b6e0e9c361eb2bcd4351708762a6e9 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/serialization/qxmlstream.cpp')
-rw-r--r--src/corelib/serialization/qxmlstream.cpp16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp
index edaf73fbe57..9cd90fa9d65 100644
--- a/src/corelib/serialization/qxmlstream.cpp
+++ b/src/corelib/serialization/qxmlstream.cpp
@@ -3241,11 +3241,17 @@ void QXmlStreamWriterPrivate::writeEscaped(QAnyStringView s, bool escapeWhitespa
struct NextUtf8 {
NextResult operator()(const char *&it, const char *end) const
{
- uchar uc = *it++;
- char32_t utf32 = 0;
- char32_t *output = &utf32;
- qsizetype n = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(uc, output, it, end);
- return n < 0 ? NextResult{0, true} : NextResult{utf32, false};
+ // We can have '\0' in the text, and it should be reported as
+ // Error::InvalidCharacter, not as Error::Encoding
+ constexpr char32_t invalidValue = 0xFFFFFFFF;
+ static_assert(invalidValue > QChar::LastValidCodePoint);
+ auto i = reinterpret_cast<const qchar8_t *>(it);
+ const auto old_i = i;
+ const auto e = reinterpret_cast<const qchar8_t *>(end);
+ const char32_t result = QUtf8Functions::nextUcs4FromUtf8(i, e, invalidValue);
+ it += i - old_i;
+ return result == invalidValue ? NextResult{U'\0', true}
+ : NextResult{result, false};
}
};
struct NextUtf16 {