QRestReply: allow comments where white-space is allowed

This is RFC2822 grammar (except that line folding is not allowed). RFC9110 doesn't allow it anymore, but it might make sense to accept it nonetheless (Postel's Law). Pick-to: 6.7 Task-number: QTBUG-123544 Change-Id: Ie990cd332c7603dbdae29c19b2804bd33a058ca0 Reviewed-by: Juha Vuolle <juha.vuolle@qt.io>
author: Marc Mutz <marc.mutz@qt.io> 2024-04-18 16:14:46 +0200
committer: Marc Mutz <marc.mutz@qt.io> 2024-05-03 01:05:07 +0200
commit: 834e7d60a9cb67a1d445151a4f7c34d4288db01b (patch)
tree: d92cf48cc907ec638113aab6b3e8924cbf88239e /src/network/access/qrestreply.cpp
parent: e53df7a0218ff2d88668a33a0d9cf8bbe40e4933 (diff)
1 files changed, 56 insertions, 8 deletions
diff --git a/src/network/access/qrestreply.cpp b/src/network/access/qrestreply.cpp
index 155e5877fdc..cdd264ba6eb 100644
--- a/src/network/access/qrestreply.cpp
+++ b/src/network/access/qrestreply.cpp
@@ -420,6 +420,54 @@ static constexpr bool is_tchar(char ch) noexcept
     }
 }
 
+static constexpr auto parse_comment(QByteArrayView data) noexcept
+{
+    struct R {
+        QByteArrayView comment, tail;
+        constexpr explicit operator bool() const noexcept { return !comment.isEmpty(); }
+    };
+
+    const auto invalid = R{{}, data}; // preserves original `data`
+
+    // comment        = "(" *( ctext / quoted-pair / comment ) ")"
+    // ctext          = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
+
+    if (!data.startsWith('('))
+        return invalid;
+
+    qsizetype i = 1;
+    qsizetype level = 1;
+    while (i < data.size()) {
+        switch (data[i++]) {
+        case '(': // nested comment
+            ++level;
+            break;
+        case ')': // end of comment
+            if (--level == 0)
+                return R{data.first(i), data.sliced(i)};
+            break;
+        case '\\': // quoted-pair
+            if (i == data.size())
+                return invalid; // premature end
+            ++i; // eat escaped character
+            break;
+        default:
+            ; // don't validate ctext - accept everything (Postel's Law)
+        }
+    }
+
+    return invalid; // premature end / unbalanced nesting levels
+}
+
+static constexpr void eat_CWS(QByteArrayView &data) noexcept
+{
+    eat_OWS(data);
+    while (const auto comment = parse_comment(data)) {
+        data = comment.tail;
+        eat_OWS(data);
+    }
+}
+
 static constexpr auto parse_token(QByteArrayView data) noexcept
 {
     struct R {
@@ -452,13 +500,13 @@ static constexpr auto parse_parameter(QByteArrayView data, qxp::function_ref<voi
         return invalid;
     data = name.tail;
 
-    eat_OWS(data); // not in the grammar, but accepted under Postel's Law
+    eat_CWS(data); // not in the grammar, but accepted under Postel's Law
 
     if (!data.startsWith('='))
         return invalid;
     data = data.sliced(1);
 
-    eat_OWS(data); // not in the grammar, but accepted under Postel's Law
+    eat_CWS(data); // not in the grammar, but accepted under Postel's Law
 
     if (Q_UNLIKELY(data.startsWith('"'))) { // value is a quoted-string
 
@@ -488,27 +536,27 @@ static auto parse_content_type(QByteArrayView data)
         constexpr explicit operator bool() const noexcept { return !type.isEmpty(); }
     };
 
-    eat_OWS(data); // not in the grammar, but accepted under Postel's Law
+    eat_CWS(data); // not in the grammar, but accepted under Postel's Law
 
     const auto type = parse_token(data);
     if (!type)
         return R{};
     data = type.tail;
 
-    eat_OWS(data); // not in the grammar, but accepted under Postel's Law
+    eat_CWS(data); // not in the grammar, but accepted under Postel's Law
 
     if (!data.startsWith('/'))
         return R{};
     data = data.sliced(1);
 
-    eat_OWS(data); // not in the grammar, but accepted under Postel's Law
+    eat_CWS(data); // not in the grammar, but accepted under Postel's Law
 
     const auto subtype = parse_token(data);
     if (!subtype)
         return R{};
     data = subtype.tail;
 
-    eat_OWS(data);
+    eat_CWS(data);
 
     auto r = R{QLatin1StringView{type.token}, QLatin1StringView{subtype.token}, {}};
 
@@ -516,7 +564,7 @@ static auto parse_content_type(QByteArrayView data)
 
         data = data.sliced(1); // eat ';'
 
-        eat_OWS(data);
+        eat_CWS(data);
 
         const auto param = parse_parameter(data, [&](char ch) { r.charset.append(1, ch); });
         if (param.name.compare("charset"_L1, Qt::CaseInsensitive) == 0) {
@@ -530,7 +578,7 @@ static auto parse_content_type(QByteArrayView data)
         // otherwise, continue (accepting e.g. `;;`)
         data = param.tail;
 
-        eat_OWS(data);
+        eat_CWS(data);
     }
 
     return r; // no charset found
author	Marc Mutz <marc.mutz@qt.io>	2024-04-18 16:14:46 +0200
committer	Marc Mutz <marc.mutz@qt.io>	2024-05-03 01:05:07 +0200
commit	834e7d60a9cb67a1d445151a4f7c34d4288db01b (patch)
tree	d92cf48cc907ec638113aab6b3e8924cbf88239e /src/network/access/qrestreply.cpp
parent	e53df7a0218ff2d88668a33a0d9cf8bbe40e4933 (diff)