summaryrefslogtreecommitdiffstats
path: root/src/tools/moc/preprocessor.cpp
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2025-07-29 15:16:39 +0200
committerMarc Mutz <marc.mutz@qt.io>2025-08-01 15:46:03 +0000
commit4f3a8a8d68ce804f5d999559ccf11a42a79e3f8f (patch)
tree66d3f211d3940b1f8690fb7770f3db98b0962f56 /src/tools/moc/preprocessor.cpp
parentf0f45d1bcd4b1f34b7f47fb9f0458c38d93e6da3 (diff)
moc: rewrite mergeStringLiterals() to avoid O(N²) algorithm
The old code called QList::erase(it, it) in a loop; which causes quadratic behavior. Fix by writing a std::unique-inspired merge algorithm that works in linear time, at the cost of merging only pairs of consecutive STRING_LITERALs. The old code tried to merge maximal sequences of adjacent STRING_LITERALs. This should not make much of a difference, since QByteArray grows geometrically, so appends (and insert at end - 1 is almost append) take amortized linear time in the final size of the result. Symbol is a QByteArray version of QStringRef (i.e. a cheap substring: string, from, len), except with individual copies of the underlying string, so make sure to rid Symbol::lex of non-lexem() data before merging. Aurélien reports that `lex` may contain a whole file, so this is an important step. Aurélien reports a 6x speedup on one of his benchmark files. Amends 0c9d1f99dac5c118d49e7f2b04f70eae3ba7b837. Reported-by: Aurélien Brooke <aurelien@bahiasoft.fr> Done-with: Aurélien Brooke <aurelien@bahiasoft.fr> Pick-to: 6.10 6.9 6.8 6.5 Change-Id: I1093a43c3727fde1785795b1aac7417be011bad8 Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> Reviewed-by: Aurélien Brooke <aurelien@bahiasoft.fr>
Diffstat (limited to 'src/tools/moc/preprocessor.cpp')
-rw-r--r--src/tools/moc/preprocessor.cpp80
1 files changed, 58 insertions, 22 deletions
diff --git a/src/tools/moc/preprocessor.cpp b/src/tools/moc/preprocessor.cpp
index 76bf08e332b..6259f7a4838 100644
--- a/src/tools/moc/preprocessor.cpp
+++ b/src/tools/moc/preprocessor.cpp
@@ -1003,33 +1003,69 @@ static QByteArray readOrMapFile(QFile *file)
return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
}
+void Symbol::mergeStringLiteral(const Symbol &next)
+{
+ Q_ASSERT(len >= 2); // at least `""`
+ Q_ASSERT(from + len <= lex.size());
+ Q_ASSERT(next.len >= 2); // at least `""`
+ Q_ASSERT(next.from + next.len <= next.lex.size());
+
+ if (len != lex.size()) {
+ // "rubbish" around lexem() in `lex`: clean up (`lex` may be the whole file)
+ QByteArray l = lexemView().chopped(1) % next.lexemView().sliced(1);
+ lex = std::move(l); // lexemView() aliases `lex`; only clobber it now
+ from = 0;
+ } else {
+ // like QByteArray::append(), but dealing with the "" around each lexem:
+ const auto unquoted = next.unquotedLexemView();
+ lex.insert(from + len - 1, // before closing `"`
+ unquoted);
+ }
+ len = lex.size();
+}
+
static void mergeStringLiterals(Symbols &symbols)
{
- for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
- if (i->token == STRING_LITERAL) {
- Symbols::Iterator mergeSymbol = i;
- qsizetype literalsLength = mergeSymbol->len;
- while (++i != symbols.end() && i->token == STRING_LITERAL)
- literalsLength += i->len - 2; // no quotes
-
- if (literalsLength != mergeSymbol->len) {
- QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
- QByteArray &mergeSymbolLexem = mergeSymbol->lex;
- mergeSymbolLexem.resize(0);
- mergeSymbolLexem.reserve(literalsLength);
- mergeSymbolLexem.append('"');
- mergeSymbolLexem.append(mergeSymbolOriginalLexem);
- for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j)
- mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem()
- mergeSymbolLexem.append('"');
- mergeSymbol->len = mergeSymbol->lex.size();
- mergeSymbol->from = 0;
- i = symbols.erase(mergeSymbol + 1, i);
+ // like std::unique, but merges instead of skips adjacent STRING_LITERALs:
+
+ const auto mergeable = [](const Symbol &lhs, const Symbol &rhs) {
+ return lhs.token == STRING_LITERAL && rhs.token == STRING_LITERAL;
+ };
+
+ auto end = symbols.end();
+ auto it = std::adjacent_find(symbols.begin(), symbols.end(), mergeable);
+ if (it == end) // none found
+ return;
+
+ // we know `it`, `it + 1` are both STRING_LITERAL (adjacent_find post-condition)
+ // in particular: it + 1 < end
+
+ auto dst = it;
+ auto lit = dst;
+ ++it;
+ lit->mergeStringLiteral(*it);
+
+ while (++it != end) {
+ // Loop Invariants:
+ // - [begin(), dst] is already processed
+ // - `lit` is the last string literal
+ // - we can merge if lit == dst
+ // - [it, end[ still to be checked
+ if (it->token == STRING_LITERAL) {
+ if (lit == dst) { // can merge
+ lit->mergeStringLiteral(*it);
+ } else { // can't merge: not adjacent to previous STRING_LITERAL
+ *++dst = std::move(*it);
+ lit = dst; // remember that this was a literal
}
- if (i == symbols.end())
- break;
+ } else {
+ *++dst = std::move(*it);
}
}
+
+ ++dst;
+
+ symbols.erase(dst, end);
}
static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,