From 4f3a8a8d68ce804f5d999559ccf11a42a79e3f8f Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Tue, 29 Jul 2025 15:16:39 +0200 Subject: =?UTF-8?q?moc:=20rewrite=20mergeStringLiterals()=20to=20avoid=20O?= =?UTF-8?q?(N=C2=B2)=20algorithm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old code called QList::erase(it, it) in a loop; which causes quadratic behavior. Fix by writing a std::unique-inspired merge algorithm that works in linear time, at the cost of merging only pairs of consecutive STRING_LITERALs. The old code tried to merge maximal sequences of adjacent STRING_LITERALs. This should not make much of a difference, since QByteArray grows geometrically, so appends (and insert at end - 1 is almost append) take amortized linear time in the final size of the result. Symbol is a QByteArray version of QStringRef (i.e. a cheap substring: string, from, len), except with individual copies of the underlying string, so make sure to rid Symbol::lex of non-lexem() data before merging. Aurélien reports that `lex` may contain a whole file, so this is an important step. Aurélien reports a 6x speedup on one of his benchmark files. Amends 0c9d1f99dac5c118d49e7f2b04f70eae3ba7b837. Reported-by: Aurélien Brooke Done-with: Aurélien Brooke Pick-to: 6.10 6.9 6.8 6.5 Change-Id: I1093a43c3727fde1785795b1aac7417be011bad8 Reviewed-by: Fabian Kosmale Reviewed-by: Aurélien Brooke --- src/tools/moc/preprocessor.cpp | 80 ++++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 22 deletions(-) (limited to 'src/tools/moc/preprocessor.cpp') diff --git a/src/tools/moc/preprocessor.cpp b/src/tools/moc/preprocessor.cpp index 76bf08e332b..6259f7a4838 100644 --- a/src/tools/moc/preprocessor.cpp +++ b/src/tools/moc/preprocessor.cpp @@ -1003,33 +1003,69 @@ static QByteArray readOrMapFile(QFile *file) return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll(); } +void Symbol::mergeStringLiteral(const Symbol &next) +{ + Q_ASSERT(len >= 2); // at least `""` + Q_ASSERT(from + len <= lex.size()); + Q_ASSERT(next.len >= 2); // at least `""` + Q_ASSERT(next.from + next.len <= next.lex.size()); + + if (len != lex.size()) { + // "rubbish" around lexem() in `lex`: clean up (`lex` may be the whole file) + QByteArray l = lexemView().chopped(1) % next.lexemView().sliced(1); + lex = std::move(l); // lexemView() aliases `lex`; only clobber it now + from = 0; + } else { + // like QByteArray::append(), but dealing with the "" around each lexem: + const auto unquoted = next.unquotedLexemView(); + lex.insert(from + len - 1, // before closing `"` + unquoted); + } + len = lex.size(); +} + static void mergeStringLiterals(Symbols &symbols) { - for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) { - if (i->token == STRING_LITERAL) { - Symbols::Iterator mergeSymbol = i; - qsizetype literalsLength = mergeSymbol->len; - while (++i != symbols.end() && i->token == STRING_LITERAL) - literalsLength += i->len - 2; // no quotes - - if (literalsLength != mergeSymbol->len) { - QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem(); - QByteArray &mergeSymbolLexem = mergeSymbol->lex; - mergeSymbolLexem.resize(0); - mergeSymbolLexem.reserve(literalsLength); - mergeSymbolLexem.append('"'); - mergeSymbolLexem.append(mergeSymbolOriginalLexem); - for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j) - mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem() - mergeSymbolLexem.append('"'); - mergeSymbol->len = mergeSymbol->lex.size(); - mergeSymbol->from = 0; - i = symbols.erase(mergeSymbol + 1, i); + // like std::unique, but merges instead of skips adjacent STRING_LITERALs: + + const auto mergeable = [](const Symbol &lhs, const Symbol &rhs) { + return lhs.token == STRING_LITERAL && rhs.token == STRING_LITERAL; + }; + + auto end = symbols.end(); + auto it = std::adjacent_find(symbols.begin(), symbols.end(), mergeable); + if (it == end) // none found + return; + + // we know `it`, `it + 1` are both STRING_LITERAL (adjacent_find post-condition) + // in particular: it + 1 < end + + auto dst = it; + auto lit = dst; + ++it; + lit->mergeStringLiteral(*it); + + while (++it != end) { + // Loop Invariants: + // - [begin(), dst] is already processed + // - `lit` is the last string literal + // - we can merge if lit == dst + // - [it, end[ still to be checked + if (it->token == STRING_LITERAL) { + if (lit == dst) { // can merge + lit->mergeStringLiteral(*it); + } else { // can't merge: not adjacent to previous STRING_LITERAL + *++dst = std::move(*it); + lit = dst; // remember that this was a literal } - if (i == symbols.end()) - break; + } else { + *++dst = std::move(*it); } } + + ++dst; + + symbols.erase(dst, end); } static QByteArray searchIncludePaths(const QList &includepaths, -- cgit v1.2.3