diff options
| author | Marc Mutz <marc.mutz@qt.io> | 2025-07-29 15:16:39 +0200 |
|---|---|---|
| committer | Marc Mutz <marc.mutz@qt.io> | 2025-08-01 15:46:03 +0000 |
| commit | 4f3a8a8d68ce804f5d999559ccf11a42a79e3f8f (patch) | |
| tree | 66d3f211d3940b1f8690fb7770f3db98b0962f56 /src/tools/moc/preprocessor.cpp | |
| parent | f0f45d1bcd4b1f34b7f47fb9f0458c38d93e6da3 (diff) | |
moc: rewrite mergeStringLiterals() to avoid O(N²) algorithm
The old code called QList::erase(it, it) in a loop; which causes
quadratic behavior.
Fix by writing a std::unique-inspired merge algorithm that works in
linear time, at the cost of merging only pairs of consecutive
STRING_LITERALs. The old code tried to merge maximal sequences of
adjacent STRING_LITERALs. This should not make much of a difference,
since QByteArray grows geometrically, so appends (and insert at end -
1 is almost append) take amortized linear time in the final size of
the result.
Symbol is a QByteArray version of QStringRef (i.e. a cheap substring:
string, from, len), except with individual copies of the underlying
string, so make sure to rid Symbol::lex of non-lexem() data before
merging. Aurélien reports that `lex` may contain a whole file, so this
is an important step.
Aurélien reports a 6x speedup on one of his benchmark files.
Amends 0c9d1f99dac5c118d49e7f2b04f70eae3ba7b837.
Reported-by: Aurélien Brooke <aurelien@bahiasoft.fr>
Done-with: Aurélien Brooke <aurelien@bahiasoft.fr>
Pick-to: 6.10 6.9 6.8 6.5
Change-Id: I1093a43c3727fde1785795b1aac7417be011bad8
Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
Reviewed-by: Aurélien Brooke <aurelien@bahiasoft.fr>
Diffstat (limited to 'src/tools/moc/preprocessor.cpp')
| -rw-r--r-- | src/tools/moc/preprocessor.cpp | 80 |
1 files changed, 58 insertions, 22 deletions
diff --git a/src/tools/moc/preprocessor.cpp b/src/tools/moc/preprocessor.cpp index 76bf08e332b..6259f7a4838 100644 --- a/src/tools/moc/preprocessor.cpp +++ b/src/tools/moc/preprocessor.cpp @@ -1003,33 +1003,69 @@ static QByteArray readOrMapFile(QFile *file) return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll(); } +void Symbol::mergeStringLiteral(const Symbol &next) +{ + Q_ASSERT(len >= 2); // at least `""` + Q_ASSERT(from + len <= lex.size()); + Q_ASSERT(next.len >= 2); // at least `""` + Q_ASSERT(next.from + next.len <= next.lex.size()); + + if (len != lex.size()) { + // "rubbish" around lexem() in `lex`: clean up (`lex` may be the whole file) + QByteArray l = lexemView().chopped(1) % next.lexemView().sliced(1); + lex = std::move(l); // lexemView() aliases `lex`; only clobber it now + from = 0; + } else { + // like QByteArray::append(), but dealing with the "" around each lexem: + const auto unquoted = next.unquotedLexemView(); + lex.insert(from + len - 1, // before closing `"` + unquoted); + } + len = lex.size(); +} + static void mergeStringLiterals(Symbols &symbols) { - for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) { - if (i->token == STRING_LITERAL) { - Symbols::Iterator mergeSymbol = i; - qsizetype literalsLength = mergeSymbol->len; - while (++i != symbols.end() && i->token == STRING_LITERAL) - literalsLength += i->len - 2; // no quotes - - if (literalsLength != mergeSymbol->len) { - QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem(); - QByteArray &mergeSymbolLexem = mergeSymbol->lex; - mergeSymbolLexem.resize(0); - mergeSymbolLexem.reserve(literalsLength); - mergeSymbolLexem.append('"'); - mergeSymbolLexem.append(mergeSymbolOriginalLexem); - for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j) - mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem() - mergeSymbolLexem.append('"'); - mergeSymbol->len = mergeSymbol->lex.size(); - mergeSymbol->from = 0; - i = symbols.erase(mergeSymbol + 1, i); + // like std::unique, but merges instead of skips adjacent STRING_LITERALs: + + const auto mergeable = [](const Symbol &lhs, const Symbol &rhs) { + return lhs.token == STRING_LITERAL && rhs.token == STRING_LITERAL; + }; + + auto end = symbols.end(); + auto it = std::adjacent_find(symbols.begin(), symbols.end(), mergeable); + if (it == end) // none found + return; + + // we know `it`, `it + 1` are both STRING_LITERAL (adjacent_find post-condition) + // in particular: it + 1 < end + + auto dst = it; + auto lit = dst; + ++it; + lit->mergeStringLiteral(*it); + + while (++it != end) { + // Loop Invariants: + // - [begin(), dst] is already processed + // - `lit` is the last string literal + // - we can merge if lit == dst + // - [it, end[ still to be checked + if (it->token == STRING_LITERAL) { + if (lit == dst) { // can merge + lit->mergeStringLiteral(*it); + } else { // can't merge: not adjacent to previous STRING_LITERAL + *++dst = std::move(*it); + lit = dst; // remember that this was a literal } - if (i == symbols.end()) - break; + } else { + *++dst = std::move(*it); } } + + ++dst; + + symbols.erase(dst, end); } static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths, |
