summaryrefslogtreecommitdiffstats
path: root/util/unicode/main.cpp
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2025-08-25 16:17:05 +0200
committerMarc Mutz <marc.mutz@qt.io>2025-10-27 22:43:31 +0000
commit763f19151cc31ca1ba4912e0828359be5dba89e8 (patch)
tree4f780ca2d14533eb6b8536c1aafcbdf8e371116a /util/unicode/main.cpp
parenteefea5bf9d8141fb920a9612cc6d274890d9ebd8 (diff)
QUnicodeTables: separate Properties::cases from the rest
These entries are quite repetitive, esp. the all-zero entry for uncased characters (but not only: there are also 137 non-zero duplicates), and each one takes 8 bytes of the total 20 bytes of sizeof(Properties). Make a new array with these entries and only store an index into it in Properties. The new array happens to have a size of 448 entries (down from 3372 unique Properties), so 9 bits would suffice for the index, but a sizeof(Properties) == 14 is probably rather pointless, so add a reserved field to prop the struct up to 16. That sounds like the ideal size for rapid indexing and probably improves qGetProp() performance, esp. if case information is not needed. Theoretically, this should save 3372 * 4 - 448 * 8 = 9904 bytes. The TEXT size of libQtCore, however shrinks by a bit more, 10596 bytes, on optimized Linux AMD64 Clang 19 builds. Picking to all active branches, because the Unicode tables are still maintained in all of them. Fixes: QTBUG-139427 Pick-to: 6.10 6.9 6.8 6.5 Change-Id: If4dc47ef06c674ad0263f0623ec408a25b977b3a Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Ahmad Samir <a.samirh78@gmail.com>
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r--util/unicode/main.cpp107
1 files changed, 87 insertions, 20 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index d303a369812..28f5c984306 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -16,8 +16,10 @@
#include <private/qunicodetables_p.h>
#endif
+#include <array>
#include <QtCore/qxpfunctional.h>
#include <QtCore/q26numeric.h>
+#include <vector>
#if QT_VERSION < QT_VERSION_CHECK(6, 9, 0)
// QSpan, QIODevice::readLineInto()
@@ -1018,13 +1020,14 @@ static const char *property_string =
" ushort unicodeVersion : 5; /* 5 used */\n"
" ushort eastAsianWidth : 3; /* 3 used */\n"
" ushort nfQuickCheck : 8;\n" // could be narrowed
- " std::array<CaseConversion, NumCases> cases;\n"
+ " ushort caseIndex : 16; /* 9 used */\n"
" ushort graphemeBreakClass : 5; /* 5 used */\n"
" ushort wordBreakClass : 5; /* 5 used */\n"
" ushort lineBreakClass : 6; /* 6 used */\n"
" ushort sentenceBreakClass : 4; /* 4 used */\n"
" ushort idnaStatus : 4; /* 3 used */\n"
" ushort script : 8;\n"
+ " ushort reserved : 16; /* makes sizeof a nice round 16 bytes */\n"
"};\n\n"
"Q_DECL_CONST_FUNCTION\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept;\n"
@@ -1063,7 +1066,7 @@ static const char *methods =
"{ return eastAsianWidth(ch.unicode()); }\n"
"\n";
-static const int SizeOfPropertiesStruct = 20;
+static const int SizeOfPropertiesStruct = 16;
static const QByteArray sizeOfPropertiesStructCheck =
"static_assert(sizeof(Properties) == " + QByteArray::number(SizeOfPropertiesStruct) + ");\n\n";
@@ -1096,6 +1099,7 @@ struct PropertyFlags {
&& upperCaseSpecial == o.upperCaseSpecial
&& titleCaseSpecial == o.titleCaseSpecial
&& caseFoldSpecial == o.caseFoldSpecial
+ // caseIndex is _not_ part of equality
&& graphemeBreakClass == o.graphemeBreakClass
&& wordBreakClass == o.wordBreakClass
&& sentenceBreakClass == o.sentenceBreakClass
@@ -1129,6 +1133,7 @@ struct PropertyFlags {
bool upperCaseSpecial = 0;
bool titleCaseSpecial = 0;
bool caseFoldSpecial = 0;
+ int caseIndex = -1; // not part of equality; replaces {lower,upper,title,fold}CaseDiff
GraphemeBreakClass graphemeBreakClass = GraphemeBreak_Any;
WordBreakClass wordBreakClass = WordBreak_Any;
SentenceBreakClass sentenceBreakClass = SentenceBreak_Any;
@@ -2585,6 +2590,77 @@ static void computeUniqueProperties()
qDebug(" %" PRIdQSIZETYPE " unique unicode properties found", uniqueProperties.size());
}
+struct CaseConversion {
+ ushort special : 1;
+ signed short diff : 15;
+
+ friend bool operator==(CaseConversion lhs, CaseConversion rhs) noexcept
+ {
+ static_assert(std::has_unique_object_representations_v<CaseConversion>);
+ return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
+ }
+};
+using CaseConversions = std::array<CaseConversion, 4>;
+
+static std::vector<CaseConversions>
+computeUniqueCaseConversions(QList<PropertyFlags> &l)
+{
+ std::vector<CaseConversions> result;
+ result.emplace_back(); // all zeros should be at the beginning
+
+ qDebug("computeUniqueCaseConversions:");
+
+ size_t nonNullDuplicates = 0;
+
+ for (auto &e : l) {
+ CaseConversions candidate = {
+ CaseConversion{ e.lowerCaseSpecial, short(e.lowerCaseDiff) },
+ CaseConversion{ e.upperCaseSpecial, short(e.upperCaseDiff) },
+ CaseConversion{ e.titleCaseSpecial, short(e.titleCaseDiff) },
+ CaseConversion{ e.caseFoldSpecial, short(e.caseFoldDiff) },
+ };
+ const auto it = std::find(result.begin(), result.end(), candidate);
+ if (it == result.end()) {
+ // new one, add:
+ e.caseIndex = int(result.size());
+ result.push_back(std::move(candidate));
+ } else {
+ e.caseIndex = it - result.begin();
+ if (e.caseIndex != 0)
+ ++nonNullDuplicates;
+ }
+ }
+
+ qDebug(" %llu unique case conversions found (with %llu non-null duplicates)",
+ qulonglong(result.size()),
+ qulonglong(nonNullDuplicates));
+
+ return result;
+}
+
+static QByteArray createCaseConversions(std::vector<CaseConversions> conv)
+{
+ QByteArray out;
+
+ qDebug("createCaseConversions:");
+
+ out += "static constexpr std::array<CaseConversion, NumCases> caseConversions[] = {\n";
+ for (const auto &e : conv) {
+ out += " { { ";
+ for (const auto &f : e) {
+ out += "{ ";
+ out += QByteArray::number(f.special);
+ out += ", ";
+ out += QByteArray::number(f.diff);
+ out += " }, ";
+ }
+ out.chop(2); // removes ", "
+ out += " } },\n";
+ }
+ out += "};\n\n";
+ return out;
+}
+
struct UniqueBlock {
inline UniqueBlock() : index(-1) {}
@@ -2773,24 +2849,9 @@ static QByteArray createPropertyInfo()
// " ushort nfQuickCheck : 8;\n"
out += QByteArray::number( p.nfQuickCheck );
out += ", ";
-// " std::array<CaseConversion, NumCases> cases;\n"
- out += "{ { { ";
- out += QByteArray::number( p.lowerCaseSpecial );
- out += ", ";
- out += QByteArray::number( p.lowerCaseDiff );
- out += "}, {";
- out += QByteArray::number( p.upperCaseSpecial );
- out += ", ";
- out += QByteArray::number( p.upperCaseDiff );
- out += "}, {";
- out += QByteArray::number( p.titleCaseSpecial );
+// " ushort caseIndex; /* 9 used */\n"
+ out += QByteArray::number(p.caseIndex);
out += ", ";
- out += QByteArray::number( p.titleCaseDiff );
- out += "}, {";
- out += QByteArray::number( p.caseFoldSpecial );
- out += ", ";
- out += QByteArray::number( p.caseFoldDiff );
- out += "} } }, ";
// " ushort graphemeBreakClass : 5; /* 5 used */\n"
// " ushort wordBreakClass : 5; /* 5 used */\n"
// " ushort lineBreakClass : 6; /* 6 used */\n"
@@ -2808,6 +2869,9 @@ static QByteArray createPropertyInfo()
out += ", ";
// " ushort script : 8;\n"
out += QByteArray::number( p.script );
+ out += ", ";
+// " ushort reserved;\n"
+ out += '0';
out += " },";
}
if (out.endsWith(','))
@@ -2838,7 +2902,7 @@ static QByteArray createPropertyInfo()
"\n"
"QSpan<const CaseConversion, NumCases> QT_FASTCALL caseConversion(char32_t ucs4) noexcept\n"
"{\n"
- " return qGetProp(ucs4)->cases;\n"
+ " return caseConversions[qGetProp(ucs4)->caseIndex];\n"
"}\n\n";
out += "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept\n"
@@ -3358,6 +3422,8 @@ int main(int, char **)
resolveIdnaStatus();
computeUniqueProperties();
+
+ const QByteArray caseConv = createCaseConversions(computeUniqueCaseConversions(uniqueProperties));
QByteArray properties = createPropertyInfo();
QByteArray specialCases = createSpecialCaseMap();
QByteArray compositions = createCompositionInfo();
@@ -3396,6 +3462,7 @@ int main(int, char **)
f.write("#include \"qunicodetables_p.h\"\n\n");
f.write("QT_BEGIN_NAMESPACE\n\n");
f.write("namespace QUnicodeTables {\n");
+ f.write(caseConv.data());
f.write(properties);
f.write(specialCases);
f.write(compositions);