summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qunicodetools.cpp
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2025-10-21 18:10:31 +0200
committerMarc Mutz <marc.mutz@qt.io>2025-10-30 22:56:25 +0000
commite08bcaaa87d7832c158e631cb6df2aecd71fe62d (patch)
treeb4e4e50b4af63384de464d03cc46cc9ad6294dbb /src/corelib/text/qunicodetools.cpp
parent9e442a8e27000c7b925aa54a5a4c59e5ca2badb7 (diff)
QUnicodeTools: port getSentenceBreaks() to QStringIterator
Like getWordBreaks(), this one is a bit more complicated than the first two, since there's a nested loop. Solve it by using a copy of the QStringIterator for look-ahead loop. To see that old and new version are equivalent, observe that qsizetype `i` and `lookahead` always pointed _onto_, while QStringIterator always points to just _after_ the last-consumed code-unit. Pick-to: 6.10 6.8 6.5 Change-Id: Id272b1a1597912eb611acb544b5ef0ac1d13a754 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'src/corelib/text/qunicodetools.cpp')
-rw-r--r--src/corelib/text/qunicodetools.cpp28
1 files changed, 8 insertions, 20 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp
index dd751709d8a..56fa41c51ab 100644
--- a/src/corelib/text/qunicodetools.cpp
+++ b/src/corelib/text/qunicodetools.cpp
@@ -388,16 +388,11 @@ static const uchar breakTable[BAfter + 1][QUnicodeTables::NumSentenceBreakClasse
static void getSentenceBreaks(const char16_t *string, qsizetype len, QCharAttributes *attributes)
{
uchar state = SB::BAfter; // to meet SB1
- for (qsizetype i = 0; i != len; ++i) {
- const qsizetype pos = i;
- char32_t ucs4 = string[i];
- if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
- ushort low = string[i + 1];
- if (QChar::isLowSurrogate(low)) {
- ucs4 = QChar::surrogateToUcs4(ucs4, low);
- ++i;
- }
- }
+
+ QStringIterator it(QStringView{string, len});
+ while (it.hasNext()) {
+ const qsizetype pos = it.index();
+ const char32_t ucs4 = it.nextOrRawCodeUnit();
const auto prop = QUnicodeTables::properties(ucs4);
QUnicodeTables::SentenceBreakClass ncls = (QUnicodeTables::SentenceBreakClass) prop->sentenceBreakClass;
@@ -406,15 +401,8 @@ static void getSentenceBreaks(const char16_t *string, qsizetype len, QCharAttrib
state = SB::breakTable[state][ncls];
if (Q_UNLIKELY(state == SB::Lookup)) { // SB8
state = SB::Break;
- for (qsizetype lookahead = i + 1; lookahead < len; ++lookahead) {
- char32_t ucs4 = string[lookahead];
- if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
- ushort low = string[lookahead + 1];
- if (QChar::isLowSurrogate(low)) {
- ucs4 = QChar::surrogateToUcs4(ucs4, low);
- ++lookahead;
- }
- }
+ for (auto lookahead = it; lookahead.hasNext(); /**/) {
+ const char32_t ucs4 = lookahead.nextOrRawCodeUnit();
const auto prop = QUnicodeTables::properties(ucs4);
QUnicodeTables::SentenceBreakClass tcls = (QUnicodeTables::SentenceBreakClass) prop->sentenceBreakClass;
@@ -427,7 +415,7 @@ static void getSentenceBreaks(const char16_t *string, qsizetype len, QCharAttrib
case QUnicodeTables::SentenceBreak_Close:
continue;
case QUnicodeTables::SentenceBreak_Lower:
- i = lookahead;
+ it = lookahead;
state = SB::Initial;
break;
default: