summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qregexp.cpp
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-09 16:01:36 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 14:19:47 +0200
commit7370b60cfe11da4d6167b51d83d18d9514a370c5 (patch)
treed6dea9cf249d26fc44286577247e4c26673caa94 /src/corelib/text/qregexp.cpp
parent7a3a9b8eb542196ed8e18e36b830396f54a3f63d (diff)
Move methods using QRegExp in QString(List) over to QRegExp
The prepares for the removal of those methods from QString and QStringList. The new methods in QRegExp are left as a porting help. Change-Id: Ieffa33a79caf53b83029e9b070c4eb5cadca1418 Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'src/corelib/text/qregexp.cpp')
-rw-r--r--src/corelib/text/qregexp.cpp414
1 files changed, 414 insertions, 0 deletions
diff --git a/src/corelib/text/qregexp.cpp b/src/corelib/text/qregexp.cpp
index 758a3695c9f..cbf827f2b8e 100644
--- a/src/corelib/text/qregexp.cpp
+++ b/src/corelib/text/qregexp.cpp
@@ -4479,6 +4479,419 @@ int QRegExp::matchedLength() const
return priv->matchState.captured[1];
}
+
+/*!
+ Replaces every occurrence of this regular expression in
+ \a str with \a after and returns the result.
+
+ For regular expressions containing \l{capturing parentheses},
+ occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
+ with \a{rx}.cap(1), cap(2), ...
+
+ \sa indexIn(), lastIndexIn(), QRegExp::cap()
+*/
+QString QRegExp::replaceIn(const QString &str, const QString &after) const
+{
+ struct QStringCapture
+ {
+ int pos;
+ int len;
+ int no;
+ };
+
+ QRegExp rx2(*this);
+
+ if (str.isEmpty() && rx2.indexIn(str) == -1)
+ return str;
+
+ QString s(str);
+
+ int index = 0;
+ int numCaptures = rx2.captureCount();
+ int al = after.length();
+ QRegExp::CaretMode caretMode = QRegExp::CaretAtZero;
+
+ if (numCaptures > 0) {
+ const QChar *uc = after.unicode();
+ int numBackRefs = 0;
+
+ for (int i = 0; i < al - 1; i++) {
+ if (uc[i] == QLatin1Char('\\')) {
+ int no = uc[i + 1].digitValue();
+ if (no > 0 && no <= numCaptures)
+ numBackRefs++;
+ }
+ }
+
+ /*
+ This is the harder case where we have back-references.
+ */
+ if (numBackRefs > 0) {
+ QVarLengthArray<QStringCapture, 16> captures(numBackRefs);
+ int j = 0;
+
+ for (int i = 0; i < al - 1; i++) {
+ if (uc[i] == QLatin1Char('\\')) {
+ int no = uc[i + 1].digitValue();
+ if (no > 0 && no <= numCaptures) {
+ QStringCapture capture;
+ capture.pos = i;
+ capture.len = 2;
+
+ if (i < al - 2) {
+ int secondDigit = uc[i + 2].digitValue();
+ if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
+ no = (no * 10) + secondDigit;
+ ++capture.len;
+ }
+ }
+
+ capture.no = no;
+ captures[j++] = capture;
+ }
+ }
+ }
+
+ while (index <= s.length()) {
+ index = rx2.indexIn(s, index, caretMode);
+ if (index == -1)
+ break;
+
+ QString after2(after);
+ for (j = numBackRefs - 1; j >= 0; j--) {
+ const QStringCapture &capture = captures[j];
+ after2.replace(capture.pos, capture.len, rx2.cap(capture.no));
+ }
+
+ s.replace(index, rx2.matchedLength(), after2);
+ index += after2.length();
+
+ // avoid infinite loop on 0-length matches (e.g., QRegExp("[a-z]*"))
+ if (rx2.matchedLength() == 0)
+ ++index;
+
+ caretMode = QRegExp::CaretWontMatch;
+ }
+ return s;
+ }
+ }
+
+ /*
+ This is the simple and optimized case where we don't have
+ back-references.
+ */
+ while (index != -1) {
+ struct {
+ int pos;
+ int length;
+ } replacements[2048];
+
+ int pos = 0;
+ int adjust = 0;
+ while (pos < 2047) {
+ index = rx2.indexIn(s, index, caretMode);
+ if (index == -1)
+ break;
+ int ml = rx2.matchedLength();
+ replacements[pos].pos = index;
+ replacements[pos++].length = ml;
+ index += ml;
+ adjust += al - ml;
+ // avoid infinite loop
+ if (!ml)
+ index++;
+ }
+ if (!pos)
+ break;
+ replacements[pos].pos = s.size();
+ int newlen = s.size() + adjust;
+
+ // to continue searching at the right position after we did
+ // the first round of replacements
+ if (index != -1)
+ index += adjust;
+ QString newstring;
+ newstring.reserve(newlen + 1);
+ QChar *newuc = newstring.data();
+ QChar *uc = newuc;
+ int copystart = 0;
+ int i = 0;
+ while (i < pos) {
+ int copyend = replacements[i].pos;
+ int size = copyend - copystart;
+ memcpy(static_cast<void*>(uc), static_cast<const void *>(s.constData() + copystart), size * sizeof(QChar));
+ uc += size;
+ memcpy(static_cast<void *>(uc), static_cast<const void *>(after.constData()), al * sizeof(QChar));
+ uc += al;
+ copystart = copyend + replacements[i].length;
+ i++;
+ }
+ memcpy(static_cast<void *>(uc), static_cast<const void *>(s.constData() + copystart), (s.size() - copystart) * sizeof(QChar));
+ newstring.resize(newlen);
+ s = newstring;
+ caretMode = QRegExp::CaretWontMatch;
+ }
+ return s;
+
+}
+
+
+/*!
+ \fn QString QRegExp::removeIn(const QString &str)
+
+ Removes every occurrence of this regular expression \a str, and
+ returns the result
+
+ Does the same as replaceIn(str, QString()).
+
+ \sa indexIn(), lastIndexIn(), replaceIn()
+*/
+
+
+/*!
+ \fn QString QRegExp::countIn(const QString &str)
+
+ Returns the number of times this regular expression matches
+ in \a str.
+
+ \sa indexIn(), lastIndexIn(), replaceIn()
+*/
+
+int QRegExp::countIn(const QString &str) const
+{
+ QRegExp rx2(*this);
+ int count = 0;
+ int index = -1;
+ int len = str.length();
+ while (index < len - 1) { // count overlapping matches
+ index = rx2.indexIn(str, index + 1);
+ if (index == -1)
+ break;
+ count++;
+ }
+ return count;
+}
+
+class qt_section_chunk {
+public:
+ qt_section_chunk() {}
+ qt_section_chunk(int l, QStringRef s) : length(l), string(std::move(s)) {}
+ int length;
+ QStringRef string;
+};
+
+static QString extractSections(const QVector<qt_section_chunk> &sections,
+ int start,
+ int end,
+ QString::SectionFlags flags)
+{
+ const int sectionsSize = sections.size();
+
+ if (!(flags & QString::SectionSkipEmpty)) {
+ if (start < 0)
+ start += sectionsSize;
+ if (end < 0)
+ end += sectionsSize;
+ } else {
+ int skip = 0;
+ for (int k = 0; k < sectionsSize; ++k) {
+ const qt_section_chunk &section = sections.at(k);
+ if (section.length == section.string.length())
+ skip++;
+ }
+ if (start < 0)
+ start += sectionsSize - skip;
+ if (end < 0)
+ end += sectionsSize - skip;
+ }
+ if (start >= sectionsSize || end < 0 || start > end)
+ return QString();
+
+ QString ret;
+ int x = 0;
+ int first_i = start, last_i = end;
+ for (int i = 0; x <= end && i < sectionsSize; ++i) {
+ const qt_section_chunk &section = sections.at(i);
+ const bool empty = (section.length == section.string.length());
+ if (x >= start) {
+ if (x == start)
+ first_i = i;
+ if (x == end)
+ last_i = i;
+ if (x != start)
+ ret += section.string;
+ else
+ ret += section.string.mid(section.length);
+ }
+ if (!empty || !(flags & QString::SectionSkipEmpty))
+ x++;
+ }
+
+ if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
+ const qt_section_chunk &section = sections.at(first_i);
+ ret.prepend(section.string.left(section.length));
+ }
+
+ if ((flags & QString::SectionIncludeTrailingSep)
+ && last_i < sectionsSize - 1) {
+ const qt_section_chunk &section = sections.at(last_i+1);
+ ret += section.string.left(section.length);
+ }
+
+ return ret;
+}
+/*!
+ \a str is treated as a sequence of fields separated by this
+ regular expression.
+
+ \sa splitString()
+*/
+QString QRegExp::sectionIn(const QString &str, int start, int end, QString::SectionFlags flags) const
+{
+ if (str.isEmpty())
+ return str;
+
+ QRegExp sep(*this);
+ sep.setCaseSensitivity((flags & QString::SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive
+ : Qt::CaseSensitive);
+
+ QVector<qt_section_chunk> sections;
+ int n = str.length(), m = 0, last_m = 0, last_len = 0;
+ while ((m = sep.indexIn(str, m)) != -1) {
+ sections.append(qt_section_chunk(last_len, QStringRef(&str, last_m, m - last_m)));
+ last_m = m;
+ last_len = sep.matchedLength();
+ m += qMax(sep.matchedLength(), 1);
+ }
+ sections.append(qt_section_chunk(last_len, QStringRef(&str, last_m, n - last_m)));
+
+ return extractSections(sections, start, end, flags);
+}
+
+/*!
+ Splits \a str into substrings wherever this regular expression
+ matches, and returns the list of those strings. If this regular
+ expression does not match anywhere in the string, split() returns a
+ single-element list containing \a str.
+
+ \sa QStringList::join(), section(), QString::split()
+*/
+QStringList QRegExp::splitString(const QString &str, Qt::SplitBehavior behavior) const
+{
+ QRegExp rx2(*this);
+ QStringList list;
+ int start = 0;
+ int extra = 0;
+ int end;
+ while ((end = rx2.indexIn(str, start + extra)) != -1) {
+ int matchedLen = rx2.matchedLength();
+ if (start != end || behavior == Qt::KeepEmptyParts)
+ list.append(str.mid(start, end - start));
+ start = end + matchedLen;
+ extra = (matchedLen == 0) ? 1 : 0;
+ }
+ if (start != str.size() || behavior == Qt::KeepEmptyParts)
+ list.append(str.mid(start, -1));
+ return list;
+}
+
+/*!
+ Splits \a str into substrings wherever this regular expression
+ matches, and returns the list of those strings. If this regular
+ expression does not match anywhere in the string, split() returns a
+ single-element list containing \a str.
+
+ \sa QStringList::join(), section(), QString::split()
+*/
+QVector<QStringRef> QRegExp::splitStringAsRef(const QString &str, Qt::SplitBehavior behavior) const
+{
+ QRegExp rx2(*this);
+ QVector<QStringRef> list;
+ int start = 0;
+ int extra = 0;
+ int end;
+ while ((end = rx2.indexIn(str, start + extra)) != -1) {
+ int matchedLen = rx2.matchedLength();
+ if (start != end || behavior == Qt::KeepEmptyParts)
+ list.append(str.midRef(start, end - start));
+ start = end + matchedLen;
+ extra = (matchedLen == 0) ? 1 : 0;
+ }
+ if (start != str.size() || behavior == Qt::KeepEmptyParts)
+ list.append(str.midRef(start, -1));
+ return list;
+}
+
+/*!
+ \fn QStringList QStringList::filter(const QRegExp &rx) const
+
+ \overload
+
+ Returns a list of all the strings that match the regular
+ expression \a rx.
+*/
+QStringList QRegExp::filterList(const QStringList &stringList) const
+{
+ QStringList res;
+ for (const QString &s : stringList) {
+ if (containedIn(s))
+ res << s;
+ }
+ return res;
+}
+
+/*!
+ Replaces every occurrence of the regexp \a rx, in each of the
+ string lists's strings, with \a after. Returns a reference to the
+ string list.
+*/
+QStringList QRegExp::replaceIn(const QStringList &stringList, const QString &after) const
+{
+ QStringList list;
+ for (const QString &s : stringList)
+ list << replaceIn(s, after);
+ return list;
+}
+
+/*!
+ Returns the index position of the first exact match of this regexp in
+ \a list, searching forward from index position \a from. Returns
+ -1 if no item matched.
+
+ \sa lastIndexIn(), contains(), exactMatch()
+*/
+int QRegExp::indexIn(const QStringList &list, int from)
+{
+ if (from < 0)
+ from = qMax(from + list.size(), 0);
+ for (int i = from; i < list.size(); ++i) {
+ if (exactMatch(list.at(i)))
+ return i;
+ }
+ return -1;
+}
+
+/*!
+ Returns the index position of the last exact match of this regexp in
+ \a list, searching backward from index position \a from. If \a
+ from is -1 (the default), the search starts at the last item.
+ Returns -1 if no item matched.
+
+ \sa indexOf(), contains(), QRegExp::exactMatch()
+*/
+int QRegExp::lastIndexIn(const QStringList &list, int from)
+{
+ if (from < 0)
+ from += list.size();
+ else if (from >= list.size())
+ from = list.size() - 1;
+ for (int i = from; i >= 0; --i) {
+ if (exactMatch(list.at(i)))
+ return i;
+ }
+ return -1;
+}
+
#ifndef QT_NO_REGEXP_CAPTURE
/*!
@@ -4637,6 +5050,7 @@ QString QRegExp::errorString()
{
return const_cast<const QRegExp *>(this)->errorString();
}
+
#endif
/*!