summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qchar.cpp
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2019-05-27 19:13:54 +0200
committerEdward Welbourne <edward.welbourne@qt.io>2019-07-10 17:05:30 +0200
commita9aa206b7b8ac4e69f8c46233b4080e00e845ff5 (patch)
tree0d19cb1f1a3b9d79d322e6e63f6f72160977ca67 /src/corelib/tools/qchar.cpp
parent85d3061c1cd4617ef09cb381320611c27da205a5 (diff)
Move text-related code out of corelib/tools/ to corelib/text/
This includes byte array, string, char, unicode, locale, collation and regular expressions. Change-Id: I8b125fa52c8c513eb57a0f1298b91910e5a0d786 Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
Diffstat (limited to 'src/corelib/tools/qchar.cpp')
-rw-r--r--src/corelib/tools/qchar.cpp2059
1 files changed, 0 insertions, 2059 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp
deleted file mode 100644
index 0c190c6a3d6..00000000000
--- a/src/corelib/tools/qchar.cpp
+++ /dev/null
@@ -1,2059 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-// Don't define it while compiling this module, or USERS of Qt will
-// not be able to link.
-#ifdef QT_NO_CAST_FROM_ASCII
-# undef QT_NO_CAST_FROM_ASCII
-#endif
-#ifdef QT_NO_CAST_TO_ASCII
-# undef QT_NO_CAST_TO_ASCII
-#endif
-#include "qchar.h"
-
-#include "qdatastream.h"
-
-#include "qunicodetables_p.h"
-#include "qunicodetables.cpp"
-
-#include <algorithm>
-
-QT_BEGIN_NAMESPACE
-
-#define FLAG(x) (1 << (x))
-
-/*!
- \class QLatin1Char
- \inmodule QtCore
- \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
-
- \ingroup string-processing
-
- This class is only useful to construct a QChar with 8-bit character.
-
- \sa QChar, QLatin1String, QString
-*/
-
-/*!
- \fn const char QLatin1Char::toLatin1() const
-
- Converts a Latin-1 character to an 8-bit ASCII representation of the character.
-*/
-
-/*!
- \fn const ushort QLatin1Char::unicode() const
-
- Converts a Latin-1 character to an 16-bit-encoded Unicode representation
- of the character.
-*/
-
-/*!
- \fn QLatin1Char::QLatin1Char(char c)
-
- Constructs a Latin-1 character for \a c. This constructor should be
- used when the encoding of the input character is known to be Latin-1.
-*/
-
-/*!
- \class QChar
- \inmodule QtCore
- \brief The QChar class provides a 16-bit Unicode character.
-
- \ingroup string-processing
- \reentrant
-
- In Qt, Unicode characters are 16-bit entities without any markup
- or structure. This class represents such an entity. It is
- lightweight, so it can be used everywhere. Most compilers treat
- it like an \c{unsigned short}.
-
- QChar provides a full complement of testing/classification
- functions, converting to and from other formats, converting from
- composed to decomposed Unicode, and trying to compare and
- case-convert if you ask it to.
-
- The classification functions include functions like those in the
- standard C++ header \<cctype\> (formerly \<ctype.h\>), but
- operating on the full range of Unicode characters, not just for the ASCII
- range. They all return true if the character is a certain type of character;
- otherwise they return false. These classification functions are
- isNull() (returns \c true if the character is '\\0'), isPrint()
- (true if the character is any sort of printable character,
- including whitespace), isPunct() (any sort of punctation),
- isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
- sort of numeric character, not just 0-9), isLetterOrNumber(), and
- isDigit() (decimal digits). All of these are wrappers around
- category() which return the Unicode-defined category of each
- character. Some of these also calculate the derived properties
- (for example isSpace() returns \c true if the character is of category
- Separator_* or an exceptional code point from Other_Control category).
-
- QChar also provides direction(), which indicates the "natural"
- writing direction of this character. The joiningType() function
- indicates how the character joins with it's neighbors (needed
- mostly for Arabic or Syriac) and finally hasMirrored(), which indicates
- whether the character needs to be mirrored when it is printed in
- it's "unnatural" writing direction.
-
- Composed Unicode characters (like \a ring) can be converted to
- decomposed Unicode ("a" followed by "ring above") by using decomposition().
-
- In Unicode, comparison is not necessarily possible and case
- conversion is very difficult at best. Unicode, covering the
- "entire" world, also includes most of the world's case and
- sorting problems. operator==() and friends will do comparison
- based purely on the numeric Unicode value (code point) of the
- characters, and toUpper() and toLower() will do case changes when
- the character has a well-defined uppercase/lowercase equivalent.
- For locale-dependent comparisons, use QString::localeAwareCompare().
-
- The conversion functions include unicode() (to a scalar),
- toLatin1() (to scalar, but converts all non-Latin-1 characters to
- 0), row() (gives the Unicode row), cell() (gives the Unicode
- cell), digitValue() (gives the integer value of any of the
- numerous digit characters), and a host of constructors.
-
- QChar provides constructors and cast operators that make it easy
- to convert to and from traditional 8-bit \c{char}s. If you
- defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
- explained in the QString documentation, you will need to
- explicitly call fromLatin1(), or use QLatin1Char,
- to construct a QChar from an 8-bit \c char, and you will need to
- call toLatin1() to get the 8-bit value back.
-
- For more information see
- \l{http://www.unicode.org/ucd/}{"About the Unicode Character Database"}.
-
- \sa Unicode, QString, QLatin1Char
-*/
-
-/*!
- \enum QChar::UnicodeVersion
-
- Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
- introduced a certain character.
-
- \value Unicode_1_1 Version 1.1
- \value Unicode_2_0 Version 2.0
- \value Unicode_2_1_2 Version 2.1.2
- \value Unicode_3_0 Version 3.0
- \value Unicode_3_1 Version 3.1
- \value Unicode_3_2 Version 3.2
- \value Unicode_4_0 Version 4.0
- \value Unicode_4_1 Version 4.1
- \value Unicode_5_0 Version 5.0
- \value Unicode_5_1 Version 5.1
- \value Unicode_5_2 Version 5.2
- \value Unicode_6_0 Version 6.0
- \value Unicode_6_1 Version 6.1
- \value Unicode_6_2 Version 6.2
- \value Unicode_6_3 Version 6.3 Since Qt 5.3
- \value Unicode_7_0 Version 7.0 Since Qt 5.5
- \value Unicode_8_0 Version 8.0 Since Qt 5.6
- \value Unicode_9_0 Version 9.0 Since Qt 5.11
- \value Unicode_10_0 Version 10.0 Since Qt 5.11
- \value Unicode_Unassigned The value is not assigned to any character
- in version 8.0 of Unicode.
-
- \sa unicodeVersion(), currentUnicodeVersion()
-*/
-
-/*!
- \enum QChar::Category
-
- This enum maps the Unicode character categories.
-
- The following characters are normative in Unicode:
-
- \value Mark_NonSpacing Unicode class name Mn
-
- \value Mark_SpacingCombining Unicode class name Mc
-
- \value Mark_Enclosing Unicode class name Me
-
- \value Number_DecimalDigit Unicode class name Nd
-
- \value Number_Letter Unicode class name Nl
-
- \value Number_Other Unicode class name No
-
- \value Separator_Space Unicode class name Zs
-
- \value Separator_Line Unicode class name Zl
-
- \value Separator_Paragraph Unicode class name Zp
-
- \value Other_Control Unicode class name Cc
-
- \value Other_Format Unicode class name Cf
-
- \value Other_Surrogate Unicode class name Cs
-
- \value Other_PrivateUse Unicode class name Co
-
- \value Other_NotAssigned Unicode class name Cn
-
-
- The following categories are informative in Unicode:
-
- \value Letter_Uppercase Unicode class name Lu
-
- \value Letter_Lowercase Unicode class name Ll
-
- \value Letter_Titlecase Unicode class name Lt
-
- \value Letter_Modifier Unicode class name Lm
-
- \value Letter_Other Unicode class name Lo
-
- \value Punctuation_Connector Unicode class name Pc
-
- \value Punctuation_Dash Unicode class name Pd
-
- \value Punctuation_Open Unicode class name Ps
-
- \value Punctuation_Close Unicode class name Pe
-
- \value Punctuation_InitialQuote Unicode class name Pi
-
- \value Punctuation_FinalQuote Unicode class name Pf
-
- \value Punctuation_Other Unicode class name Po
-
- \value Symbol_Math Unicode class name Sm
-
- \value Symbol_Currency Unicode class name Sc
-
- \value Symbol_Modifier Unicode class name Sk
-
- \value Symbol_Other Unicode class name So
-
- \sa category()
-*/
-
-/*!
- \enum QChar::Script
- \since 5.1
-
- This enum type defines the Unicode script property values.
-
- For details about the Unicode script property values see
- \l{http://www.unicode.org/reports/tr24/}{Unicode Standard Annex #24}.
-
- In order to conform to C/C++ naming conventions "Script_" is prepended
- to the codes used in the Unicode Standard.
-
- \value Script_Unknown For unassigned, private-use, noncharacter, and surrogate code points.
- \value Script_Inherited For characters that may be used with multiple scripts
- and that inherit their script from the preceding characters.
- These include nonspacing marks, enclosing marks,
- and zero width joiner/non-joiner characters.
- \value Script_Common For characters that may be used with multiple scripts
- and that do not inherit their script from the preceding characters.
-
- \value Script_Latin
- \value Script_Greek
- \value Script_Cyrillic
- \value Script_Armenian
- \value Script_Hebrew
- \value Script_Arabic
- \value Script_Syriac
- \value Script_Thaana
- \value Script_Devanagari
- \value Script_Bengali
- \value Script_Gurmukhi
- \value Script_Gujarati
- \value Script_Oriya
- \value Script_Tamil
- \value Script_Telugu
- \value Script_Kannada
- \value Script_Malayalam
- \value Script_Sinhala
- \value Script_Thai
- \value Script_Lao
- \value Script_Tibetan
- \value Script_Myanmar
- \value Script_Georgian
- \value Script_Hangul
- \value Script_Ethiopic
- \value Script_Cherokee
- \value Script_CanadianAboriginal
- \value Script_Ogham
- \value Script_Runic
- \value Script_Khmer
- \value Script_Mongolian
- \value Script_Hiragana
- \value Script_Katakana
- \value Script_Bopomofo
- \value Script_Han
- \value Script_Yi
- \value Script_OldItalic
- \value Script_Gothic
- \value Script_Deseret
- \value Script_Tagalog
- \value Script_Hanunoo
- \value Script_Buhid
- \value Script_Tagbanwa
- \value Script_Coptic
- \value Script_Limbu
- \value Script_TaiLe
- \value Script_LinearB
- \value Script_Ugaritic
- \value Script_Shavian
- \value Script_Osmanya
- \value Script_Cypriot
- \value Script_Braille
- \value Script_Buginese
- \value Script_NewTaiLue
- \value Script_Glagolitic
- \value Script_Tifinagh
- \value Script_SylotiNagri
- \value Script_OldPersian
- \value Script_Kharoshthi
- \value Script_Balinese
- \value Script_Cuneiform
- \value Script_Phoenician
- \value Script_PhagsPa
- \value Script_Nko
- \value Script_Sundanese
- \value Script_Lepcha
- \value Script_OlChiki
- \value Script_Vai
- \value Script_Saurashtra
- \value Script_KayahLi
- \value Script_Rejang
- \value Script_Lycian
- \value Script_Carian
- \value Script_Lydian
- \value Script_Cham
- \value Script_TaiTham
- \value Script_TaiViet
- \value Script_Avestan
- \value Script_EgyptianHieroglyphs
- \value Script_Samaritan
- \value Script_Lisu
- \value Script_Bamum
- \value Script_Javanese
- \value Script_MeeteiMayek
- \value Script_ImperialAramaic
- \value Script_OldSouthArabian
- \value Script_InscriptionalParthian
- \value Script_InscriptionalPahlavi
- \value Script_OldTurkic
- \value Script_Kaithi
- \value Script_Batak
- \value Script_Brahmi
- \value Script_Mandaic
- \value Script_Chakma
- \value Script_MeroiticCursive
- \value Script_MeroiticHieroglyphs
- \value Script_Miao
- \value Script_Sharada
- \value Script_SoraSompeng
- \value Script_Takri
- \value Script_CaucasianAlbanian
- \value Script_BassaVah
- \value Script_Duployan
- \value Script_Elbasan
- \value Script_Grantha
- \value Script_PahawhHmong
- \value Script_Khojki
- \value Script_LinearA
- \value Script_Mahajani
- \value Script_Manichaean
- \value Script_MendeKikakui
- \value Script_Modi
- \value Script_Mro
- \value Script_OldNorthArabian
- \value Script_Nabataean
- \value Script_Palmyrene
- \value Script_PauCinHau
- \value Script_OldPermic
- \value Script_PsalterPahlavi
- \value Script_Siddham
- \value Script_Khudawadi
- \value Script_Tirhuta
- \value Script_WarangCiti
- \value Script_Ahom
- \value Script_AnatolianHieroglyphs
- \value Script_Hatran
- \value Script_Multani
- \value Script_OldHungarian
- \value Script_SignWriting
- \value Script_Adlam
- \value Script_Bhaiksuki
- \value Script_Marchen
- \value Script_Newa
- \value Script_Osage
- \value Script_Tangut
- \value Script_MasaramGondi
- \value Script_Nushu
- \value Script_Soyombo
- \value Script_ZanabazarSquare
-
- \omitvalue ScriptCount
-
- \sa script()
-*/
-
-/*!
- \enum QChar::Direction
-
- This enum type defines the Unicode direction attributes. See the
- \l{http://www.unicode.org/reports/tr9/tr9-35.html#Table_Bidirectional_Character_Types}{Unicode Standard} for a description
- of the values.
-
- In order to conform to C/C++ naming conventions "Dir" is prepended
- to the codes used in the Unicode Standard.
-
- \value DirAL
- \value DirAN
- \value DirB
- \value DirBN
- \value DirCS
- \value DirEN
- \value DirES
- \value DirET
- \value DirFSI Since Qt 5.3
- \value DirL
- \value DirLRE
- \value DirLRI Since Qt 5.3
- \value DirLRO
- \value DirNSM
- \value DirON
- \value DirPDF
- \value DirPDI Since Qt 5.3
- \value DirR
- \value DirRLE
- \value DirRLI Since Qt 5.3
- \value DirRLO
- \value DirS
- \value DirWS
-
- \sa direction()
-*/
-
-/*!
- \enum QChar::Decomposition
-
- This enum type defines the Unicode decomposition attributes. See
- the \l{http://www.unicode.org/}{Unicode Standard} for a
- description of the values.
-
- \value NoDecomposition
- \value Canonical
- \value Circle
- \value Compat
- \value Final
- \value Font
- \value Fraction
- \value Initial
- \value Isolated
- \value Medial
- \value Narrow
- \value NoBreak
- \value Small
- \value Square
- \value Sub
- \value Super
- \value Vertical
- \value Wide
-
- \sa decomposition()
-*/
-
-/*!
- \enum QChar::JoiningType
- since 5.3
-
- This enum type defines the Unicode joining type attributes. See the
- \l{http://www.unicode.org/}{Unicode Standard} for a description of the values.
-
- In order to conform to C/C++ naming conventions "Joining_" is prepended
- to the codes used in the Unicode Standard.
-
- \value Joining_None
- \value Joining_Causing
- \value Joining_Dual
- \value Joining_Right
- \value Joining_Left
- \value Joining_Transparent
-
- \sa joiningType()
-*/
-
-#if QT_DEPRECATED_SINCE(5, 3)
-/*!
- \enum QChar::Joining
- \deprecated in 5.3, use JoiningType instead.
-
- This enum type defines the Unicode joining attributes. See the
- \l{http://www.unicode.org/}{Unicode Standard} for a description
- of the values.
-
- \value Center
- \value Dual
- \value OtherJoining
- \value Right
-
- \sa joining()
-*/
-#endif
-
-/*!
- \enum QChar::CombiningClass
-
- \internal
-
- This enum type defines names for some of the Unicode combining
- classes. See the \l{http://www.unicode.org/}{Unicode Standard}
- for a description of the values.
-
- \value Combining_Above
- \value Combining_AboveAttached
- \value Combining_AboveLeft
- \value Combining_AboveLeftAttached
- \value Combining_AboveRight
- \value Combining_AboveRightAttached
- \value Combining_Below
- \value Combining_BelowAttached
- \value Combining_BelowLeft
- \value Combining_BelowLeftAttached
- \value Combining_BelowRight
- \value Combining_BelowRightAttached
- \value Combining_DoubleAbove
- \value Combining_DoubleBelow
- \value Combining_IotaSubscript
- \value Combining_Left
- \value Combining_LeftAttached
- \value Combining_Right
- \value Combining_RightAttached
-*/
-
-/*!
- \enum QChar::SpecialCharacter
-
- \value Null A QChar with this value isNull().
- \value Tabulation Character tabulation.
- \value LineFeed
- \value CarriageReturn
- \value Space
- \value Nbsp Non-breaking space.
- \value SoftHyphen
- \value ReplacementCharacter The character shown when a font has no glyph
- for a certain codepoint. A special question mark character is often
- used. Codecs use this codepoint when input data cannot be
- represented in Unicode.
- \value ObjectReplacementCharacter Used to represent an object such as an
- image when such objects cannot be presented.
- \value ByteOrderMark
- \value ByteOrderSwapped
- \value ParagraphSeparator
- \value LineSeparator
- \value LastValidCodePoint
-*/
-
-/*!
- \fn void QChar::setCell(uchar cell)
- \internal
-*/
-
-/*!
- \fn void QChar::setRow(uchar row)
- \internal
-*/
-
-/*!
- \fn QChar::QChar()
-
- Constructs a null QChar ('\\0').
-
- \sa isNull()
-*/
-
-/*!
- \fn QChar::QChar(QLatin1Char ch)
-
- Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
-*/
-
-/*!
- \fn QChar::QChar(SpecialCharacter ch)
-
- Constructs a QChar for the predefined character value \a ch.
-*/
-
-/*!
- \fn QChar::QChar(char16_t ch)
- \since 5.10
-
- Constructs a QChar corresponding to the UTF-16 character \a ch.
-*/
-
-/*!
- \fn QChar::QChar(wchar_t ch)
- \since 5.10
-
- Constructs a QChar corresponding to the wide character \a ch.
-
- \note This constructor is only available on Windows.
-*/
-
-/*!
- \fn QChar::QChar(char ch)
-
- Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
-
- \note This constructor is not available when \c QT_NO_CAST_FROM_ASCII
- is defined.
-
- \sa QT_NO_CAST_FROM_ASCII
-*/
-
-/*!
- \fn QChar::QChar(uchar ch)
-
- Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
-
- \note This constructor is not available when \c QT_NO_CAST_FROM_ASCII
- or \c QT_RESTRICTED_CAST_FROM_ASCII is defined.
-
- \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
-*/
-
-/*!
- \fn QChar::QChar(uchar cell, uchar row)
-
- Constructs a QChar for Unicode cell \a cell in row \a row.
-
- \sa cell(), row()
-*/
-
-/*!
- \fn QChar::QChar(ushort code)
-
- Constructs a QChar for the character with Unicode code point \a code.
-*/
-
-/*!
- \fn QChar::QChar(short code)
-
- Constructs a QChar for the character with Unicode code point \a code.
-*/
-
-/*!
- \fn QChar::QChar(uint code)
-
- Constructs a QChar for the character with Unicode code point \a code.
-*/
-
-/*!
- \fn QChar::QChar(int code)
-
- Constructs a QChar for the character with Unicode code point \a code.
-*/
-
-/*!
- \fn bool QChar::isNull() const
-
- Returns \c true if the character is the Unicode character 0x0000
- ('\\0'); otherwise returns \c false.
-*/
-
-/*!
- \fn uchar QChar::cell() const
-
- Returns the cell (least significant byte) of the Unicode character.
-
- \sa row()
-*/
-
-/*!
- \fn uchar QChar::row() const
-
- Returns the row (most significant byte) of the Unicode character.
-
- \sa cell()
-*/
-
-/*!
- \fn bool QChar::isPrint() const
-
- Returns \c true if the character is a printable character; otherwise
- returns \c false. This is any character not of category Other_*.
-
- Note that this gives no indication of whether the character is
- available in a particular font.
-*/
-
-/*!
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a printable character; otherwise returns \c false.
- This is any character not of category Other_*.
-
- Note that this gives no indication of whether the character is
- available in a particular font.
-*/
-bool QChar::isPrint(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Other_Control) |
- FLAG(Other_Format) |
- FLAG(Other_Surrogate) |
- FLAG(Other_PrivateUse) |
- FLAG(Other_NotAssigned);
- return !(FLAG(qGetProp(ucs4)->category) & test);
-}
-
-/*!
- \fn bool QChar::isSpace() const
-
- Returns \c true if the character is a separator character
- (Separator_* categories or certain code points from Other_Control category);
- otherwise returns \c false.
-*/
-
-/*!
- \fn bool QChar::isSpace(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a separator character (Separator_* categories or certain code points
- from Other_Control category); otherwise returns \c false.
-*/
-
-/*!
- \internal
-*/
-bool QT_FASTCALL QChar::isSpace_helper(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Separator_Space) |
- FLAG(Separator_Line) |
- FLAG(Separator_Paragraph);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isMark() const
-
- Returns \c true if the character is a mark (Mark_* categories);
- otherwise returns \c false.
-
- See QChar::Category for more information regarding marks.
-*/
-
-/*!
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a mark (Mark_* categories); otherwise returns \c false.
-*/
-bool QChar::isMark(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Mark_NonSpacing) |
- FLAG(Mark_SpacingCombining) |
- FLAG(Mark_Enclosing);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isPunct() const
-
- Returns \c true if the character is a punctuation mark (Punctuation_*
- categories); otherwise returns \c false.
-*/
-
-/*!
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a punctuation mark (Punctuation_* categories); otherwise returns \c false.
-*/
-bool QChar::isPunct(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Punctuation_Connector) |
- FLAG(Punctuation_Dash) |
- FLAG(Punctuation_Open) |
- FLAG(Punctuation_Close) |
- FLAG(Punctuation_InitialQuote) |
- FLAG(Punctuation_FinalQuote) |
- FLAG(Punctuation_Other);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isSymbol() const
-
- Returns \c true if the character is a symbol (Symbol_* categories);
- otherwise returns \c false.
-*/
-
-/*!
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a symbol (Symbol_* categories); otherwise returns \c false.
-*/
-bool QChar::isSymbol(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Symbol_Math) |
- FLAG(Symbol_Currency) |
- FLAG(Symbol_Modifier) |
- FLAG(Symbol_Other);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isLetter() const
-
- Returns \c true if the character is a letter (Letter_* categories);
- otherwise returns \c false.
-*/
-
-/*!
- \fn bool QChar::isLetter(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a letter (Letter_* categories); otherwise returns \c false.
-*/
-
-/*!
- \internal
-*/
-bool QT_FASTCALL QChar::isLetter_helper(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Letter_Uppercase) |
- FLAG(Letter_Lowercase) |
- FLAG(Letter_Titlecase) |
- FLAG(Letter_Modifier) |
- FLAG(Letter_Other);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isNumber() const
-
- Returns \c true if the character is a number (Number_* categories,
- not just 0-9); otherwise returns \c false.
-
- \sa isDigit()
-*/
-
-/*!
- \fn bool QChar::isNumber(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a number (Number_* categories, not just 0-9); otherwise returns \c false.
-
- \sa isDigit()
-*/
-
-/*!
- \internal
-*/
-bool QT_FASTCALL QChar::isNumber_helper(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Number_DecimalDigit) |
- FLAG(Number_Letter) |
- FLAG(Number_Other);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isLetterOrNumber() const
-
- Returns \c true if the character is a letter or number (Letter_* or
- Number_* categories); otherwise returns \c false.
-*/
-
-/*!
- \fn bool QChar::isLetterOrNumber(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a letter or number (Letter_* or Number_* categories); otherwise returns \c false.
-*/
-
-/*!
- \internal
-*/
-bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- const int test = FLAG(Letter_Uppercase) |
- FLAG(Letter_Lowercase) |
- FLAG(Letter_Titlecase) |
- FLAG(Letter_Modifier) |
- FLAG(Letter_Other) |
- FLAG(Number_DecimalDigit) |
- FLAG(Number_Letter) |
- FLAG(Number_Other);
- return FLAG(qGetProp(ucs4)->category) & test;
-}
-
-/*!
- \fn bool QChar::isDigit() const
-
- Returns \c true if the character is a decimal digit
- (Number_DecimalDigit); otherwise returns \c false.
-
- \sa isNumber()
-*/
-
-/*!
- \fn bool QChar::isDigit(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
- a decimal digit (Number_DecimalDigit); otherwise returns \c false.
-
- \sa isNumber()
-*/
-
-/*!
- \fn bool QChar::isNonCharacter() const
- \since 5.0
-
- Returns \c true if the QChar is a non-character; false otherwise.
-
- Unicode has a certain number of code points that are classified
- as "non-characters:" that is, they can be used for internal purposes
- in applications but cannot be used for text interchange.
- Those are the last two entries each Unicode Plane ([0xfffe..0xffff],
- [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef].
-*/
-
-/*!
- \fn bool QChar::isHighSurrogate() const
-
- Returns \c true if the QChar is the high part of a UTF16 surrogate
- (for example if its code point is in range [0xd800..0xdbff]); false otherwise.
-*/
-
-/*!
- \fn bool QChar::isLowSurrogate() const
-
- Returns \c true if the QChar is the low part of a UTF16 surrogate
- (for example if its code point is in range [0xdc00..0xdfff]); false otherwise.
-*/
-
-/*!
- \fn bool QChar::isSurrogate() const
- \since 5.0
-
- Returns \c true if the QChar contains a code point that is in either
- the high or the low part of the UTF-16 surrogate range
- (for example if its code point is in range [0xd800..0xdfff]); false otherwise.
-*/
-
-/*!
- \fn static bool QChar::isNonCharacter(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- is a non-character; false otherwise.
-
- Unicode has a certain number of code points that are classified
- as "non-characters:" that is, they can be used for internal purposes
- in applications but cannot be used for text interchange.
- Those are the last two entries each Unicode Plane ([0xfffe..0xffff],
- [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef].
-*/
-
-/*!
- \fn static bool QChar::isHighSurrogate(uint ucs4)
- \overload
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- is the high part of a UTF16 surrogate
- (for example if its code point is in range [0xd800..0xdbff]); false otherwise.
-*/
-
-/*!
- \fn static bool QChar::isLowSurrogate(uint ucs4)
- \overload
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- is the low part of a UTF16 surrogate
- (for example if its code point is in range [0xdc00..0xdfff]); false otherwise.
-*/
-
-/*!
- \fn static bool QChar::isSurrogate(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- contains a code point that is in either the high or the low part of the
- UTF-16 surrogate range (for example if its code point is in range [0xd800..0xdfff]);
- false otherwise.
-*/
-
-/*!
- \fn static bool QChar::requiresSurrogates(uint ucs4)
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- can be split into the high and low parts of a UTF16 surrogate
- (for example if its code point is greater than or equals to 0x10000);
- false otherwise.
-*/
-
-/*!
- \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
-
- Converts a UTF16 surrogate pair with the given \a high and \a low values
- to it's UCS-4-encoded code point.
-*/
-
-/*!
- \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
- \overload
-
- Converts a UTF16 surrogate pair (\a high, \a low) to it's UCS-4-encoded code point.
-*/
-
-/*!
- \fn static ushort QChar::highSurrogate(uint ucs4)
-
- Returns the high surrogate part of a UCS-4-encoded code point.
- The returned result is undefined if \a ucs4 is smaller than 0x10000.
-*/
-
-/*!
- \fn static ushort QChar::lowSurrogate(uint ucs4)
-
- Returns the low surrogate part of a UCS-4-encoded code point.
- The returned result is undefined if \a ucs4 is smaller than 0x10000.
-*/
-
-/*!
- \fn int QChar::digitValue() const
-
- Returns the numeric value of the digit, or -1 if the character is not a digit.
-*/
-
-/*!
- \overload
- Returns the numeric value of the digit specified by the UCS-4-encoded
- character, \a ucs4, or -1 if the character is not a digit.
-*/
-int QChar::digitValue(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return -1;
- return qGetProp(ucs4)->digitValue;
-}
-
-/*!
- \fn QChar::Category QChar::category() const
-
- Returns the character's category.
-*/
-
-/*!
- \overload
- Returns the category of the UCS-4-encoded character specified by \a ucs4.
-*/
-QChar::Category QChar::category(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return QChar::Other_NotAssigned;
- return (QChar::Category) qGetProp(ucs4)->category;
-}
-
-/*!
- \fn QChar::Direction QChar::direction() const
-
- Returns the character's direction.
-*/
-
-/*!
- \overload
- Returns the direction of the UCS-4-encoded character specified by \a ucs4.
-*/
-QChar::Direction QChar::direction(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return QChar::DirL;
- return (QChar::Direction) qGetProp(ucs4)->direction;
-}
-
-/*!
- \fn QChar::JoiningType QChar::joiningType() const
- \since 5.3
-
- Returns information about the joining type attributes of the character
- (needed for certain languages such as Arabic or Syriac).
-*/
-
-/*!
- \overload
- \since 5.3
-
- Returns information about the joining type attributes of the UCS-4-encoded
- character specified by \a ucs4
- (needed for certain languages such as Arabic or Syriac).
-*/
-QChar::JoiningType QChar::joiningType(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return QChar::Joining_None;
- return QChar::JoiningType(qGetProp(ucs4)->joining);
-}
-
-#if QT_DEPRECATED_SINCE(5, 3)
-/*!
- \fn QChar::Joining QChar::joining() const
- \deprecated in 5.3, use joiningType() instead.
-
- Returns information about the joining properties of the character
- (needed for certain languages such as Arabic).
-*/
-
-/*!
- \overload
- \deprecated in 5.3, use joiningType() instead.
-
- Returns information about the joining properties of the UCS-4-encoded
- character specified by \a ucs4 (needed for certain languages such as Arabic).
-*/
-QChar::Joining QChar::joining(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return QChar::OtherJoining;
- switch (qGetProp(ucs4)->joining) {
- case QChar::Joining_Causing: return QChar::Center;
- case QChar::Joining_Dual: return QChar::Dual;
- case QChar::Joining_Right: return QChar::Right;
- default: break;
- }
- return QChar::OtherJoining;
-}
-#endif
-
-/*!
- \fn bool QChar::hasMirrored() const
-
- Returns \c true if the character should be reversed if the text
- direction is reversed; otherwise returns \c false.
-
- A bit faster equivalent of (ch.mirroredChar() != ch).
-
- \sa mirroredChar()
-*/
-
-/*!
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- should be reversed if the text direction is reversed; otherwise returns \c false.
-
- A bit faster equivalent of (QChar::mirroredChar(ucs4) != ucs4).
-
- \sa mirroredChar()
-*/
-bool QChar::hasMirrored(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return false;
- return qGetProp(ucs4)->mirrorDiff != 0;
-}
-
-/*!
- \fn bool QChar::isLower() const
-
- Returns \c true if the character is a lowercase letter, for example
- category() is Letter_Lowercase.
-
- \sa isUpper(), toLower(), toUpper()
-*/
-
-/*!
- \fn static bool QChar::isLower(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- is a lowercase letter, for example category() is Letter_Lowercase.
-
- \sa isUpper(), toLower(), toUpper()
-*/
-
-/*!
- \fn bool QChar::isUpper() const
-
- Returns \c true if the character is an uppercase letter, for example
- category() is Letter_Uppercase.
-
- \sa isLower(), toUpper(), toLower()
-*/
-
-/*!
- \fn static bool QChar::isUpper(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- is an uppercase letter, for example category() is Letter_Uppercase.
-
- \sa isLower(), toUpper(), toLower()
-*/
-
-/*!
- \fn bool QChar::isTitleCase() const
-
- Returns \c true if the character is a titlecase letter, for example
- category() is Letter_Titlecase.
-
- \sa isLower(), toUpper(), toLower(), toTitleCase()
-*/
-
-/*!
- \fn static bool QChar::isTitleCase(uint ucs4)
- \overload
- \since 5.0
-
- Returns \c true if the UCS-4-encoded character specified by \a ucs4
- is a titlecase letter, for example category() is Letter_Titlecase.
-
- \sa isLower(), toUpper(), toLower(), toTitleCase()
-*/
-/*!
- \fn QChar QChar::mirroredChar() const
-
- Returns the mirrored character if this character is a mirrored
- character; otherwise returns the character itself.
-
- \sa hasMirrored()
-*/
-
-/*!
- \overload
- Returns the mirrored character if the UCS-4-encoded character specified
- by \a ucs4 is a mirrored character; otherwise returns the character itself.
-
- \sa hasMirrored()
-*/
-uint QChar::mirroredChar(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return ucs4;
- return ucs4 + qGetProp(ucs4)->mirrorDiff;
-}
-
-
-// constants for Hangul (de)composition, see UAX #15
-enum {
- Hangul_SBase = 0xac00,
- Hangul_LBase = 0x1100,
- Hangul_VBase = 0x1161,
- Hangul_TBase = 0x11a7,
- Hangul_LCount = 19,
- Hangul_VCount = 21,
- Hangul_TCount = 28,
- Hangul_NCount = Hangul_VCount * Hangul_TCount,
- Hangul_SCount = Hangul_LCount * Hangul_NCount
-};
-
-// buffer has to have a length of 3. It's needed for Hangul decomposition
-static const unsigned short * QT_FASTCALL decompositionHelper
- (uint ucs4, int *length, int *tag, unsigned short *buffer)
-{
- if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
- // compute Hangul syllable decomposition as per UAX #15
- const uint SIndex = ucs4 - Hangul_SBase;
- buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
- buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
- buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
- *length = buffer[2] == Hangul_TBase ? 2 : 3;
- *tag = QChar::Canonical;
- return buffer;
- }
-
- const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
- if (index == 0xffff) {
- *length = 0;
- *tag = QChar::NoDecomposition;
- return nullptr;
- }
-
- const unsigned short *decomposition = uc_decomposition_map+index;
- *tag = (*decomposition) & 0xff;
- *length = (*decomposition) >> 8;
- return decomposition+1;
-}
-
-/*!
- Decomposes a character into it's constituent parts. Returns an empty string
- if no decomposition exists.
-*/
-QString QChar::decomposition() const
-{
- return QChar::decomposition(ucs);
-}
-
-/*!
- \overload
- Decomposes the UCS-4-encoded character specified by \a ucs4 into it's
- constituent parts. Returns an empty string if no decomposition exists.
-*/
-QString QChar::decomposition(uint ucs4)
-{
- unsigned short buffer[3];
- int length;
- int tag;
- const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
- return QString(reinterpret_cast<const QChar *>(d), length);
-}
-
-/*!
- \fn QChar::Decomposition QChar::decompositionTag() const
-
- Returns the tag defining the composition of the character. Returns
- QChar::NoDecomposition if no decomposition exists.
-*/
-
-/*!
- \overload
- Returns the tag defining the composition of the UCS-4-encoded character
- specified by \a ucs4. Returns QChar::NoDecomposition if no decomposition exists.
-*/
-QChar::Decomposition QChar::decompositionTag(uint ucs4) noexcept
-{
- if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount)
- return QChar::Canonical;
- const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
- if (index == 0xffff)
- return QChar::NoDecomposition;
- return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
-}
-
-/*!
- \fn unsigned char QChar::combiningClass() const
-
- Returns the combining class for the character as defined in the
- Unicode standard. This is mainly useful as a positioning hint for
- marks attached to a base character.
-
- The Qt text rendering engine uses this information to correctly
- position non-spacing marks around a base character.
-*/
-
-/*!
- \overload
- Returns the combining class for the UCS-4-encoded character specified by
- \a ucs4, as defined in the Unicode standard.
-*/
-unsigned char QChar::combiningClass(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return 0;
- return (unsigned char) qGetProp(ucs4)->combiningClass;
-}
-
-/*!
- \fn QChar::Script QChar::script() const
- \since 5.1
-
- Returns the Unicode script property value for this character.
-*/
-
-/*!
- \overload
- \since 5.1
-
- Returns the Unicode script property value for the character specified in
- its UCS-4-encoded form as \a ucs4.
-*/
-QChar::Script QChar::script(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return QChar::Script_Unknown;
- return (QChar::Script) qGetProp(ucs4)->script;
-}
-
-/*!
- \fn QChar::UnicodeVersion QChar::unicodeVersion() const
-
- Returns the Unicode version that introduced this character.
-*/
-
-/*!
- \overload
- Returns the Unicode version that introduced the character specified in
- its UCS-4-encoded form as \a ucs4.
-*/
-QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return QChar::Unicode_Unassigned;
- return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
-}
-
-/*!
- Returns the most recent supported Unicode version.
-*/
-QChar::UnicodeVersion QChar::currentUnicodeVersion() noexcept
-{
- return UNICODE_DATA_VERSION;
-}
-
-
-template <typename Traits, typename T>
-Q_DECL_CONST_FUNCTION static inline T convertCase_helper(T uc) noexcept
-{
- const QUnicodeTables::Properties *prop = qGetProp(uc);
-
- if (Q_UNLIKELY(Traits::caseSpecial(prop))) {
- const ushort *specialCase = specialCaseMap + Traits::caseDiff(prop);
- // so far, there are no special cases beyond BMP (guaranteed by the qunicodetables generator)
- return *specialCase == 1 ? specialCase[1] : uc;
- }
-
- return uc + Traits::caseDiff(prop);
-}
-
-/*!
- \fn QChar QChar::toLower() const
-
- Returns the lowercase equivalent if the character is uppercase or titlecase;
- otherwise returns the character itself.
-*/
-
-/*!
- \overload
- Returns the lowercase equivalent of the UCS-4-encoded character specified
- by \a ucs4 if the character is uppercase or titlecase; otherwise returns
- the character itself.
-*/
-uint QChar::toLower(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return ucs4;
- return convertCase_helper<QUnicodeTables::LowercaseTraits>(ucs4);
-}
-
-/*!
- \fn QChar QChar::toUpper() const
-
- Returns the uppercase equivalent if the character is lowercase or titlecase;
- otherwise returns the character itself.
-*/
-
-/*!
- \overload
- Returns the uppercase equivalent of the UCS-4-encoded character specified
- by \a ucs4 if the character is lowercase or titlecase; otherwise returns
- the character itself.
-*/
-uint QChar::toUpper(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return ucs4;
- return convertCase_helper<QUnicodeTables::UppercaseTraits>(ucs4);
-}
-
-/*!
- \fn QChar QChar::toTitleCase() const
-
- Returns the title case equivalent if the character is lowercase or uppercase;
- otherwise returns the character itself.
-*/
-
-/*!
- \overload
- Returns the title case equivalent of the UCS-4-encoded character specified
- by \a ucs4 if the character is lowercase or uppercase; otherwise returns
- the character itself.
-*/
-uint QChar::toTitleCase(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return ucs4;
- return convertCase_helper<QUnicodeTables::TitlecaseTraits>(ucs4);
-}
-
-static inline uint foldCase(const ushort *ch, const ushort *start)
-{
- uint ucs4 = *ch;
- if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1)))
- ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4);
- return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4);
-}
-
-static inline uint foldCase(uint ch, uint &last) noexcept
-{
- uint ucs4 = ch;
- if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last))
- ucs4 = QChar::surrogateToUcs4(last, ucs4);
- last = ch;
- return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4);
-}
-
-static inline ushort foldCase(ushort ch) noexcept
-{
- return convertCase_helper<QUnicodeTables::CasefoldTraits>(ch);
-}
-
-static inline QChar foldCase(QChar ch) noexcept
-{
- return QChar(foldCase(ch.unicode()));
-}
-
-/*!
- \fn QChar QChar::toCaseFolded() const
-
- Returns the case folded equivalent of the character.
- For most Unicode characters this is the same as toLower().
-*/
-
-/*!
- \overload
- Returns the case folded equivalent of the UCS-4-encoded character specified
- by \a ucs4. For most Unicode characters this is the same as toLower().
-*/
-uint QChar::toCaseFolded(uint ucs4) noexcept
-{
- if (ucs4 > LastValidCodePoint)
- return ucs4;
- return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4);
-}
-
-/*!
- \fn char QChar::toLatin1() const
-
- Returns the Latin-1 character equivalent to the QChar, or 0. This
- is mainly useful for non-internationalized software.
-
- \note It is not possible to distinguish a non-Latin-1 character from a Latin-1 0
- (NUL) character. Prefer to use unicode(), which does not have this ambiguity.
-
- \sa unicode()
-*/
-
-/*!
- \fn QChar QChar::fromLatin1(char)
-
- Converts the Latin-1 character \a c to its equivalent QChar. This
- is mainly useful for non-internationalized software.
-
- An alternative is to use QLatin1Char.
-
- \sa toLatin1(), unicode()
-*/
-
-/*!
- \fn char QChar::toAscii() const
- \deprecated
-
- Returns the Latin-1 character value of the QChar, or 0 if the character is not
- representable.
-
- The main purpose of this function is to preserve ASCII characters used
- in C strings. This is mainly useful for developers of non-internationalized
- software.
-
- \note It is not possible to distinguish a non-Latin 1 character from an ASCII 0
- (NUL) character. Prefer to use unicode(), which does not have this ambiguity.
-
- \note This function does not check whether the character value is inside
- the valid range of US-ASCII.
-
- \sa toLatin1(), unicode()
-*/
-
-/*!
- \fn QChar QChar::fromAscii(char)
- \deprecated
-
- Converts the ASCII character \a c to it's equivalent QChar. This
- is mainly useful for non-internationalized software.
-
- An alternative is to use QLatin1Char.
-
- \sa fromLatin1(), unicode()
-*/
-
-#ifndef QT_NO_DATASTREAM
-/*!
- \relates QChar
-
- Writes the char \a chr to the stream \a out.
-
- \sa {Serializing Qt Data Types}
-*/
-QDataStream &operator<<(QDataStream &out, QChar chr)
-{
- out << quint16(chr.unicode());
- return out;
-}
-
-/*!
- \relates QChar
-
- Reads a char from the stream \a in into char \a chr.
-
- \sa {Serializing Qt Data Types}
-*/
-QDataStream &operator>>(QDataStream &in, QChar &chr)
-{
- quint16 u;
- in >> u;
- chr.unicode() = ushort(u);
- return in;
-}
-#endif // QT_NO_DATASTREAM
-
-/*!
- \fn ushort & QChar::unicode()
-
- Returns a reference to the numeric Unicode value of the QChar.
-*/
-
-/*!
- \fn ushort QChar::unicode() const
-
- Returns the numeric Unicode value of the QChar.
-*/
-
-/*****************************************************************************
- Documentation of QChar related functions
- *****************************************************************************/
-
-/*!
- \fn bool operator==(QChar c1, QChar c2)
-
- \relates QChar
-
- Returns \c true if \a c1 and \a c2 are the same Unicode character;
- otherwise returns \c false.
-*/
-
-/*!
- \fn int operator!=(QChar c1, QChar c2)
-
- \relates QChar
-
- Returns \c true if \a c1 and \a c2 are not the same Unicode
- character; otherwise returns \c false.
-*/
-
-/*!
- \fn int operator<=(QChar c1, QChar c2)
-
- \relates QChar
-
- Returns \c true if the numeric Unicode value of \a c1 is less than
- or equal to that of \a c2; otherwise returns \c false.
-*/
-
-/*!
- \fn int operator>=(QChar c1, QChar c2)
-
- \relates QChar
-
- Returns \c true if the numeric Unicode value of \a c1 is greater than
- or equal to that of \a c2; otherwise returns \c false.
-*/
-
-/*!
- \fn int operator<(QChar c1, QChar c2)
-
- \relates QChar
-
- Returns \c true if the numeric Unicode value of \a c1 is less than
- that of \a c2; otherwise returns \c false.
-*/
-
-/*!
- \fn int operator>(QChar c1, QChar c2)
-
- \relates QChar
-
- Returns \c true if the numeric Unicode value of \a c1 is greater than
- that of \a c2; otherwise returns \c false.
-*/
-
-
-// ---------------------------------------------------------------------------
-
-
-static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
-{
- int length;
- int tag;
- unsigned short buffer[3];
-
- QString &s = *str;
-
- const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
- const unsigned short *uc = utf16 + s.length();
- while (uc != utf16 + from) {
- uint ucs4 = *(--uc);
- if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
- ushort high = *(uc - 1);
- if (QChar(high).isHighSurrogate()) {
- --uc;
- ucs4 = QChar::surrogateToUcs4(high, ucs4);
- }
- }
-
- if (QChar::unicodeVersion(ucs4) > version)
- continue;
-
- const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
- if (!d || (canonical && tag != QChar::Canonical))
- continue;
-
- int pos = uc - utf16;
- s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
- // since the replace invalidates the pointers and we do decomposition recursive
- utf16 = reinterpret_cast<unsigned short *>(s.data());
- uc = utf16 + pos + length;
- }
-}
-
-
-struct UCS2Pair {
- ushort u1;
- ushort u2;
-};
-
-inline bool operator<(const UCS2Pair &ligature1, const UCS2Pair &ligature2)
-{ return ligature1.u1 < ligature2.u1; }
-inline bool operator<(ushort u1, const UCS2Pair &ligature)
-{ return u1 < ligature.u1; }
-inline bool operator<(const UCS2Pair &ligature, ushort u1)
-{ return ligature.u1 < u1; }
-
-struct UCS2SurrogatePair {
- UCS2Pair p1;
- UCS2Pair p2;
-};
-
-inline bool operator<(const UCS2SurrogatePair &ligature1, const UCS2SurrogatePair &ligature2)
-{ return QChar::surrogateToUcs4(ligature1.p1.u1, ligature1.p1.u2) < QChar::surrogateToUcs4(ligature2.p1.u1, ligature2.p1.u2); }
-inline bool operator<(uint u1, const UCS2SurrogatePair &ligature)
-{ return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); }
-inline bool operator<(const UCS2SurrogatePair &ligature, uint u1)
-{ return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; }
-
-static uint inline ligatureHelper(uint u1, uint u2)
-{
- if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) {
- // compute Hangul syllable composition as per UAX #15
- // hangul L-V pair
- const uint LIndex = u1 - Hangul_LBase;
- if (LIndex < Hangul_LCount) {
- const uint VIndex = u2 - Hangul_VBase;
- if (VIndex < Hangul_VCount)
- return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
- }
- // hangul LV-T pair
- const uint SIndex = u1 - Hangul_SBase;
- if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
- const uint TIndex = u2 - Hangul_TBase;
- if (TIndex <= Hangul_TCount)
- return u1 + TIndex;
- }
- }
-
- const unsigned short index = GET_LIGATURE_INDEX(u2);
- if (index == 0xffff)
- return 0;
- const unsigned short *ligatures = uc_ligature_map+index;
- ushort length = *ligatures++;
- if (QChar::requiresSurrogates(u1)) {
- const UCS2SurrogatePair *data = reinterpret_cast<const UCS2SurrogatePair *>(ligatures);
- const UCS2SurrogatePair *r = std::lower_bound(data, data + length, u1);
- if (r != data + length && QChar::surrogateToUcs4(r->p1.u1, r->p1.u2) == u1)
- return QChar::surrogateToUcs4(r->p2.u1, r->p2.u2);
- } else {
- const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
- const UCS2Pair *r = std::lower_bound(data, data + length, ushort(u1));
- if (r != data + length && r->u1 == ushort(u1))
- return r->u2;
- }
-
- return 0;
-}
-
-static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
-{
- QString &s = *str;
-
- if (from < 0 || s.length() - from < 2)
- return;
-
- uint stcode = 0; // starter code point
- int starter = -1; // starter position
- int next = -1; // to prevent i == next
- int lastCombining = 255; // to prevent combining > lastCombining
-
- int pos = from;
- while (pos < s.length()) {
- int i = pos;
- uint uc = s.at(pos).unicode();
- if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
- ushort low = s.at(pos+1).unicode();
- if (QChar(low).isLowSurrogate()) {
- uc = QChar::surrogateToUcs4(uc, low);
- ++pos;
- }
- }
-
- const QUnicodeTables::Properties *p = qGetProp(uc);
- if (p->unicodeVersion > version) {
- starter = -1;
- next = -1; // to prevent i == next
- lastCombining = 255; // to prevent combining > lastCombining
- ++pos;
- continue;
- }
-
- int combining = p->combiningClass;
- if ((i == next || combining > lastCombining) && starter >= from) {
- // allowed to form ligature with S
- uint ligature = ligatureHelper(stcode, uc);
- if (ligature) {
- stcode = ligature;
- QChar *d = s.data();
- // ligatureHelper() never changes planes
- if (QChar::requiresSurrogates(ligature)) {
- d[starter] = QChar(QChar::highSurrogate(ligature));
- d[starter + 1] = QChar(QChar::lowSurrogate(ligature));
- s.remove(i, 2);
- } else {
- d[starter] = QChar(ligature);
- s.remove(i, 1);
- }
- continue;
- }
- }
- if (combining == 0) {
- starter = i;
- stcode = uc;
- next = pos + 1;
- }
- lastCombining = combining;
-
- ++pos;
- }
-}
-
-
-static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
-{
- QString &s = *str;
- const int l = s.length()-1;
-
- uint u1, u2;
- ushort c1, c2;
-
- int pos = from;
- while (pos < l) {
- int p2 = pos+1;
- u1 = s.at(pos).unicode();
- if (QChar(u1).isHighSurrogate()) {
- ushort low = s.at(p2).unicode();
- if (QChar(low).isLowSurrogate()) {
- u1 = QChar::surrogateToUcs4(u1, low);
- if (p2 >= l)
- break;
- ++p2;
- }
- }
- c1 = 0;
-
- advance:
- u2 = s.at(p2).unicode();
- if (QChar(u2).isHighSurrogate() && p2 < l) {
- ushort low = s.at(p2+1).unicode();
- if (QChar(low).isLowSurrogate()) {
- u2 = QChar::surrogateToUcs4(u2, low);
- ++p2;
- }
- }
-
- c2 = 0;
- {
- const QUnicodeTables::Properties *p = qGetProp(u2);
- if (p->unicodeVersion <= version)
- c2 = p->combiningClass;
- }
- if (c2 == 0) {
- pos = p2+1;
- continue;
- }
-
- if (c1 == 0) {
- const QUnicodeTables::Properties *p = qGetProp(u1);
- if (p->unicodeVersion <= version)
- c1 = p->combiningClass;
- }
-
- if (c1 > c2) {
- QChar *uc = s.data();
- int p = pos;
- // exchange characters
- if (!QChar::requiresSurrogates(u2)) {
- uc[p++] = QChar(u2);
- } else {
- uc[p++] = QChar(QChar::highSurrogate(u2));
- uc[p++] = QChar(QChar::lowSurrogate(u2));
- }
- if (!QChar::requiresSurrogates(u1)) {
- uc[p++] = QChar(u1);
- } else {
- uc[p++] = QChar(QChar::highSurrogate(u1));
- uc[p++] = QChar(QChar::lowSurrogate(u1));
- }
- if (pos > 0)
- --pos;
- if (pos > 0 && s.at(pos).isLowSurrogate())
- --pos;
- } else {
- ++pos;
- if (QChar::requiresSurrogates(u1))
- ++pos;
-
- u1 = u2;
- c1 = c2; // != 0
- p2 = pos + 1;
- if (QChar::requiresSurrogates(u1))
- ++p2;
- if (p2 > l)
- break;
-
- goto advance;
- }
- }
-}
-
-// returns true if the text is in a desired Normalization Form already; false otherwise.
-// sets lastStable to the position of the last stable code point
-static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationForm mode, int from, int *lastStable)
-{
- Q_STATIC_ASSERT(QString::NormalizationForm_D == 0);
- Q_STATIC_ASSERT(QString::NormalizationForm_C == 1);
- Q_STATIC_ASSERT(QString::NormalizationForm_KD == 2);
- Q_STATIC_ASSERT(QString::NormalizationForm_KC == 3);
-
- enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 };
-
- const ushort *string = reinterpret_cast<const ushort *>(str->constData());
- int length = str->length();
-
- // this avoids one out of bounds check in the loop
- while (length > from && QChar::isHighSurrogate(string[length - 1]))
- --length;
-
- uchar lastCombining = 0;
- for (int i = from; i < length; ++i) {
- int pos = i;
- uint uc = string[i];
- if (uc < 0x80) {
- // ASCII characters are stable code points
- lastCombining = 0;
- *lastStable = pos;
- continue;
- }
-
- if (QChar::isHighSurrogate(uc)) {
- ushort low = string[i + 1];
- if (!QChar::isLowSurrogate(low)) {
- // treat surrogate like stable code point
- lastCombining = 0;
- *lastStable = pos;
- continue;
- }
- ++i;
- uc = QChar::surrogateToUcs4(uc, low);
- }
-
- const QUnicodeTables::Properties *p = qGetProp(uc);
-
- if (p->combiningClass < lastCombining && p->combiningClass > 0)
- return false;
-
- const uchar check = (p->nfQuickCheck >> (mode << 1)) & 0x03;
- if (check != NFQC_YES)
- return false; // ### can we quick check NFQC_MAYBE ?
-
- lastCombining = p->combiningClass;
- if (lastCombining == 0)
- *lastStable = pos;
- }
-
- if (length != str->length()) // low surrogate parts at the end of text
- *lastStable = str->length() - 1;
-
- return true;
-}
-
-QT_END_NAMESPACE