Skip to content

Commit 35f2854

Browse files
committed
Apply @kidayasuo's proposal
The proposal at: w3c/csswg-drafts#9503 (comment)
1 parent 3d63693 commit 35f2854

File tree

2 files changed

+29
-29
lines changed

2 files changed

+29
-29
lines changed

auto-spacing.txt

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -586,26 +586,26 @@
586586
2DD8..2DDE ; N # N ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
587587
2DE0..2DFF ; N # N COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
588588
2E2F ; N # N VERTICAL TILDE
589-
2E80..2E99 ; W # W CJK RADICAL REPEAT..CJK RADICAL RAP
590-
2E9B..2EF3 ; W # W CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
591-
2F00..2FD5 ; W # W KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
589+
2E80..2E99 ; O # W CJK RADICAL REPEAT..CJK RADICAL RAP
590+
2E9B..2EF3 ; O # W CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
591+
2F00..2FD5 ; O # W KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
592592
2FF0..2FFB ; O # W IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
593593
3000 ; O # F IDEOGRAPHIC SPACE
594594
3001..3004 ; O # W IDEOGRAPHIC COMMA..JAPANESE INDUSTRIAL STANDARD SYMBOL
595-
3005 ; W # W IDEOGRAPHIC ITERATION MARK
596-
3006 ; N # W IDEOGRAPHIC CLOSING MARK
597-
3007 ; W # W IDEOGRAPHIC NUMBER ZERO
598-
3008..3020 ; O # W LEFT ANGLE BRACKET..POSTAL MARK FACE
599-
3021..3029 ; W # W HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
595+
3005 ; N # W IDEOGRAPHIC ITERATION MARK
596+
3006 ; W # W IDEOGRAPHIC CLOSING MARK
597+
3007..3029 ; O # W IDEOGRAPHIC NUMBER ZERO..HANGZHOU NUMERAL NINE
600598
302A..302F ; N # W IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
601599
3030 ; O # W WAVY DASH
602-
3031..3035 ; N # W VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
603-
3036..3037 ; O # W CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
604-
3038..303B ; W # W HANGZHOU NUMERAL TEN..VERTICAL IDEOGRAPHIC ITERATION MARK
605-
303C ; N # W MASU MARK
600+
3031..3035 ; W # W VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
601+
3036..303A ; O # W CIRCLED POSTAL MARK..HANGZHOU NUMERAL THIRTY
602+
303B ; N # W VERTICAL IDEOGRAPHIC ITERATION MARK
603+
303C ; W # W MASU MARK
606604
303D..303E ; O # W PART ALTERNATION MARK..IDEOGRAPHIC VARIATION INDICATOR
607605
3041..3096 ; W # W HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
608-
3099..309F ; W # W COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..HIRAGANA DIGRAPH YORI
606+
3099..309A ; N # W COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
607+
309B..309C ; O # W KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
608+
309D..309F ; W # W HIRAGANA ITERATION MARK..HIRAGANA DIGRAPH YORI
609609
30A0 ; O # W KATAKANA-HIRAGANA DOUBLE HYPHEN
610610
30A1..30FA ; W # W KATAKANA LETTER SMALL A..KATAKANA LETTER VO
611611
30FB ; O # W KATAKANA MIDDLE DOT
@@ -614,7 +614,7 @@
614614
3131..318E ; N # W HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
615615
3190..319F ; O # W IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION MAN MARK
616616
31A0..31BF ; N # W BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
617-
31C0..31E3 ; W # W CJK STROKE T..CJK STROKE Q
617+
31C0..31E3 ; O # W CJK STROKE T..CJK STROKE Q
618618
31F0..31FF ; W # W KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
619619
3200..321E ; O # W PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
620620
3220..3247 ; O # W PARENTHESIZED IDEOGRAPH ONE..CIRCLED IDEOGRAPH KOTO
@@ -890,19 +890,20 @@ FFFD ; O # A REPLACEMENT CHARACTER
890890
16F8F..16F9F ; N # N MIAO TONE RIGHT..MIAO LETTER REFORMED TONE-8
891891
16FE0..16FE1 ; N # W TANGUT ITERATION MARK..NUSHU ITERATION MARK
892892
16FE2 ; O # W OLD CHINESE HOOK MARK
893-
16FE3..16FE4 ; N # W OLD CHINESE ITERATION MARK..KHITAN SMALL SCRIPT FILLER
894-
16FF0..16FF1 ; W # W VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
895-
17000..187F7 ; N # W ..
896-
18800..18CD5 ; N # W TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
897-
18D00..18D08 ; N # W ..
893+
16FE3 ; W # W OLD CHINESE ITERATION MARK
894+
16FE4 ; N # W KHITAN SMALL SCRIPT FILLER
895+
16FF0..16FF1 ; N # W VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
896+
17000..187F7 ; W # W ..
897+
18800..18CD5 ; W # W TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
898+
18D00..18D08 ; W # W ..
898899
1AFF0..1AFF3 ; O # W KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
899900
1AFF5..1AFFB ; O # W KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
900901
1AFFD..1AFFE ; O # W KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
901-
1B000..1B11E ; N # W KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
902+
1B000..1B11E ; W # W KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
902903
1B11F..1B122 ; O # W HIRAGANA LETTER ARCHAIC WU..KATAKANA LETTER ARCHAIC WU
903-
1B150..1B152 ; N # W HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
904-
1B164..1B167 ; N # W KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
905-
1B170..1B2FB ; N # W NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
904+
1B150..1B152 ; W # W HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
905+
1B164..1B167 ; W # W KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
906+
1B170..1B2FB ; W # W NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
906907
1BC00..1BC6A ; N # N DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
907908
1BC70..1BC7C ; N # N DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
908909
1BC80..1BC88 ; N # N DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL

src/auto-spacing.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
class AutoSpacing(object):
99

1010
def __init__(self) -> None:
11-
# https://drafts.csswg.org/css-text-4/#text-spacing-classes
12-
ideographs = UnicodeSet(r'[[:sc=Han:]]')
13-
ideographs.addAll(UnicodeSet(r'[[\u3041-\u30FF]-[:P:]]'))
14-
ideographs.addAll(UnicodeSet(r'[[\u31C0-\u31FF]]'))
11+
# https://github.com/w3c/csswg-drafts/pull/9503#discussion_r1374477268
12+
ideographs = UnicodeSet(
13+
r'[[[:sc=Hiragana:][:sc=Katakana:][:sc=Common:][:ideographic:]&[:gc=L:]&[:ea=W:]]' +
14+
r'[[:gc=L:][:gc=Nl:]&[:gc=So:]&[:sc=Hani:]]]')
1515

16-
letters_numerals = UnicodeSet(r'[[:L:][:M:][:Nd:]]')
17-
letters_numerals.removeAll(UnicodeSet(r'[[:ea=F:]]'))
16+
letters_numerals = UnicodeSet(r'[[:L:][:M:][:Nd:]-[:ea=F:]]')
1817

1918
self.ideographs = ideographs
2019
self.letters_numerals = letters_numerals

0 commit comments

Comments
 (0)