diff --git a/src/ng/parse.js b/src/ng/parse.js index 5597acd876cc..6ba954afbee1 100644 --- a/src/ng/parse.js +++ b/src/ng/parse.js @@ -85,6 +85,116 @@ var OPERATORS = { }; var ESCAPE = {"n":"\n", "f":"\f", "r":"\r", "t":"\t", "v":"\v", "'":"'", '"':'"'}; +var UNICODE = (function () { + var letterRanges = { + 1: [ + 170, 11, 5, 562, 2, 152, 6, 461, 380, 42, 17, 161, 73, 32, 10, 4, 277, 19, 98, 11, 17, 144, + 95, 19, 109, 52, 18, 25, 52, 109, 128, 33, 95, 128, 199, 6, 3, 24, 2, 22, 9, 58, 319, 34, 45, + 110, 348, 104, 1303, 5, 206, 509, 1202, 2, 2, 97, 179, 14, 131, 5, 14, 15, 2, 2, 38, 3105, + 192, 31436, 212, 171, 55, 15, 2, 20571, 33 + ], + 2: [ + 886, 760, 119, 9, 262, 381, 30, 77, 20, 31, 35, 3, 3, 122, 46, 47, 35, 42, 61, 5, 5, 181, 8, + 128, 128, 210, 79, 6, 35, 8, 42, 393, 2889, 35452, 353, 810, 20619, 3 + ], + 3: [ + 904, 616, 266, 741, 147, 29, 208, 47, 26, 102, 128, 128, 403, 461, 1664, 128, 1247, 885, 48, + 4115, 152, 30566, 573, 155, 21759 + ], + 4: [ + 890, 1596, 163, 313, 770, 25, 219, 210, 496, 16, 48, 40, 16, 80, 1020, 1499, 5, 738, 346, 18, + 2991, 1041, 30475 + ], + 5: [736, 144, 1861, 128, 256, 128, 523, 2736, 1824, 137, 44, 3820, 7, 31361, 20570, 37, 824], + 6: [2565, 384, 501, 726, 3784, 48, 142, 35100, 22224, 8, 8], + 7: [ + 2425, 49, 128, 128, 128, 662, 128, 89, 951, 104, 1801, 388, 1137, 16, 48, 3498, 8, 8, 8, 8, 8, + 8, 8, 31267, 21253 + ], + 8: [2437, 384, 256, 128, 128, 571, 4112, 35572], 9: [2693, 814, 8814, 30454], + 10: [2392, 722, 128, 5216, 55829], 11: 8495, 12: [710, 2280, 62018], + 13: [4213, 1675, 96, 2176, 34434, 21704], 15: 4808, 16: [3370, 1622, 7792, 29728], 17: 5743, + 18: [3461, 2459, 32], 20: 910, 22: [2048, 403, 128, 128, 128], + 23: [192, 2898, 128, 128, 3310, 4992, 31372, 292, 304, 11600], 24: [3482, 9222], 25: 42623, + 26: [65, 32, 5664, 59552, 32], 27: 1488, 28: 43274, 29: [6400, 36960], 30: [1810, 4670, 563], + 31: [216, 7815], 32: 42560, 33: [1994, 2758], 35: 43968, 36: [3913, 3255, 90], + 38: [1329, 2927, 3712, 3552], 39: 1377, 41: [4704, 1568, 2272, 4005, 30971], 42: 1569, + 43: [4096, 208], 44: 6528, 46: 42192, 47: [6917, 4347, 48, 32084], 48: [3585, 40063], 49: 55243, + 50: 43138, 52: [6016, 37056], 53: [6688, 1376], 54: [2308, 9260, 53346], 57: 4824, 62: 64048, + 64: 64848, 67: 4888, 70: 6320, 75: 5792, 80: 42656, 83: 931, 85: 5024, 86: 12353, 88: 6176, + 89: [1869, 63513], 90: 12449, 94: 12593, 99: 1649, 103: 42786, 106: 64112, 108: 64326, + 133: 11360, 135: 65142, 139: 1015, 156: 1162, 192: 7424, 269: 42240, 278: 7680, 302: 63744, + 329: 4352, 363: 64467, 458: 248, 620: 5121, 1165: 40960, 6582: 13312, 11172: 44032, + 20940: 19968 + }; + var combiningMarkRanges = { + 1: [ + 1471, 8, 169, 161, 555, 128, 27, 101, 21, 36, 71, 128, 70, 85, 229, 155, 115, 12, 91, 128, + 132, 2, 2, 141, 201, 720, 1150, 204, 470, 622, 5, 1007, 34190, 403, 4, 5, 568, 56, 53, 17, + 20573 + ], + 2: [ + 1473, 3, 291, 635, 101, 27, 101, 41, 114, 101, 15, 12, 243, 13, 32, 83, 13, 32, 96, 32, 112, + 201, 93, 38, 72, 1996, 32, 598, 5841, 30179, 116, 400, 460, 107, 7, 302 + ], + 3: [ + 2085, 348, 74, 54, 74, 54, 70, 4, 54, 74, 123, 59, 69, 128, 128, 792, 4, 1712, 32, 217, 885, + 336, 301, 3826, 32195 + ], + 4: [1770, 300, 234, 714, 128, 128, 128, 780, 27, 41, 39142], + 5: [1155, 934, 296, 237, 384, 3673, 233, 36131], 6: [1759, 1776, 229, 20, 8546], + 7: [1750, 744, 384, 256, 128, 128, 246, 563, 60857], 8: [2750, 794, 111, 329, 39318, 701], + 9: [2027, 48, 4944], 10: [6741, 332], 11: [1552, 406], 12: [4226, 2206, 16, 1973], + 13: [8400, 34935], 14: [43443, 118], 16: 65024, 17: [2366, 4210, 388, 36224], 18: 43232, + 20: [1611, 2342, 186, 3065], 21: 7380, 27: 1840, 29: 6752, 30: 6070, 32: 11744, 36: 3993, + 39: 7616, 45: 1425, 112: 768 + }; + var digitRanges = { + 10: [ + 48, 1584, 144, 208, 422, 128, 128, 128, 128, 128, 128, 128, 128, 234, 128, 80, 288, 80, 1872, + 48, 310, 314, 16, 192, 96, 144, 16, 35280, 688, 48, 208, 128, 416, 21280 + ], + 11: 6608 + }; + var connectorPunctuationRanges = {1: [95, 8181, 57067], 2: [8255, 56820], 3: 65101}; + function rangesToObject(ranges) { + var obj = {}; + for (var length in ranges) { + if (!ranges.hasOwnProperty(length)) continue; + var codes = ranges[length]; + if (typeof codes == 'number') codes = [codes]; + var start = 0, + end; + for (var i = 0; i < codes.length; i++) { + start += codes[i]; + end = start + Number(length); + for (var code = start; code < end; code++) { + obj[String.fromCharCode(code)] = true; + } + } + } + return obj; + } + var letterMap = rangesToObject(letterRanges); + var combiningMarkMap = rangesToObject(combiningMarkRanges); + var digitMap = rangesToObject(digitRanges); + var connectorPunctuationMap = rangesToObject(connectorPunctuationRanges); + return { + isLetter: function (ch) { + return letterMap.hasOwnProperty(ch); + }, + isCombiningMark: function (ch) { + return combiningMarkMap.hasOwnProperty(ch); + }, + isDigit: function (ch) { + return digitMap.hasOwnProperty(ch); + }, + isConnectorPunctuation: function (ch) { + return connectorPunctuationMap.hasOwnProperty(ch); + } + }; +})(); + function lex(text, csp){ var tokens = [], token, @@ -161,9 +271,12 @@ function lex(text, csp){ ch == '\n' || ch == '\v' || ch == '\u00A0'; // IE treats non-breaking space as \u00A0 } function isIdent(ch) { - return 'a' <= ch && ch <= 'z' || - 'A' <= ch && ch <= 'Z' || - '_' == ch || ch == '$'; + return UNICODE.isLetter(ch) || ch == '$' || ch == '_'; + } + function isIdentPart(ch) { + return isIdent(ch) || UNICODE.isCombiningMark(ch) || UNICODE.isDigit(ch) || + UNICODE.isConnectorPunctuation(ch) || ch == '\u200C' || + ch == '\u200D'; } function isExpOperator(ch) { return ch == '-' || ch == '+' || isNumber(ch); @@ -214,7 +327,7 @@ function lex(text, csp){ while (index < text.length) { ch = text.charAt(index); - if (ch == '.' || isIdent(ch) || isNumber(ch)) { + if (ch == '.' || isIdentPart(ch) || isNumber(ch)) { if (ch == '.') lastDot = index; ident += ch; } else { diff --git a/test/ng/parseSpec.js b/test/ng/parseSpec.js index a82f736f2c56..50e8666c4c9c 100644 --- a/test/ng/parseSpec.js +++ b/test/ng/parseSpec.js @@ -173,6 +173,13 @@ describe('parser', function() { lex("'\\u1''bla'"); }).toThrowMinErr("$parse", "lexerr", "Lexer Error: Invalid unicode escape [\\u1''b] at column 2 in expression ['\\u1''bla']."); }); + + it('should tokenize unicode identifier names', function () { + var tokens = lex("אʔα⁀β.ㄱ﹍ㅎ.がな0"); + var i = 0; + expect(tokens[i].index).toEqual(0); + expect(tokens[i].text).toEqual('אʔα⁀β.ㄱ﹍ㅎ.がな0'); + }); }); var $filterProvider, scope; @@ -780,6 +787,16 @@ describe('parser', function() { }); + it('should work with unicode identifier names', function () { + scope.π = 3.14; + scope.ㄱ = {ㄴ: 1}; + expect(scope.$eval('π')).toBe(3.14); + expect(scope.$eval('ㄱ.ㄴ')).toBe(1); + expect(scope.$eval('ㄷ = 2')).toBe(2); + expect(scope.ㄷ).toBe(2); + }); + + describe('promises', function() { var deferred, promise, q;