From c4b243e437096e6fe3abc032c7de9aa994284cde Mon Sep 17 00:00:00 2001 From: Luiz Zappa Date: Sun, 22 Oct 2023 15:02:06 -0300 Subject: [PATCH] feat(): unicode extended char support for column name or alias --- src/sqlParser.jison | 244 ++++++++++++++++++++++---------------------- test/main.test.js | 12 +++ 2 files changed, 134 insertions(+), 122 deletions(-) diff --git a/src/sqlParser.jison b/src/sqlParser.jison index 4a8efbc..99c0e80 100644 --- a/src/sqlParser.jison +++ b/src/sqlParser.jison @@ -6,133 +6,133 @@ %% -[/][*](.|\n)*?[*][/] /* skip comments */ -[-][-]\s.*\n /* skip sql comments */ -[#]\s.*\n /* skip sql comments */ -\s+ /* skip whitespace */ +[/][*](.|\n)*?[*][/] /* skip comments */ +[-][-]\s.*\n /* skip sql comments */ +[#]\s.*\n /* skip sql comments */ +\s+ /* skip whitespace */ -[$][{](.*?)[}] return 'PLACE_HOLDER' -[`][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*[`] return 'IDENTIFIER' -[\w]+[\u4e00-\u9fa5]+[0-9a-zA-Z_\u4e00-\u9fa5]* return 'IDENTIFIER' -[\u4e00-\u9fa5][0-9a-zA-Z_\u4e00-\u9fa5]* return 'IDENTIFIER' -SELECT return 'SELECT' -ALL return 'ALL' -ANY return 'ANY' -DISTINCT return 'DISTINCT' -DISTINCTROW return 'DISTINCTROW' -HIGH_PRIORITY return 'HIGH_PRIORITY' -MAX_STATEMENT_TIME return 'MAX_STATEMENT_TIME' -STRAIGHT_JOIN return 'STRAIGHT_JOIN' -SQL_SMALL_RESULT return 'SQL_SMALL_RESULT' -SQL_BIG_RESULT return 'SQL_BIG_RESULT' -SQL_BUFFER_RESULT return 'SQL_BUFFER_RESULT' -SQL_CACHE return 'SQL_CACHE' -SQL_NO_CACHE return 'SQL_NO_CACHE' -SQL_CALC_FOUND_ROWS return 'SQL_CALC_FOUND_ROWS' -([a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*\.){1,2}\* return 'SELECT_EXPR_STAR' -AS return 'AS' -TRUE return 'TRUE' -FALSE return 'FALSE' -NULL return 'NULL' -COLLATE return 'COLLATE' -BINARY return 'BINARY' -ROW return 'ROW' -EXISTS return 'EXISTS' -CASE return 'CASE' -WHEN return 'WHEN' -THEN return 'THEN' -ELSE return 'ELSE' -END return 'END' -DIV return 'DIV' -MOD return 'MOD' -NOT return 'NOT' -BETWEEN return 'BETWEEN' -IN return 'IN' -SOUNDS return 'SOUNDS' -LIKE return 'LIKE' -ESCAPE return 'ESCAPE' -REGEXP return 'REGEXP' -IS return 'IS' -UNKNOWN return 'UNKNOWN' -AND return 'AND' -OR return 'OR' -XOR return 'XOR' -FROM return 'FROM' -PARTITION return 'PARTITION' -USE return 'USE' -INDEX return 'INDEX' -KEY return 'KEY' -FOR return 'FOR' -JOIN return 'JOIN' -ORDER\s+BY return 'ORDER_BY' -GROUP\s+BY return 'GROUP_BY' -IGNORE return 'IGNORE' -FORCE return 'FORCE' -INNER return 'INNER' -CROSS return 'CROSS' -ON return 'ON' -USING return 'USING' -LEFT return 'LEFT' -RIGHT return 'RIGHT' -OUTER return 'OUTER' -NATURAL return 'NATURAL' -WHERE return 'WHERE' -ASC return 'ASC' -DESC return 'DESC' -WITH return 'WITH' -ROLLUP return 'ROLLUP' -HAVING return 'HAVING' -OFFSET return 'OFFSET' -PROCEDURE return 'PROCEDURE' -UPDATE return 'UPDATE' -LOCK return 'LOCK' -SHARE return 'SHARE' -MODE return 'MODE' -OJ return 'OJ' -LIMIT return 'LIMIT' -UNION return 'UNION' +[$][{](.*?)[}] return 'PLACE_HOLDER' +[`][a-zA-Z_\u4e00-\u9fa5\u0080-\uFFFF][a-zA-Z0-9_\u4e00-\u9fa5\u0080-\uFFFF]*[`] return 'IDENTIFIER' +[\w]+[\u4e00-\u9fa5\u0080-\uFFFF]+[0-9a-zA-Z_\u4e00-\u9fa5\u0080-\uFFFF]* return 'IDENTIFIER' +[\u4e00-\u9fa5][0-9a-zA-Z_\u4e00-\u9fa5\u0080-\uFFFF]* return 'IDENTIFIER' +SELECT return 'SELECT' +ALL return 'ALL' +ANY return 'ANY' +DISTINCT return 'DISTINCT' +DISTINCTROW return 'DISTINCTROW' +HIGH_PRIORITY return 'HIGH_PRIORITY' +MAX_STATEMENT_TIME return 'MAX_STATEMENT_TIME' +STRAIGHT_JOIN return 'STRAIGHT_JOIN' +SQL_SMALL_RESULT return 'SQL_SMALL_RESULT' +SQL_BIG_RESULT return 'SQL_BIG_RESULT' +SQL_BUFFER_RESULT return 'SQL_BUFFER_RESULT' +SQL_CACHE return 'SQL_CACHE' +SQL_NO_CACHE return 'SQL_NO_CACHE' +SQL_CALC_FOUND_ROWS return 'SQL_CALC_FOUND_ROWS' +([a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*\.){1,2}\* return 'SELECT_EXPR_STAR' +AS return 'AS' +TRUE return 'TRUE' +FALSE return 'FALSE' +NULL return 'NULL' +COLLATE return 'COLLATE' +BINARY return 'BINARY' +ROW return 'ROW' +EXISTS return 'EXISTS' +CASE return 'CASE' +WHEN return 'WHEN' +THEN return 'THEN' +ELSE return 'ELSE' +END return 'END' +DIV return 'DIV' +MOD return 'MOD' +NOT return 'NOT' +BETWEEN return 'BETWEEN' +IN return 'IN' +SOUNDS return 'SOUNDS' +LIKE return 'LIKE' +ESCAPE return 'ESCAPE' +REGEXP return 'REGEXP' +IS return 'IS' +UNKNOWN return 'UNKNOWN' +AND return 'AND' +OR return 'OR' +XOR return 'XOR' +FROM return 'FROM' +PARTITION return 'PARTITION' +USE return 'USE' +INDEX return 'INDEX' +KEY return 'KEY' +FOR return 'FOR' +JOIN return 'JOIN' +ORDER\s+BY return 'ORDER_BY' +GROUP\s+BY return 'GROUP_BY' +IGNORE return 'IGNORE' +FORCE return 'FORCE' +INNER return 'INNER' +CROSS return 'CROSS' +ON return 'ON' +USING return 'USING' +LEFT return 'LEFT' +RIGHT return 'RIGHT' +OUTER return 'OUTER' +NATURAL return 'NATURAL' +WHERE return 'WHERE' +ASC return 'ASC' +DESC return 'DESC' +WITH return 'WITH' +ROLLUP return 'ROLLUP' +HAVING return 'HAVING' +OFFSET return 'OFFSET' +PROCEDURE return 'PROCEDURE' +UPDATE return 'UPDATE' +LOCK return 'LOCK' +SHARE return 'SHARE' +MODE return 'MODE' +OJ return 'OJ' +LIMIT return 'LIMIT' +UNION return 'UNION' -"," return ',' -"=" return '=' -"(" return '(' -")" return ')' -"~" return '~' -"!=" return '!=' -"!" return '!' -"|" return '|' -"&" return '&' -"+" return '+' -"-" return '-' -"*" return '*' -"/" return '/' -"%" return '%' -"^" return '^' -">>" return '>>' -">=" return '>=' -">" return '>' -"<<" return '<<' -"<=>" return '<=>' -"<=" return '<=' -"<>" return '<>' -"<" return '<' -"{" return '{' -"}" return '}' -";" return ';' +"," return ',' +"=" return '=' +"(" return '(' +")" return ')' +"~" return '~' +"!=" return '!=' +"!" return '!' +"|" return '|' +"&" return '&' +"+" return '+' +"-" return '-' +"*" return '*' +"/" return '/' +"%" return '%' +"^" return '^' +">>" return '>>' +">=" return '>=' +">" return '>' +"<<" return '<<' +"<=>" return '<=>' +"<=" return '<=' +"<>" return '<>' +"<" return '<' +"{" return '{' +"}" return '}' +";" return ';' -['](\\.|[^'])*['] return 'STRING' -["](\\.|[^"])*["] return 'STRING' -[0][x][0-9a-fA-F]+ return 'HEX_NUMERIC' -[-]?[0-9]+(\.[0-9]+)? return 'NUMERIC' -[-]?[0-9]+(\.[0-9]+)?[eE][-][0-9]+(\.[0-9]+)? return 'EXPONENT_NUMERIC' +['](\\.|[^'])*['] return 'STRING' +["](\\.|[^"])*["] return 'STRING' +[0][x][0-9a-fA-F]+ return 'HEX_NUMERIC' +[-]?[0-9]+(\.[0-9]+)? return 'NUMERIC' +[-]?[0-9]+(\.[0-9]+)?[eE][-][0-9]+(\.[0-9]+)? return 'EXPONENT_NUMERIC' -[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]* return 'IDENTIFIER' -\. return 'DOT' -["][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*["] return 'STRING' -['][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*['] return 'STRING' -([`])(?:(?=(\\?))\2.)*?\1 return 'IDENTIFIER' +[a-zA-Z_\u4e00-\u9fa5\u0080-\uFFFF][a-zA-Z0-9_\u4e00-\u9fa5\u0080-\uFFFF]* return 'IDENTIFIER' +\. return 'DOT' +["][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*["] return 'STRING' +['][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*['] return 'STRING' +([`])(?:(?=(\\?))\2.)*?\1 return 'IDENTIFIER' -<> return 'EOF' -. return 'INVALID' +<> return 'EOF' +. return 'INVALID' /lex diff --git a/test/main.test.js b/test/main.test.js index 2ee9c1b..d7a1496 100644 --- a/test/main.test.js +++ b/test/main.test.js @@ -436,4 +436,16 @@ describe('select grammar support', function () { it('test IDENTIFIER', function () { testParser('select `aa#sfs`(a) as \'A A\' from z'); }); + + it('Support unicode extended char (U+0080..U+FFFF) as column name or alias', function() { + testParser(`select + país, + MAX(produção) as maior_produção, + Ĉapelo, + Δάσος, + Молоко, + سلام, + かわいい + from table`) + }) });