Unverified Commit 02a9c151 authored by Tae Hyung Kim's avatar Tae Hyung Kim Committed by GitHub

Fix lexer issue where select/plural/other/underscores cannot be in identifier names. (#119190)

parent a45727d8
...@@ -158,20 +158,14 @@ RegExp normalString = RegExp(r'[^{}]+'); ...@@ -158,20 +158,14 @@ RegExp normalString = RegExp(r'[^{}]+');
RegExp brace = RegExp(r'{|}'); RegExp brace = RegExp(r'{|}');
RegExp whitespace = RegExp(r'\s+'); RegExp whitespace = RegExp(r'\s+');
RegExp pluralKeyword = RegExp(r'plural');
RegExp selectKeyword = RegExp(r'select');
RegExp otherKeyword = RegExp(r'other');
RegExp numeric = RegExp(r'[0-9]+'); RegExp numeric = RegExp(r'[0-9]+');
RegExp alphanumeric = RegExp(r'[a-zA-Z0-9]+'); RegExp alphanumeric = RegExp(r'[a-zA-Z0-9|_]+');
RegExp comma = RegExp(r','); RegExp comma = RegExp(r',');
RegExp equalSign = RegExp(r'='); RegExp equalSign = RegExp(r'=');
// List of token matchers ordered by precedence // List of token matchers ordered by precedence
Map<ST, RegExp> matchers = <ST, RegExp>{ Map<ST, RegExp> matchers = <ST, RegExp>{
ST.empty: whitespace, ST.empty: whitespace,
ST.plural: pluralKeyword,
ST.select: selectKeyword,
ST.other: otherKeyword,
ST.number: numeric, ST.number: numeric,
ST.comma: comma, ST.comma: comma,
ST.equalSign: equalSign, ST.equalSign: equalSign,
...@@ -303,12 +297,25 @@ class Parser { ...@@ -303,12 +297,25 @@ class Parser {
// Do not add whitespace as a token. // Do not add whitespace as a token.
startIndex = match.end; startIndex = match.end;
continue; continue;
} else if (<ST>[ST.plural, ST.select].contains(matchedType) && tokens.last.type == ST.openBrace) { } else if (<ST>[ST.identifier].contains(matchedType) && tokens.last.type == ST.openBrace) {
// Treat "plural" or "select" as identifier if it comes right after an open brace. // Treat any token as identifier if it comes right after an open brace, whether it's a keyword or not.
tokens.add(Node(ST.identifier, startIndex, value: match.group(0))); tokens.add(Node(ST.identifier, startIndex, value: match.group(0)));
startIndex = match.end; startIndex = match.end;
continue; continue;
} else { } else {
// Handle keywords separately. Otherwise, lexer will assume parts of identifiers may be keywords.
final String tokenStr = match.group(0)!;
switch(tokenStr) {
case 'plural':
matchedType = ST.plural;
break;
case 'select':
matchedType = ST.select;
break;
case 'other':
matchedType = ST.other;
break;
}
tokens.add(Node(matchedType!, startIndex, value: match.group(0))); tokens.add(Node(matchedType!, startIndex, value: match.group(0)));
startIndex = match.end; startIndex = match.end;
continue; continue;
......
...@@ -226,6 +226,22 @@ void main() { ...@@ -226,6 +226,22 @@ void main() {
expect(tokens[5].type, equals(ST.identifier)); expect(tokens[5].type, equals(ST.identifier));
}); });
testWithoutContext('lexer identifier names can contain underscores', () {
final List<Node> tokens = Parser('keywords', 'app_en.arb', '{ test_placeholder } { test_select, select, singular{test} other{hmm} }').lexIntoTokens();
expect(tokens[1].value, equals('test_placeholder'));
expect(tokens[1].type, equals(ST.identifier));
expect(tokens[5].value, equals('test_select'));
expect(tokens[5].type, equals(ST.identifier));
});
testWithoutContext('lexer identifier names can contain the strings select or plural', () {
final List<Node> tokens = Parser('keywords', 'app_en.arb', '{ selectTest } { pluralTest, select, singular{test} other{hmm} }').lexIntoTokens();
expect(tokens[1].value, equals('selectTest'));
expect(tokens[1].type, equals(ST.identifier));
expect(tokens[5].value, equals('pluralTest'));
expect(tokens[5].type, equals(ST.identifier));
});
testWithoutContext('lexer: lexically correct but syntactically incorrect', () { testWithoutContext('lexer: lexically correct but syntactically incorrect', () {
final List<Node> tokens = Parser( final List<Node> tokens = Parser(
'syntax', 'syntax',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment