diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp index 0b17a0883..1a3903d5c 100644 --- a/liblangutil/Scanner.cpp +++ b/liblangutil/Scanner.cpp @@ -680,7 +680,7 @@ void Scanner::scanToken() else token = setError(ScannerError::IllegalToken); } - else if (token == Token::Unicode) + else if (token == Token::Unicode && m_kind != ScannerKind::Yul) { // reset m = 0; @@ -969,7 +969,17 @@ tuple Scanner::scanIdentifierOrKeyword() while (isIdentifierPart(m_char) || (m_char == '.' && m_kind == ScannerKind::Yul)) addLiteralCharAndAdvance(); literal.complete(); - return TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal); + auto const token = TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal); + if (m_kind == ScannerKind::Yul) + { + // Turn Solidity identifier into a Yul keyword + if (m_tokens[NextNext].literal == "leave") + return std::make_tuple(Token::Leave, 0, 0); + // Turn non-Yul keywords into identifiers. + if (!TokenTraits::isYulKeyword(std::get<0>(token))) + return std::make_tuple(Token::Identifier, 0, 0); + } + return token; } } // namespace solidity::langutil diff --git a/liblangutil/Token.h b/liblangutil/Token.h index 53d8131f1..2bfc58e44 100644 --- a/liblangutil/Token.h +++ b/liblangutil/Token.h @@ -269,6 +269,9 @@ namespace solidity::langutil K(Unchecked, "unchecked", 0) \ K(Var, "var", 0) \ \ + /* Yul-specific tokens, but not keywords. */ \ + T(Leave, "leave", 0) \ + \ /* Illegal token - not able to scan. */ \ T(Illegal, "ILLEGAL", 0) \ \ @@ -317,6 +320,13 @@ namespace TokenTraits constexpr bool isTimeSubdenomination(Token op) { return op == Token::SubSecond || op == Token::SubMinute || op == Token::SubHour || op == Token::SubDay || op == Token::SubWeek || op == Token::SubYear; } constexpr bool isReservedKeyword(Token op) { return (Token::After <= op && op <= Token::Unchecked); } + constexpr bool isYulKeyword(Token tok) + { + return tok == Token::Function || tok == Token::Let || tok == Token::If || tok == Token::Switch || tok == Token::Case || + tok == Token::Default || tok == Token::For || tok == Token::Break || tok == Token::Continue || tok == Token::Leave || + tok == Token::TrueLiteral || tok == Token::FalseLiteral || tok == Token::HexStringLiteral || tok == Token::Hex; + } + inline Token AssignmentToBinaryOp(Token op) { solAssert(isAssignmentOp(op) && op != Token::Assign, ""); diff --git a/libyul/AsmParser.cpp b/libyul/AsmParser.cpp index 83f17a0d3..2baefa16f 100644 --- a/libyul/AsmParser.cpp +++ b/libyul/AsmParser.cpp @@ -116,26 +116,24 @@ Statement Parser::parseStatement() { Statement stmt{createWithLocation()}; checkBreakContinuePosition("break"); - m_scanner->next(); + advance(); return stmt; } case Token::Continue: { Statement stmt{createWithLocation()}; checkBreakContinuePosition("continue"); - m_scanner->next(); + advance(); + return stmt; + } + case Token::Leave: + { + Statement stmt{createWithLocation()}; + if (!m_insideFunction) + m_errorReporter.syntaxError(8149_error, currentLocation(), "Keyword \"leave\" can only be used inside a function."); + advance(); return stmt; } - case Token::Identifier: - if (currentLiteral() == "leave") - { - Statement stmt{createWithLocation()}; - if (!m_insideFunction) - m_errorReporter.syntaxError(8149_error, currentLocation(), "Keyword \"leave\" can only be used inside a function."); - m_scanner->next(); - return stmt; - } - break; default: break; } @@ -284,12 +282,6 @@ Parser::ElementaryOperation Parser::parseElementaryOperation() switch (currentToken()) { case Token::Identifier: - case Token::Return: - case Token::Byte: - case Token::Bool: - case Token::Address: - case Token::Var: - case Token::In: { YulString literal{currentLiteral()}; if (m_dialect.builtin(literal)) @@ -345,6 +337,9 @@ Parser::ElementaryOperation Parser::parseElementaryOperation() ret = std::move(literal); break; } + case Token::HexStringLiteral: + fatalParserError(3772_error, "Hex literals are not valid in this context."); + break; default: fatalParserError(1856_error, "Literal or identifier expected."); } @@ -472,24 +467,10 @@ TypedName Parser::parseTypedName() YulString Parser::expectAsmIdentifier() { YulString name{currentLiteral()}; - switch (currentToken()) - { - case Token::Return: - case Token::Byte: - case Token::Address: - case Token::Bool: - case Token::Identifier: - case Token::Var: - case Token::In: - break; - default: - expectToken(Token::Identifier); - break; - } - - if (m_dialect.builtin(name)) + if (currentToken() == Token::Identifier && m_dialect.builtin(name)) fatalParserError(5568_error, "Cannot use builtin function name \"" + name.str() + "\" as identifier name."); - advance(); + // NOTE: We keep the expectation here to ensure the correct source location for the error above. + expectToken(Token::Identifier); return name; } diff --git a/test/liblangutil/Scanner.cpp b/test/liblangutil/Scanner.cpp index 755f59c2f..2110cb2b6 100644 --- a/test/liblangutil/Scanner.cpp +++ b/test/liblangutil/Scanner.cpp @@ -595,6 +595,11 @@ BOOST_AUTO_TEST_CASE(unicode_prefix_only) BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalToken); + scanner.reset(CharStream("{ unicode", "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "unicode"); } BOOST_AUTO_TEST_CASE(unicode_invalid_space) @@ -611,6 +616,13 @@ BOOST_AUTO_TEST_CASE(unicode_invalid_token) BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalToken); + scanner.reset(CharStream("{ unicode test", "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "unicode"); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "test"); } BOOST_AUTO_TEST_CASE(valid_unicode_literal) @@ -638,6 +650,10 @@ BOOST_AUTO_TEST_CASE(hex_prefix_only) BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalToken); + scanner.reset(CharStream("{ hex", "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalToken); } BOOST_AUTO_TEST_CASE(hex_invalid_space) @@ -654,6 +670,11 @@ BOOST_AUTO_TEST_CASE(hex_invalid_token) BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalToken); + scanner.reset(CharStream("{ hex test", "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalToken); } BOOST_AUTO_TEST_CASE(valid_hex_literal) @@ -819,6 +840,71 @@ BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_strings) } } +BOOST_AUTO_TEST_CASE(solidity_keywords) +{ + // These are tokens which have a different meaning in Yul. + string keywords = "return byte bool address var in true false leave switch case default"; + Scanner scanner(CharStream(keywords, "")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Return); + BOOST_CHECK_EQUAL(scanner.next(), Token::Byte); + BOOST_CHECK_EQUAL(scanner.next(), Token::Bool); + BOOST_CHECK_EQUAL(scanner.next(), Token::Address); + BOOST_CHECK_EQUAL(scanner.next(), Token::Var); + BOOST_CHECK_EQUAL(scanner.next(), Token::In); + BOOST_CHECK_EQUAL(scanner.next(), Token::TrueLiteral); + BOOST_CHECK_EQUAL(scanner.next(), Token::FalseLiteral); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Switch); + BOOST_CHECK_EQUAL(scanner.next(), Token::Case); + BOOST_CHECK_EQUAL(scanner.next(), Token::Default); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + scanner.reset(CharStream(keywords, "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::TrueLiteral); + BOOST_CHECK_EQUAL(scanner.next(), Token::FalseLiteral); + BOOST_CHECK_EQUAL(scanner.next(), Token::Leave); + BOOST_CHECK_EQUAL(scanner.next(), Token::Switch); + BOOST_CHECK_EQUAL(scanner.next(), Token::Case); + BOOST_CHECK_EQUAL(scanner.next(), Token::Default); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); +} + +BOOST_AUTO_TEST_CASE(yul_keyword_like) +{ + Scanner scanner(CharStream("leave.function", "")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Period); + BOOST_CHECK_EQUAL(scanner.next(), Token::Function); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + scanner.reset(CharStream("leave.function", "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); +} + +BOOST_AUTO_TEST_CASE(yul_identifier_with_dots) +{ + Scanner scanner(CharStream("mystorage.slot := 1", "")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::Period); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::AssemblyAssign); + BOOST_CHECK_EQUAL(scanner.next(), Token::Number); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + scanner.reset(CharStream("mystorage.slot := 1", "")); + scanner.setScannerMode(ScannerKind::Yul); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.next(), Token::AssemblyAssign); + BOOST_CHECK_EQUAL(scanner.next(), Token::Number); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); +} + BOOST_AUTO_TEST_CASE(yul_function) { string sig = "function f(a, b) -> x, y"; diff --git a/test/libsolidity/syntaxTests/inlineAssembly/hex_assignment.sol b/test/libsolidity/syntaxTests/inlineAssembly/hex_assignment.sol index 18107e044..b51dd8604 100644 --- a/test/libsolidity/syntaxTests/inlineAssembly/hex_assignment.sol +++ b/test/libsolidity/syntaxTests/inlineAssembly/hex_assignment.sol @@ -6,4 +6,4 @@ contract C { } } // ---- -// ParserError 1856: (72-81): Literal or identifier expected. +// ParserError 3772: (72-81): Hex literals are not valid in this context. diff --git a/test/libsolidity/syntaxTests/inlineAssembly/hex_expression.sol b/test/libsolidity/syntaxTests/inlineAssembly/hex_expression.sol index 53cd23a29..2f4f69174 100644 --- a/test/libsolidity/syntaxTests/inlineAssembly/hex_expression.sol +++ b/test/libsolidity/syntaxTests/inlineAssembly/hex_expression.sol @@ -6,4 +6,4 @@ contract C { } } // ---- -// ParserError 1856: (67-76): Literal or identifier expected. +// ParserError 3772: (67-76): Hex literals are not valid in this context. diff --git a/test/libsolidity/syntaxTests/inlineAssembly/hex_switch_case.sol b/test/libsolidity/syntaxTests/inlineAssembly/hex_switch_case.sol index bc0ff56fe..de64567d7 100644 --- a/test/libsolidity/syntaxTests/inlineAssembly/hex_switch_case.sol +++ b/test/libsolidity/syntaxTests/inlineAssembly/hex_switch_case.sol @@ -8,4 +8,4 @@ contract C { } } // ---- -// ParserError 1856: (92-99): Literal or identifier expected. +// ParserError 3772: (92-99): Hex literals are not valid in this context. diff --git a/test/libsolidity/syntaxTests/inlineAssembly/solidity_keywords.sol b/test/libsolidity/syntaxTests/inlineAssembly/solidity_keywords.sol new file mode 100644 index 000000000..5467d045a --- /dev/null +++ b/test/libsolidity/syntaxTests/inlineAssembly/solidity_keywords.sol @@ -0,0 +1,113 @@ +contract C { + function f() view public { + assembly { + // These are keywords of Solidity -- a copy from liblangutil/Token.h. + let abstract := 1 + let anonymous := 1 + let as := 1 + let assembly := 1 + // break is Yul keyword + let catch := 1 + let constant := 1 + let constructor := 1 + // continue is Yul keyword + let contract := 1 + let do := 1 + let else := 1 + let enum := 1 + let emit := 1 + let event := 1 + let external := 1 + let fallback := 1 + // for is a Yul keyword + // function is a Yul keyword + // hex is a Yul keyword + // if is a Yul keyword + let indexed := 1 + let interface := 1 + let internal := 1 + let immutable := 1 + let import := 1 + let is := 1 + let library := 1 + let mapping := 1 + let memory := 1 + let modifier := 1 + let new := 1 + let override := 1 + let payable := 1 + let public := 1 + let pragma := 1 + let private := 1 + let pure := 1 + let receive := 1 + // return is a builtin in EVMDialect + return(0, 0) + let returns := 1 + let storage := 1 + let calldata := 1 + let struct := 1 + let throw := 1 + let try := 1 + // type shadows the Solidity function + let unicode := 1 + let using := 1 + let view := 1 + let virtual := 1 + let while := 1 + let wei := 1 + let gwei := 1 + let ether := 1 + let seconds := 1 + let minutes := 1 + let hours := 1 + let days := 1 + let weeks := 1 + let years := 1 + let int := 1 + let uint := 1 + let bytes := 1 + // byte is a builtin in EVMDialect + pop(byte(1, 1)) + let string := 1 + // address is a builtin in EVMDialect + pop(address()) + let bool := 1 + let fixed := 1 + let ufixed := 1 + let after := 1 + let alias := 1 + let apply := 1 + let auto := 1 + // case is a Yul keyword + let copyof := 1 + // default is a Yul keyword + let define := 1 + let final := 1 + let implements := 1 + let in := 1 + let inline := 1 + // let is a Yul keyword + let macro := 1 + let match := 1 + let mutable := 1 + let null := 1 + let of := 1 + let partial := 1 + let promise := 1 + let reference := 1 + let relocatable := 1 + let sealed := 1 + let sizeof := 1 + let static := 1 + let supports := 1 + // switch is a Yul keyword + let typedef := 1 + let typeof := 1 + let unchecked := 1 + let var := 1 + } + } +} +// ---- +// Warning 5740: (955-2168): Unreachable code. diff --git a/test/libyul/yulSyntaxTests/hex_assignment.yul b/test/libyul/yulSyntaxTests/hex_assignment.yul index c9af448c1..213163916 100644 --- a/test/libyul/yulSyntaxTests/hex_assignment.yul +++ b/test/libyul/yulSyntaxTests/hex_assignment.yul @@ -2,4 +2,4 @@ let x := hex"0011" } // ---- -// ParserError 1856: (15-24): Literal or identifier expected. +// ParserError 3772: (15-24): Hex literals are not valid in this context. diff --git a/test/libyul/yulSyntaxTests/hex_expression.yul b/test/libyul/yulSyntaxTests/hex_expression.yul index f9c1cb52d..191fdf085 100644 --- a/test/libyul/yulSyntaxTests/hex_expression.yul +++ b/test/libyul/yulSyntaxTests/hex_expression.yul @@ -2,4 +2,4 @@ pop(hex"2233") } // ---- -// ParserError 1856: (10-19): Literal or identifier expected. +// ParserError 3772: (10-19): Hex literals are not valid in this context. diff --git a/test/libyul/yulSyntaxTests/hex_switch_case.yul b/test/libyul/yulSyntaxTests/hex_switch_case.yul index 0f3636b4f..87ba6a4a9 100644 --- a/test/libyul/yulSyntaxTests/hex_switch_case.yul +++ b/test/libyul/yulSyntaxTests/hex_switch_case.yul @@ -4,4 +4,4 @@ case hex"1122" {} } // ---- -// ParserError 1856: (33-40): Literal or identifier expected. +// ParserError 3772: (33-40): Hex literals are not valid in this context. diff --git a/test/libyul/yulSyntaxTests/solidity_keywords.yul b/test/libyul/yulSyntaxTests/solidity_keywords.yul new file mode 100644 index 000000000..1f1dafc53 --- /dev/null +++ b/test/libyul/yulSyntaxTests/solidity_keywords.yul @@ -0,0 +1,109 @@ +{ + // These are keywords of Solidity -- a copy from liblangutil/Token.h. + let abstract := 1 + let anonymous := 1 + let as := 1 + let assembly := 1 + // break is Yul keyword + let catch := 1 + let constant := 1 + let constructor := 1 + // continue is Yul keyword + let contract := 1 + let do := 1 + let else := 1 + let enum := 1 + let emit := 1 + let event := 1 + let external := 1 + let fallback := 1 + // for is a Yul keyword + // function is a Yul keyword + // hex is a Yul keyword + // if is a Yul keyword + let indexed := 1 + let interface := 1 + let internal := 1 + let immutable := 1 + let import := 1 + let is := 1 + let library := 1 + let mapping := 1 + let memory := 1 + let modifier := 1 + let new := 1 + let override := 1 + let payable := 1 + let public := 1 + let pragma := 1 + let private := 1 + let pure := 1 + let receive := 1 + // return is a builtin in EVMDialect + return(0, 0) + let returns := 1 + let storage := 1 + let calldata := 1 + let struct := 1 + let throw := 1 + let try := 1 + let type := 1 + let unicode := 1 + let using := 1 + let view := 1 + let virtual := 1 + let while := 1 + let wei := 1 + let gwei := 1 + let ether := 1 + let seconds := 1 + let minutes := 1 + let hours := 1 + let days := 1 + let weeks := 1 + let years := 1 + let int := 1 + let uint := 1 + let bytes := 1 + // byte is a builtin in EVMDialect + pop(byte(1, 1)) + let string := 1 + // address is a builtin in EVMDialect + pop(address()) + let bool := 1 + let fixed := 1 + let ufixed := 1 + let after := 1 + let alias := 1 + let apply := 1 + let auto := 1 + // case is a Yul keyword + let copyof := 1 + // default is a Yul keyword + let define := 1 + let final := 1 + let implements := 1 + let in := 1 + let inline := 1 + // let is a Yul keyword + let macro := 1 + let match := 1 + let mutable := 1 + let null := 1 + let of := 1 + let partial := 1 + let promise := 1 + let reference := 1 + let relocatable := 1 + let sealed := 1 + let sizeof := 1 + let static := 1 + let supports := 1 + // switch is a Yul keyword + let typedef := 1 + let typeof := 1 + let unchecked := 1 + let var := 1 +} +// ==== +// dialect: evm