diff --git a/Changelog.md b/Changelog.md index 30d7909cf..7ff9dc52e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -22,6 +22,7 @@ Compiler Features: * Optimizer: Add rule for shifts by constants larger than 255 for Constantinople. * Optimizer: Add rule to simplify certain ANDs and SHL combinations * Yul: Adds break and continue keywords to for-loop syntax. + * Yul: Support ``.`` as part of identifiers. * Yul Optimizer: Adds steps for detecting and removing of dead code. diff --git a/docs/assembly.rst b/docs/assembly.rst index 835ae11ac..2b918a526 100644 --- a/docs/assembly.rst +++ b/docs/assembly.rst @@ -42,7 +42,8 @@ Syntax ------ Assembly parses comments, literals and identifiers in the same way as Solidity, so you can use the -usual ``//`` and ``/* */`` comments. Inline assembly is marked by ``assembly { ... }`` and inside +usual ``//`` and ``/* */`` comments. There is one exception: Identifiers in inline assembly can contain +``.``. Inline assembly is marked by ``assembly { ... }`` and inside these curly braces, you can use the following (see the later sections for more details): - literals, i.e. ``0x123``, ``42`` or ``"abc"`` (strings up to 32 characters) @@ -765,7 +766,7 @@ Grammar:: SubAssembly AssemblyExpression = AssemblyCall | Identifier | AssemblyLiteral AssemblyLiteral = NumberLiteral | StringLiteral | HexLiteral - Identifier = [a-zA-Z_$] [a-zA-Z_0-9]* + Identifier = [a-zA-Z_$] [a-zA-Z_0-9.]* AssemblyCall = Identifier '(' ( AssemblyExpression ( ',' AssemblyExpression )* )? ')' AssemblyLocalDefinition = 'let' IdentifierOrList ( ':=' AssemblyExpression )? AssemblyAssignment = IdentifierOrList ':=' AssemblyExpression diff --git a/docs/yul.rst b/docs/yul.rst index 19b7e8524..02543296f 100644 --- a/docs/yul.rst +++ b/docs/yul.rst @@ -123,7 +123,7 @@ Grammar:: 'break' | 'continue' FunctionCall = Identifier '(' ( Expression ( ',' Expression )* )? ')' - Identifier = [a-zA-Z_$] [a-zA-Z_$0-9]* + Identifier = [a-zA-Z_$] [a-zA-Z_$0-9.]* IdentifierList = Identifier ( ',' Identifier)* TypeName = Identifier | BuiltinTypeName BuiltinTypeName = 'bool' | [us] ( '8' | '32' | '64' | '128' | '256' ) diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp index 6e4a263f5..852a92e9b 100644 --- a/liblangutil/Scanner.cpp +++ b/liblangutil/Scanner.cpp @@ -149,10 +149,17 @@ void Scanner::reset(shared_ptr _source) void Scanner::reset() { m_source->reset(); + m_supportPeriodInIdentifier = false; m_char = m_source->get(); skipWhitespace(); - scanToken(); next(); + next(); +} + +void Scanner::supportPeriodInIdentifier(bool _value) +{ + m_supportPeriodInIdentifier = _value; + rescan(); } bool Scanner::scanHexByte(char& o_scannedByte) @@ -208,6 +215,18 @@ void Scanner::addUnicodeAsUTF8(unsigned codepoint) } } +void Scanner::rescan() +{ + size_t rollbackTo = 0; + if (m_skippedComment.literal.empty()) + rollbackTo = m_currentToken.location.start; + else + rollbackTo = m_skippedComment.location.start; + m_char = m_source->rollback(size_t(m_source->position()) - rollbackTo); + next(); + next(); +} + // Ensure that tokens can be stored in a byte. BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100); @@ -865,7 +884,7 @@ tuple Scanner::scanIdentifierOrKeyword() LiteralScope literal(this, LITERAL_TYPE_STRING); addLiteralCharAndAdvance(); // Scan the rest of the identifier characters. - while (isIdentifierPart(m_char)) //get full literal + while (isIdentifierPart(m_char) || (m_char == '.' && m_supportPeriodInIdentifier)) addLiteralCharAndAdvance(); literal.complete(); return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal); diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h index bc7b9434f..9a4a170a6 100644 --- a/liblangutil/Scanner.h +++ b/liblangutil/Scanner.h @@ -103,6 +103,10 @@ public: /// Resets scanner to the start of input. void reset(); + /// Enables or disables support for period in identifier. + /// This re-scans the current token and comment literal and thus invalidates it. + void supportPeriodInIdentifier(bool _value); + /// @returns the next token and advances input Token next(); @@ -191,6 +195,8 @@ private: bool advance() { m_char = m_source->advanceAndGet(); return !m_source->isPastEndOfInput(); } void rollback(int _amount) { m_char = m_source->rollback(_amount); } + /// Rolls back to the start of the current token and re-runs the scanner. + void rescan(); inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); } inline Token selectToken(Token _tok) { advance(); return _tok; } @@ -233,6 +239,8 @@ private: int sourcePos() const { return m_source->position(); } bool isSourcePastEndOfInput() const { return m_source->isPastEndOfInput(); } + bool m_supportPeriodInIdentifier = false; + TokenDesc m_skippedComment; // desc for current skipped comment TokenDesc m_nextSkippedComment; // desc for next skipped comment diff --git a/libyul/AsmParser.cpp b/libyul/AsmParser.cpp index aa94fe35c..13ff81c4f 100644 --- a/libyul/AsmParser.cpp +++ b/libyul/AsmParser.cpp @@ -38,6 +38,10 @@ using namespace yul; shared_ptr Parser::parse(std::shared_ptr const& _scanner, bool _reuseScanner) { m_recursionDepth = 0; + + _scanner->supportPeriodInIdentifier(true); + ScopeGuard resetScanner([&]{ _scanner->supportPeriodInIdentifier(false); }); + try { m_scanner = _scanner; @@ -50,6 +54,7 @@ shared_ptr Parser::parse(std::shared_ptr const& _scanner, bool _ { solAssert(!m_errorReporter.errors().empty(), "Fatal error detected, but no error is reported."); } + return nullptr; } diff --git a/test/libsolidity/syntaxTests/inlineAssembly/period_in_identifer.sol b/test/libsolidity/syntaxTests/inlineAssembly/period_in_identifer.sol new file mode 100644 index 000000000..6788c891f --- /dev/null +++ b/test/libsolidity/syntaxTests/inlineAssembly/period_in_identifer.sol @@ -0,0 +1,10 @@ +contract C { + function f() pure public { + // Periods are part of identifiers in assembly, + // but not in Solidity. This tests that this scanner + // setting is properly reset early enough. + assembly { } + C.f(); + } +} +// ---- diff --git a/test/libyul/Parser.cpp b/test/libyul/Parser.cpp index 728f732a4..c643cfd14 100644 --- a/test/libyul/Parser.cpp +++ b/test/libyul/Parser.cpp @@ -151,6 +151,33 @@ BOOST_AUTO_TEST_CASE(assignment) BOOST_CHECK(successParse("{ let x:u256 := 2:u256 let y:u256 := x }")); } +BOOST_AUTO_TEST_CASE(period_in_identifier) +{ + BOOST_CHECK(successParse("{ let x.y:u256 := 2:u256 }")); +} + +BOOST_AUTO_TEST_CASE(period_not_as_identifier_start) +{ + CHECK_ERROR("{ let .y:u256 }", ParserError, "Expected identifier but got '.'"); +} + +BOOST_AUTO_TEST_CASE(period_in_identifier_spaced) +{ + CHECK_ERROR("{ let x. y:u256 }", ParserError, "Expected ':' but got identifier"); + CHECK_ERROR("{ let x .y:u256 }", ParserError, "Expected ':' but got '.'"); + CHECK_ERROR("{ let x . y:u256 }", ParserError, "Expected ':' but got '.'"); +} + +BOOST_AUTO_TEST_CASE(period_in_identifier_start) +{ + BOOST_CHECK(successParse("{ x.y(2:u256) function x.y(a:u256) {} }")); +} + +BOOST_AUTO_TEST_CASE(period_in_identifier_start_with_comment) +{ + BOOST_CHECK(successParse("/// comment\n{ x.y(2:u256) function x.y(a:u256) {} }")); +} + BOOST_AUTO_TEST_CASE(vardecl_complex) { BOOST_CHECK(successParse("{ function add(a:u256, b:u256) -> c:u256 {} let y:u256 := 2:u256 let x:u256 := add(7:u256, add(6:u256, y)) }"));