diff --git a/Changelog.md b/Changelog.md index 5a47d9814..e04029d94 100644 --- a/Changelog.md +++ b/Changelog.md @@ -2,6 +2,7 @@ Language Features: * Allow to obtain the selector of public or external library functions via a member ``.selector``. + * Parser: Allow splitting string and hexadecimal string literals into multiple parts. Compiler Features: diff --git a/docs/types/value-types.rst b/docs/types/value-types.rst index 053160701..426bdbe37 100644 --- a/docs/types/value-types.rst +++ b/docs/types/value-types.rst @@ -459,7 +459,7 @@ a non-rational number). String Literals and Types ------------------------- -String literals are written with either double or single-quotes (``"foo"`` or ``'bar'``). They do not imply trailing zeroes as in C; ``"foo"`` represents three bytes, not four. As with integer literals, their type can vary, but they are implicitly convertible to ``bytes1``, ..., ``bytes32``, if they fit, to ``bytes`` and to ``string``. +String literals are written with either double or single-quotes (``"foo"`` or ``'bar'``), and they can also be split into multiple consecutive parts (``"foo" "bar"`` is equivalent to ``"foobar"``) which can be helpful when dealing with long strings. They do not imply trailing zeroes as in C; ``"foo"`` represents three bytes, not four. As with integer literals, their type can vary, but they are implicitly convertible to ``bytes1``, ..., ``bytes32``, if they fit, to ``bytes`` and to ``string``. For example, with ``bytes32 samevar = "stringliteral"`` the string literal is interpreted in its raw byte form when assigned to a ``bytes32`` type. @@ -498,7 +498,7 @@ terminate the string literal. Newline only terminates the string literal if it i Hexadecimal Literals -------------------- -Hexadecimal literals are prefixed with the keyword ``hex`` and are enclosed in double or single-quotes (``hex"001122FF"``). Their content must be a hexadecimal string and their value will be the binary representation of those values. +Hexadecimal literals are prefixed with the keyword ``hex`` and are enclosed in double or single-quotes (``hex"001122FF"``), and they can also be split into multiple consecutive parts (``hex"00112233" hex"44556677"`` is equivalent to ``hex"0011223344556677"``). Their content must be a hexadecimal string and their value will be the binary representation of those values. Hexadecimal literals behave like :ref:`string literals ` and have the same convertibility restrictions. diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp index 0d65334fb..3ddb23cb1 100644 --- a/liblangutil/Scanner.cpp +++ b/liblangutil/Scanner.cpp @@ -798,7 +798,7 @@ Token Scanner::scanHexString() literal.complete(); advance(); // consume quote - return Token::StringLiteral; + return Token::HexStringLiteral; } // Parse for regex [:digit:]+(_[:digit:]+)* diff --git a/liblangutil/Token.h b/liblangutil/Token.h index 35d4f76f3..c5e829524 100644 --- a/liblangutil/Token.h +++ b/liblangutil/Token.h @@ -221,6 +221,7 @@ namespace langutil K(FalseLiteral, "false", 0) \ T(Number, nullptr, 0) \ T(StringLiteral, nullptr, 0) \ + T(HexStringLiteral, nullptr, 0) \ T(CommentLiteral, nullptr, 0) \ \ /* Identifiers (not keywords or future reserved words). */ \ diff --git a/libsolidity/ast/ASTJsonConverter.cpp b/libsolidity/ast/ASTJsonConverter.cpp index 0162fc1c1..4d686b6f1 100644 --- a/libsolidity/ast/ASTJsonConverter.cpp +++ b/libsolidity/ast/ASTJsonConverter.cpp @@ -801,6 +801,7 @@ string ASTJsonConverter::literalTokenKind(Token _token) case dev::solidity::Token::Number: return "number"; case dev::solidity::Token::StringLiteral: + case dev::solidity::Token::HexStringLiteral: return "string"; case dev::solidity::Token::TrueLiteral: case dev::solidity::Token::FalseLiteral: diff --git a/libsolidity/ast/TypeProvider.cpp b/libsolidity/ast/TypeProvider.cpp index 6e94c011b..1962df9d5 100644 --- a/libsolidity/ast/TypeProvider.cpp +++ b/libsolidity/ast/TypeProvider.cpp @@ -331,6 +331,7 @@ TypePointer TypeProvider::forLiteral(Literal const& _literal) case Token::Number: return rationalNumber(_literal); case Token::StringLiteral: + case Token::HexStringLiteral: return stringLiteral(_literal.value()); default: return nullptr; diff --git a/libsolidity/parsing/Parser.cpp b/libsolidity/parsing/Parser.cpp index f1145ee83..ff00df90b 100644 --- a/libsolidity/parsing/Parser.cpp +++ b/libsolidity/parsing/Parser.cpp @@ -1614,9 +1614,22 @@ ASTPointer Parser::parsePrimaryExpression() } break; case Token::StringLiteral: + case Token::HexStringLiteral: + { + string literal = m_scanner->currentLiteral(); + Token firstToken = m_scanner->currentToken(); + while (m_scanner->peekNextToken() == firstToken) + { + m_scanner->next(); + literal += m_scanner->currentLiteral(); + } nodeFactory.markEndPosition(); - expression = nodeFactory.createNode(token, getLiteralAndAdvance()); + m_scanner->next(); + if (m_scanner->currentToken() == Token::Illegal) + fatalParserError(to_string(m_scanner->currentError())); + expression = nodeFactory.createNode(token, make_shared(literal)); break; + } case Token::Identifier: nodeFactory.markEndPosition(); expression = nodeFactory.createNode(getLiteralAndAdvance()); diff --git a/libyul/ObjectParser.cpp b/libyul/ObjectParser.cpp index 5f1eadef6..2e201046f 100644 --- a/libyul/ObjectParser.cpp +++ b/libyul/ObjectParser.cpp @@ -120,7 +120,10 @@ void ObjectParser::parseData(Object& _containingObject) YulString name = parseUniqueName(&_containingObject); - expectToken(Token::StringLiteral, false); + if (currentToken() == Token::HexStringLiteral) + expectToken(Token::HexStringLiteral, false); + else + expectToken(Token::StringLiteral, false); addNamedSubObject(_containingObject, name, make_shared(name, asBytes(currentLiteral()))); advance(); } diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp index 7e7b58469..5d97ee988 100644 --- a/test/libsolidity/SolidityScanner.cpp +++ b/test/libsolidity/SolidityScanner.cpp @@ -505,7 +505,7 @@ BOOST_AUTO_TEST_CASE(valid_hex_literal) { Scanner scanner(CharStream("{ hex\"00112233FF\"", "")); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); - BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); + BOOST_CHECK_EQUAL(scanner.next(), Token::HexStringLiteral); BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\x00\x11\x22\x33\xFF", 5)); } diff --git a/test/libsolidity/syntaxTests/string/string_multipart_hex_valid_parts.sol b/test/libsolidity/syntaxTests/string/string_multipart_hex_valid_parts.sol new file mode 100644 index 000000000..684322143 --- /dev/null +++ b/test/libsolidity/syntaxTests/string/string_multipart_hex_valid_parts.sol @@ -0,0 +1,8 @@ +contract test { + function f() public pure returns (bytes32) { + bytes32 escapeCharacters = hex"aa" hex"b"; + return escapeCharacters; + } +} +// ---- +// ParserError: (108-112): Expected even number of hex-nibbles within double-quotes. diff --git a/test/libsolidity/syntaxTests/string/string_multipart_newline_with_hex_prefix.sol b/test/libsolidity/syntaxTests/string/string_multipart_newline_with_hex_prefix.sol new file mode 100644 index 000000000..1d4de1759 --- /dev/null +++ b/test/libsolidity/syntaxTests/string/string_multipart_newline_with_hex_prefix.sol @@ -0,0 +1,9 @@ +contract test { + function f() public pure returns (bytes32) { + bytes32 escapeCharacters = hex"0000" + hex"deaf" + hex"feed"; + return escapeCharacters; + } +} +// ---- diff --git a/test/libsolidity/syntaxTests/string/string_multipart_newline_without_hex_prefix.sol b/test/libsolidity/syntaxTests/string/string_multipart_newline_without_hex_prefix.sol new file mode 100644 index 000000000..613b86da6 --- /dev/null +++ b/test/libsolidity/syntaxTests/string/string_multipart_newline_without_hex_prefix.sol @@ -0,0 +1,10 @@ +contract test { + function f() public pure returns (bytes32) { + bytes32 escapeCharacters = hex"0000" + "deaf" + "feed"; + return escapeCharacters; + } +} +// ---- +// ParserError: (118-124): Expected ';' but got 'StringLiteral' \ No newline at end of file diff --git a/test/libsolidity/syntaxTests/string/string_multipart_only_hex.sol b/test/libsolidity/syntaxTests/string/string_multipart_only_hex.sol new file mode 100644 index 000000000..d748104f2 --- /dev/null +++ b/test/libsolidity/syntaxTests/string/string_multipart_only_hex.sol @@ -0,0 +1,8 @@ +contract test { + function f() public pure returns (bytes32) { + bytes32 escapeCharacters = hex"aa" hex"bb" "cc"; + return escapeCharacters; + } +} +// ---- +// ParserError: (116-120): Expected ';' but got 'StringLiteral' diff --git a/test/libsolidity/syntaxTests/string/string_multipart_only_regular.sol b/test/libsolidity/syntaxTests/string/string_multipart_only_regular.sol new file mode 100644 index 000000000..b420601f5 --- /dev/null +++ b/test/libsolidity/syntaxTests/string/string_multipart_only_regular.sol @@ -0,0 +1,8 @@ +contract test { + function f() public pure returns (bytes32) { + bytes32 escapeCharacters = "foo" "bar" hex"aa"; + return escapeCharacters; + } +} +// ---- +// ParserError: (112-119): Expected ';' but got 'HexStringLiteral' diff --git a/test/libsolidity/syntaxTests/string/string_multipart_single_line.sol b/test/libsolidity/syntaxTests/string/string_multipart_single_line.sol new file mode 100644 index 000000000..c07bde30c --- /dev/null +++ b/test/libsolidity/syntaxTests/string/string_multipart_single_line.sol @@ -0,0 +1,7 @@ +contract test { + function f() public pure returns (bytes32) { + bytes32 escapeCharacters = "first" "second" "third"; + return escapeCharacters; + } +} +// ----