diff --git a/docs/types.rst b/docs/types.rst index 84e58fde3..4bade37c8 100644 --- a/docs/types.rst +++ b/docs/types.rst @@ -284,8 +284,8 @@ one side. Examples include ``1.``, ``.1`` and ``1.3``. Scientific notation is also supported, where the base can have fractions, while the exponent cannot. Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``. -Underscores can be used to separate digits of a numeric literal to aid readability. -For example, ``123_000``, ``0x2eff_abde``, ``1_233e34_89`` are all valid. Underscores are only allowed between two digits. +Underscores can be used to separate the digits of a numeric literal to aid readability. +For example, ``123_000``, ``0x2eff_abde``, ``1233_e348_9a`` are all valid. Underscores are only allowed between two digits. For hex literals, underscores are only allowed to separate groups of 4 hex digits. Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by using them together with a non-literal expression). diff --git a/libsolidity/analysis/SyntaxChecker.cpp b/libsolidity/analysis/SyntaxChecker.cpp index 77492499b..dc2e35e50 100644 --- a/libsolidity/analysis/SyntaxChecker.cpp +++ b/libsolidity/analysis/SyntaxChecker.cpp @@ -24,6 +24,9 @@ #include #include +#include +#include + using namespace std; using namespace dev; using namespace dev::solidity; @@ -183,6 +186,46 @@ bool SyntaxChecker::visit(Throw const& _throwStatement) return true; } +bool SyntaxChecker::visit(Literal const& _literal) +{ + if (!_literal.isHexNumber()) + return true; + // We have a hex literal. Do underscore validation + solAssert(_literal.value().substr(0, 2) == "0x", ""); + ASTString value = _literal.value().substr(2); // Skip the 0x + vector parts; + boost::split(parts, value, boost::is_any_of("_")); + + if (parts.size() == 1) // no underscores + return true; + // Everything except first and last part must be 4 chars in length + for (size_t i = 1; i + 1 < parts.size(); ++i) + { + if (parts[i].size() != 4) + m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. Found inner part with " + to_string(parts[i].size()) + " digits (has to be 4 digits)."); + } + + // Validate rightmost block + if (parts.back().size() == 4) // If ends with 4 digits, then no need to validate first block + return true; + + // Validate leftmost block + // If first part is 4 digits then last part's length has to be even to avoid ambiguity over zero padding + if (parts.front().size() == 4) + { + if (parts.back().size() % 2 == 0) + return true; + m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. If the first part has 4 digits, it is assumed to be a byte sequence instead of a number and thus the last part should have an even number of digits."); + } + else + { + // Both first and last part is invalid + m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. First or last part must have 4 digits."); + } + + return true; +} + bool SyntaxChecker::visit(UnaryOperation const& _operation) { if (_operation.getOperator() == Token::Add) diff --git a/libsolidity/analysis/SyntaxChecker.h b/libsolidity/analysis/SyntaxChecker.h index 28a0f66ee..897df6767 100644 --- a/libsolidity/analysis/SyntaxChecker.h +++ b/libsolidity/analysis/SyntaxChecker.h @@ -73,6 +73,7 @@ private: virtual bool visit(VariableDeclarationStatement const& _statement) override; virtual bool visit(StructDefinition const& _struct) override; + virtual bool visit(Literal const& _literal) override; ErrorReporter& m_errorReporter; diff --git a/libsolidity/ast/Types.cpp b/libsolidity/ast/Types.cpp index 3eccc6d42..73137ba98 100644 --- a/libsolidity/ast/Types.cpp +++ b/libsolidity/ast/Types.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -779,7 +780,9 @@ tuple RationalNumberType::isValidLiteral(Literal const& _literal if (boost::starts_with(_literal.value(), "0x")) { // process as hex - value = bigint(_literal.value()); + ASTString valueString = _literal.value(); + boost::erase_all(valueString, "_");// Remove underscore separators + value = bigint(valueString); } else if (expPoint != _literal.value().end()) { diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index 65189b199..c223779e1 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -726,21 +726,26 @@ Token::Value Scanner::scanHexString() void Scanner::scanDecimalDigits() { - if (!isDecimalDigit(m_char)) // avoid underscore at beginning - return; - while (isDecimalDigit(m_char) || m_char == '_') + // Parse for regex [:digit:]+(_[:digit:]+)* + + do { + if (!isDecimalDigit(m_char)) + return; + while (isDecimalDigit(m_char)) + addLiteralCharAndAdvance(); + if (m_char == '_') { advance(); - if (!isDecimalDigit(m_char)) // avoid trailing underscore + if (!isDecimalDigit(m_char)) // Trailing underscore. Rollback and allow next step to flag it as illegal { rollback(1); - break; + return; } } - addLiteralCharAndAdvance(); } + while (isDecimalDigit(m_char)); } Token::Value Scanner::scanNumber(char _charSeen) @@ -768,19 +773,17 @@ Token::Value Scanner::scanNumber(char _charSeen) addLiteralCharAndAdvance(); if (!isHexDigit(m_char)) return Token::Illegal; // we must have at least one hex digit after 'x'/'X' - while (isHexDigit(m_char) || m_char == '_') // same logic as scanDecimalDigits + char last = m_char; + while (isHexDigit(m_char) || m_char == '_') // Unlike decimal digits, we keep the underscores for later validation { - if (m_char == '_') - { - advance(); - if (!isHexDigit(m_char)) // avoid trailing underscore - { - rollback(1); - break; - } - } + if (m_char == '_' && last == '_') + return Token::Illegal; // Double underscore + + last = m_char; addLiteralCharAndAdvance(); } + if (last == '_') + return Token::Illegal; // Trailing underscore } else if (isDecimalDigit(m_char)) // We do not allow octal numbers diff --git a/test/libsolidity/SolidityEndToEndTest.cpp b/test/libsolidity/SolidityEndToEndTest.cpp index a6c1372bf..12da1fa1d 100644 --- a/test/libsolidity/SolidityEndToEndTest.cpp +++ b/test/libsolidity/SolidityEndToEndTest.cpp @@ -12836,6 +12836,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external) ABI_CHECK(callContractFunction("h()"), encodeArgs(12)); } +BOOST_AUTO_TEST_CASE(test_underscore_in_hex) +{ + char const* sourceCode = R"( + contract test { + function f(bool cond) returns (uint) { + uint32 x = 0x1234_ab; + uint y = 0x1234_abcd_1234; + return cond ? x : y; + } + } + )"; + compileAndRun(sourceCode); + ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab))); + ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234))); +} + BOOST_AUTO_TEST_SUITE_END() } diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp index b650d918a..9ad738ae1 100644 --- a/test/libsolidity/SolidityScanner.cpp +++ b/test/libsolidity/SolidityScanner.cpp @@ -199,7 +199,7 @@ BOOST_AUTO_TEST_CASE(underscores_in_hex) BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); BOOST_CHECK_EQUAL(scanner.next(), Token::Assign); BOOST_CHECK_EQUAL(scanner.next(), Token::Number); - BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0xab19cf"); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0xab_19cf"); BOOST_CHECK_EQUAL(scanner.next(), Token::Semicolon); BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); }