Merge pull request #4684 from ethereum/underscores_in_numeric_literals

[BREAKING] Underscores in numeric literals
2023-10-03 13:03:40 +00:00 · 2018-08-08 21:36:57 +02:00 · 2018-08-08 21:36:57 +02:00 · d634d20b5b
commit d634d20b5b
parent 551343ae3e b9222808f6
15 changed files with 256 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -46,3 +46,5 @@ browse.VC.db
 CMakeLists.txt.user
 /CMakeSettings.json
 /.vs
 /.cproject
 /.project
--- a/Changelog.md
+++ b/Changelog.md
@ -150,6 +150,7 @@ Features:
 * General: Introduce new constructor syntax using the ``constructor`` keyword as experimental 0.5.0 feature.
 * General: Limit the number of errors output in a single run to 256.
 * General: Support accessing dynamic return data in post-byzantium EVMs.
 * General: Allow underscores in numeric and hex literals to separate thousands and quads.
 * Inheritance: Error when using empty parentheses for base class constructors that require arguments as experimental 0.5.0 feature.
 * Inheritance: Error when using no parentheses in modifier-style constructor calls as experimental 0.5.0 feature.
 * Interfaces: Allow overriding external functions in interfaces with public in an implementing contract.
--- a/docs/types.rst
+++ b/docs/types.rst
@ -284,6 +284,11 @@ one side.  Examples include ``1.``, ``.1`` and ``1.3``.
 Scientific notation is also supported, where the base can have fractions, while the exponent cannot.
 Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.
 Underscores can be used to separate the digits of a numeric literal to aid readability.
 For example, decimal ``123_000``, hexadecimal ``0x2eff_abde``, scientific decimal notation ``1_2e345_678`` are all valid.
 Underscores are only allowed between two digits and only one consecutive underscore is allowed.
 There is no additional semantic meaning added to a number literal containing underscores.
 Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by
 using them together with a non-literal expression).
 This means that computations do not overflow and divisions do not truncate
--- a/libsolidity/analysis/SyntaxChecker.cpp
+++ b/libsolidity/analysis/SyntaxChecker.cpp
@ -24,6 +24,9 @@
 #include <libsolidity/interface/Version.h>
 #include <boost/algorithm/cxx11/all_of.hpp>
 #include <boost/algorithm/string.hpp>
 #include <string>
 using namespace std;
 using namespace dev;
 using namespace dev::solidity;
@ -183,6 +186,45 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
 	return true;
 }
 bool SyntaxChecker::visit(Literal const& _literal)
 {
 	if (_literal.token() != Token::Number)
 		return true;
 	ASTString const& value = _literal.value();
 	solAssert(!value.empty(), "");
 	// Generic checks no matter what base this number literal is of:
 	if (value.back() == '_')
 	{
 		m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No trailing underscores allowed.");
 		return true;
 	}
 	if (value.find("__") != ASTString::npos)
 	{
 		m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.");
 		return true;
 	}
 	if (!_literal.isHexNumber()) // decimal literal
 	{
 		if (value.find("._") != ASTString::npos)
 			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
 		if (value.find("_.") != ASTString::npos)
 			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
 		if (value.find("_e") != ASTString::npos)
 			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.");
 		if (value.find("e_") != ASTString::npos)
 			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore in front of exponent allowed.");
 	}
 	return true;
 }
 bool SyntaxChecker::visit(UnaryOperation const& _operation)
 {
 	if (_operation.getOperator() == Token::Add)
--- a/libsolidity/analysis/SyntaxChecker.h
+++ b/libsolidity/analysis/SyntaxChecker.h
@ -73,6 +73,7 @@ private:
 	virtual bool visit(VariableDeclarationStatement const& _statement) override;
 	virtual bool visit(StructDefinition const& _struct) override;
 	virtual bool visit(Literal const& _literal) override;
 	ErrorReporter& m_errorReporter;
--- a/libsolidity/ast/Types.cpp
+++ b/libsolidity/ast/Types.cpp
@ -39,6 +39,7 @@
 #include <boost/range/algorithm/copy.hpp>
 #include <boost/range/adaptor/sliced.hpp>
 #include <boost/range/adaptor/transformed.hpp>
 #include <boost/algorithm/string.hpp>
 #include <limits>
@ -783,19 +784,22 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
 	rational value;
 	try
 	{
-		auto expPoint = find(_literal.value().begin(), _literal.value().end(), 'e');
+		ASTString valueString = _literal.value();
-		if (expPoint == _literal.value().end())
+		boost::erase_all(valueString, "_");// Remove underscore separators
 			expPoint = find(_literal.value().begin(), _literal.value().end(), 'E');
-		if (boost::starts_with(_literal.value(), "0x"))
+		auto expPoint = find(valueString.begin(), valueString.end(), 'e');
 		if (expPoint == valueString.end())
 			expPoint = find(valueString.begin(), valueString.end(), 'E');
 		if (boost::starts_with(valueString, "0x"))
 		{
 			// process as hex
-			value = bigint(_literal.value());
+			value = bigint(valueString);
 		}
-		else if (expPoint != _literal.value().end())
+		else if (expPoint != valueString.end())
 		{
 			// Parse mantissa and exponent. Checks numeric limit.
-			tuple<bool, rational> mantissa = parseRational(string(_literal.value().begin(), expPoint));
+			tuple<bool, rational> mantissa = parseRational(string(valueString.begin(), expPoint));
 			if (!get<0>(mantissa))
 				return make_tuple(false, rational(0));
@ -805,7 +809,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
 			if (value == 0)
 				return make_tuple(true, rational(0));
-			bigint exp = bigint(string(expPoint + 1, _literal.value().end()));
+			bigint exp = bigint(string(expPoint + 1, valueString.end()));
 			if (exp > numeric_limits<int32_t>::max() || exp < numeric_limits<int32_t>::min())
 				return make_tuple(false, rational(0));
@ -834,7 +838,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
 		else
 		{
 			// parse as rational number
-			tuple<bool, rational> tmp = parseRational(_literal.value());
+			tuple<bool, rational> tmp = parseRational(valueString);
 			if (!get<0>(tmp))
 				return tmp;
 			value = get<1>(tmp);
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@ -724,10 +724,18 @@ Token::Value Scanner::scanHexString()
 	return Token::StringLiteral;
 }
 // Parse for regex [:digit:]+(_[:digit:]+)*
 void Scanner::scanDecimalDigits()
 {
-	while (isDecimalDigit(m_char))
+	// MUST begin with a decimal digit.
-		addLiteralCharAndAdvance();
+	if (!isDecimalDigit(m_char))
 		return;
 	// May continue with decimal digit or underscore for grouping.
 	do addLiteralCharAndAdvance();
 	while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
 	// Defer further validation of underscore to SyntaxChecker.
 }
 Token::Value Scanner::scanNumber(char _charSeen)
@ -738,6 +746,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
 	{
 		// we have already seen a decimal point of the float
 		addLiteralChar('.');
 		if (m_char == '_')
 			return Token::Illegal;
 		scanDecimalDigits();  // we know we have at least one digit
 	}
 	else
@ -755,7 +765,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
 				addLiteralCharAndAdvance();
 				if (!isHexDigit(m_char))
 					return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
-				while (isHexDigit(m_char))
+
 				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
 					addLiteralCharAndAdvance();
 			}
 			else if (isDecimalDigit(m_char))
@ -768,9 +779,17 @@ Token::Value Scanner::scanNumber(char _charSeen)
 			scanDecimalDigits();  // optional
 			if (m_char == '.')
 			{
-				// A '.' has to be followed by a number.
+				if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
 				{
 					// Assume the input may be a floating point number with leading '_' in fraction part.
 					// Recover by consuming it all but returning `Illegal` right away.
 					addLiteralCharAndAdvance(); // '.'
 					addLiteralCharAndAdvance(); // '_'
 					scanDecimalDigits();
 				}
 				if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
 				{
 					// A '.' has to be followed by a number.
 					literal.complete();
 					return Token::Number;
 				}
@ -785,8 +804,18 @@ Token::Value Scanner::scanNumber(char _charSeen)
 		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
 		if (kind != DECIMAL)
 			return Token::Illegal;
 		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
 		{
 			// Recover from wrongly placed underscore as delimiter in literal with scientific
 			// notation by consuming until the end.
 			addLiteralCharAndAdvance(); // 'e'
 			addLiteralCharAndAdvance(); // '_'
 			scanDecimalDigits();
 			literal.complete();
 			return Token::Number;
 		}
 		// scan exponent
-		addLiteralCharAndAdvance();
+		addLiteralCharAndAdvance(); // 'e' | 'E'
 		if (m_char == '+' || m_char == '-')
 			addLiteralCharAndAdvance();
 		if (!isDecimalDigit(m_char))
--- a/test/libsolidity/SolidityEndToEndTest.cpp
+++ b/test/libsolidity/SolidityEndToEndTest.cpp
@ -12835,6 +12835,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external)
 	ABI_CHECK(callContractFunction("h()"), encodeArgs(12));
 }
 BOOST_AUTO_TEST_CASE(test_underscore_in_hex)
 {
 	char const* sourceCode = R"(
 		contract test {
 			function f(bool cond) public pure returns (uint) {
 				uint32 x = 0x1234_ab;
 				uint y = 0x1234_abcd_1234;
 				return cond ? x : y;
 			}
 		}
 	)";
 	compileAndRun(sourceCode);
 	ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab)));
 	ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234)));
 }
 BOOST_AUTO_TEST_SUITE_END()
 }
--- a/test/libsolidity/SolidityScanner.cpp
+++ b/test/libsolidity/SolidityScanner.cpp
@ -155,6 +155,76 @@ BOOST_AUTO_TEST_CASE(trailing_dot)
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(leading_underscore_decimal_is_identifier)
 {
 	// Actual error is cought by SyntaxChecker.
 	Scanner scanner(CharStream("_1.2"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(leading_underscore_decimal_after_dot_illegal)
 {
 	// Actual error is cought by SyntaxChecker.
 	Scanner scanner(CharStream("1._2"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 	scanner.reset(CharStream("1._"), "");
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(leading_underscore_exp_are_identifier)
 {
 	// Actual error is cought by SyntaxChecker.
 	Scanner scanner(CharStream("_1e2"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(leading_underscore_exp_after_e_illegal)
 {
 	// Actual error is cought by SyntaxChecker.
 	Scanner scanner(CharStream("1e_2"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.currentLiteral(), "1e_2");
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(leading_underscore_hex_illegal)
 {
 	Scanner scanner(CharStream("0x_abc"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(fixed_number_invalid_underscore_front)
 {
 	// Actual error is cought by SyntaxChecker.
 	Scanner scanner(CharStream("12._1234_1234"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(number_literals_with_trailing_underscore_at_eos)
 {
 	// Actual error is cought by SyntaxChecker.
 	Scanner scanner(CharStream("0x123_"));
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 	scanner.reset(CharStream("123_"), "");
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 	scanner.reset(CharStream("12.34_"), "");
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }
 BOOST_AUTO_TEST_CASE(negative_numbers)
 {
 	Scanner scanner(CharStream("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2;"));
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal.sol
@ -0,0 +1,13 @@
 contract C {
  function f() public pure {
    uint d1 = 654_321;
    uint d2 =  54_321;
    uint d3 =   4_321;
    uint d4 = 5_43_21;
    uint d5 = 1_2e10;
    uint d6 = 12e1_0;
    d1; d2; d3; d4; d5; d6;
  }
 }
 // ----
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal_fail.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal_fail.sol
@ -0,0 +1,13 @@
 contract C {
  function f() public pure {
    uint D1 = 1234_;
    uint D2 = 12__34;
    uint D3 = 12_e34;
    uint D4 = 12e_34;
  }
 }
 // ----
 // SyntaxError: (56-61): Invalid use of underscores in number literal. No trailing underscores allowed.
 // SyntaxError: (77-83): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
 // SyntaxError: (99-105): Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.
 // SyntaxError: (121-127): Invalid use of underscores in number literal. No underscore in front of exponent allowed.
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed.sol
@ -0,0 +1,9 @@
 contract C {
  function f() public pure {
    fixed f1 = 3.14_15;
    fixed f2 = 3_1.4_15;
    f1; f2;
  }
 }
 // ----
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed_fail.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed_fail.sol
@ -0,0 +1,17 @@
 contract C {
  function f() public pure {
    fixed F1 = 3.1415_;
    fixed F2 = 3__1.4__15;
    fixed F3 = 1_.2;
    fixed F4 = 1._2;
    fixed F5 = 1.2e_12;
    fixed F6 = 1._;
  }
 }
 // ----
 // SyntaxError: (57-64): Invalid use of underscores in number literal. No trailing underscores allowed.
 // SyntaxError: (81-91): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
 // SyntaxError: (108-112): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
 // SyntaxError: (129-133): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
 // SyntaxError: (150-157): Invalid use of underscores in number literal. No underscore in front of exponent allowed.
 // SyntaxError: (174-177): Invalid use of underscores in number literal. No trailing underscores allowed.
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex.sol
@ -0,0 +1,13 @@
 contract C {
  function f() public pure {
    uint x1 = 0x8765_4321;
    uint x2 = 0x765_4321;
    uint x3 = 0x65_4321;
    uint x4 = 0x5_4321;
    uint x5 = 0x123_1234_1234_1234;
    uint x6 = 0x123456_1234_1234;
    x1; x2; x3; x4; x5; x6;
  }
 }
 // ----
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex_fail.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex_fail.sol
@ -0,0 +1,7 @@
 contract C {
  function f() public pure {
    uint X1 = 0x1234__1234__1234__123;
  }
 }
 // ----
 // SyntaxError: (56-79): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.