Merge pull request #4684 from ethereum/underscores_in_numeric_literals

[BREAKING] Underscores in numeric literals
2023-10-03 13:03:40 +00:00 · 2018-08-08 21:36:57 +02:00 · 2018-08-08 21:36:57 +02:00 · d634d20b5b
commit d634d20b5b
parent 551343ae3e b9222808f6
15 changed files with 256 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -46,3 +46,5 @@ browse.VC.db
 CMakeLists.txt.user
 /CMakeSettings.json
 /.vs
+/.cproject
+/.project
--- a/Changelog.md
+++ b/Changelog.md
@ -150,6 +150,7 @@ Features:
 * General: Introduce new constructor syntax using the ``constructor`` keyword as experimental 0.5.0 feature.
 * General: Limit the number of errors output in a single run to 256.
 * General: Support accessing dynamic return data in post-byzantium EVMs.
+ * General: Allow underscores in numeric and hex literals to separate thousands and quads.
 * Inheritance: Error when using empty parentheses for base class constructors that require arguments as experimental 0.5.0 feature.
 * Inheritance: Error when using no parentheses in modifier-style constructor calls as experimental 0.5.0 feature.
 * Interfaces: Allow overriding external functions in interfaces with public in an implementing contract.
--- a/docs/types.rst
+++ b/docs/types.rst
@ -284,6 +284,11 @@ one side.  Examples include ``1.``, ``.1`` and ``1.3``.
 Scientific notation is also supported, where the base can have fractions, while the exponent cannot.
 Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.

+Underscores can be used to separate the digits of a numeric literal to aid readability.
+For example, decimal ``123_000``, hexadecimal ``0x2eff_abde``, scientific decimal notation ``1_2e345_678`` are all valid.
+Underscores are only allowed between two digits and only one consecutive underscore is allowed.
+There is no additional semantic meaning added to a number literal containing underscores.
+
 Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by
 using them together with a non-literal expression).
 This means that computations do not overflow and divisions do not truncate
--- a/libsolidity/analysis/SyntaxChecker.cpp
+++ b/libsolidity/analysis/SyntaxChecker.cpp
@ -24,6 +24,9 @@
 #include <libsolidity/interface/Version.h>
 #include <boost/algorithm/cxx11/all_of.hpp>

+#include <boost/algorithm/string.hpp>
+#include <string>
+
 using namespace std;
 using namespace dev;
 using namespace dev::solidity;
@ -183,6 +186,45 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
 	return true;
 }

+bool SyntaxChecker::visit(Literal const& _literal)
+{
+	if (_literal.token() != Token::Number)
+		return true;
+
+	ASTString const& value = _literal.value();
+	solAssert(!value.empty(), "");
+
+	// Generic checks no matter what base this number literal is of:
+	if (value.back() == '_')
+	{
+		m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No trailing underscores allowed.");
+		return true;
+	}
+
+	if (value.find("__") != ASTString::npos)
+	{
+		m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.");
+		return true;
+	}
+
+	if (!_literal.isHexNumber()) // decimal literal
+	{
+		if (value.find("._") != ASTString::npos)
+			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
+
+		if (value.find("_.") != ASTString::npos)
+			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
+
+		if (value.find("_e") != ASTString::npos)
+			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.");
+
+		if (value.find("e_") != ASTString::npos)
+			m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore in front of exponent allowed.");
+	}
+
+	return true;
+}
+
 bool SyntaxChecker::visit(UnaryOperation const& _operation)
 {
 	if (_operation.getOperator() == Token::Add)
--- a/libsolidity/analysis/SyntaxChecker.h
+++ b/libsolidity/analysis/SyntaxChecker.h
@ -73,6 +73,7 @@ private:
 	virtual bool visit(VariableDeclarationStatement const& _statement) override;

 	virtual bool visit(StructDefinition const& _struct) override;
+	virtual bool visit(Literal const& _literal) override;

 	ErrorReporter& m_errorReporter;

--- a/libsolidity/ast/Types.cpp
+++ b/libsolidity/ast/Types.cpp
@ -39,6 +39,7 @@
 #include <boost/range/algorithm/copy.hpp>
 #include <boost/range/adaptor/sliced.hpp>
 #include <boost/range/adaptor/transformed.hpp>
+#include <boost/algorithm/string.hpp>

 #include <limits>

@ -783,19 +784,22 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
 	rational value;
 	try
 	{
-		auto expPoint = find(_literal.value().begin(), _literal.value().end(), 'e');
-		if (expPoint == _literal.value().end())
-			expPoint = find(_literal.value().begin(), _literal.value().end(), 'E');
+		ASTString valueString = _literal.value();
+		boost::erase_all(valueString, "_");// Remove underscore separators

-		if (boost::starts_with(_literal.value(), "0x"))
+		auto expPoint = find(valueString.begin(), valueString.end(), 'e');
+		if (expPoint == valueString.end())
+			expPoint = find(valueString.begin(), valueString.end(), 'E');
+
+		if (boost::starts_with(valueString, "0x"))
 		{
 			// process as hex
-			value = bigint(_literal.value());
+			value = bigint(valueString);
 		}
-		else if (expPoint != _literal.value().end())
+		else if (expPoint != valueString.end())
 		{
 			// Parse mantissa and exponent. Checks numeric limit.
-			tuple<bool, rational> mantissa = parseRational(string(_literal.value().begin(), expPoint));
+			tuple<bool, rational> mantissa = parseRational(string(valueString.begin(), expPoint));

 			if (!get<0>(mantissa))
 				return make_tuple(false, rational(0));
@ -805,7 +809,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
 			if (value == 0)
 				return make_tuple(true, rational(0));

-			bigint exp = bigint(string(expPoint + 1, _literal.value().end()));
+			bigint exp = bigint(string(expPoint + 1, valueString.end()));

 			if (exp > numeric_limits<int32_t>::max() || exp < numeric_limits<int32_t>::min())
 				return make_tuple(false, rational(0));
@ -834,7 +838,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
 		else
 		{
 			// parse as rational number
-			tuple<bool, rational> tmp = parseRational(_literal.value());
+			tuple<bool, rational> tmp = parseRational(valueString);
 			if (!get<0>(tmp))
 				return tmp;
 			value = get<1>(tmp);
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@ -724,10 +724,18 @@ Token::Value Scanner::scanHexString()
 	return Token::StringLiteral;
 }

+// Parse for regex [:digit:]+(_[:digit:]+)*
 void Scanner::scanDecimalDigits()
 {
-	while (isDecimalDigit(m_char))
-		addLiteralCharAndAdvance();
+	// MUST begin with a decimal digit.
+	if (!isDecimalDigit(m_char))
+		return;
+
+	// May continue with decimal digit or underscore for grouping.
+	do addLiteralCharAndAdvance();
+	while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
+
+	// Defer further validation of underscore to SyntaxChecker.
 }

 Token::Value Scanner::scanNumber(char _charSeen)
@ -738,6 +746,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
 	{
 		// we have already seen a decimal point of the float
 		addLiteralChar('.');
+		if (m_char == '_')
+			return Token::Illegal;
 		scanDecimalDigits();  // we know we have at least one digit
 	}
 	else
@ -755,7 +765,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
 				addLiteralCharAndAdvance();
 				if (!isHexDigit(m_char))
 					return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
-				while (isHexDigit(m_char))
+
+				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
 					addLiteralCharAndAdvance();
 			}
 			else if (isDecimalDigit(m_char))
@ -768,9 +779,17 @@ Token::Value Scanner::scanNumber(char _charSeen)
 			scanDecimalDigits();  // optional
 			if (m_char == '.')
 			{
-				// A '.' has to be followed by a number.
+				if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+				{
+					// Assume the input may be a floating point number with leading '_' in fraction part.
+					// Recover by consuming it all but returning `Illegal` right away.
+					addLiteralCharAndAdvance(); // '.'
+					addLiteralCharAndAdvance(); // '_'
+					scanDecimalDigits();
+				}
 				if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
 				{
+					// A '.' has to be followed by a number.
 					literal.complete();
 					return Token::Number;
 				}
@ -785,8 +804,18 @@ Token::Value Scanner::scanNumber(char _charSeen)
 		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
 		if (kind != DECIMAL)
 			return Token::Illegal;
+		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+		{
+			// Recover from wrongly placed underscore as delimiter in literal with scientific
+			// notation by consuming until the end.
+			addLiteralCharAndAdvance(); // 'e'
+			addLiteralCharAndAdvance(); // '_'
+			scanDecimalDigits();
+			literal.complete();
+			return Token::Number;
+		}
 		// scan exponent
-		addLiteralCharAndAdvance();
+		addLiteralCharAndAdvance(); // 'e' | 'E'
 		if (m_char == '+' || m_char == '-')
 			addLiteralCharAndAdvance();
 		if (!isDecimalDigit(m_char))
--- a/test/libsolidity/SolidityEndToEndTest.cpp
+++ b/test/libsolidity/SolidityEndToEndTest.cpp
@ -12835,6 +12835,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external)
 	ABI_CHECK(callContractFunction("h()"), encodeArgs(12));
 }

+BOOST_AUTO_TEST_CASE(test_underscore_in_hex)
+{
+	char const* sourceCode = R"(
+		contract test {
+			function f(bool cond) public pure returns (uint) {
+				uint32 x = 0x1234_ab;
+				uint y = 0x1234_abcd_1234;
+				return cond ? x : y;
+			}
+		}
+	)";
+	compileAndRun(sourceCode);
+	ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab)));
+	ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234)));
+}
+
 BOOST_AUTO_TEST_SUITE_END()

 }
--- a/test/libsolidity/SolidityScanner.cpp
+++ b/test/libsolidity/SolidityScanner.cpp
@ -155,6 +155,76 @@ BOOST_AUTO_TEST_CASE(trailing_dot)
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }

+BOOST_AUTO_TEST_CASE(leading_underscore_decimal_is_identifier)
+{
+	// Actual error is cought by SyntaxChecker.
+	Scanner scanner(CharStream("_1.2"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(leading_underscore_decimal_after_dot_illegal)
+{
+	// Actual error is cought by SyntaxChecker.
+	Scanner scanner(CharStream("1._2"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+
+	scanner.reset(CharStream("1._"), "");
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(leading_underscore_exp_are_identifier)
+{
+	// Actual error is cought by SyntaxChecker.
+	Scanner scanner(CharStream("_1e2"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(leading_underscore_exp_after_e_illegal)
+{
+	// Actual error is cought by SyntaxChecker.
+	Scanner scanner(CharStream("1e_2"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.currentLiteral(), "1e_2");
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(leading_underscore_hex_illegal)
+{
+	Scanner scanner(CharStream("0x_abc"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(fixed_number_invalid_underscore_front)
+{
+	// Actual error is cought by SyntaxChecker.
+	Scanner scanner(CharStream("12._1234_1234"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(number_literals_with_trailing_underscore_at_eos)
+{
+	// Actual error is cought by SyntaxChecker.
+	Scanner scanner(CharStream("0x123_"));
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+
+	scanner.reset(CharStream("123_"), "");
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+
+	scanner.reset(CharStream("12.34_"), "");
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
 BOOST_AUTO_TEST_CASE(negative_numbers)
 {
 	Scanner scanner(CharStream("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2;"));
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal.sol
@ -0,0 +1,13 @@
+contract C {
+  function f() public pure {
+    uint d1 = 654_321;
+    uint d2 =  54_321;
+    uint d3 =   4_321;
+    uint d4 = 5_43_21;
+    uint d5 = 1_2e10;
+    uint d6 = 12e1_0;
+
+    d1; d2; d3; d4; d5; d6;
+  }
+}
+// ----
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal_fail.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_decimal_fail.sol
@ -0,0 +1,13 @@
+contract C {
+  function f() public pure {
+    uint D1 = 1234_;
+    uint D2 = 12__34;
+    uint D3 = 12_e34;
+    uint D4 = 12e_34;
+  }
+}
+// ----
+// SyntaxError: (56-61): Invalid use of underscores in number literal. No trailing underscores allowed.
+// SyntaxError: (77-83): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
+// SyntaxError: (99-105): Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.
+// SyntaxError: (121-127): Invalid use of underscores in number literal. No underscore in front of exponent allowed.
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed.sol
@ -0,0 +1,9 @@
+contract C {
+  function f() public pure {
+    fixed f1 = 3.14_15;
+    fixed f2 = 3_1.4_15;
+
+    f1; f2;
+  }
+}
+// ----
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed_fail.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_fixed_fail.sol
@ -0,0 +1,17 @@
+contract C {
+  function f() public pure {
+    fixed F1 = 3.1415_;
+    fixed F2 = 3__1.4__15;
+    fixed F3 = 1_.2;
+    fixed F4 = 1._2;
+    fixed F5 = 1.2e_12;
+    fixed F6 = 1._;
+  }
+}
+// ----
+// SyntaxError: (57-64): Invalid use of underscores in number literal. No trailing underscores allowed.
+// SyntaxError: (81-91): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
+// SyntaxError: (108-112): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
+// SyntaxError: (129-133): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
+// SyntaxError: (150-157): Invalid use of underscores in number literal. No underscore in front of exponent allowed.
+// SyntaxError: (174-177): Invalid use of underscores in number literal. No trailing underscores allowed.
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex.sol
@ -0,0 +1,13 @@
+contract C {
+  function f() public pure {
+    uint x1 = 0x8765_4321;
+    uint x2 = 0x765_4321;
+    uint x3 = 0x65_4321;
+    uint x4 = 0x5_4321;
+    uint x5 = 0x123_1234_1234_1234;
+    uint x6 = 0x123456_1234_1234;
+
+    x1; x2; x3; x4; x5; x6;
+  }
+}
+// ----
--- a/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex_fail.sol
+++ b/test/libsolidity/syntaxTests/parsing/lexer_numbers_with_underscores_hex_fail.sol
@ -0,0 +1,7 @@
+contract C {
+  function f() public pure {
+    uint X1 = 0x1234__1234__1234__123;
+  }
+}
+// ----
+// SyntaxError: (56-79): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.