Merge pull request #4684 from ethereum/underscores_in_numeric_literals

[BREAKING] Underscores in numeric literals
This commit is contained in:
chriseth 2018-08-08 21:36:57 +02:00 committed by GitHub
commit d634d20b5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 256 additions and 14 deletions

2
.gitignore vendored
View File

@ -46,3 +46,5 @@ browse.VC.db
CMakeLists.txt.user CMakeLists.txt.user
/CMakeSettings.json /CMakeSettings.json
/.vs /.vs
/.cproject
/.project

View File

@ -150,6 +150,7 @@ Features:
* General: Introduce new constructor syntax using the ``constructor`` keyword as experimental 0.5.0 feature. * General: Introduce new constructor syntax using the ``constructor`` keyword as experimental 0.5.0 feature.
* General: Limit the number of errors output in a single run to 256. * General: Limit the number of errors output in a single run to 256.
* General: Support accessing dynamic return data in post-byzantium EVMs. * General: Support accessing dynamic return data in post-byzantium EVMs.
* General: Allow underscores in numeric and hex literals to separate thousands and quads.
* Inheritance: Error when using empty parentheses for base class constructors that require arguments as experimental 0.5.0 feature. * Inheritance: Error when using empty parentheses for base class constructors that require arguments as experimental 0.5.0 feature.
* Inheritance: Error when using no parentheses in modifier-style constructor calls as experimental 0.5.0 feature. * Inheritance: Error when using no parentheses in modifier-style constructor calls as experimental 0.5.0 feature.
* Interfaces: Allow overriding external functions in interfaces with public in an implementing contract. * Interfaces: Allow overriding external functions in interfaces with public in an implementing contract.

View File

@ -284,6 +284,11 @@ one side. Examples include ``1.``, ``.1`` and ``1.3``.
Scientific notation is also supported, where the base can have fractions, while the exponent cannot. Scientific notation is also supported, where the base can have fractions, while the exponent cannot.
Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``. Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.
Underscores can be used to separate the digits of a numeric literal to aid readability.
For example, decimal ``123_000``, hexadecimal ``0x2eff_abde``, scientific decimal notation ``1_2e345_678`` are all valid.
Underscores are only allowed between two digits and only one consecutive underscore is allowed.
There is no additional semantic meaning added to a number literal containing underscores.
Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by
using them together with a non-literal expression). using them together with a non-literal expression).
This means that computations do not overflow and divisions do not truncate This means that computations do not overflow and divisions do not truncate

View File

@ -24,6 +24,9 @@
#include <libsolidity/interface/Version.h> #include <libsolidity/interface/Version.h>
#include <boost/algorithm/cxx11/all_of.hpp> #include <boost/algorithm/cxx11/all_of.hpp>
#include <boost/algorithm/string.hpp>
#include <string>
using namespace std; using namespace std;
using namespace dev; using namespace dev;
using namespace dev::solidity; using namespace dev::solidity;
@ -183,6 +186,45 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
return true; return true;
} }
bool SyntaxChecker::visit(Literal const& _literal)
{
if (_literal.token() != Token::Number)
return true;
ASTString const& value = _literal.value();
solAssert(!value.empty(), "");
// Generic checks no matter what base this number literal is of:
if (value.back() == '_')
{
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No trailing underscores allowed.");
return true;
}
if (value.find("__") != ASTString::npos)
{
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.");
return true;
}
if (!_literal.isHexNumber()) // decimal literal
{
if (value.find("._") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
if (value.find("_.") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
if (value.find("_e") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.");
if (value.find("e_") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore in front of exponent allowed.");
}
return true;
}
bool SyntaxChecker::visit(UnaryOperation const& _operation) bool SyntaxChecker::visit(UnaryOperation const& _operation)
{ {
if (_operation.getOperator() == Token::Add) if (_operation.getOperator() == Token::Add)

View File

@ -73,6 +73,7 @@ private:
virtual bool visit(VariableDeclarationStatement const& _statement) override; virtual bool visit(VariableDeclarationStatement const& _statement) override;
virtual bool visit(StructDefinition const& _struct) override; virtual bool visit(StructDefinition const& _struct) override;
virtual bool visit(Literal const& _literal) override;
ErrorReporter& m_errorReporter; ErrorReporter& m_errorReporter;

View File

@ -39,6 +39,7 @@
#include <boost/range/algorithm/copy.hpp> #include <boost/range/algorithm/copy.hpp>
#include <boost/range/adaptor/sliced.hpp> #include <boost/range/adaptor/sliced.hpp>
#include <boost/range/adaptor/transformed.hpp> #include <boost/range/adaptor/transformed.hpp>
#include <boost/algorithm/string.hpp>
#include <limits> #include <limits>
@ -783,19 +784,22 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
rational value; rational value;
try try
{ {
auto expPoint = find(_literal.value().begin(), _literal.value().end(), 'e'); ASTString valueString = _literal.value();
if (expPoint == _literal.value().end()) boost::erase_all(valueString, "_");// Remove underscore separators
expPoint = find(_literal.value().begin(), _literal.value().end(), 'E');
if (boost::starts_with(_literal.value(), "0x")) auto expPoint = find(valueString.begin(), valueString.end(), 'e');
if (expPoint == valueString.end())
expPoint = find(valueString.begin(), valueString.end(), 'E');
if (boost::starts_with(valueString, "0x"))
{ {
// process as hex // process as hex
value = bigint(_literal.value()); value = bigint(valueString);
} }
else if (expPoint != _literal.value().end()) else if (expPoint != valueString.end())
{ {
// Parse mantissa and exponent. Checks numeric limit. // Parse mantissa and exponent. Checks numeric limit.
tuple<bool, rational> mantissa = parseRational(string(_literal.value().begin(), expPoint)); tuple<bool, rational> mantissa = parseRational(string(valueString.begin(), expPoint));
if (!get<0>(mantissa)) if (!get<0>(mantissa))
return make_tuple(false, rational(0)); return make_tuple(false, rational(0));
@ -805,7 +809,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
if (value == 0) if (value == 0)
return make_tuple(true, rational(0)); return make_tuple(true, rational(0));
bigint exp = bigint(string(expPoint + 1, _literal.value().end())); bigint exp = bigint(string(expPoint + 1, valueString.end()));
if (exp > numeric_limits<int32_t>::max() || exp < numeric_limits<int32_t>::min()) if (exp > numeric_limits<int32_t>::max() || exp < numeric_limits<int32_t>::min())
return make_tuple(false, rational(0)); return make_tuple(false, rational(0));
@ -834,7 +838,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
else else
{ {
// parse as rational number // parse as rational number
tuple<bool, rational> tmp = parseRational(_literal.value()); tuple<bool, rational> tmp = parseRational(valueString);
if (!get<0>(tmp)) if (!get<0>(tmp))
return tmp; return tmp;
value = get<1>(tmp); value = get<1>(tmp);

View File

@ -724,10 +724,18 @@ Token::Value Scanner::scanHexString()
return Token::StringLiteral; return Token::StringLiteral;
} }
// Parse for regex [:digit:]+(_[:digit:]+)*
void Scanner::scanDecimalDigits() void Scanner::scanDecimalDigits()
{ {
while (isDecimalDigit(m_char)) // MUST begin with a decimal digit.
addLiteralCharAndAdvance(); if (!isDecimalDigit(m_char))
return;
// May continue with decimal digit or underscore for grouping.
do addLiteralCharAndAdvance();
while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
// Defer further validation of underscore to SyntaxChecker.
} }
Token::Value Scanner::scanNumber(char _charSeen) Token::Value Scanner::scanNumber(char _charSeen)
@ -738,6 +746,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
{ {
// we have already seen a decimal point of the float // we have already seen a decimal point of the float
addLiteralChar('.'); addLiteralChar('.');
if (m_char == '_')
return Token::Illegal;
scanDecimalDigits(); // we know we have at least one digit scanDecimalDigits(); // we know we have at least one digit
} }
else else
@ -755,7 +765,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
addLiteralCharAndAdvance(); addLiteralCharAndAdvance();
if (!isHexDigit(m_char)) if (!isHexDigit(m_char))
return Token::Illegal; // we must have at least one hex digit after 'x'/'X' return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
while (isHexDigit(m_char))
while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
addLiteralCharAndAdvance(); addLiteralCharAndAdvance();
} }
else if (isDecimalDigit(m_char)) else if (isDecimalDigit(m_char))
@ -768,9 +779,17 @@ Token::Value Scanner::scanNumber(char _charSeen)
scanDecimalDigits(); // optional scanDecimalDigits(); // optional
if (m_char == '.') if (m_char == '.')
{ {
// A '.' has to be followed by a number. if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
{
// Assume the input may be a floating point number with leading '_' in fraction part.
// Recover by consuming it all but returning `Illegal` right away.
addLiteralCharAndAdvance(); // '.'
addLiteralCharAndAdvance(); // '_'
scanDecimalDigits();
}
if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1))) if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
{ {
// A '.' has to be followed by a number.
literal.complete(); literal.complete();
return Token::Number; return Token::Number;
} }
@ -785,8 +804,18 @@ Token::Value Scanner::scanNumber(char _charSeen)
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
if (kind != DECIMAL) if (kind != DECIMAL)
return Token::Illegal; return Token::Illegal;
else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
{
// Recover from wrongly placed underscore as delimiter in literal with scientific
// notation by consuming until the end.
addLiteralCharAndAdvance(); // 'e'
addLiteralCharAndAdvance(); // '_'
scanDecimalDigits();
literal.complete();
return Token::Number;
}
// scan exponent // scan exponent
addLiteralCharAndAdvance(); addLiteralCharAndAdvance(); // 'e' | 'E'
if (m_char == '+' || m_char == '-') if (m_char == '+' || m_char == '-')
addLiteralCharAndAdvance(); addLiteralCharAndAdvance();
if (!isDecimalDigit(m_char)) if (!isDecimalDigit(m_char))

View File

@ -12835,6 +12835,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external)
ABI_CHECK(callContractFunction("h()"), encodeArgs(12)); ABI_CHECK(callContractFunction("h()"), encodeArgs(12));
} }
BOOST_AUTO_TEST_CASE(test_underscore_in_hex)
{
char const* sourceCode = R"(
contract test {
function f(bool cond) public pure returns (uint) {
uint32 x = 0x1234_ab;
uint y = 0x1234_abcd_1234;
return cond ? x : y;
}
}
)";
compileAndRun(sourceCode);
ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab)));
ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234)));
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()
} }

View File

@ -155,6 +155,76 @@ BOOST_AUTO_TEST_CASE(trailing_dot)
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
} }
BOOST_AUTO_TEST_CASE(leading_underscore_decimal_is_identifier)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("_1.2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_decimal_after_dot_illegal)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("1._2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
scanner.reset(CharStream("1._"), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_exp_are_identifier)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("_1e2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_exp_after_e_illegal)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("1e_2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "1e_2");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_hex_illegal)
{
Scanner scanner(CharStream("0x_abc"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(fixed_number_invalid_underscore_front)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("12._1234_1234"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(number_literals_with_trailing_underscore_at_eos)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("0x123_"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
scanner.reset(CharStream("123_"), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
scanner.reset(CharStream("12.34_"), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(negative_numbers) BOOST_AUTO_TEST_CASE(negative_numbers)
{ {
Scanner scanner(CharStream("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2;")); Scanner scanner(CharStream("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2;"));

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure {
uint d1 = 654_321;
uint d2 = 54_321;
uint d3 = 4_321;
uint d4 = 5_43_21;
uint d5 = 1_2e10;
uint d6 = 12e1_0;
d1; d2; d3; d4; d5; d6;
}
}
// ----

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure {
uint D1 = 1234_;
uint D2 = 12__34;
uint D3 = 12_e34;
uint D4 = 12e_34;
}
}
// ----
// SyntaxError: (56-61): Invalid use of underscores in number literal. No trailing underscores allowed.
// SyntaxError: (77-83): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
// SyntaxError: (99-105): Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.
// SyntaxError: (121-127): Invalid use of underscores in number literal. No underscore in front of exponent allowed.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure {
fixed f1 = 3.14_15;
fixed f2 = 3_1.4_15;
f1; f2;
}
}
// ----

View File

@ -0,0 +1,17 @@
contract C {
function f() public pure {
fixed F1 = 3.1415_;
fixed F2 = 3__1.4__15;
fixed F3 = 1_.2;
fixed F4 = 1._2;
fixed F5 = 1.2e_12;
fixed F6 = 1._;
}
}
// ----
// SyntaxError: (57-64): Invalid use of underscores in number literal. No trailing underscores allowed.
// SyntaxError: (81-91): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
// SyntaxError: (108-112): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
// SyntaxError: (129-133): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
// SyntaxError: (150-157): Invalid use of underscores in number literal. No underscore in front of exponent allowed.
// SyntaxError: (174-177): Invalid use of underscores in number literal. No trailing underscores allowed.

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure {
uint x1 = 0x8765_4321;
uint x2 = 0x765_4321;
uint x3 = 0x65_4321;
uint x4 = 0x5_4321;
uint x5 = 0x123_1234_1234_1234;
uint x6 = 0x123456_1234_1234;
x1; x2; x3; x4; x5; x6;
}
}
// ----

View File

@ -0,0 +1,7 @@
contract C {
function f() public pure {
uint X1 = 0x1234__1234__1234__123;
}
}
// ----
// SyntaxError: (56-79): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.