Merge pull request #4684 from ethereum/underscores_in_numeric_literals

[BREAKING] Underscores in numeric literals
This commit is contained in:
chriseth 2018-08-08 21:36:57 +02:00 committed by GitHub
commit d634d20b5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 256 additions and 14 deletions

2
.gitignore vendored
View File

@ -46,3 +46,5 @@ browse.VC.db
CMakeLists.txt.user
/CMakeSettings.json
/.vs
/.cproject
/.project

View File

@ -150,6 +150,7 @@ Features:
* General: Introduce new constructor syntax using the ``constructor`` keyword as experimental 0.5.0 feature.
* General: Limit the number of errors output in a single run to 256.
* General: Support accessing dynamic return data in post-byzantium EVMs.
* General: Allow underscores in numeric and hex literals to separate thousands and quads.
* Inheritance: Error when using empty parentheses for base class constructors that require arguments as experimental 0.5.0 feature.
* Inheritance: Error when using no parentheses in modifier-style constructor calls as experimental 0.5.0 feature.
* Interfaces: Allow overriding external functions in interfaces with public in an implementing contract.

View File

@ -284,6 +284,11 @@ one side. Examples include ``1.``, ``.1`` and ``1.3``.
Scientific notation is also supported, where the base can have fractions, while the exponent cannot.
Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.
Underscores can be used to separate the digits of a numeric literal to aid readability.
For example, decimal ``123_000``, hexadecimal ``0x2eff_abde``, scientific decimal notation ``1_2e345_678`` are all valid.
Underscores are only allowed between two digits and only one consecutive underscore is allowed.
There is no additional semantic meaning added to a number literal containing underscores.
Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by
using them together with a non-literal expression).
This means that computations do not overflow and divisions do not truncate

View File

@ -24,6 +24,9 @@
#include <libsolidity/interface/Version.h>
#include <boost/algorithm/cxx11/all_of.hpp>
#include <boost/algorithm/string.hpp>
#include <string>
using namespace std;
using namespace dev;
using namespace dev::solidity;
@ -183,6 +186,45 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
return true;
}
bool SyntaxChecker::visit(Literal const& _literal)
{
if (_literal.token() != Token::Number)
return true;
ASTString const& value = _literal.value();
solAssert(!value.empty(), "");
// Generic checks no matter what base this number literal is of:
if (value.back() == '_')
{
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No trailing underscores allowed.");
return true;
}
if (value.find("__") != ASTString::npos)
{
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.");
return true;
}
if (!_literal.isHexNumber()) // decimal literal
{
if (value.find("._") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
if (value.find("_.") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.");
if (value.find("_e") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.");
if (value.find("e_") != ASTString::npos)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in number literal. No underscore in front of exponent allowed.");
}
return true;
}
bool SyntaxChecker::visit(UnaryOperation const& _operation)
{
if (_operation.getOperator() == Token::Add)

View File

@ -73,6 +73,7 @@ private:
virtual bool visit(VariableDeclarationStatement const& _statement) override;
virtual bool visit(StructDefinition const& _struct) override;
virtual bool visit(Literal const& _literal) override;
ErrorReporter& m_errorReporter;

View File

@ -39,6 +39,7 @@
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/adaptor/sliced.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include <boost/algorithm/string.hpp>
#include <limits>
@ -783,19 +784,22 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
rational value;
try
{
auto expPoint = find(_literal.value().begin(), _literal.value().end(), 'e');
if (expPoint == _literal.value().end())
expPoint = find(_literal.value().begin(), _literal.value().end(), 'E');
ASTString valueString = _literal.value();
boost::erase_all(valueString, "_");// Remove underscore separators
if (boost::starts_with(_literal.value(), "0x"))
auto expPoint = find(valueString.begin(), valueString.end(), 'e');
if (expPoint == valueString.end())
expPoint = find(valueString.begin(), valueString.end(), 'E');
if (boost::starts_with(valueString, "0x"))
{
// process as hex
value = bigint(_literal.value());
value = bigint(valueString);
}
else if (expPoint != _literal.value().end())
else if (expPoint != valueString.end())
{
// Parse mantissa and exponent. Checks numeric limit.
tuple<bool, rational> mantissa = parseRational(string(_literal.value().begin(), expPoint));
tuple<bool, rational> mantissa = parseRational(string(valueString.begin(), expPoint));
if (!get<0>(mantissa))
return make_tuple(false, rational(0));
@ -805,7 +809,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
if (value == 0)
return make_tuple(true, rational(0));
bigint exp = bigint(string(expPoint + 1, _literal.value().end()));
bigint exp = bigint(string(expPoint + 1, valueString.end()));
if (exp > numeric_limits<int32_t>::max() || exp < numeric_limits<int32_t>::min())
return make_tuple(false, rational(0));
@ -834,7 +838,7 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
else
{
// parse as rational number
tuple<bool, rational> tmp = parseRational(_literal.value());
tuple<bool, rational> tmp = parseRational(valueString);
if (!get<0>(tmp))
return tmp;
value = get<1>(tmp);

View File

@ -724,10 +724,18 @@ Token::Value Scanner::scanHexString()
return Token::StringLiteral;
}
// Parse for regex [:digit:]+(_[:digit:]+)*
void Scanner::scanDecimalDigits()
{
while (isDecimalDigit(m_char))
addLiteralCharAndAdvance();
// MUST begin with a decimal digit.
if (!isDecimalDigit(m_char))
return;
// May continue with decimal digit or underscore for grouping.
do addLiteralCharAndAdvance();
while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
// Defer further validation of underscore to SyntaxChecker.
}
Token::Value Scanner::scanNumber(char _charSeen)
@ -738,6 +746,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
{
// we have already seen a decimal point of the float
addLiteralChar('.');
if (m_char == '_')
return Token::Illegal;
scanDecimalDigits(); // we know we have at least one digit
}
else
@ -755,7 +765,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
addLiteralCharAndAdvance();
if (!isHexDigit(m_char))
return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
while (isHexDigit(m_char))
while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
addLiteralCharAndAdvance();
}
else if (isDecimalDigit(m_char))
@ -768,9 +779,17 @@ Token::Value Scanner::scanNumber(char _charSeen)
scanDecimalDigits(); // optional
if (m_char == '.')
{
// A '.' has to be followed by a number.
if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
{
// Assume the input may be a floating point number with leading '_' in fraction part.
// Recover by consuming it all but returning `Illegal` right away.
addLiteralCharAndAdvance(); // '.'
addLiteralCharAndAdvance(); // '_'
scanDecimalDigits();
}
if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
{
// A '.' has to be followed by a number.
literal.complete();
return Token::Number;
}
@ -785,8 +804,18 @@ Token::Value Scanner::scanNumber(char _charSeen)
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
if (kind != DECIMAL)
return Token::Illegal;
else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
{
// Recover from wrongly placed underscore as delimiter in literal with scientific
// notation by consuming until the end.
addLiteralCharAndAdvance(); // 'e'
addLiteralCharAndAdvance(); // '_'
scanDecimalDigits();
literal.complete();
return Token::Number;
}
// scan exponent
addLiteralCharAndAdvance();
addLiteralCharAndAdvance(); // 'e' | 'E'
if (m_char == '+' || m_char == '-')
addLiteralCharAndAdvance();
if (!isDecimalDigit(m_char))

View File

@ -12835,6 +12835,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external)
ABI_CHECK(callContractFunction("h()"), encodeArgs(12));
}
BOOST_AUTO_TEST_CASE(test_underscore_in_hex)
{
char const* sourceCode = R"(
contract test {
function f(bool cond) public pure returns (uint) {
uint32 x = 0x1234_ab;
uint y = 0x1234_abcd_1234;
return cond ? x : y;
}
}
)";
compileAndRun(sourceCode);
ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab)));
ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234)));
}
BOOST_AUTO_TEST_SUITE_END()
}

View File

@ -155,6 +155,76 @@ BOOST_AUTO_TEST_CASE(trailing_dot)
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_decimal_is_identifier)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("_1.2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_decimal_after_dot_illegal)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("1._2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
scanner.reset(CharStream("1._"), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_exp_are_identifier)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("_1e2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_exp_after_e_illegal)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("1e_2"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "1e_2");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(leading_underscore_hex_illegal)
{
Scanner scanner(CharStream("0x_abc"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(fixed_number_invalid_underscore_front)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("12._1234_1234"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(number_literals_with_trailing_underscore_at_eos)
{
// Actual error is cought by SyntaxChecker.
Scanner scanner(CharStream("0x123_"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
scanner.reset(CharStream("123_"), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
scanner.reset(CharStream("12.34_"), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(negative_numbers)
{
Scanner scanner(CharStream("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2;"));

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure {
uint d1 = 654_321;
uint d2 = 54_321;
uint d3 = 4_321;
uint d4 = 5_43_21;
uint d5 = 1_2e10;
uint d6 = 12e1_0;
d1; d2; d3; d4; d5; d6;
}
}
// ----

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure {
uint D1 = 1234_;
uint D2 = 12__34;
uint D3 = 12_e34;
uint D4 = 12e_34;
}
}
// ----
// SyntaxError: (56-61): Invalid use of underscores in number literal. No trailing underscores allowed.
// SyntaxError: (77-83): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
// SyntaxError: (99-105): Invalid use of underscores in number literal. No underscore at the end of the mantissa allowed.
// SyntaxError: (121-127): Invalid use of underscores in number literal. No underscore in front of exponent allowed.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure {
fixed f1 = 3.14_15;
fixed f2 = 3_1.4_15;
f1; f2;
}
}
// ----

View File

@ -0,0 +1,17 @@
contract C {
function f() public pure {
fixed F1 = 3.1415_;
fixed F2 = 3__1.4__15;
fixed F3 = 1_.2;
fixed F4 = 1._2;
fixed F5 = 1.2e_12;
fixed F6 = 1._;
}
}
// ----
// SyntaxError: (57-64): Invalid use of underscores in number literal. No trailing underscores allowed.
// SyntaxError: (81-91): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.
// SyntaxError: (108-112): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
// SyntaxError: (129-133): Invalid use of underscores in number literal. No underscores in front of the fraction part allowed.
// SyntaxError: (150-157): Invalid use of underscores in number literal. No underscore in front of exponent allowed.
// SyntaxError: (174-177): Invalid use of underscores in number literal. No trailing underscores allowed.

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure {
uint x1 = 0x8765_4321;
uint x2 = 0x765_4321;
uint x3 = 0x65_4321;
uint x4 = 0x5_4321;
uint x5 = 0x123_1234_1234_1234;
uint x6 = 0x123456_1234_1234;
x1; x2; x3; x4; x5; x6;
}
}
// ----

View File

@ -0,0 +1,7 @@
contract C {
function f() public pure {
uint X1 = 0x1234__1234__1234__123;
}
}
// ----
// SyntaxError: (56-79): Invalid use of underscores in number literal. Only one consecutive underscores between digits allowed.