Add stricter hex underscore rules

This commit is contained in:
Balajiganapathi S 2017-10-25 13:42:07 +05:30 committed by Christian Parpart
parent 0000bfc604
commit 09a36cba02
7 changed files with 86 additions and 20 deletions

View File

@ -284,8 +284,8 @@ one side. Examples include ``1.``, ``.1`` and ``1.3``.
Scientific notation is also supported, where the base can have fractions, while the exponent cannot. Scientific notation is also supported, where the base can have fractions, while the exponent cannot.
Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``. Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.
Underscores can be used to separate digits of a numeric literal to aid readability. Underscores can be used to separate the digits of a numeric literal to aid readability.
For example, ``123_000``, ``0x2eff_abde``, ``1_233e34_89`` are all valid. Underscores are only allowed between two digits. For example, ``123_000``, ``0x2eff_abde``, ``1233_e348_9a`` are all valid. Underscores are only allowed between two digits. For hex literals, underscores are only allowed to separate groups of 4 hex digits.
Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by
using them together with a non-literal expression). using them together with a non-literal expression).

View File

@ -24,6 +24,9 @@
#include <libsolidity/interface/Version.h> #include <libsolidity/interface/Version.h>
#include <boost/algorithm/cxx11/all_of.hpp> #include <boost/algorithm/cxx11/all_of.hpp>
#include <boost/algorithm/string.hpp>
#include <string>
using namespace std; using namespace std;
using namespace dev; using namespace dev;
using namespace dev::solidity; using namespace dev::solidity;
@ -183,6 +186,46 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
return true; return true;
} }
bool SyntaxChecker::visit(Literal const& _literal)
{
if (!_literal.isHexNumber())
return true;
// We have a hex literal. Do underscore validation
solAssert(_literal.value().substr(0, 2) == "0x", "");
ASTString value = _literal.value().substr(2); // Skip the 0x
vector<ASTString> parts;
boost::split(parts, value, boost::is_any_of("_"));
if (parts.size() == 1) // no underscores
return true;
// Everything except first and last part must be 4 chars in length
for (size_t i = 1; i + 1 < parts.size(); ++i)
{
if (parts[i].size() != 4)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. Found inner part with " + to_string(parts[i].size()) + " digits (has to be 4 digits).");
}
// Validate rightmost block
if (parts.back().size() == 4) // If ends with 4 digits, then no need to validate first block
return true;
// Validate leftmost block
// If first part is 4 digits then last part's length has to be even to avoid ambiguity over zero padding
if (parts.front().size() == 4)
{
if (parts.back().size() % 2 == 0)
return true;
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. If the first part has 4 digits, it is assumed to be a byte sequence instead of a number and thus the last part should have an even number of digits.");
}
else
{
// Both first and last part is invalid
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. First or last part must have 4 digits.");
}
return true;
}
bool SyntaxChecker::visit(UnaryOperation const& _operation) bool SyntaxChecker::visit(UnaryOperation const& _operation)
{ {
if (_operation.getOperator() == Token::Add) if (_operation.getOperator() == Token::Add)

View File

@ -73,6 +73,7 @@ private:
virtual bool visit(VariableDeclarationStatement const& _statement) override; virtual bool visit(VariableDeclarationStatement const& _statement) override;
virtual bool visit(StructDefinition const& _struct) override; virtual bool visit(StructDefinition const& _struct) override;
virtual bool visit(Literal const& _literal) override;
ErrorReporter& m_errorReporter; ErrorReporter& m_errorReporter;

View File

@ -39,6 +39,7 @@
#include <boost/range/algorithm/copy.hpp> #include <boost/range/algorithm/copy.hpp>
#include <boost/range/adaptor/sliced.hpp> #include <boost/range/adaptor/sliced.hpp>
#include <boost/range/adaptor/transformed.hpp> #include <boost/range/adaptor/transformed.hpp>
#include <boost/algorithm/string.hpp>
#include <limits> #include <limits>
@ -779,7 +780,9 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
if (boost::starts_with(_literal.value(), "0x")) if (boost::starts_with(_literal.value(), "0x"))
{ {
// process as hex // process as hex
value = bigint(_literal.value()); ASTString valueString = _literal.value();
boost::erase_all(valueString, "_");// Remove underscore separators
value = bigint(valueString);
} }
else if (expPoint != _literal.value().end()) else if (expPoint != _literal.value().end())
{ {

View File

@ -726,21 +726,26 @@ Token::Value Scanner::scanHexString()
void Scanner::scanDecimalDigits() void Scanner::scanDecimalDigits()
{ {
if (!isDecimalDigit(m_char)) // avoid underscore at beginning // Parse for regex [:digit:]+(_[:digit:]+)*
return;
while (isDecimalDigit(m_char) || m_char == '_') do
{ {
if (!isDecimalDigit(m_char))
return;
while (isDecimalDigit(m_char))
addLiteralCharAndAdvance();
if (m_char == '_') if (m_char == '_')
{ {
advance(); advance();
if (!isDecimalDigit(m_char)) // avoid trailing underscore if (!isDecimalDigit(m_char)) // Trailing underscore. Rollback and allow next step to flag it as illegal
{ {
rollback(1); rollback(1);
break; return;
} }
} }
addLiteralCharAndAdvance();
} }
while (isDecimalDigit(m_char));
} }
Token::Value Scanner::scanNumber(char _charSeen) Token::Value Scanner::scanNumber(char _charSeen)
@ -768,19 +773,17 @@ Token::Value Scanner::scanNumber(char _charSeen)
addLiteralCharAndAdvance(); addLiteralCharAndAdvance();
if (!isHexDigit(m_char)) if (!isHexDigit(m_char))
return Token::Illegal; // we must have at least one hex digit after 'x'/'X' return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
while (isHexDigit(m_char) || m_char == '_') // same logic as scanDecimalDigits char last = m_char;
while (isHexDigit(m_char) || m_char == '_') // Unlike decimal digits, we keep the underscores for later validation
{ {
if (m_char == '_') if (m_char == '_' && last == '_')
{ return Token::Illegal; // Double underscore
advance();
if (!isHexDigit(m_char)) // avoid trailing underscore last = m_char;
{
rollback(1);
break;
}
}
addLiteralCharAndAdvance(); addLiteralCharAndAdvance();
} }
if (last == '_')
return Token::Illegal; // Trailing underscore
} }
else if (isDecimalDigit(m_char)) else if (isDecimalDigit(m_char))
// We do not allow octal numbers // We do not allow octal numbers

View File

@ -12836,6 +12836,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external)
ABI_CHECK(callContractFunction("h()"), encodeArgs(12)); ABI_CHECK(callContractFunction("h()"), encodeArgs(12));
} }
BOOST_AUTO_TEST_CASE(test_underscore_in_hex)
{
char const* sourceCode = R"(
contract test {
function f(bool cond) returns (uint) {
uint32 x = 0x1234_ab;
uint y = 0x1234_abcd_1234;
return cond ? x : y;
}
}
)";
compileAndRun(sourceCode);
ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab)));
ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234)));
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()
} }

View File

@ -199,7 +199,7 @@ BOOST_AUTO_TEST_CASE(underscores_in_hex)
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::Assign); BOOST_CHECK_EQUAL(scanner.next(), Token::Assign);
BOOST_CHECK_EQUAL(scanner.next(), Token::Number); BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0xab19cf"); BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0xab_19cf");
BOOST_CHECK_EQUAL(scanner.next(), Token::Semicolon); BOOST_CHECK_EQUAL(scanner.next(), Token::Semicolon);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
} }