Add stricter hex underscore rules

This commit is contained in:
Balajiganapathi S 2017-10-25 13:42:07 +05:30 committed by Christian Parpart
parent 0000bfc604
commit 09a36cba02
7 changed files with 86 additions and 20 deletions

View File

@ -284,8 +284,8 @@ one side. Examples include ``1.``, ``.1`` and ``1.3``.
Scientific notation is also supported, where the base can have fractions, while the exponent cannot.
Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.
Underscores can be used to separate digits of a numeric literal to aid readability.
For example, ``123_000``, ``0x2eff_abde``, ``1_233e34_89`` are all valid. Underscores are only allowed between two digits.
Underscores can be used to separate the digits of a numeric literal to aid readability.
For example, ``123_000``, ``0x2eff_abde``, ``1233_e348_9a`` are all valid. Underscores are only allowed between two digits. For hex literals, underscores are only allowed to separate groups of 4 hex digits.
Number literal expressions retain arbitrary precision until they are converted to a non-literal type (i.e. by
using them together with a non-literal expression).

View File

@ -24,6 +24,9 @@
#include <libsolidity/interface/Version.h>
#include <boost/algorithm/cxx11/all_of.hpp>
#include <boost/algorithm/string.hpp>
#include <string>
using namespace std;
using namespace dev;
using namespace dev::solidity;
@ -183,6 +186,46 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
return true;
}
bool SyntaxChecker::visit(Literal const& _literal)
{
if (!_literal.isHexNumber())
return true;
// We have a hex literal. Do underscore validation
solAssert(_literal.value().substr(0, 2) == "0x", "");
ASTString value = _literal.value().substr(2); // Skip the 0x
vector<ASTString> parts;
boost::split(parts, value, boost::is_any_of("_"));
if (parts.size() == 1) // no underscores
return true;
// Everything except first and last part must be 4 chars in length
for (size_t i = 1; i + 1 < parts.size(); ++i)
{
if (parts[i].size() != 4)
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. Found inner part with " + to_string(parts[i].size()) + " digits (has to be 4 digits).");
}
// Validate rightmost block
if (parts.back().size() == 4) // If ends with 4 digits, then no need to validate first block
return true;
// Validate leftmost block
// If first part is 4 digits then last part's length has to be even to avoid ambiguity over zero padding
if (parts.front().size() == 4)
{
if (parts.back().size() % 2 == 0)
return true;
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. If the first part has 4 digits, it is assumed to be a byte sequence instead of a number and thus the last part should have an even number of digits.");
}
else
{
// Both first and last part is invalid
m_errorReporter.syntaxError(_literal.location(), "Invalid use of underscores in hex literal. First or last part must have 4 digits.");
}
return true;
}
bool SyntaxChecker::visit(UnaryOperation const& _operation)
{
if (_operation.getOperator() == Token::Add)

View File

@ -73,6 +73,7 @@ private:
virtual bool visit(VariableDeclarationStatement const& _statement) override;
virtual bool visit(StructDefinition const& _struct) override;
virtual bool visit(Literal const& _literal) override;
ErrorReporter& m_errorReporter;

View File

@ -39,6 +39,7 @@
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/adaptor/sliced.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include <boost/algorithm/string.hpp>
#include <limits>
@ -779,7 +780,9 @@ tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _literal
if (boost::starts_with(_literal.value(), "0x"))
{
// process as hex
value = bigint(_literal.value());
ASTString valueString = _literal.value();
boost::erase_all(valueString, "_");// Remove underscore separators
value = bigint(valueString);
}
else if (expPoint != _literal.value().end())
{

View File

@ -726,21 +726,26 @@ Token::Value Scanner::scanHexString()
void Scanner::scanDecimalDigits()
{
if (!isDecimalDigit(m_char)) // avoid underscore at beginning
return;
while (isDecimalDigit(m_char) || m_char == '_')
// Parse for regex [:digit:]+(_[:digit:]+)*
do
{
if (!isDecimalDigit(m_char))
return;
while (isDecimalDigit(m_char))
addLiteralCharAndAdvance();
if (m_char == '_')
{
advance();
if (!isDecimalDigit(m_char)) // avoid trailing underscore
if (!isDecimalDigit(m_char)) // Trailing underscore. Rollback and allow next step to flag it as illegal
{
rollback(1);
break;
return;
}
}
addLiteralCharAndAdvance();
}
while (isDecimalDigit(m_char));
}
Token::Value Scanner::scanNumber(char _charSeen)
@ -768,19 +773,17 @@ Token::Value Scanner::scanNumber(char _charSeen)
addLiteralCharAndAdvance();
if (!isHexDigit(m_char))
return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
while (isHexDigit(m_char) || m_char == '_') // same logic as scanDecimalDigits
char last = m_char;
while (isHexDigit(m_char) || m_char == '_') // Unlike decimal digits, we keep the underscores for later validation
{
if (m_char == '_')
{
advance();
if (!isHexDigit(m_char)) // avoid trailing underscore
{
rollback(1);
break;
}
}
if (m_char == '_' && last == '_')
return Token::Illegal; // Double underscore
last = m_char;
addLiteralCharAndAdvance();
}
if (last == '_')
return Token::Illegal; // Trailing underscore
}
else if (isDecimalDigit(m_char))
// We do not allow octal numbers

View File

@ -12836,6 +12836,22 @@ BOOST_AUTO_TEST_CASE(write_storage_external)
ABI_CHECK(callContractFunction("h()"), encodeArgs(12));
}
BOOST_AUTO_TEST_CASE(test_underscore_in_hex)
{
char const* sourceCode = R"(
contract test {
function f(bool cond) returns (uint) {
uint32 x = 0x1234_ab;
uint y = 0x1234_abcd_1234;
return cond ? x : y;
}
}
)";
compileAndRun(sourceCode);
ABI_CHECK(callContractFunction("f(bool)", true), encodeArgs(u256(0x1234ab)));
ABI_CHECK(callContractFunction("f(bool)", false), encodeArgs(u256(0x1234abcd1234)));
}
BOOST_AUTO_TEST_SUITE_END()
}

View File

@ -199,7 +199,7 @@ BOOST_AUTO_TEST_CASE(underscores_in_hex)
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.next(), Token::Assign);
BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0xab19cf");
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0xab_19cf");
BOOST_CHECK_EQUAL(scanner.next(), Token::Semicolon);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}