Merge pull request #10618 from ethereum/develop

Merge develop into breaking.
This commit is contained in:
chriseth 2020-12-16 12:34:41 +01:00 committed by GitHub
commit 8f833f4e8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 340 additions and 11 deletions

View File

@ -45,21 +45,22 @@ AST Changes:
### 0.7.6 (unreleased) ### 0.7.6 (unreleased)
Language Features: Language Features:
* Code generator: Support conversion from calldata slices to memory and storage arrays.
* Code generator: Support copying dynamically encoded structs from calldata to memory. * Code generator: Support copying dynamically encoded structs from calldata to memory.
* Code generator: Support copying of nested arrays from calldata to memory. * Code generator: Support copying of nested arrays from calldata to memory.
* Code generator: Support conversion from calldata slices to memory and storage arrays. * Scanner: Generate a parser error when comments or unicode strings contain an unbalanced or underflowing set of unicode direction override markers (LRO, RLO, LRE, RLE, PDF).
* The fallback function can now also have a single ``calldata`` argument (equaling ``msg.data``) and return ``bytes memory`` (which will not be ABI-encoded but returned as-is). * The fallback function can now also have a single ``calldata`` argument (equaling ``msg.data``) and return ``bytes memory`` (which will not be ABI-encoded but returned as-is).
* Wasm backend: Add ``i32.select`` and ``i64.select`` instructions. * Wasm backend: Add ``i32.select`` and ``i64.select`` instructions.
Compiler Features: Compiler Features:
* Build System: Optionally support dynamic loading of Z3 and use that mechanism for Linux release builds. * Build System: Optionally support dynamic loading of Z3 and use that mechanism for Linux release builds.
* Code Generator: Avoid memory allocation for default value if it is not used. * Code Generator: Avoid memory allocation for default value if it is not used.
* SMTChecker: Create underflow and overflow verification targets for increment/decrement in the CHC engine.
* SMTChecker: Report struct values in counterexamples from CHC engine. * SMTChecker: Report struct values in counterexamples from CHC engine.
* SMTChecker: Support early returns in the CHC engine. * SMTChecker: Support early returns in the CHC engine.
* SMTChecker: Support getters. * SMTChecker: Support getters.
* SMTChecker: Support named arguments in function calls. * SMTChecker: Support named arguments in function calls.
* SMTChecker: Support struct constructor. * SMTChecker: Support struct constructor.
* SMTChecker: Create underflow and overflow verification targets for increment/decrement in the CHC engine.
* Standard-Json: Move the recently introduced ``modelCheckerSettings`` key to ``settings.modelChecker``. * Standard-Json: Move the recently introduced ``modelCheckerSettings`` key to ``settings.modelChecker``.
* Standard-Json: Properly filter the requested output artifacts. * Standard-Json: Properly filter the requested output artifacts.

View File

@ -98,6 +98,20 @@ public:
std::tuple<int, int> translatePositionToLineColumn(int _position) const; std::tuple<int, int> translatePositionToLineColumn(int _position) const;
///@} ///@}
/// Tests whether or not given octet sequence is present at the current position in stream.
/// @returns true if the sequence could be found, false otherwise.
bool prefixMatch(std::string_view _sequence)
{
if (isPastEndOfInput(_sequence.size()))
return false;
for (size_t i = 0; i < _sequence.size(); ++i)
if (_sequence[i] != get(i))
return false;
return true;
}
private: private:
std::string m_source; std::string m_source;
std::string m_name; std::string m_name;

View File

@ -54,9 +54,10 @@
#include <liblangutil/Exceptions.h> #include <liblangutil/Exceptions.h>
#include <liblangutil/Scanner.h> #include <liblangutil/Scanner.h>
#include <algorithm> #include <boost/algorithm/string/classification.hpp>
#include <optional> #include <optional>
#include <ostream> #include <string_view>
#include <tuple> #include <tuple>
using namespace std; using namespace std;
@ -79,6 +80,8 @@ string to_string(ScannerError _errorCode)
case ScannerError::IllegalExponent: return "Invalid exponent."; case ScannerError::IllegalExponent: return "Invalid exponent.";
case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number."; case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
case ScannerError::OctalNotAllowed: return "Octal numbers not allowed."; case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
case ScannerError::DirectionalOverrideUnderflow: return "Unicode direction override underflow in comment or string literal.";
case ScannerError::DirectionalOverrideMismatch: return "Mismatching directional override markers in comment or string literal.";
default: default:
solAssert(false, "Unhandled case in to_string(ScannerError)"); solAssert(false, "Unhandled case in to_string(ScannerError)");
return ""; return "";
@ -271,12 +274,61 @@ bool Scanner::skipWhitespaceExceptUnicodeLinebreak()
return sourcePos() != startPosition; return sourcePos() != startPosition;
} }
namespace
{
/// Tries to scan for an RLO/LRO/RLE/LRE/PDF and keeps track of script writing direction override depth.
///
/// @returns ScannerError::NoError in case of successful parsing and directional encodings are paired
/// and error code in case the input's lexical parser state is invalid and this error should be reported
/// to the user.
static ScannerError validateBiDiMarkup(CharStream& _stream, size_t _startPosition)
{
static array<pair<string_view, int>, 5> constexpr directionalSequences{
pair<string_view, int>{"\xE2\x80\xAD", 1}, // U+202D (LRO - Left-to-Right Override)
pair<string_view, int>{"\xE2\x80\xAE", 1}, // U+202E (RLO - Right-to-Left Override)
pair<string_view, int>{"\xE2\x80\xAA", 1}, // U+202A (LRE - Left-to-Right Embedding)
pair<string_view, int>{"\xE2\x80\xAB", 1}, // U+202B (RLE - Right-to-Left Embedding)
pair<string_view, int>{"\xE2\x80\xAC", -1} // U+202C (PDF - Pop Directional Formatting
};
size_t endPosition = _stream.position();
_stream.setPosition(_startPosition);
int directionOverrideDepth = 0;
for (size_t currentPos = _startPosition; currentPos < endPosition; ++currentPos)
{
_stream.setPosition(currentPos);
for (auto const& [sequence, depthChange]: directionalSequences)
if (_stream.prefixMatch(sequence))
directionOverrideDepth += depthChange;
if (directionOverrideDepth < 0)
return ScannerError::DirectionalOverrideUnderflow;
}
_stream.setPosition(endPosition);
return directionOverrideDepth > 0 ? ScannerError::DirectionalOverrideMismatch : ScannerError::NoError;
}
}
Token Scanner::skipSingleLineComment() Token Scanner::skipSingleLineComment()
{ {
// Line terminator is not part of the comment. If it is a // Line terminator is not part of the comment. If it is a
// non-ascii line terminator, it will result in a parser error. // non-ascii line terminator, it will result in a parser error.
size_t startPosition = m_source->position();
while (!isUnicodeLinebreak()) while (!isUnicodeLinebreak())
if (!advance()) break; if (!advance())
break;
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
if (unicodeDirectionError != ScannerError::NoError)
return setError(unicodeDirectionError);
return Token::Whitespace; return Token::Whitespace;
} }
@ -349,16 +401,21 @@ size_t Scanner::scanSingleLineDocComment()
Token Scanner::skipMultiLineComment() Token Scanner::skipMultiLineComment()
{ {
size_t startPosition = m_source->position();
while (!isSourcePastEndOfInput()) while (!isSourcePastEndOfInput())
{ {
char ch = m_char; char prevChar = m_char;
advance(); advance();
// If we have reached the end of the multi-line comment, we // If we have reached the end of the multi-line comment, we
// consume the '/' and insert a whitespace. This way all // consume the '/' and insert a whitespace. This way all
// multi-line comments are treated as whitespace. // multi-line comments are treated as whitespace.
if (ch == '*' && m_char == '/') if (prevChar == '*' && m_char == '/')
{ {
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
if (unicodeDirectionError != ScannerError::NoError)
return setError(unicodeDirectionError);
m_char = ' '; m_char = ' ';
return Token::Whitespace; return Token::Whitespace;
} }
@ -776,6 +833,7 @@ bool Scanner::isUnicodeLinebreak()
Token Scanner::scanString(bool const _isUnicode) Token Scanner::scanString(bool const _isUnicode)
{ {
size_t startPosition = m_source->position();
char const quote = m_char; char const quote = m_char;
advance(); // consume quote advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING); LiteralScope literal(this, LITERAL_TYPE_STRING);
@ -803,6 +861,14 @@ Token Scanner::scanString(bool const _isUnicode)
} }
if (m_char != quote) if (m_char != quote)
return setError(ScannerError::IllegalStringEndQuote); return setError(ScannerError::IllegalStringEndQuote);
if (_isUnicode)
{
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
if (unicodeDirectionError != ScannerError::NoError)
return setError(unicodeDirectionError);
}
literal.complete(); literal.complete();
advance(); // consume quote advance(); // consume quote
return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral; return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;

View File

@ -89,6 +89,9 @@ enum class ScannerError
IllegalExponent, IllegalExponent,
IllegalNumberEnd, IllegalNumberEnd,
DirectionalOverrideUnderflow,
DirectionalOverrideMismatch,
OctalNotAllowed, OctalNotAllowed,
}; };
@ -183,6 +186,7 @@ public:
///@} ///@}
private: private:
inline Token setError(ScannerError _error) noexcept inline Token setError(ScannerError _error) noexcept
{ {
m_tokens[NextNext].error = _error; m_tokens[NextNext].error = _error;

View File

@ -327,6 +327,9 @@ variant<Literal, Identifier> Parser::parseLiteralOrIdentifier()
case Token::HexStringLiteral: case Token::HexStringLiteral:
fatalParserError(3772_error, "Hex literals are not valid in this context."); fatalParserError(3772_error, "Hex literals are not valid in this context.");
break; break;
case Token::Illegal:
fatalParserError(1465_error, "Illegal token: " + to_string(m_scanner->currentError()));
break;
default: default:
fatalParserError(1856_error, "Literal or identifier expected."); fatalParserError(1856_error, "Literal or identifier expected.");
} }

View File

@ -6,7 +6,10 @@
REPO_ROOT="$(dirname "$0")"/.. REPO_ROOT="$(dirname "$0")"/..
cd $REPO_ROOT cd $REPO_ROOT
WHITESPACE=$(git grep -n -I -E "^.*[[:space:]]+$" | grep -v "test/libsolidity/ASTJSON\|test/libsolidity/ASTRecoveryTests\|test/compilationTests/zeppelin/LICENSE") WHITESPACE=$(git grep -n -I -E "^.*[[:space:]]+$" |
grep -v "test/libsolidity/ASTJSON\|test/libsolidity/ASTRecoveryTests\|test/compilationTests/zeppelin/LICENSE" |
grep -v -E "test/libsolidity/syntaxTests/comments/unicode_direction_override_1.sol"
)
if [[ "$WHITESPACE" != "" ]] if [[ "$WHITESPACE" != "" ]]
then then

View File

@ -116,7 +116,10 @@ done < <(
grep -riL -E \ grep -riL -E \
"^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \ "^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \
"${ROOT_DIR}/test/libsolidity/syntaxTests" \ "${ROOT_DIR}/test/libsolidity/syntaxTests" \
"${ROOT_DIR}/test/libsolidity/semanticTests" \ "${ROOT_DIR}/test/libsolidity/semanticTests" |
grep -v -E 'comments/.*_direction_override.*.sol' |
grep -v -E 'literals/.*_direction_override.*.sol'
# Skipping the unicode tests as I couldn't adapt the lexical grammar to recursively counting RLO/LRO/PDF's.
) )
YUL_FILES=() YUL_FILES=()

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF
/*underflow */
}
}
// ----
// ParserError 8936: (71-83): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF PDF
/*underflow */
}
}
// ----
// ParserError 8936: (75-87): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// RLO
/*overflow */
}
}
// ----
// ParserError 8936: (71-86): Mismatching directional override markers in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// RLO RLO
/*overflow */
}
}
// ----
// ParserError 8936: (75-93): Mismatching directional override markers in comment or string literal.

View File

@ -0,0 +1,14 @@
contract C {
function f() public pure
{
// RLO PDF
/*ok */
// RLO RLO PDF PDF
/*ok */
// RLO RLO RLO PDF PDF PDF
/*ok */
}
}
// ----

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF RLO
/*overflow */
}
}
// ----
// ParserError 8936: (75-86): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,7 @@
contract C {
function f() public pure {
/* LRO LRE RLE PDF RLO PDF PDF
}
}
// ----
// ParserError 8936: (52-115): Expected multi-line comment-terminator.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF
// underflow
}
}
// ----
// ParserError 8936: (71-84): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF PDF
// underflow
}
}
// ----
// ParserError 8936: (75-88): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// RLO
// overflow
}
}
// ----
// ParserError 8936: (71-86): Mismatching directional override markers in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// RLO RLO
// overflow
}
}
// ----
// ParserError 8936: (75-93): Mismatching directional override markers in comment or string literal.

View File

@ -0,0 +1,14 @@
contract C {
function f() public pure
{
// RLO PDF
// ok
// RLO RLO PDF PDF
// ok
// RLO RLO RLO PDF PDF PDF
// ok
}
}
// ----

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF RLO
// underflow
}
}
// ----
// ParserError 8936: (75-88): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,8 @@
contract C {
function f(bool b) public pure
{
if (b) { return; }
}
}
// ----
// ParserError 2314: (65-66): Expected '(' but got 'ILLEGAL'

View File

@ -0,0 +1,8 @@
contract C {
function f(bool b) public pure
{
uint a = 10;
}
}
// ----
// ParserError 8936: (75-76): Invalid token.

View File

@ -0,0 +1,10 @@
contract TimelockUpgrade {
function confirmUpgrade() external {
uint256 m;
uint256 d;
(/*year*/,/*month*/,d/*yad*/,m/*hour*/,/*minute*/,/*second*/) = BokkyDateTime.timestampToDateTime(block.timestamp);
}
}
// ----
// ParserError 8936: (128-139): Mismatching directional override markers in comment or string literal.

View File

@ -6,4 +6,4 @@ contract C {
} }
} }
// ---- // ----
// ParserError 1856: (72-73): Literal or identifier expected. // ParserError 1465: (72-73): Illegal token: Octal numbers not allowed.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF
bytes memory s = unicode"underflow ";
}
}
// ----
// ParserError 8936: (88-106): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF PDF
bytes memory m = unicode"underflow ";
}
}
// ----
// ParserError 8936: (92-110): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// RLO
bytes memory m = unicode"overflow ";
}
}
// ----
// ParserError 8936: (88-108): Mismatching directional override markers in comment or string literal.

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// RLO RLO
bytes memory m = unicode"overflow ";
}
}
// ----
// ParserError 8936: (92-115): Mismatching directional override markers in comment or string literal.

View File

@ -0,0 +1,14 @@
contract C {
function f() public pure
{
// RLO PDF
bytes memory m = unicode" ok ";
// RLO RLO PDF PDF
m = unicode" ok ";
// RLO RLO RLO PDF PDF PDF
m = unicode" ok ";
}
}
// ----

View File

@ -0,0 +1,9 @@
contract C {
function f() public pure
{
// PDF RLO
bytes memory m = unicode" underflow ";
}
}
// ----
// ParserError 8936: (92-111): Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,13 @@
contract C {
function f() public pure
{
// LRO PDF RLO PDF
bytes memory m = unicode" ok ";
// lre rle pdf pdf
m = unicode"lre rle pdf pdf";
// lre lro pdf pdf
m = unicode"lre lro pdf pdf";
}
}
// ----

View File

@ -2,4 +2,4 @@
let x := 0100 let x := 0100
} }
// ---- // ----
// ParserError 1856: (15-16): Literal or identifier expected. // ParserError 1465: (15-16): Illegal token: Octal numbers not allowed.

View File

@ -0,0 +1,6 @@
{
// pop 1
// underflow
}
// ----
// ParserError 1465: (19-32): Illegal token: Unicode direction override underflow in comment or string literal.

View File

@ -0,0 +1,6 @@
{
// pop 1
let s := unicode"underflow ";
}
// ----
// ParserError 1465: (35-47): Illegal token: Invalid character in string.