Merge pull request #10618 from ethereum/develop

Merge develop into breaking.
2023-10-03 13:03:40 +00:00 · 2020-12-16 12:34:41 +01:00 · 2020-12-16 12:34:41 +01:00 · 8f833f4e8f
commit 8f833f4e8f
parent b764e06c76 e347545597
34 changed files with 340 additions and 11 deletions
--- a/Changelog.md
+++ b/Changelog.md
@ -45,21 +45,22 @@ AST Changes:
 ### 0.7.6 (unreleased)

 Language Features:
+ * Code generator: Support conversion from calldata slices to memory and storage arrays.
 * Code generator: Support copying dynamically encoded structs from calldata to memory.
 * Code generator: Support copying of nested arrays from calldata to memory.
- * Code generator: Support conversion from calldata slices to memory and storage arrays.
+ * Scanner: Generate a parser error when comments or unicode strings contain an unbalanced or underflowing set of unicode direction override markers (LRO, RLO, LRE, RLE, PDF).
 * The fallback function can now also have a single ``calldata`` argument (equaling ``msg.data``) and return ``bytes memory`` (which will not be ABI-encoded but returned as-is).
 * Wasm backend: Add ``i32.select`` and ``i64.select`` instructions.

 Compiler Features:
 * Build System: Optionally support dynamic loading of Z3 and use that mechanism for Linux release builds.
 * Code Generator: Avoid memory allocation for default value if it is not used.
+ * SMTChecker: Create underflow and overflow verification targets for increment/decrement in the CHC engine.
 * SMTChecker: Report struct values in counterexamples from CHC engine.
 * SMTChecker: Support early returns in the CHC engine.
 * SMTChecker: Support getters.
 * SMTChecker: Support named arguments in function calls.
 * SMTChecker: Support struct constructor.
- * SMTChecker: Create underflow and overflow verification targets for increment/decrement in the CHC engine.
 * Standard-Json: Move the recently introduced ``modelCheckerSettings`` key to ``settings.modelChecker``.
 * Standard-Json: Properly filter the requested output artifacts.

--- a/liblangutil/CharStream.h
+++ b/liblangutil/CharStream.h
@ -98,6 +98,20 @@ public:
 	std::tuple<int, int> translatePositionToLineColumn(int _position) const;
 	///@}

+	/// Tests whether or not given octet sequence is present at the current position in stream.
+	/// @returns true if the sequence could be found, false otherwise.
+	bool prefixMatch(std::string_view _sequence)
+	{
+		if (isPastEndOfInput(_sequence.size()))
+			return false;
+
+		for (size_t i = 0; i < _sequence.size(); ++i)
+			if (_sequence[i] != get(i))
+				return false;
+
+		return true;
+	}
+
 private:
 	std::string m_source;
 	std::string m_name;
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@ -54,9 +54,10 @@
 #include <liblangutil/Exceptions.h>
 #include <liblangutil/Scanner.h>

-#include <algorithm>
+#include <boost/algorithm/string/classification.hpp>
+
 #include <optional>
-#include <ostream>
+#include <string_view>
 #include <tuple>

 using namespace std;
@ -79,6 +80,8 @@ string to_string(ScannerError _errorCode)
 		case ScannerError::IllegalExponent: return "Invalid exponent.";
 		case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
 		case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
+		case ScannerError::DirectionalOverrideUnderflow: return "Unicode direction override underflow in comment or string literal.";
+		case ScannerError::DirectionalOverrideMismatch: return "Mismatching directional override markers in comment or string literal.";
 		default:
 			solAssert(false, "Unhandled case in to_string(ScannerError)");
 			return "";
@ -271,12 +274,61 @@ bool Scanner::skipWhitespaceExceptUnicodeLinebreak()
 	return sourcePos() != startPosition;
 }

+
+namespace
+{
+
+/// Tries to scan for an RLO/LRO/RLE/LRE/PDF and keeps track of script writing direction override depth.
+///
+/// @returns ScannerError::NoError in case of successful parsing and directional encodings are paired
+///          and error code in case the input's lexical parser state is invalid and this error should be reported
+///          to the user.
+static ScannerError validateBiDiMarkup(CharStream& _stream, size_t _startPosition)
+{
+	static array<pair<string_view, int>, 5> constexpr directionalSequences{
+		pair<string_view, int>{"\xE2\x80\xAD", 1}, // U+202D (LRO - Left-to-Right Override)
+		pair<string_view, int>{"\xE2\x80\xAE", 1}, // U+202E (RLO - Right-to-Left Override)
+		pair<string_view, int>{"\xE2\x80\xAA", 1}, // U+202A (LRE - Left-to-Right Embedding)
+		pair<string_view, int>{"\xE2\x80\xAB", 1}, // U+202B (RLE - Right-to-Left Embedding)
+		pair<string_view, int>{"\xE2\x80\xAC", -1} // U+202C (PDF - Pop Directional Formatting
+	};
+
+	size_t endPosition = _stream.position();
+	_stream.setPosition(_startPosition);
+
+	int directionOverrideDepth = 0;
+
+	for (size_t currentPos = _startPosition; currentPos < endPosition; ++currentPos)
+	{
+		_stream.setPosition(currentPos);
+
+		for (auto const& [sequence, depthChange]: directionalSequences)
+			if (_stream.prefixMatch(sequence))
+				directionOverrideDepth += depthChange;
+
+		if (directionOverrideDepth < 0)
+			return ScannerError::DirectionalOverrideUnderflow;
+	}
+
+	_stream.setPosition(endPosition);
+
+	return directionOverrideDepth > 0 ? ScannerError::DirectionalOverrideMismatch : ScannerError::NoError;
+}
+
+}
+
 Token Scanner::skipSingleLineComment()
 {
 	// Line terminator is not part of the comment. If it is a
 	// non-ascii line terminator, it will result in a parser error.
+	size_t startPosition = m_source->position();
 	while (!isUnicodeLinebreak())
-		if (!advance()) break;
+		if (!advance())
+			break;
+
+	ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
+	if (unicodeDirectionError != ScannerError::NoError)
+		return setError(unicodeDirectionError);

 	return Token::Whitespace;
 }
@ -349,16 +401,21 @@ size_t Scanner::scanSingleLineDocComment()

 Token Scanner::skipMultiLineComment()
 {
+	size_t startPosition = m_source->position();
 	while (!isSourcePastEndOfInput())
 	{
-		char ch = m_char;
+		char prevChar = m_char;
 		advance();

 		// If we have reached the end of the multi-line comment, we
 		// consume the '/' and insert a whitespace. This way all
 		// multi-line comments are treated as whitespace.
-		if (ch == '*' && m_char == '/')
+		if (prevChar == '*' && m_char == '/')
 		{
+			ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
+			if (unicodeDirectionError != ScannerError::NoError)
+				return setError(unicodeDirectionError);
+
 			m_char = ' ';
 			return Token::Whitespace;
 		}
@ -776,6 +833,7 @@ bool Scanner::isUnicodeLinebreak()

 Token Scanner::scanString(bool const _isUnicode)
 {
+	size_t startPosition = m_source->position();
 	char const quote = m_char;
 	advance();  // consume quote
 	LiteralScope literal(this, LITERAL_TYPE_STRING);
@ -803,6 +861,14 @@ Token Scanner::scanString(bool const _isUnicode)
 	}
 	if (m_char != quote)
 		return setError(ScannerError::IllegalStringEndQuote);
+
+	if (_isUnicode)
+	{
+		ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
+		if (unicodeDirectionError != ScannerError::NoError)
+			return setError(unicodeDirectionError);
+	}
+
 	literal.complete();
 	advance();  // consume quote
 	return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;
--- a/liblangutil/Scanner.h
+++ b/liblangutil/Scanner.h
@ -89,6 +89,9 @@ enum class ScannerError
 	IllegalExponent,
 	IllegalNumberEnd,

+	DirectionalOverrideUnderflow,
+	DirectionalOverrideMismatch,
+
 	OctalNotAllowed,
 };

@ -183,6 +186,7 @@ public:
 	///@}

 private:
+
 	inline Token setError(ScannerError _error) noexcept
 	{
 		m_tokens[NextNext].error = _error;
--- a/libyul/AsmParser.cpp
+++ b/libyul/AsmParser.cpp
@ -327,6 +327,9 @@ variant<Literal, Identifier> Parser::parseLiteralOrIdentifier()
 	case Token::HexStringLiteral:
 		fatalParserError(3772_error, "Hex literals are not valid in this context.");
 		break;
+	case Token::Illegal:
+		fatalParserError(1465_error, "Illegal token: " + to_string(m_scanner->currentError()));
+		break;
 	default:
 		fatalParserError(1856_error, "Literal or identifier expected.");
 	}
--- a/scripts/check_style.sh
+++ b/scripts/check_style.sh
@ -6,7 +6,10 @@
 REPO_ROOT="$(dirname "$0")"/..
 cd $REPO_ROOT

-WHITESPACE=$(git grep -n -I -E "^.*[[:space:]]+$" | grep -v "test/libsolidity/ASTJSON\|test/libsolidity/ASTRecoveryTests\|test/compilationTests/zeppelin/LICENSE")
+WHITESPACE=$(git grep -n -I -E "^.*[[:space:]]+$" |
+  grep -v "test/libsolidity/ASTJSON\|test/libsolidity/ASTRecoveryTests\|test/compilationTests/zeppelin/LICENSE" |
+  grep -v -E "test/libsolidity/syntaxTests/comments/unicode_direction_override_1.sol"
+)

 if [[ "$WHITESPACE" != "" ]]
 then
--- a/scripts/test_antlr_grammar.sh
+++ b/scripts/test_antlr_grammar.sh
@ -116,7 +116,10 @@ done < <(
  grep -riL -E \
    "^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \
    "${ROOT_DIR}/test/libsolidity/syntaxTests" \
-    "${ROOT_DIR}/test/libsolidity/semanticTests" \
+    "${ROOT_DIR}/test/libsolidity/semanticTests" |
+      grep -v -E 'comments/.*_direction_override.*.sol' |
+      grep -v -E 'literals/.*_direction_override.*.sol'
+      # Skipping the unicode tests as I couldn't adapt the lexical grammar to recursively counting RLO/LRO/PDF's.
 )

 YUL_FILES=()
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_1.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_1.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF
+        /*underflow ‬*/
+    }
+}
+// ----
+// ParserError 8936: (71-83): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_2.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_2.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF PDF
+        /*underflow ‬‬*/
+    }
+}
+// ----
+// ParserError 8936: (75-87): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_3.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_3.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // RLO
+        /*overflow ‮*/
+    }
+}
+// ----
+// ParserError 8936: (71-86): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_4.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_4.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // RLO RLO
+        /*overflow ‮‮*/
+    }
+}
+// ----
+// ParserError 8936: (75-93): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_5.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_5.sol
@ -0,0 +1,14 @@
+contract C {
+    function f() public pure
+    {
+        // RLO PDF
+        /*ok ‮‬*/
+
+        // RLO RLO PDF PDF
+        /*ok ‮‮‬‬*/
+
+        // RLO RLO RLO PDF PDF PDF
+        /*ok ‮‮‮‬‬‬*/
+    }
+}
+// ----
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_6.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_6.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF RLO
+        /*overflow ‬‮*/
+    }
+}
+// ----
+// ParserError 8936: (75-86): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_7.sol
+++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_7.sol
@ -0,0 +1,7 @@
+contract C {
+    function f() public pure {
+        /* LRO‭ LRE‪ RLE ‫  PDF‬ RLO‮ PDF ‬ PDF‬
+    }
+}
+// ----
+// ParserError 8936: (52-115): Expected multi-line comment-terminator.
--- a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_1.sol
+++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_1.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF
+        // underflow ‬
+    }
+}
+// ----
+// ParserError 8936: (71-84): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_2.sol
+++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_2.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF PDF
+        // underflow ‬‬
+    }
+}
+// ----
+// ParserError 8936: (75-88): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_3.sol
+++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_3.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // RLO
+        // overflow ‮
+    }
+}
+// ----
+// ParserError 8936: (71-86): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_4.sol
+++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_4.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // RLO RLO
+        // overflow ‮‮
+    }
+}
+// ----
+// ParserError 8936: (75-93): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_5.sol
+++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_5.sol
@ -0,0 +1,14 @@
+contract C {
+    function f() public pure
+    {
+        // RLO PDF
+        // ok ‮‬
+
+        // RLO RLO PDF PDF
+        // ok ‮‮‬‬
+
+        // RLO RLO RLO PDF PDF PDF
+        // ok ‮‮‮‬‬‬
+    }
+}
+// ----
--- a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_6.sol
+++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_6.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF RLO
+        // underflow ‬‮
+    }
+}
+// ----
+// ParserError 8936: (75-88): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/comments/unicode_direction_in_source_1.sol
+++ b/test/libsolidity/syntaxTests/comments/unicode_direction_in_source_1.sol
@ -0,0 +1,8 @@
+contract C {
+    function f(bool b) public pure
+    {
+        if ‬(b) { return; }
+    }
+}
+// ----
+// ParserError 2314: (65-66): Expected '(' but got 'ILLEGAL'
--- a/test/libsolidity/syntaxTests/comments/unicode_direction_in_source_2.sol
+++ b/test/libsolidity/syntaxTests/comments/unicode_direction_in_source_2.sol
@ -0,0 +1,8 @@
+contract C {
+    function f(bool b) public pure
+    {
+        uint a = 10; ‬
+    }
+}
+// ----
+// ParserError 8936: (75-76): Invalid token.
--- a/test/libsolidity/syntaxTests/comments/unicode_direction_override_1.sol
+++ b/test/libsolidity/syntaxTests/comments/unicode_direction_override_1.sol
@ -0,0 +1,10 @@
+contract TimelockUpgrade {
+    function confirmUpgrade() external {
+        uint256 m;
+        uint256 d;
+        (/*year*/,/*month‮*/,d/*yad*/,m/*‬‬hour*/,/*minute*/,/*second*/) = BokkyDateTime.timestampToDateTime(block.timestamp);
+    }
+}
+
+// ----
+// ParserError 8936: (128-139): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/inlineAssembly/invalid/invalid_number.sol
+++ b/test/libsolidity/syntaxTests/inlineAssembly/invalid/invalid_number.sol
@ -6,4 +6,4 @@ contract C {
  }
 }
 // ----
-// ParserError 1856: (72-73): Literal or identifier expected.
+// ParserError 1465: (72-73): Illegal token: Octal numbers not allowed.
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_1.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_1.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF
+        bytes memory s = unicode"underflow ‬";
+    }
+}
+// ----
+// ParserError 8936: (88-106): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_2.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_2.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF PDF
+        bytes memory m = unicode"underflow ‬‬";
+    }
+}
+// ----
+// ParserError 8936: (92-110): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_3.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_3.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // RLO
+        bytes memory m = unicode"overflow ‮";
+    }
+}
+// ----
+// ParserError 8936: (88-108): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_4.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_4.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // RLO RLO
+        bytes memory m = unicode"overflow ‮‮";
+    }
+}
+// ----
+// ParserError 8936: (92-115): Mismatching directional override markers in comment or string literal.
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_5.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_5.sol
@ -0,0 +1,14 @@
+contract C {
+    function f() public pure
+    {
+        // RLO PDF
+        bytes memory m = unicode" ok ‮‬";
+
+        // RLO RLO PDF PDF
+        m = unicode" ok ‮‮‬‬";
+
+        // RLO RLO RLO PDF PDF PDF
+        m = unicode" ok ‮‮‮‬‬‬";
+    }
+}
+// ----
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_6.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_6.sol
@ -0,0 +1,9 @@
+contract C {
+    function f() public pure
+    {
+        // PDF RLO
+        bytes memory m = unicode" underflow ‬‮";
+    }
+}
+// ----
+// ParserError 8936: (92-111): Unicode direction override underflow in comment or string literal.
--- a/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_7.sol
+++ b/test/libsolidity/syntaxTests/literals/unicode_string_direction_override_7.sol
@ -0,0 +1,13 @@
+contract C {
+    function f() public pure
+    {
+        // LRO PDF RLO PDF
+        bytes memory m = unicode"‭ ok ‬‮‬";
+
+        // lre rle pdf pdf
+        m = unicode"lre‪ rle‫ pdf‬ pdf‬";
+        // lre lro pdf pdf
+        m = unicode"lre‪ lro‭ pdf‬ pdf‬";
+    }
+}
+// ----
--- a/test/libyul/yulSyntaxTests/invalid/invalid_octal_number.yul
+++ b/test/libyul/yulSyntaxTests/invalid/invalid_octal_number.yul
@ -2,4 +2,4 @@
    let x := 0100
 }
 // ----
-// ParserError 1856: (15-16): Literal or identifier expected.
+// ParserError 1465: (15-16): Illegal token: Octal numbers not allowed.
--- a/test/libyul/yulSyntaxTests/invalid/unicode_comment_direction_override.sol
+++ b/test/libyul/yulSyntaxTests/invalid/unicode_comment_direction_override.sol
@ -0,0 +1,6 @@
+{
+    // pop 1
+    // underflow ‬
+}
+// ----
+// ParserError 1465: (19-32): Illegal token: Unicode direction override underflow in comment or string literal.
--- a/test/libyul/yulSyntaxTests/invalid/unicode_string_direction_override.sol
+++ b/test/libyul/yulSyntaxTests/invalid/unicode_string_direction_override.sol
@ -0,0 +1,6 @@
+{
+    // pop 1
+    let s := unicode"underflow ‬";
+}
+// ----
+// ParserError 1465: (35-47): Illegal token: Invalid character in string.