mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
Scanner: Generate error on inbalanced RLO/LRO/PDF override markers.
This commit is contained in:
parent
67b7267735
commit
381c63ec99
@ -6,6 +6,7 @@ Language Features:
|
|||||||
* Code generator: Support conversion from calldata slices to memory and storage arrays.
|
* Code generator: Support conversion from calldata slices to memory and storage arrays.
|
||||||
* The fallback function can now also have a single ``calldata`` argument (equaling ``msg.data``) and return ``bytes memory`` (which will not be ABI-encoded but returned as-is).
|
* The fallback function can now also have a single ``calldata`` argument (equaling ``msg.data``) and return ``bytes memory`` (which will not be ABI-encoded but returned as-is).
|
||||||
* Wasm backend: Add ``i32.select`` and ``i64.select`` instructions.
|
* Wasm backend: Add ``i32.select`` and ``i64.select`` instructions.
|
||||||
|
* Scanner: Generate a parser error when comments or unicode strings contain an unbalanced or underflowing set of unicode direction override markers (LRO, RLO, LRE, RLE, PDF).
|
||||||
|
|
||||||
Compiler Features:
|
Compiler Features:
|
||||||
* Build System: Optionally support dynamic loading of Z3 and use that mechanism for Linux release builds.
|
* Build System: Optionally support dynamic loading of Z3 and use that mechanism for Linux release builds.
|
||||||
|
@ -98,6 +98,20 @@ public:
|
|||||||
std::tuple<int, int> translatePositionToLineColumn(int _position) const;
|
std::tuple<int, int> translatePositionToLineColumn(int _position) const;
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
|
/// Tests whether or not given octet sequence is present at the current position in stream.
|
||||||
|
/// @returns true if the sequence could be found, false otherwise.
|
||||||
|
bool prefixMatch(std::string_view _sequence)
|
||||||
|
{
|
||||||
|
if (isPastEndOfInput(_sequence.size()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < _sequence.size(); ++i)
|
||||||
|
if (_sequence[i] != get(i))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string m_source;
|
std::string m_source;
|
||||||
std::string m_name;
|
std::string m_name;
|
||||||
|
@ -54,9 +54,10 @@
|
|||||||
#include <liblangutil/Exceptions.h>
|
#include <liblangutil/Exceptions.h>
|
||||||
#include <liblangutil/Scanner.h>
|
#include <liblangutil/Scanner.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <boost/algorithm/string/classification.hpp>
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <ostream>
|
#include <string_view>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -79,6 +80,8 @@ string to_string(ScannerError _errorCode)
|
|||||||
case ScannerError::IllegalExponent: return "Invalid exponent.";
|
case ScannerError::IllegalExponent: return "Invalid exponent.";
|
||||||
case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
|
case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
|
||||||
case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
|
case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
|
||||||
|
case ScannerError::DirectionalOverrideUnderflow: return "Unicode direction override underflow in comment or string literal.";
|
||||||
|
case ScannerError::DirectionalOverrideMismatch: return "Mismatching directional override markers in comment or string literal.";
|
||||||
default:
|
default:
|
||||||
solAssert(false, "Unhandled case in to_string(ScannerError)");
|
solAssert(false, "Unhandled case in to_string(ScannerError)");
|
||||||
return "";
|
return "";
|
||||||
@ -271,12 +274,61 @@ bool Scanner::skipWhitespaceExceptUnicodeLinebreak()
|
|||||||
return sourcePos() != startPosition;
|
return sourcePos() != startPosition;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
/// Tries to scan for an RLO/LRO/RLE/LRE/PDF and keeps track of script writing direction override depth.
|
||||||
|
///
|
||||||
|
/// @returns ScannerError::NoError in case of successful parsing and directional encodings are paired
|
||||||
|
/// and error code in case the input's lexical parser state is invalid and this error should be reported
|
||||||
|
/// to the user.
|
||||||
|
static ScannerError validateBiDiMarkup(CharStream& _stream, size_t _startPosition)
|
||||||
|
{
|
||||||
|
static array<pair<string_view, int>, 5> constexpr directionalSequences{
|
||||||
|
pair<string_view, int>{"\xE2\x80\xAD", 1}, // U+202D (LRO - Left-to-Right Override)
|
||||||
|
pair<string_view, int>{"\xE2\x80\xAE", 1}, // U+202E (RLO - Right-to-Left Override)
|
||||||
|
pair<string_view, int>{"\xE2\x80\xAA", 1}, // U+202A (LRE - Left-to-Right Embedding)
|
||||||
|
pair<string_view, int>{"\xE2\x80\xAB", 1}, // U+202B (RLE - Right-to-Left Embedding)
|
||||||
|
pair<string_view, int>{"\xE2\x80\xAC", -1} // U+202C (PDF - Pop Directional Formatting
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t endPosition = _stream.position();
|
||||||
|
_stream.setPosition(_startPosition);
|
||||||
|
|
||||||
|
int directionOverrideDepth = 0;
|
||||||
|
|
||||||
|
for (size_t currentPos = _startPosition; currentPos < endPosition; ++currentPos)
|
||||||
|
{
|
||||||
|
_stream.setPosition(currentPos);
|
||||||
|
|
||||||
|
for (auto const& [sequence, depthChange]: directionalSequences)
|
||||||
|
if (_stream.prefixMatch(sequence))
|
||||||
|
directionOverrideDepth += depthChange;
|
||||||
|
|
||||||
|
if (directionOverrideDepth < 0)
|
||||||
|
return ScannerError::DirectionalOverrideUnderflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
_stream.setPosition(endPosition);
|
||||||
|
|
||||||
|
return directionOverrideDepth > 0 ? ScannerError::DirectionalOverrideMismatch : ScannerError::NoError;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
Token Scanner::skipSingleLineComment()
|
Token Scanner::skipSingleLineComment()
|
||||||
{
|
{
|
||||||
// Line terminator is not part of the comment. If it is a
|
// Line terminator is not part of the comment. If it is a
|
||||||
// non-ascii line terminator, it will result in a parser error.
|
// non-ascii line terminator, it will result in a parser error.
|
||||||
|
size_t startPosition = m_source->position();
|
||||||
while (!isUnicodeLinebreak())
|
while (!isUnicodeLinebreak())
|
||||||
if (!advance()) break;
|
if (!advance())
|
||||||
|
break;
|
||||||
|
|
||||||
|
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
|
||||||
|
if (unicodeDirectionError != ScannerError::NoError)
|
||||||
|
return setError(unicodeDirectionError);
|
||||||
|
|
||||||
return Token::Whitespace;
|
return Token::Whitespace;
|
||||||
}
|
}
|
||||||
@ -349,16 +401,21 @@ size_t Scanner::scanSingleLineDocComment()
|
|||||||
|
|
||||||
Token Scanner::skipMultiLineComment()
|
Token Scanner::skipMultiLineComment()
|
||||||
{
|
{
|
||||||
|
size_t startPosition = m_source->position();
|
||||||
while (!isSourcePastEndOfInput())
|
while (!isSourcePastEndOfInput())
|
||||||
{
|
{
|
||||||
char ch = m_char;
|
char prevChar = m_char;
|
||||||
advance();
|
advance();
|
||||||
|
|
||||||
// If we have reached the end of the multi-line comment, we
|
// If we have reached the end of the multi-line comment, we
|
||||||
// consume the '/' and insert a whitespace. This way all
|
// consume the '/' and insert a whitespace. This way all
|
||||||
// multi-line comments are treated as whitespace.
|
// multi-line comments are treated as whitespace.
|
||||||
if (ch == '*' && m_char == '/')
|
if (prevChar == '*' && m_char == '/')
|
||||||
{
|
{
|
||||||
|
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
|
||||||
|
if (unicodeDirectionError != ScannerError::NoError)
|
||||||
|
return setError(unicodeDirectionError);
|
||||||
|
|
||||||
m_char = ' ';
|
m_char = ' ';
|
||||||
return Token::Whitespace;
|
return Token::Whitespace;
|
||||||
}
|
}
|
||||||
@ -785,6 +842,7 @@ bool Scanner::isUnicodeLinebreak()
|
|||||||
|
|
||||||
Token Scanner::scanString(bool const _isUnicode)
|
Token Scanner::scanString(bool const _isUnicode)
|
||||||
{
|
{
|
||||||
|
size_t startPosition = m_source->position();
|
||||||
char const quote = m_char;
|
char const quote = m_char;
|
||||||
advance(); // consume quote
|
advance(); // consume quote
|
||||||
LiteralScope literal(this, LITERAL_TYPE_STRING);
|
LiteralScope literal(this, LITERAL_TYPE_STRING);
|
||||||
@ -812,6 +870,14 @@ Token Scanner::scanString(bool const _isUnicode)
|
|||||||
}
|
}
|
||||||
if (m_char != quote)
|
if (m_char != quote)
|
||||||
return setError(ScannerError::IllegalStringEndQuote);
|
return setError(ScannerError::IllegalStringEndQuote);
|
||||||
|
|
||||||
|
if (_isUnicode)
|
||||||
|
{
|
||||||
|
ScannerError unicodeDirectionError = validateBiDiMarkup(*m_source, startPosition);
|
||||||
|
if (unicodeDirectionError != ScannerError::NoError)
|
||||||
|
return setError(unicodeDirectionError);
|
||||||
|
}
|
||||||
|
|
||||||
literal.complete();
|
literal.complete();
|
||||||
advance(); // consume quote
|
advance(); // consume quote
|
||||||
return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;
|
return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;
|
||||||
|
@ -89,6 +89,9 @@ enum class ScannerError
|
|||||||
IllegalExponent,
|
IllegalExponent,
|
||||||
IllegalNumberEnd,
|
IllegalNumberEnd,
|
||||||
|
|
||||||
|
DirectionalOverrideUnderflow,
|
||||||
|
DirectionalOverrideMismatch,
|
||||||
|
|
||||||
OctalNotAllowed,
|
OctalNotAllowed,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -183,6 +186,7 @@ public:
|
|||||||
///@}
|
///@}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
inline Token setError(ScannerError _error) noexcept
|
inline Token setError(ScannerError _error) noexcept
|
||||||
{
|
{
|
||||||
m_tokens[NextNext].error = _error;
|
m_tokens[NextNext].error = _error;
|
||||||
|
@ -6,7 +6,10 @@
|
|||||||
REPO_ROOT="$(dirname "$0")"/..
|
REPO_ROOT="$(dirname "$0")"/..
|
||||||
cd $REPO_ROOT
|
cd $REPO_ROOT
|
||||||
|
|
||||||
WHITESPACE=$(git grep -n -I -E "^.*[[:space:]]+$" | grep -v "test/libsolidity/ASTJSON\|test/libsolidity/ASTRecoveryTests\|test/compilationTests/zeppelin/LICENSE")
|
WHITESPACE=$(git grep -n -I -E "^.*[[:space:]]+$" |
|
||||||
|
grep -v "test/libsolidity/ASTJSON\|test/libsolidity/ASTRecoveryTests\|test/compilationTests/zeppelin/LICENSE" |
|
||||||
|
grep -v -E "test/libsolidity/syntaxTests/comments/unicode_direction_override_1.sol"
|
||||||
|
)
|
||||||
|
|
||||||
if [[ "$WHITESPACE" != "" ]]
|
if [[ "$WHITESPACE" != "" ]]
|
||||||
then
|
then
|
||||||
|
@ -116,7 +116,10 @@ done < <(
|
|||||||
grep -riL -E \
|
grep -riL -E \
|
||||||
"^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \
|
"^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \
|
||||||
"${ROOT_DIR}/test/libsolidity/syntaxTests" \
|
"${ROOT_DIR}/test/libsolidity/syntaxTests" \
|
||||||
"${ROOT_DIR}/test/libsolidity/semanticTests" \
|
"${ROOT_DIR}/test/libsolidity/semanticTests" |
|
||||||
|
grep -v -E 'comments/.*_direction_override.*.sol' |
|
||||||
|
grep -v -E 'literals/.*_direction_override.*.sol'
|
||||||
|
# Skipping the unicode tests as I couldn't adapt the lexical grammar to recursively counting RLO/LRO/PDF's.
|
||||||
)
|
)
|
||||||
|
|
||||||
YUL_FILES=()
|
YUL_FILES=()
|
||||||
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF
|
||||||
|
/*underflow */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (71-83): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF PDF
|
||||||
|
/*underflow */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-87): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO
|
||||||
|
/*overflow */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (71-86): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO RLO
|
||||||
|
/*overflow */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-93): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,14 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO PDF
|
||||||
|
/*ok */
|
||||||
|
|
||||||
|
// RLO RLO PDF PDF
|
||||||
|
/*ok */
|
||||||
|
|
||||||
|
// RLO RLO RLO PDF PDF PDF
|
||||||
|
/*ok */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF RLO
|
||||||
|
/*overflow */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-86): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,7 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure {
|
||||||
|
/* LRO LRE RLE PDF RLO PDF PDF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (52-115): Expected multi-line comment-terminator.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF
|
||||||
|
// underflow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (71-84): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF PDF
|
||||||
|
// underflow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-88): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO
|
||||||
|
// overflow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (71-86): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO RLO
|
||||||
|
// overflow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-93): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,14 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO PDF
|
||||||
|
// ok
|
||||||
|
|
||||||
|
// RLO RLO PDF PDF
|
||||||
|
// ok
|
||||||
|
|
||||||
|
// RLO RLO RLO PDF PDF PDF
|
||||||
|
// ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF RLO
|
||||||
|
// underflow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-88): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,8 @@
|
|||||||
|
contract C {
|
||||||
|
function f(bool b) public pure
|
||||||
|
{
|
||||||
|
if (b) { return; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 2314: (65-66): Expected '(' but got 'ILLEGAL'
|
@ -0,0 +1,8 @@
|
|||||||
|
contract C {
|
||||||
|
function f(bool b) public pure
|
||||||
|
{
|
||||||
|
uint a = 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (75-76): Invalid token.
|
@ -0,0 +1,10 @@
|
|||||||
|
contract TimelockUpgrade {
|
||||||
|
function confirmUpgrade() external {
|
||||||
|
uint256 m;
|
||||||
|
uint256 d;
|
||||||
|
(/*year*/,/*month*/,d/*yad*/,m/*hour*/,/*minute*/,/*second*/) = BokkyDateTime.timestampToDateTime(block.timestamp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (128-139): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF
|
||||||
|
bytes memory s = unicode"underflow ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (88-106): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF PDF
|
||||||
|
bytes memory m = unicode"underflow ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (92-110): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO
|
||||||
|
bytes memory m = unicode"overflow ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (88-108): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO RLO
|
||||||
|
bytes memory m = unicode"overflow ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (92-115): Mismatching directional override markers in comment or string literal.
|
@ -0,0 +1,14 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// RLO PDF
|
||||||
|
bytes memory m = unicode" ok ";
|
||||||
|
|
||||||
|
// RLO RLO PDF PDF
|
||||||
|
m = unicode" ok ";
|
||||||
|
|
||||||
|
// RLO RLO RLO PDF PDF PDF
|
||||||
|
m = unicode" ok ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
@ -0,0 +1,9 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// PDF RLO
|
||||||
|
bytes memory m = unicode" underflow ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 8936: (92-111): Unicode direction override underflow in comment or string literal.
|
@ -0,0 +1,13 @@
|
|||||||
|
contract C {
|
||||||
|
function f() public pure
|
||||||
|
{
|
||||||
|
// LRO PDF RLO PDF
|
||||||
|
bytes memory m = unicode" ok ";
|
||||||
|
|
||||||
|
// lre rle pdf pdf
|
||||||
|
m = unicode"lre rle pdf pdf";
|
||||||
|
// lre lro pdf pdf
|
||||||
|
m = unicode"lre lro pdf pdf";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ----
|
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
// pop 1
|
||||||
|
// underflow
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 1856: (19-32): Literal or identifier expected.
|
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
// pop 1
|
||||||
|
let s := unicode"underflow ";
|
||||||
|
}
|
||||||
|
// ----
|
||||||
|
// ParserError 1856: (35-47): Literal or identifier expected.
|
Loading…
Reference in New Issue
Block a user