mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
Merge pull request #5494 from ethereum/scanner-error-handling
Improved Scanner error diagnostics.
This commit is contained in:
commit
9ac7c748f8
@ -53,6 +53,7 @@
|
||||
#include <liblangutil/Exceptions.h>
|
||||
#include <liblangutil/Scanner.h>
|
||||
#include <algorithm>
|
||||
#include <ostream>
|
||||
#include <tuple>
|
||||
|
||||
using namespace std;
|
||||
@ -100,7 +101,32 @@ int hexValue(char c)
|
||||
}
|
||||
} // end anonymous namespace
|
||||
|
||||
std::string to_string(ScannerError _errorCode)
|
||||
{
|
||||
switch (_errorCode)
|
||||
{
|
||||
case ScannerError::NoError: return "No error.";
|
||||
case ScannerError::IllegalToken: return "Invalid token.";
|
||||
case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles within double-quotes.";
|
||||
case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid.";
|
||||
case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator.";
|
||||
case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence.";
|
||||
case ScannerError::IllegalStringEndQuote: return "Expected string end-quote.";
|
||||
case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'.";
|
||||
case ScannerError::IllegalExponent: return "Invalid exponent.";
|
||||
case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
|
||||
case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
|
||||
default:
|
||||
solAssert(false, "Unhandled case in to_string(ScannerError)");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, ScannerError _errorCode)
|
||||
{
|
||||
os << to_string(_errorCode);
|
||||
return os;
|
||||
}
|
||||
|
||||
/// Scoped helper for literal recording. Automatically drops the literal
|
||||
/// if aborting the scanning before it's complete.
|
||||
@ -311,7 +337,7 @@ Token Scanner::skipMultiLineComment()
|
||||
}
|
||||
}
|
||||
// Unterminated multi-line comment.
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalCommentTerminator);
|
||||
}
|
||||
|
||||
Token Scanner::scanMultiLineDocComment()
|
||||
@ -362,7 +388,7 @@ Token Scanner::scanMultiLineDocComment()
|
||||
}
|
||||
literal.complete();
|
||||
if (!endFound)
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalCommentTerminator);
|
||||
else
|
||||
return Token::CommentLiteral;
|
||||
}
|
||||
@ -392,7 +418,7 @@ Token Scanner::scanSlash()
|
||||
{
|
||||
// doxygen style /** natspec comment
|
||||
if (!advance()) /* slash star comment before EOS */
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalCommentTerminator);
|
||||
else if (m_char == '*')
|
||||
{
|
||||
advance(); //consume the last '*' at /**
|
||||
@ -410,7 +436,7 @@ Token Scanner::scanSlash()
|
||||
m_nextSkippedComment.location.end = sourcePos();
|
||||
m_nextSkippedComment.token = comment;
|
||||
if (comment == Token::Illegal)
|
||||
return Token::Illegal;
|
||||
return Token::Illegal; // error already set
|
||||
else
|
||||
return Token::Whitespace;
|
||||
}
|
||||
@ -425,6 +451,7 @@ Token Scanner::scanSlash()
|
||||
|
||||
void Scanner::scanToken()
|
||||
{
|
||||
m_nextToken.error = ScannerError::NoError;
|
||||
m_nextToken.literal.clear();
|
||||
m_nextToken.extendedTokenInfo = make_tuple(0, 0);
|
||||
m_nextSkippedComment.literal.clear();
|
||||
@ -610,7 +637,7 @@ void Scanner::scanToken()
|
||||
if (m_char == '"' || m_char == '\'')
|
||||
token = scanHexString();
|
||||
else
|
||||
token = Token::IllegalHex;
|
||||
token = setError(ScannerError::IllegalToken);
|
||||
}
|
||||
}
|
||||
else if (isDecimalDigit(m_char))
|
||||
@ -620,7 +647,7 @@ void Scanner::scanToken()
|
||||
else if (isSourcePastEndOfInput())
|
||||
token = Token::EOS;
|
||||
else
|
||||
token = selectToken(Token::Illegal);
|
||||
token = selectErrorToken(ScannerError::IllegalToken);
|
||||
break;
|
||||
}
|
||||
// Continue scanning for tokens as long as we're just skipping
|
||||
@ -713,13 +740,13 @@ Token Scanner::scanString()
|
||||
if (c == '\\')
|
||||
{
|
||||
if (isSourcePastEndOfInput() || !scanEscape())
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalEscapeSequence);
|
||||
}
|
||||
else
|
||||
addLiteralChar(c);
|
||||
}
|
||||
if (m_char != quote)
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalStringEndQuote);
|
||||
literal.complete();
|
||||
advance(); // consume quote
|
||||
return Token::StringLiteral;
|
||||
@ -734,11 +761,14 @@ Token Scanner::scanHexString()
|
||||
{
|
||||
char c = m_char;
|
||||
if (!scanHexByte(c))
|
||||
return Token::IllegalHex;
|
||||
// can only return false if hex-byte is incomplete (only one hex digit instead of two)
|
||||
return setError(ScannerError::IllegalHexString);
|
||||
addLiteralChar(c);
|
||||
}
|
||||
|
||||
if (m_char != quote)
|
||||
return Token::IllegalHex;
|
||||
return setError(ScannerError::IllegalStringEndQuote);
|
||||
|
||||
literal.complete();
|
||||
advance(); // consume quote
|
||||
return Token::StringLiteral;
|
||||
@ -767,7 +797,7 @@ Token Scanner::scanNumber(char _charSeen)
|
||||
// we have already seen a decimal point of the float
|
||||
addLiteralChar('.');
|
||||
if (m_char == '_')
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalToken);
|
||||
scanDecimalDigits(); // we know we have at least one digit
|
||||
}
|
||||
else
|
||||
@ -784,14 +814,14 @@ Token Scanner::scanNumber(char _charSeen)
|
||||
kind = HEX;
|
||||
addLiteralCharAndAdvance();
|
||||
if (!isHexDigit(m_char))
|
||||
return Token::Illegal; // we must have at least one hex digit after 'x'
|
||||
return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x'
|
||||
|
||||
while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
|
||||
addLiteralCharAndAdvance();
|
||||
}
|
||||
else if (isDecimalDigit(m_char))
|
||||
// We do not allow octal numbers
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::OctalNotAllowed);
|
||||
}
|
||||
// Parse decimal digits and allow trailing fractional part.
|
||||
if (kind == DECIMAL)
|
||||
@ -823,7 +853,7 @@ Token Scanner::scanNumber(char _charSeen)
|
||||
{
|
||||
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
|
||||
if (kind != DECIMAL)
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalExponent);
|
||||
else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
|
||||
{
|
||||
// Recover from wrongly placed underscore as delimiter in literal with scientific
|
||||
@ -838,8 +868,8 @@ Token Scanner::scanNumber(char _charSeen)
|
||||
addLiteralCharAndAdvance(); // 'e' | 'E'
|
||||
if (m_char == '+' || m_char == '-')
|
||||
addLiteralCharAndAdvance();
|
||||
if (!isDecimalDigit(m_char))
|
||||
return Token::Illegal; // we must have at least one decimal digit after 'e'/'E'
|
||||
if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E'
|
||||
return setError(ScannerError::IllegalExponent);
|
||||
scanDecimalDigits();
|
||||
}
|
||||
// The source character immediately following a numeric literal must
|
||||
@ -847,7 +877,7 @@ Token Scanner::scanNumber(char _charSeen)
|
||||
// section 7.8.3, page 17 (note that we read only one decimal digit
|
||||
// if the value is 0).
|
||||
if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
|
||||
return Token::Illegal;
|
||||
return setError(ScannerError::IllegalNumberEnd);
|
||||
literal.complete();
|
||||
return Token::Number;
|
||||
}
|
||||
|
@ -57,6 +57,7 @@
|
||||
#include <liblangutil/SourceLocation.h>
|
||||
#include <libdevcore/Common.h>
|
||||
#include <libdevcore/CommonData.h>
|
||||
#include <iosfwd>
|
||||
|
||||
namespace langutil
|
||||
{
|
||||
@ -65,6 +66,26 @@ class AstRawString;
|
||||
class AstValueFactory;
|
||||
class ParserRecorder;
|
||||
|
||||
enum class ScannerError
|
||||
{
|
||||
NoError,
|
||||
|
||||
IllegalToken,
|
||||
IllegalHexString,
|
||||
IllegalHexDigit,
|
||||
IllegalCommentTerminator,
|
||||
IllegalEscapeSequence,
|
||||
IllegalStringEndQuote,
|
||||
IllegalNumberSeparator,
|
||||
IllegalExponent,
|
||||
IllegalNumberEnd,
|
||||
|
||||
OctalNotAllowed,
|
||||
};
|
||||
|
||||
std::string to_string(ScannerError _errorCode);
|
||||
std::ostream& operator<<(std::ostream& os, ScannerError _errorCode);
|
||||
|
||||
class Scanner
|
||||
{
|
||||
friend class LiteralScope;
|
||||
@ -100,6 +121,10 @@ public:
|
||||
SourceLocation currentLocation() const { return m_currentToken.location; }
|
||||
std::string const& currentLiteral() const { return m_currentToken.literal; }
|
||||
std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_currentToken.extendedTokenInfo; }
|
||||
|
||||
/// Retrieves the last error that occurred during lexical analysis.
|
||||
/// @note If no error occurred, the value is undefined.
|
||||
ScannerError currentError() const noexcept { return m_currentToken.error; }
|
||||
///@}
|
||||
|
||||
///@{
|
||||
@ -139,12 +164,19 @@ public:
|
||||
///@}
|
||||
|
||||
private:
|
||||
inline Token setError(ScannerError _error) noexcept
|
||||
{
|
||||
m_nextToken.error = _error;
|
||||
return Token::Illegal;
|
||||
}
|
||||
|
||||
/// Used for the current and look-ahead token and comments
|
||||
struct TokenDesc
|
||||
{
|
||||
Token token;
|
||||
SourceLocation location;
|
||||
std::string literal;
|
||||
ScannerError error = ScannerError::NoError;
|
||||
std::tuple<unsigned, unsigned> extendedTokenInfo;
|
||||
};
|
||||
|
||||
@ -159,6 +191,7 @@ private:
|
||||
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
|
||||
void rollback(int _amount) { m_char = m_source.rollback(_amount); }
|
||||
|
||||
inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); }
|
||||
inline Token selectToken(Token _tok) { advance(); return _tok; }
|
||||
/// If the next character is _next, advance and return _then, otherwise return _else.
|
||||
inline Token selectToken(char _next, Token _then, Token _else);
|
||||
|
@ -263,8 +263,6 @@ namespace langutil
|
||||
\
|
||||
/* Illegal token - not able to scan. */ \
|
||||
T(Illegal, "ILLEGAL", 0) \
|
||||
/* Illegal hex token */ \
|
||||
T(IllegalHex, "ILLEGAL_HEX", 0) \
|
||||
\
|
||||
/* Scanner-internal use only. */ \
|
||||
T(Whitespace, nullptr, 0)
|
||||
|
@ -1555,8 +1555,8 @@ ASTPointer<Expression> Parser::parsePrimaryExpression()
|
||||
expression = nodeFactory.createNode<TupleExpression>(components, isArray);
|
||||
break;
|
||||
}
|
||||
case Token::IllegalHex:
|
||||
fatalParserError("Expected even number of hex-nibbles within double-quotes.");
|
||||
case Token::Illegal:
|
||||
fatalParserError(to_string(m_scanner->currentError()));
|
||||
break;
|
||||
default:
|
||||
if (TokenTraits::isElementaryTypeName(token))
|
||||
|
@ -88,6 +88,7 @@ BOOST_AUTO_TEST_CASE(string_escape_illegal)
|
||||
Scanner scanner(CharStream(" bla \"\\x6rf\" (illegalescape)"));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalEscapeSequence);
|
||||
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "");
|
||||
// TODO recovery from illegal tokens should be improved
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
@ -486,28 +487,32 @@ BOOST_AUTO_TEST_CASE(invalid_short_hex_literal)
|
||||
{
|
||||
Scanner scanner(CharStream("{ hex\"00112233F\""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(invalid_hex_literal_with_space)
|
||||
{
|
||||
Scanner scanner(CharStream("{ hex\"00112233FF \""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(invalid_hex_literal_with_wrong_quotes)
|
||||
{
|
||||
Scanner scanner(CharStream("{ hex\"00112233FF'"));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(invalid_hex_literal_nonhex_string)
|
||||
{
|
||||
Scanner scanner(CharStream("{ hex\"hello\""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString);
|
||||
}
|
||||
|
||||
// COMMENTS
|
||||
|
@ -7,4 +7,4 @@ contract C {
|
||||
}
|
||||
// ----
|
||||
// ParserError: (72-73): Literal, identifier or instruction expected.
|
||||
// ParserError: (72-73): Expected primary expression.
|
||||
// ParserError: (72-73): Octal numbers not allowed.
|
||||
|
@ -4,4 +4,4 @@ contract test {
|
||||
}
|
||||
}
|
||||
// ----
|
||||
// ParserError: (44-47): Expected primary expression.
|
||||
// ParserError: (44-47): Identifier-start is not allowed at end of a number.
|
||||
|
@ -6,4 +6,4 @@ contract test {
|
||||
}
|
||||
}
|
||||
// ----
|
||||
// ParserError: (100-112): Expected primary expression.
|
||||
// ParserError: (100-112): Expected string end-quote.
|
||||
|
@ -5,4 +5,4 @@ contract test {
|
||||
}
|
||||
}
|
||||
// ----
|
||||
// ParserError: (100-109): Expected primary expression.
|
||||
// ParserError: (100-109): Expected string end-quote.
|
||||
|
@ -4,4 +4,4 @@ contract test {
|
||||
}
|
||||
}
|
||||
// ----
|
||||
// ParserError: (100-112): Expected primary expression.
|
||||
// ParserError: (100-112): Expected string end-quote.
|
||||
|
@ -1,4 +1,4 @@
|
||||
contract test {
|
||||
function f() pure public { "abc\
|
||||
// ----
|
||||
// ParserError: (47-53): Expected primary expression.
|
||||
// ParserError: (47-53): Expected string end-quote.
|
||||
|
@ -28,4 +28,4 @@ contract test {
|
||||
|
||||
}
|
||||
// ----
|
||||
// ParserError: (678-681): Expected primary expression.
|
||||
// ParserError: (678-681): Invalid escape sequence.
|
||||
|
Loading…
Reference in New Issue
Block a user