Increase scanner lookahead to two.

This commit is contained in:
Mathias Baumann 2020-01-22 20:10:56 +01:00 committed by chriseth
parent 641bb815e8
commit dd035f8f48
2 changed files with 50 additions and 46 deletions

View File

@ -108,18 +108,18 @@ public:
m_complete(false) m_complete(false)
{ {
if (_type == LITERAL_TYPE_COMMENT) if (_type == LITERAL_TYPE_COMMENT)
m_scanner->m_nextSkippedComment.literal.clear(); m_scanner->m_skippedComments[Scanner::NextNext].literal.clear();
else else
m_scanner->m_nextToken.literal.clear(); m_scanner->m_tokens[Scanner::NextNext].literal.clear();
} }
~LiteralScope() ~LiteralScope()
{ {
if (!m_complete) if (!m_complete)
{ {
if (m_type == LITERAL_TYPE_COMMENT) if (m_type == LITERAL_TYPE_COMMENT)
m_scanner->m_nextSkippedComment.literal.clear(); m_scanner->m_skippedComments[Scanner::NextNext].literal.clear();
else else
m_scanner->m_nextToken.literal.clear(); m_scanner->m_tokens[Scanner::NextNext].literal.clear();
} }
} }
void complete() { m_complete = true; } void complete() { m_complete = true; }
@ -151,6 +151,7 @@ void Scanner::reset()
skipWhitespace(); skipWhitespace();
next(); next();
next(); next();
next();
} }
void Scanner::setPosition(size_t _offset) void Scanner::setPosition(size_t _offset)
@ -158,6 +159,7 @@ void Scanner::setPosition(size_t _offset)
m_char = m_source->setPosition(_offset); m_char = m_source->setPosition(_offset);
scanToken(); scanToken();
next(); next();
next();
} }
void Scanner::supportPeriodInIdentifier(bool _value) void Scanner::supportPeriodInIdentifier(bool _value)
@ -222,13 +224,14 @@ void Scanner::addUnicodeAsUTF8(unsigned codepoint)
void Scanner::rescan() void Scanner::rescan()
{ {
size_t rollbackTo = 0; size_t rollbackTo = 0;
if (m_skippedComment.literal.empty()) if (m_skippedComments[Current].literal.empty())
rollbackTo = m_currentToken.location.start; rollbackTo = m_tokens[Current].location.start;
else else
rollbackTo = m_skippedComment.location.start; rollbackTo = m_skippedComments[Current].location.start;
m_char = m_source->rollback(size_t(m_source->position()) - rollbackTo); m_char = m_source->rollback(size_t(m_source->position()) - rollbackTo);
next(); next();
next(); next();
next();
} }
// Ensure that tokens can be stored in a byte. // Ensure that tokens can be stored in a byte.
@ -236,11 +239,14 @@ BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100);
Token Scanner::next() Token Scanner::next()
{ {
m_currentToken = m_nextToken; m_tokens[Current] = std::move(m_tokens[Next]);
m_skippedComment = m_nextSkippedComment; m_tokens[Next] = std::move(m_tokens[NextNext]);
m_skippedComments[Current] = std::move(m_skippedComments[Next]);
m_skippedComments[Next] = std::move(m_skippedComments[NextNext]);
scanToken(); scanToken();
return m_currentToken.token; return m_tokens[Current].token;
} }
Token Scanner::selectToken(char _next, Token _then, Token _else) Token Scanner::selectToken(char _next, Token _then, Token _else)
@ -421,10 +427,10 @@ Token Scanner::scanSlash()
{ {
// doxygen style /// comment // doxygen style /// comment
Token comment; Token comment;
m_nextSkippedComment.location.start = firstSlashPosition; m_skippedComments[NextNext].location.start = firstSlashPosition;
comment = scanSingleLineDocComment(); comment = scanSingleLineDocComment();
m_nextSkippedComment.location.end = sourcePos(); m_skippedComments[NextNext].location.end = sourcePos();
m_nextSkippedComment.token = comment; m_skippedComments[NextNext].token = comment;
return Token::Whitespace; return Token::Whitespace;
} }
else else
@ -447,10 +453,10 @@ Token Scanner::scanSlash()
} }
// we actually have a multiline documentation comment // we actually have a multiline documentation comment
Token comment; Token comment;
m_nextSkippedComment.location.start = firstSlashPosition; m_skippedComments[NextNext].location.start = firstSlashPosition;
comment = scanMultiLineDocComment(); comment = scanMultiLineDocComment();
m_nextSkippedComment.location.end = sourcePos(); m_skippedComments[NextNext].location.end = sourcePos();
m_nextSkippedComment.token = comment; m_skippedComments[NextNext].token = comment;
if (comment == Token::Illegal) if (comment == Token::Illegal)
return Token::Illegal; // error already set return Token::Illegal; // error already set
else else
@ -467,11 +473,8 @@ Token Scanner::scanSlash()
void Scanner::scanToken() void Scanner::scanToken()
{ {
m_nextToken.error = ScannerError::NoError; m_tokens[NextNext] = {};
m_nextToken.literal.clear(); m_skippedComments[NextNext] = {};
m_nextToken.extendedTokenInfo = make_tuple(0, 0);
m_nextSkippedComment.literal.clear();
m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0);
Token token; Token token;
// M and N are for the purposes of grabbing different type sizes // M and N are for the purposes of grabbing different type sizes
@ -480,7 +483,7 @@ void Scanner::scanToken()
do do
{ {
// Remember the position of the next token // Remember the position of the next token
m_nextToken.location.start = sourcePos(); m_tokens[NextNext].location.start = sourcePos();
switch (m_char) switch (m_char)
{ {
case '"': case '"':
@ -675,9 +678,9 @@ void Scanner::scanToken()
// whitespace. // whitespace.
} }
while (token == Token::Whitespace); while (token == Token::Whitespace);
m_nextToken.location.end = sourcePos(); m_tokens[NextNext].location.end = sourcePos();
m_nextToken.token = token; m_tokens[NextNext].token = token;
m_nextToken.extendedTokenInfo = make_tuple(m, n); m_tokens[NextNext].extendedTokenInfo = make_tuple(m, n);
} }
bool Scanner::scanEscape() bool Scanner::scanEscape()
@ -927,7 +930,7 @@ tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
while (isIdentifierPart(m_char) || (m_char == '.' && m_supportPeriodInIdentifier)) while (isIdentifierPart(m_char) || (m_char == '.' && m_supportPeriodInIdentifier))
addLiteralCharAndAdvance(); addLiteralCharAndAdvance();
literal.complete(); literal.complete();
return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal); return TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal);
} }
} // namespace solidity::langutil } // namespace solidity::langutil

View File

@ -121,32 +121,32 @@ public:
/// @returns the current token /// @returns the current token
Token currentToken() const Token currentToken() const
{ {
return m_currentToken.token; return m_tokens[Current].token;
} }
ElementaryTypeNameToken currentElementaryTypeNameToken() const ElementaryTypeNameToken currentElementaryTypeNameToken() const
{ {
unsigned firstSize; unsigned firstSize;
unsigned secondSize; unsigned secondSize;
std::tie(firstSize, secondSize) = m_currentToken.extendedTokenInfo; std::tie(firstSize, secondSize) = m_tokens[Current].extendedTokenInfo;
return ElementaryTypeNameToken(m_currentToken.token, firstSize, secondSize); return ElementaryTypeNameToken(m_tokens[Current].token, firstSize, secondSize);
} }
SourceLocation currentLocation() const { return m_currentToken.location; } SourceLocation currentLocation() const { return m_tokens[Current].location; }
std::string const& currentLiteral() const { return m_currentToken.literal; } std::string const& currentLiteral() const { return m_tokens[Current].literal; }
std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_currentToken.extendedTokenInfo; } std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_tokens[Current].extendedTokenInfo; }
/// Retrieves the last error that occurred during lexical analysis. /// Retrieves the last error that occurred during lexical analysis.
/// @note If no error occurred, the value is undefined. /// @note If no error occurred, the value is undefined.
ScannerError currentError() const noexcept { return m_currentToken.error; } ScannerError currentError() const noexcept { return m_tokens[Current].error; }
///@} ///@}
///@{ ///@{
///@name Information about the current comment token ///@name Information about the current comment token
SourceLocation currentCommentLocation() const { return m_skippedComment.location; } SourceLocation currentCommentLocation() const { return m_skippedComments[Current].location; }
std::string const& currentCommentLiteral() const { return m_skippedComment.literal; } std::string const& currentCommentLiteral() const { return m_skippedComments[Current].literal; }
/// Called by the parser during FunctionDefinition parsing to clear the current comment /// Called by the parser during FunctionDefinition parsing to clear the current comment
void clearCurrentCommentLiteral() { m_skippedComment.literal.clear(); } void clearCurrentCommentLiteral() { m_skippedComments[Current].literal.clear(); }
///@} ///@}
@ -154,9 +154,11 @@ public:
///@name Information about the next token ///@name Information about the next token
/// @returns the next token without advancing input. /// @returns the next token without advancing input.
Token peekNextToken() const { return m_nextToken.token; } Token peekNextToken() const { return m_tokens[Next].token; }
SourceLocation peekLocation() const { return m_nextToken.location; } SourceLocation peekLocation() const { return m_tokens[Next].location; }
std::string const& peekLiteral() const { return m_nextToken.literal; } std::string const& peekLiteral() const { return m_tokens[Next].literal; }
Token peekNextNextToken() const { return m_tokens[NextNext].token; }
///@} ///@}
///@{ ///@{
@ -176,7 +178,7 @@ public:
private: private:
inline Token setError(ScannerError _error) noexcept inline Token setError(ScannerError _error) noexcept
{ {
m_nextToken.error = _error; m_tokens[NextNext].error = _error;
return Token::Illegal; return Token::Illegal;
} }
@ -192,8 +194,8 @@ private:
///@{ ///@{
///@name Literal buffer support ///@name Literal buffer support
inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); } inline void addLiteralChar(char c) { m_tokens[NextNext].literal.push_back(c); }
inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); } inline void addCommentLiteralChar(char c) { m_skippedComments[NextNext].literal.push_back(c); }
inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
void addUnicodeAsUTF8(unsigned codepoint); void addUnicodeAsUTF8(unsigned codepoint);
///@} ///@}
@ -252,11 +254,10 @@ private:
bool m_supportPeriodInIdentifier = false; bool m_supportPeriodInIdentifier = false;
TokenDesc m_skippedComment; // desc for current skipped comment enum TokenIndex { Current, Next, NextNext };
TokenDesc m_nextSkippedComment; // desc for next skipped comment
TokenDesc m_currentToken; // desc for current token (as returned by Next()) TokenDesc m_skippedComments[3] = {}; // desc for the current, next and nextnext skipped comment
TokenDesc m_nextToken; // desc for next token (one token look-ahead) TokenDesc m_tokens[3] = {}; // desc for the current, next and nextnext token
std::shared_ptr<CharStream> m_source; std::shared_ptr<CharStream> m_source;