Increase scanner lookahead to two.

2023-10-03 13:03:40 +00:00 · 2020-01-22 20:10:56 +01:00 · 2020-01-22 20:10:56 +01:00 · dd035f8f48
commit dd035f8f48
parent 641bb815e8
2 changed files with 50 additions and 46 deletions
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@ -108,18 +108,18 @@ public:
 		m_complete(false)
 	{
 		if (_type == LITERAL_TYPE_COMMENT)
-			m_scanner->m_nextSkippedComment.literal.clear();
+			m_scanner->m_skippedComments[Scanner::NextNext].literal.clear();
 		else
-			m_scanner->m_nextToken.literal.clear();
+			m_scanner->m_tokens[Scanner::NextNext].literal.clear();
 	}
 	~LiteralScope()
 	{
 		if (!m_complete)
 		{
 			if (m_type == LITERAL_TYPE_COMMENT)
-				m_scanner->m_nextSkippedComment.literal.clear();
+				m_scanner->m_skippedComments[Scanner::NextNext].literal.clear();
 			else
-				m_scanner->m_nextToken.literal.clear();
+				m_scanner->m_tokens[Scanner::NextNext].literal.clear();
 		}
 	}
 	void complete() { m_complete = true; }
@ -151,6 +151,7 @@ void Scanner::reset()
 	skipWhitespace();
 	next();
 	next();
+	next();
 }

 void Scanner::setPosition(size_t _offset)
@ -158,6 +159,7 @@ void Scanner::setPosition(size_t _offset)
 	m_char = m_source->setPosition(_offset);
 	scanToken();
 	next();
+	next();
 }

 void Scanner::supportPeriodInIdentifier(bool _value)
@ -222,13 +224,14 @@ void Scanner::addUnicodeAsUTF8(unsigned codepoint)
 void Scanner::rescan()
 {
 	size_t rollbackTo = 0;
-	if (m_skippedComment.literal.empty())
-		rollbackTo = m_currentToken.location.start;
+	if (m_skippedComments[Current].literal.empty())
+		rollbackTo = m_tokens[Current].location.start;
 	else
-		rollbackTo = m_skippedComment.location.start;
+		rollbackTo = m_skippedComments[Current].location.start;
 	m_char = m_source->rollback(size_t(m_source->position()) - rollbackTo);
 	next();
 	next();
+	next();
 }

 // Ensure that tokens can be stored in a byte.
@ -236,11 +239,14 @@ BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100);

 Token Scanner::next()
 {
-	m_currentToken = m_nextToken;
-	m_skippedComment = m_nextSkippedComment;
+	m_tokens[Current] = std::move(m_tokens[Next]);
+	m_tokens[Next] = std::move(m_tokens[NextNext]);
+	m_skippedComments[Current] = std::move(m_skippedComments[Next]);
+	m_skippedComments[Next] = std::move(m_skippedComments[NextNext]);
+
 	scanToken();

-	return m_currentToken.token;
+	return m_tokens[Current].token;
 }

 Token Scanner::selectToken(char _next, Token _then, Token _else)
@ -421,10 +427,10 @@ Token Scanner::scanSlash()
 		{
 			// doxygen style /// comment
 			Token comment;
-			m_nextSkippedComment.location.start = firstSlashPosition;
+			m_skippedComments[NextNext].location.start = firstSlashPosition;
 			comment = scanSingleLineDocComment();
-			m_nextSkippedComment.location.end = sourcePos();
-			m_nextSkippedComment.token = comment;
+			m_skippedComments[NextNext].location.end = sourcePos();
+			m_skippedComments[NextNext].token = comment;
 			return Token::Whitespace;
 		}
 		else
@ -447,10 +453,10 @@ Token Scanner::scanSlash()
 			}
 			// we actually have a multiline documentation comment
 			Token comment;
-			m_nextSkippedComment.location.start = firstSlashPosition;
+			m_skippedComments[NextNext].location.start = firstSlashPosition;
 			comment = scanMultiLineDocComment();
-			m_nextSkippedComment.location.end = sourcePos();
-			m_nextSkippedComment.token = comment;
+			m_skippedComments[NextNext].location.end = sourcePos();
+			m_skippedComments[NextNext].token = comment;
 			if (comment == Token::Illegal)
 				return Token::Illegal; // error already set
 			else
@ -467,11 +473,8 @@ Token Scanner::scanSlash()

 void Scanner::scanToken()
 {
-	m_nextToken.error = ScannerError::NoError;
-	m_nextToken.literal.clear();
-	m_nextToken.extendedTokenInfo = make_tuple(0, 0);
-	m_nextSkippedComment.literal.clear();
-	m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0);
+	m_tokens[NextNext] = {};
+	m_skippedComments[NextNext] = {};

 	Token token;
 	// M and N are for the purposes of grabbing different type sizes
@ -480,7 +483,7 @@ void Scanner::scanToken()
 	do
 	{
 		// Remember the position of the next token
-		m_nextToken.location.start = sourcePos();
+		m_tokens[NextNext].location.start = sourcePos();
 		switch (m_char)
 		{
 		case '"':
@ -675,9 +678,9 @@ void Scanner::scanToken()
 		// whitespace.
 	}
 	while (token == Token::Whitespace);
-	m_nextToken.location.end = sourcePos();
-	m_nextToken.token = token;
-	m_nextToken.extendedTokenInfo = make_tuple(m, n);
+	m_tokens[NextNext].location.end = sourcePos();
+	m_tokens[NextNext].token = token;
+	m_tokens[NextNext].extendedTokenInfo = make_tuple(m, n);
 }

 bool Scanner::scanEscape()
@ -927,7 +930,7 @@ tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
 	while (isIdentifierPart(m_char) || (m_char == '.' && m_supportPeriodInIdentifier))
 		addLiteralCharAndAdvance();
 	literal.complete();
-	return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
+	return TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal);
 }

 } // namespace solidity::langutil
--- a/liblangutil/Scanner.h
+++ b/liblangutil/Scanner.h
@ -121,32 +121,32 @@ public:
 	/// @returns the current token
 	Token currentToken() const
 	{
-		return m_currentToken.token;
+		return m_tokens[Current].token;
 	}
 	ElementaryTypeNameToken currentElementaryTypeNameToken() const
 	{
 		unsigned firstSize;
 		unsigned secondSize;
-		std::tie(firstSize, secondSize) = m_currentToken.extendedTokenInfo;
-		return ElementaryTypeNameToken(m_currentToken.token, firstSize, secondSize);
+		std::tie(firstSize, secondSize) = m_tokens[Current].extendedTokenInfo;
+		return ElementaryTypeNameToken(m_tokens[Current].token, firstSize, secondSize);
 	}

-	SourceLocation currentLocation() const { return m_currentToken.location; }
-	std::string const& currentLiteral() const { return m_currentToken.literal; }
-	std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_currentToken.extendedTokenInfo; }
+	SourceLocation currentLocation() const { return m_tokens[Current].location; }
+	std::string const& currentLiteral() const { return m_tokens[Current].literal; }
+	std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_tokens[Current].extendedTokenInfo; }

 	/// Retrieves the last error that occurred during lexical analysis.
 	/// @note If no error occurred, the value is undefined.
-	ScannerError currentError() const noexcept { return m_currentToken.error; }
+	ScannerError currentError() const noexcept { return m_tokens[Current].error; }
 	///@}

 	///@{
 	///@name Information about the current comment token

-	SourceLocation currentCommentLocation() const { return m_skippedComment.location; }
-	std::string const& currentCommentLiteral() const { return m_skippedComment.literal; }
+	SourceLocation currentCommentLocation() const { return m_skippedComments[Current].location; }
+	std::string const& currentCommentLiteral() const { return m_skippedComments[Current].literal; }
 	/// Called by the parser during FunctionDefinition parsing to clear the current comment
-	void clearCurrentCommentLiteral() { m_skippedComment.literal.clear(); }
+	void clearCurrentCommentLiteral() { m_skippedComments[Current].literal.clear(); }

 	///@}

@ -154,9 +154,11 @@ public:
 	///@name Information about the next token

 	/// @returns the next token without advancing input.
-	Token peekNextToken() const { return m_nextToken.token; }
-	SourceLocation peekLocation() const { return m_nextToken.location; }
-	std::string const& peekLiteral() const { return m_nextToken.literal; }
+	Token peekNextToken() const { return m_tokens[Next].token; }
+	SourceLocation peekLocation() const { return m_tokens[Next].location; }
+	std::string const& peekLiteral() const { return m_tokens[Next].literal; }
+
+	Token peekNextNextToken() const { return m_tokens[NextNext].token; }
 	///@}

 	///@{
@ -176,7 +178,7 @@ public:
 private:
 	inline Token setError(ScannerError _error) noexcept
 	{
-		m_nextToken.error = _error;
+		m_tokens[NextNext].error = _error;
 		return Token::Illegal;
 	}

@ -192,8 +194,8 @@ private:

 	///@{
 	///@name Literal buffer support
-	inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); }
-	inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); }
+	inline void addLiteralChar(char c) { m_tokens[NextNext].literal.push_back(c); }
+	inline void addCommentLiteralChar(char c) { m_skippedComments[NextNext].literal.push_back(c); }
 	inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
 	void addUnicodeAsUTF8(unsigned codepoint);
 	///@}
@ -252,11 +254,10 @@ private:

 	bool m_supportPeriodInIdentifier = false;

-	TokenDesc m_skippedComment;  // desc for current skipped comment
-	TokenDesc m_nextSkippedComment; // desc for next skipped comment
+	enum TokenIndex { Current, Next, NextNext };

-	TokenDesc m_currentToken;  // desc for current token (as returned by Next())
-	TokenDesc m_nextToken;     // desc for next token (one token look-ahead)
+	TokenDesc m_skippedComments[3] = {}; // desc for the current, next and nextnext skipped comment
+	TokenDesc m_tokens[3] = {}; // desc for the current, next and nextnext token

 	std::shared_ptr<CharStream> m_source;