Merge pull request #524 from LefterisJP/sol_parse_comments

Solidity scanner taking documentation comments into account
2023-10-03 13:03:40 +00:00 · 2014-11-21 15:35:17 +01:00 · 2014-11-21 15:35:17 +01:00 · c7b933b4db
commit c7b933b4db
parent 3ba9649dde 3b16ffa8ab
3 changed files with 66 additions and 12 deletions
--- a/Scanner.cpp
+++ b/Scanner.cpp
@ -104,11 +104,16 @@ int HexValue(char c)

 void Scanner::reset(CharStream const& _source)
 {
+	bool foundDocComment;
 	m_source = _source;
 	m_char = m_source.get();
 	skipWhitespace();
-	scanToken();
-	next();
+	foundDocComment = scanToken();
+
+	// special version of Scanner:next() taking the previous scanToken() result into account
+	m_current_token = m_next_token;
+	if (scanToken() || foundDocComment)
+		m_skipped_comment = m_next_skipped_comment;
 }


@ -137,7 +142,8 @@ BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
 Token::Value Scanner::next()
 {
 	m_current_token = m_next_token;
-	scanToken();
+	if (scanToken())
+		m_skipped_comment = m_next_skipped_comment;
 	return m_current_token.token;
 }

@ -171,6 +177,20 @@ Token::Value Scanner::skipSingleLineComment()
 	return Token::WHITESPACE;
 }

+/// For the moment this function simply consumes a single line triple slash doc comment
+Token::Value Scanner::scanDocumentationComment()
+{
+	LiteralScope literal(this);
+	advance(); //consume the last '/'
+	while (!isSourcePastEndOfInput() && !IsLineTerminator(m_char))
+	{
+		addCommentLiteralChar(m_char);
+		advance();
+	}
+	literal.Complete();
+	return Token::COMMENT_LITERAL;
+}
+
 Token::Value Scanner::skipMultiLineComment()
 {
 	if (asserts(m_char == '*'))
@ -194,8 +214,9 @@ Token::Value Scanner::skipMultiLineComment()
 	return Token::ILLEGAL;
 }

-void Scanner::scanToken()
+bool Scanner::scanToken()
 {
+	bool foundDocComment = false;
 	m_next_token.literal.clear();
 	Token::Value token;
 	do
@ -297,7 +318,22 @@ void Scanner::scanToken()
 			// /  // /* /=
 			advance();
 			if (m_char == '/')
-				token = skipSingleLineComment();
+			{
+				if (!advance()) /* double slash comment directly before EOS */
+					token = Token::WHITESPACE;
+				else if (m_char == '/')
+				{
+					Token::Value comment;
+					m_next_skipped_comment.location.start = getSourcePos();
+					comment = scanDocumentationComment();
+					m_next_skipped_comment.location.end = getSourcePos();
+					m_next_skipped_comment.token = comment;
+					token = Token::WHITESPACE;
+					foundDocComment = true;
+				}
+				else
+					token = skipSingleLineComment();
+			}
 			else if (m_char == '*')
 				token = skipMultiLineComment();
 			else if (m_char == '=')
@ -389,6 +425,8 @@ void Scanner::scanToken()
 	while (token == Token::WHITESPACE);
 	m_next_token.location.end = getSourcePos();
 	m_next_token.token = token;
+
+	return foundDocComment;
 }

 bool Scanner::scanEscape()
@ -532,9 +570,9 @@ Token::Value Scanner::scanNumber(char _charSeen)
 // ----------------------------------------------------------------------------
 // Keyword Matcher

-#define KEYWORDS(KEYWORD_GROUP, KEYWORD)                                     \
+#define KEYWORDS(KEYWORD_GROUP, KEYWORD)                                       \
 	KEYWORD_GROUP('a')                                                         \
-	KEYWORD("address", Token::ADDRESS)                                           \
+	KEYWORD("address", Token::ADDRESS)                                         \
 	KEYWORD_GROUP('b')                                                         \
 	KEYWORD("break", Token::BREAK)                                             \
 	KEYWORD("bool", Token::BOOL)                                               \
--- a/Scanner.h
+++ b/Scanner.h
@ -116,18 +116,27 @@ public:
 	/// Resets the scanner as if newly constructed with _input as input.
 	void reset(CharStream const& _source);

-	/// Returns the next token and advances input.
+	/// Returns the next token and advances input
 	Token::Value next();

 	///@{
 	///@name Information about the current token

 	/// Returns the current token
-	Token::Value getCurrentToken() { return m_current_token.token; }
+	Token::Value getCurrentToken()
+	{
+		return m_current_token.token;
+	}
 	Location getCurrentLocation() const { return m_current_token.location; }
 	std::string const& getCurrentLiteral() const { return m_current_token.literal; }
 	///@}

+	///@{
+	///@name Information about the current comment token
+	Location getCurrentCommentLocation() const { return m_skipped_comment.location; }
+	std::string const& getCurrentCommentLiteral() const { return m_skipped_comment.literal; }
+	///@}
+
 	///@{
 	///@name Information about the next token

@ -146,7 +155,7 @@ public:
 	///@}

 private:
-	// Used for the current and look-ahead token.
+	/// Used for the current and look-ahead token and comments
 	struct TokenDesc
 	{
 		Token::Value token;
@ -158,6 +167,7 @@ private:
 	///@name Literal buffer support
 	inline void startNewLiteral() { m_next_token.literal.clear(); }
 	inline void addLiteralChar(char c) { m_next_token.literal.push_back(c); }
+	inline void addCommentLiteralChar(char c) { m_next_skipped_comment.literal.push_back(c); }
 	inline void dropLiteral() { m_next_token.literal.clear(); }
 	inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
 	///@}
@ -171,8 +181,9 @@ private:

 	bool scanHexByte(char& o_scannedByte);

-	/// Scans a single JavaScript token.
-	void scanToken();
+	/// Scans a single Solidity token. Returns true if the scanned token was
+	/// a skipped documentation comment. False in all other cases.
+	bool scanToken();

 	/// Skips all whitespace and @returns true if something was skipped.
 	bool skipWhitespace();
@ -184,6 +195,7 @@ private:
 	Token::Value scanIdentifierOrKeyword();

 	Token::Value scanString();
+	Token::Value scanDocumentationComment();

 	/// Scans an escape-sequence which is part of a string and adds the
 	/// decoded character to the current literal. Returns true if a pattern
@ -194,6 +206,9 @@ private:
 	int getSourcePos() { return m_source.getPos(); }
 	bool isSourcePastEndOfInput() { return m_source.isPastEndOfInput(); }

+	TokenDesc m_skipped_comment;  // desc for current skipped comment
+	TokenDesc m_next_skipped_comment; // desc for next skiped comment
+
 	TokenDesc m_current_token;  // desc for current token (as returned by Next())
 	TokenDesc m_next_token;     // desc for next token (one token look-ahead)

--- a/Token.h
+++ b/Token.h
@ -281,6 +281,7 @@ namespace solidity
 	K(FALSE_LITERAL, "false", 0)                                       \
 	T(NUMBER, NULL, 0)                                                 \
 	T(STRING_LITERAL, NULL, 0)                                         \
+	T(COMMENT_LITERAL, NULL, 0)                                        \
 	\
 	/* Identifiers (not keywords or future reserved words). */         \
 	T(IDENTIFIER, NULL, 0)                                             \