Merge pull request #7174 from ethereum/doxygen-multiline-comments-and-crlf

Fixes doxygen style multiline comment parsing for files with CRLF
This commit is contained in:
Christian Parpart 2019-09-30 09:45:39 +02:00 committed by GitHub
commit 8847647547
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 57 additions and 15 deletions

View File

@ -15,6 +15,7 @@ Compiler Features:
Bugfixes: Bugfixes:
* Fix internal error when popping a dynamic storage array of mappings. * Fix internal error when popping a dynamic storage array of mappings.
* Yul Optimizer: Fix reordering bug in connection with shifted one and mul/div-instructions in for loop conditions. * Yul Optimizer: Fix reordering bug in connection with shifted one and mul/div-instructions in for loop conditions.
* Scanner: Fix multi-line natspec comment parsing with triple slashes when file is encoded with CRLF instead of LF.
### 0.5.11 (2019-08-12) ### 0.5.11 (2019-08-12)

View File

@ -30,11 +30,6 @@ inline bool isHexDigit(char c)
('A' <= c && c <= 'F'); ('A' <= c && c <= 'F');
} }
inline bool isLineTerminator(char c)
{
return c == '\n';
}
inline bool isWhiteSpace(char c) inline bool isWhiteSpace(char c)
{ {
return c == ' ' || c == '\n' || c == '\t' || c == '\r'; return c == ' ' || c == '\n' || c == '\t' || c == '\r';

View File

@ -280,6 +280,29 @@ Token Scanner::skipSingleLineComment()
return Token::Whitespace; return Token::Whitespace;
} }
bool Scanner::atEndOfLine() const
{
return m_char == '\n' || m_char == '\r';
}
bool Scanner::tryScanEndOfLine()
{
if (m_char == '\n')
{
advance();
return true;
}
if (m_char == '\r')
{
if (advance() && m_char == '\n')
advance();
return true;
}
return false;
}
Token Scanner::scanSingleLineDocComment() Token Scanner::scanSingleLineDocComment()
{ {
LiteralScope literal(this, LITERAL_TYPE_COMMENT); LiteralScope literal(this, LITERAL_TYPE_COMMENT);
@ -289,7 +312,7 @@ Token Scanner::scanSingleLineDocComment()
while (!isSourcePastEndOfInput()) while (!isSourcePastEndOfInput())
{ {
if (isLineTerminator(m_char)) if (tryScanEndOfLine())
{ {
// check if next line is also a documentation comment // check if next line is also a documentation comment
skipWhitespace(); skipWhitespace();
@ -303,7 +326,6 @@ Token Scanner::scanSingleLineDocComment()
} }
else else
break; // next line is not a documentation comment, we are done break; // next line is not a documentation comment, we are done
} }
else if (isUnicodeLinebreak()) else if (isUnicodeLinebreak())
// Any line terminator that is not '\n' is considered to end the // Any line terminator that is not '\n' is considered to end the
@ -343,13 +365,13 @@ Token Scanner::scanMultiLineDocComment()
bool endFound = false; bool endFound = false;
bool charsAdded = false; bool charsAdded = false;
while (isWhiteSpace(m_char) && !isLineTerminator(m_char)) while (isWhiteSpace(m_char) && !atEndOfLine())
advance(); advance();
while (!isSourcePastEndOfInput()) while (!isSourcePastEndOfInput())
{ {
//handle newlines in multline comments //handle newlines in multline comments
if (isLineTerminator(m_char)) if (atEndOfLine())
{ {
skipWhitespace(); skipWhitespace();
if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*') if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*')
@ -664,10 +686,12 @@ void Scanner::scanToken()
bool Scanner::scanEscape() bool Scanner::scanEscape()
{ {
char c = m_char; char c = m_char;
advance();
// Skip escaped newlines. // Skip escaped newlines.
if (isLineTerminator(c)) if (tryScanEndOfLine())
return true; return true;
advance();
switch (c) switch (c)
{ {
case '\'': // fall through case '\'': // fall through

View File

@ -219,6 +219,12 @@ private:
Token skipSingleLineComment(); Token skipSingleLineComment();
Token skipMultiLineComment(); Token skipMultiLineComment();
/// Tests if current source position is CR, LF or CRLF.
bool atEndOfLine() const;
/// Tries to consume CR, LF or CRLF line terminators and returns success or failure.
bool tryScanEndOfLine();
void scanDecimalDigits(); void scanDecimalDigits();
Token scanNumber(char _charSeen = 0); Token scanNumber(char _charSeen = 0);
std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword(); std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();

View File

@ -568,7 +568,7 @@ BOOST_AUTO_TEST_CASE(multiline_comment_at_eos)
BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment) BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment)
{ {
for (auto const& nl: {"\r", "\n"}) for (auto const& nl: {"\r", "\n", "\r\n"})
{ {
Scanner scanner(CharStream("// abc " + string(nl) + " def ", "")); Scanner scanner(CharStream("// abc " + string(nl) + " def ", ""));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), ""); BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
@ -595,7 +595,7 @@ BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_comment)
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment) BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
{ {
for (auto const& nl: {"\r", "\n"}) for (auto const& nl: {"\r", "\n", "\r\n"})
{ {
Scanner scanner(CharStream("/// abc " + string(nl) + " def ", "")); Scanner scanner(CharStream("/// abc " + string(nl) + " def ", ""));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc "); BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
@ -605,6 +605,22 @@ BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
} }
} }
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_multiline_doc_comment)
{
// Test CR, LF, CRLF as line valid terminators for code comments.
// Any accepted non-LF is being canonicalized to LF.
for (auto const& nl : {"\r"s, "\n"s, "\r\n"s})
{
Scanner scanner{CharStream{"/// Hello" + nl + "/// World" + nl + "ident", ""}};
auto const& lit = scanner.currentCommentLiteral();
BOOST_CHECK_EQUAL(lit, "Hello\n World");
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "Hello\n World");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "ident");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment) BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
{ {
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"}) for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
@ -622,9 +638,9 @@ BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings) BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings)
{ {
for (auto const& nl: {"\n", "\r"}) for (auto const& nl: {"\r"s, "\n"s, "\r\n"s})
{ {
Scanner scanner(CharStream("\"abc " + string(nl) + " def\"", "")); Scanner scanner(CharStream("\"abc " + nl + " def\"", ""));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");