Fixes doxygen style multiline comment parsing for files with CRLF as line terminators.

This commit is contained in:
Christian Parpart 2019-08-05 11:53:24 +02:00 committed by Christian Parpart
parent 45583895fc
commit f85f6ba7e0
5 changed files with 57 additions and 15 deletions

View File

@ -15,6 +15,7 @@ Compiler Features:
Bugfixes:
* Fix internal error when popping a dynamic storage array of mappings.
* Yul Optimizer: Fix reordering bug in connection with shifted one and mul/div-instructions in for loop conditions.
* Scanner: Fix multi-line natspec comment parsing with triple slashes when file is encoded with CRLF instead of LF.
### 0.5.11 (2019-08-12)

View File

@ -30,11 +30,6 @@ inline bool isHexDigit(char c)
('A' <= c && c <= 'F');
}
inline bool isLineTerminator(char c)
{
return c == '\n';
}
inline bool isWhiteSpace(char c)
{
return c == ' ' || c == '\n' || c == '\t' || c == '\r';

View File

@ -280,6 +280,29 @@ Token Scanner::skipSingleLineComment()
return Token::Whitespace;
}
bool Scanner::atEndOfLine() const
{
return m_char == '\n' || m_char == '\r';
}
bool Scanner::tryScanEndOfLine()
{
if (m_char == '\n')
{
advance();
return true;
}
if (m_char == '\r')
{
if (advance() && m_char == '\n')
advance();
return true;
}
return false;
}
Token Scanner::scanSingleLineDocComment()
{
LiteralScope literal(this, LITERAL_TYPE_COMMENT);
@ -289,7 +312,7 @@ Token Scanner::scanSingleLineDocComment()
while (!isSourcePastEndOfInput())
{
if (isLineTerminator(m_char))
if (tryScanEndOfLine())
{
// check if next line is also a documentation comment
skipWhitespace();
@ -303,7 +326,6 @@ Token Scanner::scanSingleLineDocComment()
}
else
break; // next line is not a documentation comment, we are done
}
else if (isUnicodeLinebreak())
// Any line terminator that is not '\n' is considered to end the
@ -343,13 +365,13 @@ Token Scanner::scanMultiLineDocComment()
bool endFound = false;
bool charsAdded = false;
while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
while (isWhiteSpace(m_char) && !atEndOfLine())
advance();
while (!isSourcePastEndOfInput())
{
//handle newlines in multline comments
if (isLineTerminator(m_char))
if (atEndOfLine())
{
skipWhitespace();
if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*')
@ -664,10 +686,12 @@ void Scanner::scanToken()
bool Scanner::scanEscape()
{
char c = m_char;
advance();
// Skip escaped newlines.
if (isLineTerminator(c))
if (tryScanEndOfLine())
return true;
advance();
switch (c)
{
case '\'': // fall through

View File

@ -219,6 +219,12 @@ private:
Token skipSingleLineComment();
Token skipMultiLineComment();
/// Tests if current source position is CR, LF or CRLF.
bool atEndOfLine() const;
/// Tries to consume CR, LF or CRLF line terminators and returns success or failure.
bool tryScanEndOfLine();
void scanDecimalDigits();
Token scanNumber(char _charSeen = 0);
std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();

View File

@ -568,7 +568,7 @@ BOOST_AUTO_TEST_CASE(multiline_comment_at_eos)
BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment)
{
for (auto const& nl: {"\r", "\n"})
for (auto const& nl: {"\r", "\n", "\r\n"})
{
Scanner scanner(CharStream("// abc " + string(nl) + " def ", ""));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
@ -595,7 +595,7 @@ BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_comment)
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
{
for (auto const& nl: {"\r", "\n"})
for (auto const& nl: {"\r", "\n", "\r\n"})
{
Scanner scanner(CharStream("/// abc " + string(nl) + " def ", ""));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
@ -605,6 +605,22 @@ BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
}
}
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_multiline_doc_comment)
{
// Test CR, LF, CRLF as line valid terminators for code comments.
// Any accepted non-LF is being canonicalized to LF.
for (auto const& nl : {"\r"s, "\n"s, "\r\n"s})
{
Scanner scanner{CharStream{"/// Hello" + nl + "/// World" + nl + "ident", ""}};
auto const& lit = scanner.currentCommentLiteral();
BOOST_CHECK_EQUAL(lit, "Hello\n World");
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "Hello\n World");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "ident");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
@ -622,9 +638,9 @@ BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings)
{
for (auto const& nl: {"\n", "\r"})
for (auto const& nl: {"\r"s, "\n"s, "\r\n"s})
{
Scanner scanner(CharStream("\"abc " + string(nl) + " def\"", ""));
Scanner scanner(CharStream("\"abc " + nl + " def\"", ""));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");