Merge pull request #4912 from ethereum/fixNewline

Fix bugs in comments.
This commit is contained in:
chriseth 2018-09-10 12:25:07 +02:00 committed by GitHub
commit 255eda2ea6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 167 additions and 33 deletions

View File

@ -117,6 +117,9 @@ Bugfixes:
* Type Checker: Fix internal error when array index value is too large. * Type Checker: Fix internal error when array index value is too large.
* Type System: Allow arbitrary exponents for literals with a mantissa of zero. * Type System: Allow arbitrary exponents for literals with a mantissa of zero.
* Parser: Fix incorrect source location for nameless parameters. * Parser: Fix incorrect source location for nameless parameters.
* Parser: Treat unicode line endings as terminating strings and single-line comments.
* Parser: Disallow unterminated multi-line comments at the end of input.
* Parser: Treat ``/** /`` as unterminated multi-line comment.
### 0.4.24 (2018-05-16) ### 0.4.24 (2018-05-16)

View File

@ -243,22 +243,17 @@ bool Scanner::skipWhitespace()
return sourcePos() != startPosition; return sourcePos() != startPosition;
} }
bool Scanner::skipWhitespaceExceptLF() void Scanner::skipWhitespaceExceptUnicodeLinebreak()
{ {
int const startPosition = sourcePos(); while (isWhiteSpace(m_char) && !isUnicodeLinebreak())
while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
advance(); advance();
// Return whether or not we skipped any characters.
return sourcePos() != startPosition;
} }
Token::Value Scanner::skipSingleLineComment() Token::Value Scanner::skipSingleLineComment()
{ {
// The line terminator at the end of the line is not considered // Line terminator is not part of the comment. If it is a
// to be part of the single-line comment; it is recognized // non-ascii line terminator, it will result in a parser error.
// separately by the lexical grammar and becomes part of the while (!isUnicodeLinebreak())
// stream of input elements for the syntactic grammar
while (!isLineTerminator(m_char))
if (!advance()) break; if (!advance()) break;
return Token::Whitespace; return Token::Whitespace;
@ -268,7 +263,9 @@ Token::Value Scanner::scanSingleLineDocComment()
{ {
LiteralScope literal(this, LITERAL_TYPE_COMMENT); LiteralScope literal(this, LITERAL_TYPE_COMMENT);
advance(); //consume the last '/' at /// advance(); //consume the last '/' at ///
skipWhitespaceExceptLF();
skipWhitespaceExceptUnicodeLinebreak();
while (!isSourcePastEndOfInput()) while (!isSourcePastEndOfInput())
{ {
if (isLineTerminator(m_char)) if (isLineTerminator(m_char))
@ -287,6 +284,10 @@ Token::Value Scanner::scanSingleLineDocComment()
break; // next line is not a documentation comment, we are done break; // next line is not a documentation comment, we are done
} }
else if (isUnicodeLinebreak())
// Any line terminator that is not '\n' is considered to end the
// comment.
break;
addCommentLiteralChar(m_char); addCommentLiteralChar(m_char);
advance(); advance();
} }
@ -321,6 +322,9 @@ Token::Value Scanner::scanMultiLineDocComment()
bool endFound = false; bool endFound = false;
bool charsAdded = false; bool charsAdded = false;
while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
advance();
while (!isSourcePastEndOfInput()) while (!isSourcePastEndOfInput())
{ {
//handle newlines in multline comments //handle newlines in multline comments
@ -390,23 +394,26 @@ Token::Value Scanner::scanSlash()
{ {
// doxygen style /** natspec comment // doxygen style /** natspec comment
if (!advance()) /* slash star comment before EOS */ if (!advance()) /* slash star comment before EOS */
return Token::Whitespace; return Token::Illegal;
else if (m_char == '*') else if (m_char == '*')
{ {
advance(); //consume the last '*' at /** advance(); //consume the last '*' at /**
skipWhitespaceExceptLF();
// special case of a closed normal multiline comment // "/**/"
if (!m_source.isPastEndOfInput() && m_source.get(0) == '/') if (m_char == '/')
advance(); //skip the closing slash
else // we actually have a multiline documentation comment
{ {
advance(); //skip the closing slash
return Token::Whitespace;
}
// we actually have a multiline documentation comment
Token::Value comment; Token::Value comment;
m_nextSkippedComment.location.start = firstSlashPosition; m_nextSkippedComment.location.start = firstSlashPosition;
comment = scanMultiLineDocComment(); comment = scanMultiLineDocComment();
m_nextSkippedComment.location.end = sourcePos(); m_nextSkippedComment.location.end = sourcePos();
m_nextSkippedComment.token = comment; m_nextSkippedComment.token = comment;
} if (comment == Token::Illegal)
return Token::Illegal;
else
return Token::Whitespace; return Token::Whitespace;
} }
else else
@ -435,11 +442,6 @@ void Scanner::scanToken()
m_nextToken.location.start = sourcePos(); m_nextToken.location.start = sourcePos();
switch (m_char) switch (m_char)
{ {
case '\n':
case ' ':
case '\t':
token = selectToken(Token::Whitespace);
break;
case '"': case '"':
case '\'': case '\'':
token = scanString(); token = scanString();
@ -675,18 +677,38 @@ bool Scanner::scanEscape()
if (!scanHexByte(c)) if (!scanHexByte(c))
return false; return false;
break; break;
default:
return false;
} }
addLiteralChar(c); addLiteralChar(c);
return true; return true;
} }
bool Scanner::isUnicodeLinebreak()
{
if (0x0a <= m_char && m_char <= 0x0d)
// line feed, vertical tab, form feed, carriage return
return true;
else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
// NEL - U+0085, C2 85 in utf8
return true;
else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
))
// LS - U+2028, E2 80 A8 in utf8
// PS - U+2029, E2 80 A9 in utf8
return true;
else
return false;
}
Token::Value Scanner::scanString() Token::Value Scanner::scanString()
{ {
char const quote = m_char; char const quote = m_char;
advance(); // consume quote advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING); LiteralScope literal(this, LITERAL_TYPE_STRING);
while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char)) while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
{ {
char c = m_char; char c = m_char;
advance(); advance();
@ -710,7 +732,7 @@ Token::Value Scanner::scanHexString()
char const quote = m_char; char const quote = m_char;
advance(); // consume quote advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING); LiteralScope literal(this, LITERAL_TYPE_STRING);
while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char)) while (m_char != quote && !isSourcePastEndOfInput())
{ {
char c = m_char; char c = m_char;
if (!scanHexByte(c)) if (!scanHexByte(c))

View File

@ -197,8 +197,8 @@ private:
/// Skips all whitespace and @returns true if something was skipped. /// Skips all whitespace and @returns true if something was skipped.
bool skipWhitespace(); bool skipWhitespace();
/// Skips all whitespace except Line feeds and returns true if something was skipped /// Skips all whitespace that are neither '\r' nor '\n'.
bool skipWhitespaceExceptLF(); void skipWhitespaceExceptUnicodeLinebreak();
Token::Value skipSingleLineComment(); Token::Value skipSingleLineComment();
Token::Value skipMultiLineComment(); Token::Value skipMultiLineComment();
@ -218,6 +218,9 @@ private:
/// is scanned. /// is scanned.
bool scanEscape(); bool scanEscape();
/// @returns true iff we are currently positioned at a unicode line break.
bool isUnicodeLinebreak();
/// Return the current source position. /// Return the current source position.
int sourcePos() const { return m_source.position(); } int sourcePos() const { return m_source.position(); }
bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); } bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }

View File

@ -23,6 +23,8 @@
#include <libsolidity/parsing/Scanner.h> #include <libsolidity/parsing/Scanner.h>
#include <boost/test/unit_test.hpp> #include <boost/test/unit_test.hpp>
using namespace std;
namespace dev namespace dev
{ {
namespace solidity namespace solidity
@ -500,6 +502,110 @@ BOOST_AUTO_TEST_CASE(invalid_hex_literal_nonhex_string)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
} }
BOOST_AUTO_TEST_CASE(invalid_multiline_comment_close)
{
// This used to parse as "comment", "identifier"
Scanner scanner(CharStream("/** / x"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(multiline_doc_comment_at_eos)
{
// This used to parse as "whitespace"
Scanner scanner(CharStream("/**"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(multiline_comment_at_eos)
{
Scanner scanner(CharStream("/*"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment)
{
for (auto const& nl: {"\r", "\n"})
{
Scanner scanner(CharStream("// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_comment)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
{
Scanner scanner(CharStream("// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
for (size_t i = 0; i < string(nl).size() - 1; i++)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
{
for (auto const& nl: {"\r", "\n"})
{
Scanner scanner(CharStream("/// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
{
Scanner scanner(CharStream("/// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
for (size_t i = 0; i < string(nl).size() - 1; i++)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings)
{
for (auto const& nl: {"\n", "\r"})
{
Scanner scanner(CharStream("\"abc " + string(nl) + " def\""));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_strings)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
{
Scanner scanner(CharStream("\"abc " + string(nl) + " def\""));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
for (size_t i = 0; i < string(nl).size(); i++)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()