Add check that regular and unicode string literals are well formatted

This commit is contained in:
Alex Beregszaszi 2020-07-02 17:39:04 +01:00
parent 6fe8e63eee
commit 6eb60bc8cd
9 changed files with 39 additions and 7 deletions

View File

@ -8,6 +8,7 @@ Breaking changes:
* Parser: Disallow ``gwei`` as identifier. * Parser: Disallow ``gwei`` as identifier.
* Parser: Disallow dot syntax for ``value`` and ``gas``. * Parser: Disallow dot syntax for ``value`` and ``gas``.
* Parser: Disallow non-printable characters in string literals. * Parser: Disallow non-printable characters in string literals.
* Parser: Introduce Unicode string literals: ``unicode"😃"``.
* Parser: NatSpec comments on variables are only allowed for public state variables. * Parser: NatSpec comments on variables are only allowed for public state variables.
* Parser: Remove the ``finney`` and ``szabo`` denominations. * Parser: Remove the ``finney`` and ``szabo`` denominations.
* Parser: Remove the identifier ``now`` (replaced by ``block.timestamp``). * Parser: Remove the identifier ``now`` (replaced by ``block.timestamp``).

View File

@ -509,7 +509,7 @@ void Scanner::scanToken()
{ {
case '"': case '"':
case '\'': case '\'':
token = scanString(); token = scanString(false);
break; break;
case '<': case '<':
// < <= << <<= // < <= << <<=
@ -684,6 +684,18 @@ void Scanner::scanToken()
else else
token = setError(ScannerError::IllegalToken); token = setError(ScannerError::IllegalToken);
} }
else if (token == Token::Unicode)
{
// reset
m = 0;
n = 0;
// Special quoted hex string must follow
if (m_char == '"' || m_char == '\'')
token = scanString(true);
else
token = setError(ScannerError::IllegalToken);
}
} }
else if (isDecimalDigit(m_char)) else if (isDecimalDigit(m_char))
token = scanNumber(); token = scanNumber();
@ -775,7 +787,7 @@ bool Scanner::isUnicodeLinebreak()
return false; return false;
} }
Token Scanner::scanString() Token Scanner::scanString(bool const _isUnicode)
{ {
char const quote = m_char; char const quote = m_char;
advance(); // consume quote advance(); // consume quote
@ -791,11 +803,13 @@ Token Scanner::scanString()
} }
else else
{ {
// Report error on non-printable characters in string literals. // Report error on non-printable characters in string literals, however
// allow anything for unicode string literals, because their validity will
// be verified later (in the syntax checker).
// //
// We are using a manual range and not isprint() to avoid // We are using a manual range and not isprint() to avoid
// any potential complications with locale. // any potential complications with locale.
if (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f) if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
return setError(ScannerError::IllegalCharacterInString); return setError(ScannerError::IllegalCharacterInString);
addLiteralChar(c); addLiteralChar(c);
} }
@ -804,7 +818,7 @@ Token Scanner::scanString()
return setError(ScannerError::IllegalStringEndQuote); return setError(ScannerError::IllegalStringEndQuote);
literal.complete(); literal.complete();
advance(); // consume quote advance(); // consume quote
return Token::StringLiteral; return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;
} }
Token Scanner::scanHexString() Token Scanner::scanHexString()

View File

@ -229,7 +229,7 @@ private:
Token scanNumber(char _charSeen = 0); Token scanNumber(char _charSeen = 0);
std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword(); std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();
Token scanString(); Token scanString(bool const _isUnicode);
Token scanHexString(); Token scanHexString();
/// Scans a single line comment and returns its corrected end position. /// Scans a single line comment and returns its corrected end position.
size_t scanSingleLineDocComment(); size_t scanSingleLineDocComment();

View File

@ -190,6 +190,7 @@ namespace solidity::langutil
K(Throw, "throw", 0) \ K(Throw, "throw", 0) \
K(Try, "try", 0) \ K(Try, "try", 0) \
K(Type, "type", 0) \ K(Type, "type", 0) \
K(Unicode, "unicode", 0) \
K(Using, "using", 0) \ K(Using, "using", 0) \
K(View, "view", 0) \ K(View, "view", 0) \
K(Virtual, "virtual", 0) \ K(Virtual, "virtual", 0) \
@ -227,6 +228,7 @@ namespace solidity::langutil
K(FalseLiteral, "false", 0) \ K(FalseLiteral, "false", 0) \
T(Number, nullptr, 0) \ T(Number, nullptr, 0) \
T(StringLiteral, nullptr, 0) \ T(StringLiteral, nullptr, 0) \
T(UnicodeStringLiteral, nullptr, 0) \
T(HexStringLiteral, nullptr, 0) \ T(HexStringLiteral, nullptr, 0) \
T(CommentLiteral, nullptr, 0) \ T(CommentLiteral, nullptr, 0) \
\ \

View File

@ -28,6 +28,8 @@
#include <liblangutil/ErrorReporter.h> #include <liblangutil/ErrorReporter.h>
#include <liblangutil/SemVerHandler.h> #include <liblangutil/SemVerHandler.h>
#include <libsolutil/UTF8.h>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include <memory> #include <memory>
@ -37,7 +39,7 @@ using namespace std;
using namespace solidity; using namespace solidity;
using namespace solidity::langutil; using namespace solidity::langutil;
using namespace solidity::frontend; using namespace solidity::frontend;
using namespace solidity::util;
bool SyntaxChecker::checkSyntax(ASTNode const& _astRoot) bool SyntaxChecker::checkSyntax(ASTNode const& _astRoot)
{ {
@ -217,6 +219,13 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
bool SyntaxChecker::visit(Literal const& _literal) bool SyntaxChecker::visit(Literal const& _literal)
{ {
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value()))
m_errorReporter.syntaxError(
8452_error,
_literal.location(),
"Invalid UTF-8 sequence found"
);
if (_literal.token() != Token::Number) if (_literal.token() != Token::Number)
return true; return true;

View File

@ -920,6 +920,8 @@ string ASTJsonConverter::literalTokenKind(Token _token)
return "number"; return "number";
case Token::StringLiteral: case Token::StringLiteral:
return "string"; return "string";
case Token::UnicodeStringLiteral:
return "unicodeString";
case Token::HexStringLiteral: case Token::HexStringLiteral:
return "hexString"; return "hexString";
case Token::TrueLiteral: case Token::TrueLiteral:

View File

@ -943,6 +943,8 @@ Token ASTJsonImporter::literalTokenKind(Json::Value const& _node)
tok = Token::Number; tok = Token::Number;
else if (_node["kind"].asString() == "string") else if (_node["kind"].asString() == "string")
tok = Token::StringLiteral; tok = Token::StringLiteral;
else if (_node["kind"].asString() == "unicodeString")
tok = Token::UnicodeStringLiteral;
else if (_node["kind"].asString() == "hexString") else if (_node["kind"].asString() == "hexString")
tok = Token::HexStringLiteral; tok = Token::HexStringLiteral;
else if (_node["kind"].asString() == "bool") else if (_node["kind"].asString() == "bool")

View File

@ -349,6 +349,7 @@ TypePointer TypeProvider::forLiteral(Literal const& _literal)
case Token::Number: case Token::Number:
return rationalNumber(_literal); return rationalNumber(_literal);
case Token::StringLiteral: case Token::StringLiteral:
case Token::UnicodeStringLiteral:
case Token::HexStringLiteral: case Token::HexStringLiteral:
return stringLiteral(_literal.value()); return stringLiteral(_literal.value());
default: default:

View File

@ -1782,6 +1782,7 @@ ASTPointer<Expression> Parser::parsePrimaryExpression()
} }
break; break;
case Token::StringLiteral: case Token::StringLiteral:
case Token::UnicodeStringLiteral:
case Token::HexStringLiteral: case Token::HexStringLiteral:
{ {
string literal = m_scanner->currentLiteral(); string literal = m_scanner->currentLiteral();