mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
Add check that regular and unicode string literals are well formatted
This commit is contained in:
parent
6fe8e63eee
commit
6eb60bc8cd
@ -8,6 +8,7 @@ Breaking changes:
|
||||
* Parser: Disallow ``gwei`` as identifier.
|
||||
* Parser: Disallow dot syntax for ``value`` and ``gas``.
|
||||
* Parser: Disallow non-printable characters in string literals.
|
||||
* Parser: Introduce Unicode string literals: ``unicode"😃"``.
|
||||
* Parser: NatSpec comments on variables are only allowed for public state variables.
|
||||
* Parser: Remove the ``finney`` and ``szabo`` denominations.
|
||||
* Parser: Remove the identifier ``now`` (replaced by ``block.timestamp``).
|
||||
|
@ -509,7 +509,7 @@ void Scanner::scanToken()
|
||||
{
|
||||
case '"':
|
||||
case '\'':
|
||||
token = scanString();
|
||||
token = scanString(false);
|
||||
break;
|
||||
case '<':
|
||||
// < <= << <<=
|
||||
@ -684,6 +684,18 @@ void Scanner::scanToken()
|
||||
else
|
||||
token = setError(ScannerError::IllegalToken);
|
||||
}
|
||||
else if (token == Token::Unicode)
|
||||
{
|
||||
// reset
|
||||
m = 0;
|
||||
n = 0;
|
||||
|
||||
// Special quoted hex string must follow
|
||||
if (m_char == '"' || m_char == '\'')
|
||||
token = scanString(true);
|
||||
else
|
||||
token = setError(ScannerError::IllegalToken);
|
||||
}
|
||||
}
|
||||
else if (isDecimalDigit(m_char))
|
||||
token = scanNumber();
|
||||
@ -775,7 +787,7 @@ bool Scanner::isUnicodeLinebreak()
|
||||
return false;
|
||||
}
|
||||
|
||||
Token Scanner::scanString()
|
||||
Token Scanner::scanString(bool const _isUnicode)
|
||||
{
|
||||
char const quote = m_char;
|
||||
advance(); // consume quote
|
||||
@ -791,11 +803,13 @@ Token Scanner::scanString()
|
||||
}
|
||||
else
|
||||
{
|
||||
// Report error on non-printable characters in string literals.
|
||||
// Report error on non-printable characters in string literals, however
|
||||
// allow anything for unicode string literals, because their validity will
|
||||
// be verified later (in the syntax checker).
|
||||
//
|
||||
// We are using a manual range and not isprint() to avoid
|
||||
// any potential complications with locale.
|
||||
if (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f)
|
||||
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
|
||||
return setError(ScannerError::IllegalCharacterInString);
|
||||
addLiteralChar(c);
|
||||
}
|
||||
@ -804,7 +818,7 @@ Token Scanner::scanString()
|
||||
return setError(ScannerError::IllegalStringEndQuote);
|
||||
literal.complete();
|
||||
advance(); // consume quote
|
||||
return Token::StringLiteral;
|
||||
return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;
|
||||
}
|
||||
|
||||
Token Scanner::scanHexString()
|
||||
|
@ -229,7 +229,7 @@ private:
|
||||
Token scanNumber(char _charSeen = 0);
|
||||
std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();
|
||||
|
||||
Token scanString();
|
||||
Token scanString(bool const _isUnicode);
|
||||
Token scanHexString();
|
||||
/// Scans a single line comment and returns its corrected end position.
|
||||
size_t scanSingleLineDocComment();
|
||||
|
@ -190,6 +190,7 @@ namespace solidity::langutil
|
||||
K(Throw, "throw", 0) \
|
||||
K(Try, "try", 0) \
|
||||
K(Type, "type", 0) \
|
||||
K(Unicode, "unicode", 0) \
|
||||
K(Using, "using", 0) \
|
||||
K(View, "view", 0) \
|
||||
K(Virtual, "virtual", 0) \
|
||||
@ -227,6 +228,7 @@ namespace solidity::langutil
|
||||
K(FalseLiteral, "false", 0) \
|
||||
T(Number, nullptr, 0) \
|
||||
T(StringLiteral, nullptr, 0) \
|
||||
T(UnicodeStringLiteral, nullptr, 0) \
|
||||
T(HexStringLiteral, nullptr, 0) \
|
||||
T(CommentLiteral, nullptr, 0) \
|
||||
\
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include <liblangutil/ErrorReporter.h>
|
||||
#include <liblangutil/SemVerHandler.h>
|
||||
|
||||
#include <libsolutil/UTF8.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include <memory>
|
||||
@ -37,7 +39,7 @@ using namespace std;
|
||||
using namespace solidity;
|
||||
using namespace solidity::langutil;
|
||||
using namespace solidity::frontend;
|
||||
|
||||
using namespace solidity::util;
|
||||
|
||||
bool SyntaxChecker::checkSyntax(ASTNode const& _astRoot)
|
||||
{
|
||||
@ -217,6 +219,13 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
|
||||
|
||||
bool SyntaxChecker::visit(Literal const& _literal)
|
||||
{
|
||||
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value()))
|
||||
m_errorReporter.syntaxError(
|
||||
8452_error,
|
||||
_literal.location(),
|
||||
"Invalid UTF-8 sequence found"
|
||||
);
|
||||
|
||||
if (_literal.token() != Token::Number)
|
||||
return true;
|
||||
|
||||
|
@ -920,6 +920,8 @@ string ASTJsonConverter::literalTokenKind(Token _token)
|
||||
return "number";
|
||||
case Token::StringLiteral:
|
||||
return "string";
|
||||
case Token::UnicodeStringLiteral:
|
||||
return "unicodeString";
|
||||
case Token::HexStringLiteral:
|
||||
return "hexString";
|
||||
case Token::TrueLiteral:
|
||||
|
@ -943,6 +943,8 @@ Token ASTJsonImporter::literalTokenKind(Json::Value const& _node)
|
||||
tok = Token::Number;
|
||||
else if (_node["kind"].asString() == "string")
|
||||
tok = Token::StringLiteral;
|
||||
else if (_node["kind"].asString() == "unicodeString")
|
||||
tok = Token::UnicodeStringLiteral;
|
||||
else if (_node["kind"].asString() == "hexString")
|
||||
tok = Token::HexStringLiteral;
|
||||
else if (_node["kind"].asString() == "bool")
|
||||
|
@ -349,6 +349,7 @@ TypePointer TypeProvider::forLiteral(Literal const& _literal)
|
||||
case Token::Number:
|
||||
return rationalNumber(_literal);
|
||||
case Token::StringLiteral:
|
||||
case Token::UnicodeStringLiteral:
|
||||
case Token::HexStringLiteral:
|
||||
return stringLiteral(_literal.value());
|
||||
default:
|
||||
|
@ -1782,6 +1782,7 @@ ASTPointer<Expression> Parser::parsePrimaryExpression()
|
||||
}
|
||||
break;
|
||||
case Token::StringLiteral:
|
||||
case Token::UnicodeStringLiteral:
|
||||
case Token::HexStringLiteral:
|
||||
{
|
||||
string literal = m_scanner->currentLiteral();
|
||||
|
Loading…
Reference in New Issue
Block a user