Merge pull request #9867 from ethereum/string-literals

More clear error messages with converting (hex) string literals
This commit is contained in:
chriseth 2020-09-24 12:49:46 +02:00 committed by GitHub
commit 0a3b836f5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 41 additions and 24 deletions

View File

@ -12,6 +12,7 @@ Compiler Features:
* SMTChecker: Support structs.
* SMTChecker: Support ``type(T).min``, ``type(T).max``, and ``type(I).interfaceId``.
* SMTChecker: Support ``address`` type conversion with literals, e.g. ``address(0)``.
* Type Checker: Report position of first invalid UTF-8 sequence in ``unicode""`` literals.
* Type Checker: More detailed error messages why implicit conversions fail.
* Type Checker: Explain why oversized hex string literals can not be explicitly converted to a shorter ``bytesNN`` type.
* Yul Optimizer: Prune unused parameters in functions.

View File

@ -219,11 +219,12 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
bool SyntaxChecker::visit(Literal const& _literal)
{
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value()))
size_t invalidSequence;
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value(), invalidSequence))
m_errorReporter.syntaxError(
8452_error,
_literal.location(),
"Invalid UTF-8 sequence found"
"Contains invalid UTF-8 sequence at position " + toString(invalidSequence) + "."
);
if (_literal.token() != Token::Number)

View File

@ -1357,11 +1357,19 @@ BoolResult StringLiteralType::isImplicitlyConvertibleTo(Type const& _convertTo)
return true;
}
else if (auto arrayType = dynamic_cast<ArrayType const*>(&_convertTo))
{
size_t invalidSequence;
if (arrayType->isString() && !util::validateUTF8(value(), invalidSequence))
return BoolResult::err(
"Contains invalid UTF-8 sequence at position " +
util::toString(invalidSequence) +
"."
);
return
arrayType->location() != DataLocation::CallData &&
arrayType->isByteArray() &&
!(arrayType->dataStoredIn(DataLocation::Storage) && arrayType->isPointer()) &&
!(arrayType->isString() && !util::validateUTF8(value()));
!(arrayType->dataStoredIn(DataLocation::Storage) && arrayType->isPointer());
}
else
return false;
}
@ -1382,12 +1390,19 @@ bool StringLiteralType::operator==(Type const& _other) const
std::string StringLiteralType::toString(bool) const
{
size_t invalidSequence;
auto isPrintableASCII = [](string const& s)
{
for (auto c: s)
{
if (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f)
return false;
}
return true;
};
if (!util::validateUTF8(m_value, invalidSequence))
return "literal_string (contains invalid UTF-8 sequence at position " + util::toString(invalidSequence) + ")";
return "literal_string \"" + m_value + "\"";
return isPrintableASCII(m_value) ?
("literal_string \"" + m_value + "\"") :
("literal_string hex\"" + util::toHex(util::asBytes(m_value)) + "\"");
}
TypePointer StringLiteralType::mobileType() const

View File

@ -86,7 +86,7 @@
"typeDescriptions":
{
"typeIdentifier": "t_stringliteral_8b1a944cf13a9a1c08facb2c9e98623ef3254d2ddb48113885c3e8e97fec8db9",
"typeString": "literal_string (contains invalid UTF-8 sequence at position 0)"
"typeString": "literal_string hex\"ff\""
}
},
"nodeType": "VariableDeclarationStatement",

View File

@ -131,7 +131,7 @@
"isPure": true,
"lValueRequested": false,
"token": "hexString",
"type": "literal_string (contains invalid UTF-8 sequence at position 0)"
"type": "literal_string hex\"ff\""
},
"id": 5,
"name": "Literal",

View File

@ -86,7 +86,7 @@
"typeDescriptions":
{
"typeIdentifier": "t_stringliteral_cd7a99177cebb3d14b8cc54e313dbf76867c71cd6fbb9a33ce3870dc80e9992b",
"typeString": "literal_string \"Hello \ud83d\ude03\""
"typeString": "literal_string hex\"48656c6c6f20f09f9883\""
},
"value": "Hello \ud83d\ude03"
},

View File

@ -131,7 +131,7 @@
"isPure": true,
"lValueRequested": false,
"token": "unicodeString",
"type": "literal_string \"Hello \ud83d\ude03\"",
"type": "literal_string hex\"48656c6c6f20f09f9883\"",
"value": "Hello \ud83d\ude03"
},
"id": 5,

View File

@ -2,4 +2,4 @@ contract C {
string s = string("\xa0\x00");
}
// ----
// TypeError 9640: (28-46): Explicit type conversion not allowed from "literal_string (contains invalid UTF-8 sequence at position 0)" to "string memory".
// TypeError 9640: (28-46): Explicit type conversion not allowed from "literal_string hex"a000"" to "string memory". Contains invalid UTF-8 sequence at position 0.

View File

@ -2,4 +2,4 @@ contract C {
string s = hex"a000";
}
// ----
// TypeError 7407: (28-37): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.
// TypeError 7407: (28-37): Type literal_string hex"a000" is not implicitly convertible to expected type string storage ref. Contains invalid UTF-8 sequence at position 0.

View File

@ -2,4 +2,4 @@ contract C {
string s = "\xa0\x00";
}
// ----
// TypeError 7407: (28-38): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.
// TypeError 7407: (28-38): Type literal_string hex"a000" is not implicitly convertible to expected type string storage ref. Contains invalid UTF-8 sequence at position 0.

View File

@ -2,5 +2,5 @@ contract C {
string s = unicode"À";
}
// ----
// SyntaxError 8452: (28-38): Invalid UTF-8 sequence found
// TypeError 7407: (28-38): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.
// SyntaxError 8452: (28-38): Contains invalid UTF-8 sequence at position 0.
// TypeError 7407: (28-38): Type literal_string hex"c0" is not implicitly convertible to expected type string storage ref. Contains invalid UTF-8 sequence at position 0.

View File

@ -4,4 +4,4 @@ contract test {
}
}
// ----
// TypeError 6359: (86-92): Return argument type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type (type of first return variable) string memory.
// TypeError 6359: (86-92): Return argument type literal_string hex"c1" is not implicitly convertible to expected type (type of first return variable) string memory. Contains invalid UTF-8 sequence at position 0.

View File

@ -4,4 +4,4 @@ contract C {
}
}
// ----
// TypeError 9640: (76-95): Explicit type conversion not allowed from "literal_string "4V"" to "bytes2". Literal is larger than the type.
// TypeError 9640: (76-95): Explicit type conversion not allowed from "literal_string hex"123456"" to "bytes2". Literal is larger than the type.

View File

@ -14,7 +14,7 @@ contract C {
}
}
// ----
// TypeError 9640: (92-109): Explicit type conversion not allowed from "literal_string "4"" to "bytes1". Literal is larger than the type.
// TypeError 9640: (198-217): Explicit type conversion not allowed from "literal_string "4V"" to "bytes2". Literal is larger than the type.
// TypeError 9640: (310-331): Explicit type conversion not allowed from "literal_string "4Vx"" to "bytes3". Literal is larger than the type.
// TypeError 9640: (430-453): Explicit type conversion not allowed from "literal_string (contains invalid UTF-8 sequence at position 4)" to "bytes4". Literal is larger than the type.
// TypeError 9640: (92-109): Explicit type conversion not allowed from "literal_string hex"1234"" to "bytes1". Literal is larger than the type.
// TypeError 9640: (198-217): Explicit type conversion not allowed from "literal_string hex"123456"" to "bytes2". Literal is larger than the type.
// TypeError 9640: (310-331): Explicit type conversion not allowed from "literal_string hex"12345678"" to "bytes3". Literal is larger than the type.
// TypeError 9640: (430-453): Explicit type conversion not allowed from "literal_string hex"1234567890"" to "bytes4". Literal is larger than the type.