mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
Merge pull request #9867 from ethereum/string-literals
More clear error messages with converting (hex) string literals
This commit is contained in:
commit
0a3b836f5a
@ -12,6 +12,7 @@ Compiler Features:
|
|||||||
* SMTChecker: Support structs.
|
* SMTChecker: Support structs.
|
||||||
* SMTChecker: Support ``type(T).min``, ``type(T).max``, and ``type(I).interfaceId``.
|
* SMTChecker: Support ``type(T).min``, ``type(T).max``, and ``type(I).interfaceId``.
|
||||||
* SMTChecker: Support ``address`` type conversion with literals, e.g. ``address(0)``.
|
* SMTChecker: Support ``address`` type conversion with literals, e.g. ``address(0)``.
|
||||||
|
* Type Checker: Report position of first invalid UTF-8 sequence in ``unicode""`` literals.
|
||||||
* Type Checker: More detailed error messages why implicit conversions fail.
|
* Type Checker: More detailed error messages why implicit conversions fail.
|
||||||
* Type Checker: Explain why oversized hex string literals can not be explicitly converted to a shorter ``bytesNN`` type.
|
* Type Checker: Explain why oversized hex string literals can not be explicitly converted to a shorter ``bytesNN`` type.
|
||||||
* Yul Optimizer: Prune unused parameters in functions.
|
* Yul Optimizer: Prune unused parameters in functions.
|
||||||
|
@ -219,11 +219,12 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
|
|||||||
|
|
||||||
bool SyntaxChecker::visit(Literal const& _literal)
|
bool SyntaxChecker::visit(Literal const& _literal)
|
||||||
{
|
{
|
||||||
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value()))
|
size_t invalidSequence;
|
||||||
|
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value(), invalidSequence))
|
||||||
m_errorReporter.syntaxError(
|
m_errorReporter.syntaxError(
|
||||||
8452_error,
|
8452_error,
|
||||||
_literal.location(),
|
_literal.location(),
|
||||||
"Invalid UTF-8 sequence found"
|
"Contains invalid UTF-8 sequence at position " + toString(invalidSequence) + "."
|
||||||
);
|
);
|
||||||
|
|
||||||
if (_literal.token() != Token::Number)
|
if (_literal.token() != Token::Number)
|
||||||
|
@ -1357,11 +1357,19 @@ BoolResult StringLiteralType::isImplicitlyConvertibleTo(Type const& _convertTo)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (auto arrayType = dynamic_cast<ArrayType const*>(&_convertTo))
|
else if (auto arrayType = dynamic_cast<ArrayType const*>(&_convertTo))
|
||||||
|
{
|
||||||
|
size_t invalidSequence;
|
||||||
|
if (arrayType->isString() && !util::validateUTF8(value(), invalidSequence))
|
||||||
|
return BoolResult::err(
|
||||||
|
"Contains invalid UTF-8 sequence at position " +
|
||||||
|
util::toString(invalidSequence) +
|
||||||
|
"."
|
||||||
|
);
|
||||||
return
|
return
|
||||||
arrayType->location() != DataLocation::CallData &&
|
arrayType->location() != DataLocation::CallData &&
|
||||||
arrayType->isByteArray() &&
|
arrayType->isByteArray() &&
|
||||||
!(arrayType->dataStoredIn(DataLocation::Storage) && arrayType->isPointer()) &&
|
!(arrayType->dataStoredIn(DataLocation::Storage) && arrayType->isPointer());
|
||||||
!(arrayType->isString() && !util::validateUTF8(value()));
|
}
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1382,12 +1390,19 @@ bool StringLiteralType::operator==(Type const& _other) const
|
|||||||
|
|
||||||
std::string StringLiteralType::toString(bool) const
|
std::string StringLiteralType::toString(bool) const
|
||||||
{
|
{
|
||||||
size_t invalidSequence;
|
auto isPrintableASCII = [](string const& s)
|
||||||
|
{
|
||||||
|
for (auto c: s)
|
||||||
|
{
|
||||||
|
if (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
if (!util::validateUTF8(m_value, invalidSequence))
|
return isPrintableASCII(m_value) ?
|
||||||
return "literal_string (contains invalid UTF-8 sequence at position " + util::toString(invalidSequence) + ")";
|
("literal_string \"" + m_value + "\"") :
|
||||||
|
("literal_string hex\"" + util::toHex(util::asBytes(m_value)) + "\"");
|
||||||
return "literal_string \"" + m_value + "\"";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TypePointer StringLiteralType::mobileType() const
|
TypePointer StringLiteralType::mobileType() const
|
||||||
|
@ -86,7 +86,7 @@
|
|||||||
"typeDescriptions":
|
"typeDescriptions":
|
||||||
{
|
{
|
||||||
"typeIdentifier": "t_stringliteral_8b1a944cf13a9a1c08facb2c9e98623ef3254d2ddb48113885c3e8e97fec8db9",
|
"typeIdentifier": "t_stringliteral_8b1a944cf13a9a1c08facb2c9e98623ef3254d2ddb48113885c3e8e97fec8db9",
|
||||||
"typeString": "literal_string (contains invalid UTF-8 sequence at position 0)"
|
"typeString": "literal_string hex\"ff\""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nodeType": "VariableDeclarationStatement",
|
"nodeType": "VariableDeclarationStatement",
|
||||||
|
@ -131,7 +131,7 @@
|
|||||||
"isPure": true,
|
"isPure": true,
|
||||||
"lValueRequested": false,
|
"lValueRequested": false,
|
||||||
"token": "hexString",
|
"token": "hexString",
|
||||||
"type": "literal_string (contains invalid UTF-8 sequence at position 0)"
|
"type": "literal_string hex\"ff\""
|
||||||
},
|
},
|
||||||
"id": 5,
|
"id": 5,
|
||||||
"name": "Literal",
|
"name": "Literal",
|
||||||
|
@ -86,7 +86,7 @@
|
|||||||
"typeDescriptions":
|
"typeDescriptions":
|
||||||
{
|
{
|
||||||
"typeIdentifier": "t_stringliteral_cd7a99177cebb3d14b8cc54e313dbf76867c71cd6fbb9a33ce3870dc80e9992b",
|
"typeIdentifier": "t_stringliteral_cd7a99177cebb3d14b8cc54e313dbf76867c71cd6fbb9a33ce3870dc80e9992b",
|
||||||
"typeString": "literal_string \"Hello \ud83d\ude03\""
|
"typeString": "literal_string hex\"48656c6c6f20f09f9883\""
|
||||||
},
|
},
|
||||||
"value": "Hello \ud83d\ude03"
|
"value": "Hello \ud83d\ude03"
|
||||||
},
|
},
|
||||||
|
@ -131,7 +131,7 @@
|
|||||||
"isPure": true,
|
"isPure": true,
|
||||||
"lValueRequested": false,
|
"lValueRequested": false,
|
||||||
"token": "unicodeString",
|
"token": "unicodeString",
|
||||||
"type": "literal_string \"Hello \ud83d\ude03\"",
|
"type": "literal_string hex\"48656c6c6f20f09f9883\"",
|
||||||
"value": "Hello \ud83d\ude03"
|
"value": "Hello \ud83d\ude03"
|
||||||
},
|
},
|
||||||
"id": 5,
|
"id": 5,
|
||||||
|
@ -2,4 +2,4 @@ contract C {
|
|||||||
string s = string("\xa0\x00");
|
string s = string("\xa0\x00");
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// TypeError 9640: (28-46): Explicit type conversion not allowed from "literal_string (contains invalid UTF-8 sequence at position 0)" to "string memory".
|
// TypeError 9640: (28-46): Explicit type conversion not allowed from "literal_string hex"a000"" to "string memory". Contains invalid UTF-8 sequence at position 0.
|
||||||
|
@ -2,4 +2,4 @@ contract C {
|
|||||||
string s = hex"a000";
|
string s = hex"a000";
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// TypeError 7407: (28-37): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.
|
// TypeError 7407: (28-37): Type literal_string hex"a000" is not implicitly convertible to expected type string storage ref. Contains invalid UTF-8 sequence at position 0.
|
||||||
|
@ -2,4 +2,4 @@ contract C {
|
|||||||
string s = "\xa0\x00";
|
string s = "\xa0\x00";
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// TypeError 7407: (28-38): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.
|
// TypeError 7407: (28-38): Type literal_string hex"a000" is not implicitly convertible to expected type string storage ref. Contains invalid UTF-8 sequence at position 0.
|
||||||
|
@ -2,5 +2,5 @@ contract C {
|
|||||||
string s = unicode"À";
|
string s = unicode"À";
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// SyntaxError 8452: (28-38): Invalid UTF-8 sequence found
|
// SyntaxError 8452: (28-38): Contains invalid UTF-8 sequence at position 0.
|
||||||
// TypeError 7407: (28-38): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.
|
// TypeError 7407: (28-38): Type literal_string hex"c0" is not implicitly convertible to expected type string storage ref. Contains invalid UTF-8 sequence at position 0.
|
||||||
|
@ -4,4 +4,4 @@ contract test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// TypeError 6359: (86-92): Return argument type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type (type of first return variable) string memory.
|
// TypeError 6359: (86-92): Return argument type literal_string hex"c1" is not implicitly convertible to expected type (type of first return variable) string memory. Contains invalid UTF-8 sequence at position 0.
|
||||||
|
@ -4,4 +4,4 @@ contract C {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// TypeError 9640: (76-95): Explicit type conversion not allowed from "literal_string "4V"" to "bytes2". Literal is larger than the type.
|
// TypeError 9640: (76-95): Explicit type conversion not allowed from "literal_string hex"123456"" to "bytes2". Literal is larger than the type.
|
||||||
|
@ -14,7 +14,7 @@ contract C {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ----
|
// ----
|
||||||
// TypeError 9640: (92-109): Explicit type conversion not allowed from "literal_string "4"" to "bytes1". Literal is larger than the type.
|
// TypeError 9640: (92-109): Explicit type conversion not allowed from "literal_string hex"1234"" to "bytes1". Literal is larger than the type.
|
||||||
// TypeError 9640: (198-217): Explicit type conversion not allowed from "literal_string "4V"" to "bytes2". Literal is larger than the type.
|
// TypeError 9640: (198-217): Explicit type conversion not allowed from "literal_string hex"123456"" to "bytes2". Literal is larger than the type.
|
||||||
// TypeError 9640: (310-331): Explicit type conversion not allowed from "literal_string "4Vx"" to "bytes3". Literal is larger than the type.
|
// TypeError 9640: (310-331): Explicit type conversion not allowed from "literal_string hex"12345678"" to "bytes3". Literal is larger than the type.
|
||||||
// TypeError 9640: (430-453): Explicit type conversion not allowed from "literal_string (contains invalid UTF-8 sequence at position 4)" to "bytes4". Literal is larger than the type.
|
// TypeError 9640: (430-453): Explicit type conversion not allowed from "literal_string hex"1234567890"" to "bytes4". Literal is larger than the type.
|
||||||
|
Loading…
Reference in New Issue
Block a user