Merge pull request #2386 from ethereum/utf8-fix

Fix UTF-8 validation for high codepoints (>10000)
This commit is contained in:
chriseth 2017-06-16 16:41:59 +02:00 committed by GitHub
commit 3abadc7122
3 changed files with 12 additions and 1 deletions

View File

@ -10,6 +10,7 @@ Features:
* Inline Assembly: introduce ``keccak256`` as an opcode. ``sha3`` is still a valid alias.
Bugfixes:
* Type Checker: Make UTF8-validation a bit more sloppy to include more valid sequences.
* Fixed crash concerning non-callable types.
* Unused variable warnings no longer issued for variables used inside inline assembly.
* Inline Assembly: Enforce function arguments when parsing functional instructions.

View File

@ -40,7 +40,7 @@ bool validateUTF8(std::string const& _input, size_t& _invalidPosition)
continue;
size_t count = 0;
switch(_input[i] & 0xe0) {
switch(_input[i] & 0xf0) {
case 0xc0: count = 1; break;
case 0xe0: count = 2; break;
case 0xf0: count = 3; break;

View File

@ -2422,6 +2422,16 @@ BOOST_AUTO_TEST_CASE(invalid_utf8_explicit)
CHECK_ERROR(sourceCode, TypeError, "Explicit type conversion not allowed");
}
BOOST_AUTO_TEST_CASE(large_utf8_codepoint)
{
char const* sourceCode = R"(
contract C {
string s = "\xf0\x9f\xa6\x84";
}
)";
CHECK_SUCCESS(sourceCode);
}
BOOST_AUTO_TEST_CASE(string_index)
{
char const* sourceCode = R"(