diff --git a/Changelog.md b/Changelog.md index b2e080a05..de65421ae 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,6 +10,7 @@ Features: * Inline Assembly: introduce ``keccak256`` as an opcode. ``sha3`` is still a valid alias. Bugfixes: + * Type Checker: Make UTF8-validation a bit more sloppy to include more valid sequences. * Fixed crash concerning non-callable types. * Unused variable warnings no longer issued for variables used inside inline assembly. * Inline Assembly: Enforce function arguments when parsing functional instructions. diff --git a/libdevcore/UTF8.cpp b/libdevcore/UTF8.cpp index 9fbf4b45a..449ccc5db 100644 --- a/libdevcore/UTF8.cpp +++ b/libdevcore/UTF8.cpp @@ -40,7 +40,7 @@ bool validateUTF8(std::string const& _input, size_t& _invalidPosition) continue; size_t count = 0; - switch(_input[i] & 0xe0) { + switch(_input[i] & 0xf0) { case 0xc0: count = 1; break; case 0xe0: count = 2; break; case 0xf0: count = 3; break; diff --git a/test/libsolidity/SolidityNameAndTypeResolution.cpp b/test/libsolidity/SolidityNameAndTypeResolution.cpp index 70934543b..0b3cb481b 100644 --- a/test/libsolidity/SolidityNameAndTypeResolution.cpp +++ b/test/libsolidity/SolidityNameAndTypeResolution.cpp @@ -2422,6 +2422,16 @@ BOOST_AUTO_TEST_CASE(invalid_utf8_explicit) CHECK_ERROR(sourceCode, TypeError, "Explicit type conversion not allowed"); } +BOOST_AUTO_TEST_CASE(large_utf8_codepoint) +{ + char const* sourceCode = R"( + contract C { + string s = "\xf0\x9f\xa6\x84"; + } + )"; + CHECK_SUCCESS(sourceCode); +} + BOOST_AUTO_TEST_CASE(string_index) { char const* sourceCode = R"(