From 0e5abbd4a9ae5dd2a5daf175fe078f9faa533583 Mon Sep 17 00:00:00 2001 From: Alex Beregszaszi Date: Wed, 23 Sep 2020 13:21:30 +0100 Subject: [PATCH] Display location of invalid UTF-8 sequence in unicode literals in SyntaxChecker --- Changelog.md | 1 + libsolidity/analysis/SyntaxChecker.cpp | 5 +++-- .../libsolidity/syntaxTests/string/invalid_utf8_sequence.sol | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Changelog.md b/Changelog.md index be82d098b..579613841 100644 --- a/Changelog.md +++ b/Changelog.md @@ -12,6 +12,7 @@ Compiler Features: * SMTChecker: Support structs. * SMTChecker: Support ``type(T).min``, ``type(T).max``, and ``type(I).interfaceId``. * SMTChecker: Support ``address`` type conversion with literals, e.g. ``address(0)``. + * Type Checker: Report position of first invalid UTF-8 sequence in ``unicode""`` literals. * Type Checker: More detailed error messages why implicit conversions fail. * Type Checker: Explain why oversized hex string literals can not be explicitly converted to a shorter ``bytesNN`` type. * Yul Optimizer: Prune unused parameters in functions. diff --git a/libsolidity/analysis/SyntaxChecker.cpp b/libsolidity/analysis/SyntaxChecker.cpp index 670939321..076592053 100644 --- a/libsolidity/analysis/SyntaxChecker.cpp +++ b/libsolidity/analysis/SyntaxChecker.cpp @@ -219,11 +219,12 @@ bool SyntaxChecker::visit(Throw const& _throwStatement) bool SyntaxChecker::visit(Literal const& _literal) { - if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value())) + size_t invalidSequence; + if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value(), invalidSequence)) m_errorReporter.syntaxError( 8452_error, _literal.location(), - "Invalid UTF-8 sequence found" + "Contains invalid UTF-8 sequence at position " + toString(invalidSequence) + "." ); if (_literal.token() != Token::Number) diff --git a/test/libsolidity/syntaxTests/string/invalid_utf8_sequence.sol b/test/libsolidity/syntaxTests/string/invalid_utf8_sequence.sol index f5d5077f2..7655757a7 100644 --- a/test/libsolidity/syntaxTests/string/invalid_utf8_sequence.sol +++ b/test/libsolidity/syntaxTests/string/invalid_utf8_sequence.sol @@ -2,5 +2,5 @@ contract C { string s = unicode"À"; } // ---- -// SyntaxError 8452: (28-38): Invalid UTF-8 sequence found +// SyntaxError 8452: (28-38): Contains invalid UTF-8 sequence at position 0. // TypeError 7407: (28-38): Type literal_string (contains invalid UTF-8 sequence at position 0) is not implicitly convertible to expected type string storage ref.