From 7f3d437ffeb9710a3e3aadd5c9eefa477bb57d52 Mon Sep 17 00:00:00 2001 From: Mathias Baumann Date: Mon, 25 May 2020 12:22:11 +0200 Subject: [PATCH] Fix caret position for errors with utf source --- liblangutil/SourceReferenceFormatterHuman.cpp | 6 ++++-- libsolutil/UTF8.cpp | 9 +++++++++ libsolutil/UTF8.h | 2 ++ scripts/isolate_tests.py | 2 +- scripts/wasm-rebuild/docker-scripts/isolate_tests.py | 2 +- test/cmdlineTests/message_format_utf16/err | 11 +++++++++++ test/cmdlineTests/message_format_utf16/input.sol | 3 +++ 7 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 test/cmdlineTests/message_format_utf16/err create mode 100644 test/cmdlineTests/message_format_utf16/input.sol diff --git a/liblangutil/SourceReferenceFormatterHuman.cpp b/liblangutil/SourceReferenceFormatterHuman.cpp index ca947314e..cbf24cf35 100644 --- a/liblangutil/SourceReferenceFormatterHuman.cpp +++ b/liblangutil/SourceReferenceFormatterHuman.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include using namespace std; @@ -103,12 +104,13 @@ void SourceReferenceFormatterHuman::printSourceLocation(SourceReference const& _ m_stream << leftpad << ' '; frameColored() << '|'; m_stream << ' '; + for_each( _ref.text.cbegin(), - _ref.text.cbegin() + _ref.startColumn, + _ref.text.cbegin() + numCodepoints(_ref.text.substr(0, _ref.startColumn)), [this](char ch) { m_stream << (ch == '\t' ? '\t' : ' '); } ); - diagColored() << string(locationLength, '^'); + diagColored() << string(numCodepoints(_ref.text.substr(_ref.startColumn, locationLength)), '^'); m_stream << '\n'; } else diff --git a/libsolutil/UTF8.cpp b/libsolutil/UTF8.cpp index a7d55af6c..82c87bb44 100644 --- a/libsolutil/UTF8.cpp +++ b/libsolutil/UTF8.cpp @@ -138,4 +138,13 @@ bool validateUTF8(std::string const& _input, size_t& _invalidPosition) return validateUTF8(reinterpret_cast(_input.c_str()), _input.length(), _invalidPosition); } +size_t numCodepoints(std::string const& _utf8EncodedInput) +{ + size_t codepoint = 0; + for (char c: _utf8EncodedInput) + codepoint += (c & 0xc0) != 0x80; + + return codepoint; +} + } diff --git a/libsolutil/UTF8.h b/libsolutil/UTF8.h index cd84c3982..fb5ee376c 100644 --- a/libsolutil/UTF8.h +++ b/libsolutil/UTF8.h @@ -38,4 +38,6 @@ inline bool validateUTF8(std::string const& _input) return validateUTF8(_input, invalidPos); } +size_t numCodepoints(std::string const& _utf8EncodedInput); + } diff --git a/scripts/isolate_tests.py b/scripts/isolate_tests.py index 768f94801..0f2bc1d79 100755 --- a/scripts/isolate_tests.py +++ b/scripts/isolate_tests.py @@ -13,7 +13,7 @@ import hashlib from os.path import join, isfile def extract_test_cases(path): - lines = open(path, mode='r', encoding='utf8').read().splitlines() + lines = open(path, encoding="utf8", errors='ignore', mode='r').read().splitlines() inside = False delimiter = '' diff --git a/scripts/wasm-rebuild/docker-scripts/isolate_tests.py b/scripts/wasm-rebuild/docker-scripts/isolate_tests.py index eab461bff..0a7571891 100755 --- a/scripts/wasm-rebuild/docker-scripts/isolate_tests.py +++ b/scripts/wasm-rebuild/docker-scripts/isolate_tests.py @@ -8,7 +8,7 @@ from os.path import join, isfile def extract_test_cases(path): - lines = open(path, 'rb').read().splitlines() + lines = open(path, encoding="utf8", errors='ignore', mode='rb').read().splitlines() inside = False delimiter = '' diff --git a/test/cmdlineTests/message_format_utf16/err b/test/cmdlineTests/message_format_utf16/err new file mode 100644 index 000000000..abe92d2e5 --- /dev/null +++ b/test/cmdlineTests/message_format_utf16/err @@ -0,0 +1,11 @@ +Warning: SPDX license identifier not provided in source file. Before publishing, consider adding a comment containing "SPDX-License-Identifier: " to each source file. Use "SPDX-License-Identifier: UNLICENSED" for non-open-source code. Please see https://spdx.org for more information. +--> message_format_utf16/input.sol + +Warning: Source file does not specify required compiler version! +--> message_format_utf16/input.sol + +Warning: Statement has no effect. + --> message_format_utf16/input.sol:2:58: + | +2 | /* ©©©©ᄅ©©©©© 2017 */ constructor () public { "©©©©ᄅ©©©©©" ; } + | ^^^^^^^^^^^^ diff --git a/test/cmdlineTests/message_format_utf16/input.sol b/test/cmdlineTests/message_format_utf16/input.sol new file mode 100644 index 000000000..2c1f9007a --- /dev/null +++ b/test/cmdlineTests/message_format_utf16/input.sol @@ -0,0 +1,3 @@ +contract Foo { +/* ©©©©ᄅ©©©©© 2017 */ constructor () public { "©©©©ᄅ©©©©©" ; } +}