Merge pull request #9012 from ethereum/wrong-offset-for-utf8

Fix caret position for errors with utf source
This commit is contained in:
chriseth 2020-05-25 15:15:59 +02:00 committed by GitHub
commit e7f97cf3ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 31 additions and 4 deletions

View File

@ -21,6 +21,7 @@
#include <liblangutil/SourceReferenceFormatterHuman.h>
#include <liblangutil/Scanner.h>
#include <liblangutil/Exceptions.h>
#include <libsolutil/UTF8.h>
#include <iomanip>
using namespace std;
@ -103,12 +104,13 @@ void SourceReferenceFormatterHuman::printSourceLocation(SourceReference const& _
m_stream << leftpad << ' ';
frameColored() << '|';
m_stream << ' ';
for_each(
_ref.text.cbegin(),
_ref.text.cbegin() + _ref.startColumn,
_ref.text.cbegin() + numCodepoints(_ref.text.substr(0, _ref.startColumn)),
[this](char ch) { m_stream << (ch == '\t' ? '\t' : ' '); }
);
diagColored() << string(locationLength, '^');
diagColored() << string(numCodepoints(_ref.text.substr(_ref.startColumn, locationLength)), '^');
m_stream << '\n';
}
else

View File

@ -138,4 +138,13 @@ bool validateUTF8(std::string const& _input, size_t& _invalidPosition)
return validateUTF8(reinterpret_cast<unsigned char const*>(_input.c_str()), _input.length(), _invalidPosition);
}
size_t numCodepoints(std::string const& _utf8EncodedInput)
{
size_t codepoint = 0;
for (char c: _utf8EncodedInput)
codepoint += (c & 0xc0) != 0x80;
return codepoint;
}
}

View File

@ -38,4 +38,6 @@ inline bool validateUTF8(std::string const& _input)
return validateUTF8(_input, invalidPos);
}
size_t numCodepoints(std::string const& _utf8EncodedInput);
}

View File

@ -13,7 +13,7 @@ import hashlib
from os.path import join, isfile
def extract_test_cases(path):
lines = open(path, mode='r', encoding='utf8').read().splitlines()
lines = open(path, encoding="utf8", errors='ignore', mode='r').read().splitlines()
inside = False
delimiter = ''

View File

@ -8,7 +8,7 @@ from os.path import join, isfile
def extract_test_cases(path):
lines = open(path, 'rb').read().splitlines()
lines = open(path, encoding="utf8", errors='ignore', mode='rb').read().splitlines()
inside = False
delimiter = ''

View File

@ -0,0 +1,11 @@
Warning: SPDX license identifier not provided in source file. Before publishing, consider adding a comment containing "SPDX-License-Identifier: <SPDX-License>" to each source file. Use "SPDX-License-Identifier: UNLICENSED" for non-open-source code. Please see https://spdx.org for more information.
--> message_format_utf16/input.sol
Warning: Source file does not specify required compiler version!
--> message_format_utf16/input.sol
Warning: Statement has no effect.
--> message_format_utf16/input.sol:2:58:
|
2 | /* ©©©©ᄅ©©©©© 2017 */ constructor () public { "©©©©ᄅ©©©©©" ; }
| ^^^^^^^^^^^^

View File

@ -0,0 +1,3 @@
contract Foo {
/* ©©©©ᄅ©©©©© 2017 */ constructor () public { "©©©©ᄅ©©©©©" ; }
}