diff --git a/libdevcore/UTF8.cpp b/libdevcore/UTF8.cpp new file mode 100644 index 000000000..d742fe665 --- /dev/null +++ b/libdevcore/UTF8.cpp @@ -0,0 +1,81 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file UTF8.cpp + * @author Alex Beregszaszi + * @date 2016 + * + * UTF-8 related helpers + */ + +#include "UTF8.h" + + +namespace dev +{ + + +bool validate(std::string const& _input, size_t& _invalidPosition) +{ + const size_t length = _input.length(); + bool valid = true; + size_t i = 0; + + for (; i < length; i++) + { + if ((unsigned char)_input[i] < 0x80) + continue; + + size_t count = 0; + switch(_input[i] & 0xe0) { + case 0xc0: count = 1; break; + case 0xe0: count = 2; break; + case 0xf0: count = 3; break; + default: break; + } + + if (count == 0) + { + valid = false; + break; + } + + if ((i + count) >= length) + { + valid = false; + break; + } + + for (size_t j = 0; j < count; j++) + { + i++; + if ((_input[i] & 0xc0) != 0x80) + { + valid = false; + break; + } + } + } + + if (valid) + return true; + + _invalidPosition = i; + return false; +} + + +} diff --git a/libdevcore/UTF8.h b/libdevcore/UTF8.h new file mode 100644 index 000000000..3e39273cd --- /dev/null +++ b/libdevcore/UTF8.h @@ -0,0 +1,35 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file UTF8.h + * @author Alex Beregszaszi + * @date 2016 + * + * UTF-8 related helpers + */ + +#pragma once + +#include + +namespace dev +{ + +/// Validate an input for UTF8 encoding +/// @returns true if it is invalid and the first invalid position in invalidPosition +bool validate(std::string const& _input, size_t& _invalidPosition); + +} diff --git a/libsolidity/ast/Types.cpp b/libsolidity/ast/Types.cpp index 5630743bd..28f7e1b7e 100644 --- a/libsolidity/ast/Types.cpp +++ b/libsolidity/ast/Types.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -852,6 +853,16 @@ bool StringLiteralType::operator==(const Type& _other) const return m_value == dynamic_cast(_other).m_value; } +std::string StringLiteralType::toString(bool) const +{ + size_t invalidSequence; + + if (!dev::validate(m_value, invalidSequence)) + return "literal_string (contains invalid UTF-8 sequence at position " + dev::toString(invalidSequence) + ")"; + + return "literal_string \"" + m_value + "\""; +} + TypePointer StringLiteralType::mobileType() const { return make_shared(DataLocation::Memory, true); diff --git a/libsolidity/ast/Types.h b/libsolidity/ast/Types.h index 1ee762e53..1282e5d82 100644 --- a/libsolidity/ast/Types.h +++ b/libsolidity/ast/Types.h @@ -419,7 +419,7 @@ public: virtual bool canLiveOutsideStorage() const override { return false; } virtual unsigned sizeOnStack() const override { return 0; } - virtual std::string toString(bool) const override { return "literal_string \"" + m_value + "\""; } + virtual std::string toString(bool) const override; virtual TypePointer mobileType() const override; std::string const& value() const { return m_value; }