Merge pull request #826 from axic/utf8-check

AST printer: do not output invalid UTF8 sequences
This commit is contained in:
chriseth 2016-08-10 16:31:27 +02:00 committed by GitHub
commit fc60839105
4 changed files with 128 additions and 1 deletions

81
libdevcore/UTF8.cpp Normal file
View File

@ -0,0 +1,81 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file UTF8.cpp
* @author Alex Beregszaszi
* @date 2016
*
* UTF-8 related helpers
*/
#include "UTF8.h"
namespace dev
{
bool validate(std::string const& _input, size_t& _invalidPosition)
{
const size_t length = _input.length();
bool valid = true;
size_t i = 0;
for (; i < length; i++)
{
if ((unsigned char)_input[i] < 0x80)
continue;
size_t count = 0;
switch(_input[i] & 0xe0) {
case 0xc0: count = 1; break;
case 0xe0: count = 2; break;
case 0xf0: count = 3; break;
default: break;
}
if (count == 0)
{
valid = false;
break;
}
if ((i + count) >= length)
{
valid = false;
break;
}
for (size_t j = 0; j < count; j++)
{
i++;
if ((_input[i] & 0xc0) != 0x80)
{
valid = false;
break;
}
}
}
if (valid)
return true;
_invalidPosition = i;
return false;
}
}

35
libdevcore/UTF8.h Normal file
View File

@ -0,0 +1,35 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file UTF8.h
* @author Alex Beregszaszi
* @date 2016
*
* UTF-8 related helpers
*/
#pragma once
#include <string>
namespace dev
{
/// Validate an input for UTF8 encoding
/// @returns true if it is invalid and the first invalid position in invalidPosition
bool validate(std::string const& _input, size_t& _invalidPosition);
}

View File

@ -26,6 +26,7 @@
#include <libdevcore/CommonIO.h>
#include <libdevcore/CommonData.h>
#include <libdevcore/SHA3.h>
#include <libdevcore/UTF8.h>
#include <libsolidity/interface/Utils.h>
#include <libsolidity/ast/AST.h>
@ -852,6 +853,16 @@ bool StringLiteralType::operator==(const Type& _other) const
return m_value == dynamic_cast<StringLiteralType const&>(_other).m_value;
}
std::string StringLiteralType::toString(bool) const
{
size_t invalidSequence;
if (!dev::validate(m_value, invalidSequence))
return "literal_string (contains invalid UTF-8 sequence at position " + dev::toString(invalidSequence) + ")";
return "literal_string \"" + m_value + "\"";
}
TypePointer StringLiteralType::mobileType() const
{
return make_shared<ArrayType>(DataLocation::Memory, true);

View File

@ -419,7 +419,7 @@ public:
virtual bool canLiveOutsideStorage() const override { return false; }
virtual unsigned sizeOnStack() const override { return 0; }
virtual std::string toString(bool) const override { return "literal_string \"" + m_value + "\""; }
virtual std::string toString(bool) const override;
virtual TypePointer mobileType() const override;
std::string const& value() const { return m_value; }