Deterministic YulStringRepository using string hashes.

This commit is contained in:
Daniel Kirchner 2018-11-09 20:21:26 +01:00
parent 09f8ff27fc
commit 74557ceb0e
5 changed files with 71 additions and 44 deletions

View File

@ -106,7 +106,6 @@ void CompilerStack::reset(bool _keepSources)
m_stackState = Empty;
m_sources.clear();
}
yul::YulStringRepository::instance().reset();
m_libraries.clear();
m_evmVersion = EVMVersion();
m_optimize = false;

View File

@ -22,7 +22,7 @@
#include <boost/noncopyable.hpp>
#include <map>
#include <unordered_map>
#include <memory>
#include <vector>
#include <string>
@ -32,72 +32,101 @@ namespace dev
namespace yul
{
/// Repository for YulStrings.
/// Owns the string data for all YulStrings, which can be referenced by a Handle.
/// A Handle consists of an ID (that depends on the insertion order of YulStrings and is potentially
/// non-deterministic) and a deterministic string hash.
class YulStringRepository: boost::noncopyable
{
public:
YulStringRepository()
struct Handle
{
reset();
}
size_t id;
std::uint64_t hash;
};
YulStringRepository():
m_strings{std::make_shared<std::string>()},
m_hashToID{std::make_pair(emptyHash(), 0)}
{}
static YulStringRepository& instance()
{
static YulStringRepository inst;
return inst;
}
size_t stringToId(std::string const& _string)
Handle stringToHandle(std::string const& _string)
{
if (_string.empty())
return 0;
size_t& id = m_ids[_string];
if (id == 0)
return { 0, emptyHash() };
std::uint64_t h = hash(_string);
auto range = m_hashToID.equal_range(h);
for (auto it = range.first; it != range.second; ++it)
if (*m_strings[it->second] == _string)
return Handle{it->second, h};
m_strings.emplace_back(std::make_shared<std::string>(_string));
size_t id = m_strings.size() - 1;
m_hashToID.emplace_hint(range.second, std::make_pair(h, id));
return Handle{id, h};
}
std::string const& idToString(size_t _id) const { return *m_strings.at(_id); }
static std::uint64_t hash(std::string const& v)
{
// FNV hash - can be replaced by a better one, e.g. xxhash64
std::uint64_t hash = emptyHash();
for (auto c: v)
{
m_strings.emplace_back(std::make_shared<std::string>(_string));
id = m_strings.size() - 1;
hash *= 1099511628211u;
hash ^= c;
}
return id;
}
std::string const& idToString(size_t _id) const
{
return *m_strings.at(_id);
}
void reset()
{
m_strings.clear();
m_ids.clear();
m_strings.emplace_back(std::make_shared<std::string>());
m_ids[std::string{}] = 0;
return hash;
}
static constexpr std::uint64_t emptyHash() { return 14695981039346656037u; }
private:
std::vector<std::shared_ptr<std::string>> m_strings;
std::map<std::string, size_t> m_ids;
std::unordered_multimap<std::uint64_t, size_t> m_hashToID;
};
/// Wrapper around handles into the YulString repository.
/// Equality of two YulStrings is determined by comparing their ID.
/// The <-operator depends on the string hash and is not consistent
/// with string comparisons (however, it is still deterministic).
class YulString
{
public:
YulString() = default;
explicit YulString(std::string const& _s): m_id(YulStringRepository::instance().stringToId(_s)) {}
explicit YulString(std::string const& _s): m_handle(YulStringRepository::instance().stringToHandle(_s)) {}
YulString(YulString const&) = default;
YulString(YulString&&) = default;
YulString& operator=(YulString const&) = default;
YulString& operator=(YulString&&) = default;
/// This is not consistent with the string <-operator!
bool operator<(YulString const& _other) const { return m_id < _other.m_id; }
bool operator==(YulString const& _other) const { return m_id == _other.m_id; }
bool operator!=(YulString const& _other) const { return m_id != _other.m_id; }
/// First compares the string hashes. If they are equal
/// it checks for identical IDs (only identical strings have
/// identical IDs and identical strings do not compare as "less").
/// If the hashes are identical and the strings are distinct, it
/// falls back to string comparison.
bool operator<(YulString const& _other) const
{
if (m_handle.hash < _other.m_handle.hash) return true;
if (_other.m_handle.hash < m_handle.hash) return false;
if (m_handle.id == _other.m_handle.id) return false;
return str() < _other.str();
}
/// Equality is determined based on the string ID.
bool operator==(YulString const& _other) const { return m_handle.id == _other.m_handle.id; }
bool operator!=(YulString const& _other) const { return m_handle.id != _other.m_handle.id; }
bool empty() const { return m_id == 0; }
bool empty() const { return m_handle.id == 0; }
std::string const& str() const
{
return YulStringRepository::instance().idToString(m_id);
return YulStringRepository::instance().idToString(m_handle.id);
}
private:
/// ID of the string. Assumes that the empty string has ID zero.
size_t m_id = 0;
/// Handle of the string. Assumes that the empty string has ID zero.
YulStringRepository::Handle m_handle{ 0, YulStringRepository::emptyHash() };
};
}

View File

@ -71,7 +71,7 @@ BOOST_AUTO_TEST_CASE(simple)
BOOST_CHECK_EQUAL(inlinableFunctions("{"
"function g(a:u256) -> b:u256 { b := a }"
"function f() -> x:u256 { x := g(2:u256) }"
"}"), "f,g");
"}"), "g,f");
}
BOOST_AUTO_TEST_CASE(simple_inside_structures)
@ -82,7 +82,7 @@ BOOST_AUTO_TEST_CASE(simple_inside_structures)
"function g(a:u256) -> b:u256 { b := a }"
"function f() -> x:u256 { x := g(2:u256) }"
"}"
"}"), "f,g");
"}"), "g,f");
BOOST_CHECK_EQUAL(inlinableFunctions("{"
"for {"
"function g(a:u256) -> b:u256 { b := a }"
@ -92,7 +92,7 @@ BOOST_AUTO_TEST_CASE(simple_inside_structures)
"{"
"function h() -> y:u256 { y := 2:u256 }"
"}"
"}"), "f,g,h");
"}"), "h,g,f");
}
BOOST_AUTO_TEST_CASE(negative)

View File

@ -90,8 +90,6 @@ YulOptimizerTest::YulOptimizerTest(string const& _filename)
bool YulOptimizerTest::run(ostream& _stream, string const& _linePrefix, bool const _formatted)
{
yul::YulStringRepository::instance().reset();
assembly::AsmPrinter printer{m_yul};
shared_ptr<Block> ast;
shared_ptr<assembly::AsmAnalysisInfo> analysisInfo;

View File

@ -41,18 +41,19 @@
// h_t := 2
// mstore(7, h_t)
// let g_x_1 := 10
// f(1)
// let g_f_x_8 := 1
// mstore(0, g_f_x_8)
// mstore(7, h())
// g(10)
// mstore(1, g_f_x_8)
// mstore(1, x)
// }
// function g(x_1)
// {
// let f_x_8 := 1
// mstore(0, f_x_8)
// let f_h_t
// f_h_t := 2
// mstore(7, f_h_t)
// let f_g_x_1 := 10
// f(1)
// mstore(7, h())
// g(10)
// mstore(1, f_x_8)
// }
// function h() -> t