From 74557ceb0e520838d3e3a580cc30671a9c274ca7 Mon Sep 17 00:00:00 2001 From: Daniel Kirchner Date: Fri, 9 Nov 2018 20:21:26 +0100 Subject: [PATCH] Deterministic YulStringRepository using string hashes. --- libsolidity/interface/CompilerStack.cpp | 1 - libyul/YulString.h | 93 ++++++++++++------- test/libyul/Inliner.cpp | 6 +- test/libyul/YulOptimizerTest.cpp | 2 - .../fullInliner/multi_fun_callback.yul | 13 +-- 5 files changed, 71 insertions(+), 44 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index e0909eb31..7aa0faa73 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -106,7 +106,6 @@ void CompilerStack::reset(bool _keepSources) m_stackState = Empty; m_sources.clear(); } - yul::YulStringRepository::instance().reset(); m_libraries.clear(); m_evmVersion = EVMVersion(); m_optimize = false; diff --git a/libyul/YulString.h b/libyul/YulString.h index a80152394..ad900a703 100644 --- a/libyul/YulString.h +++ b/libyul/YulString.h @@ -22,7 +22,7 @@ #include -#include +#include #include #include #include @@ -32,72 +32,101 @@ namespace dev namespace yul { +/// Repository for YulStrings. +/// Owns the string data for all YulStrings, which can be referenced by a Handle. +/// A Handle consists of an ID (that depends on the insertion order of YulStrings and is potentially +/// non-deterministic) and a deterministic string hash. class YulStringRepository: boost::noncopyable { public: - YulStringRepository() + struct Handle { - reset(); - } + size_t id; + std::uint64_t hash; + }; + YulStringRepository(): + m_strings{std::make_shared()}, + m_hashToID{std::make_pair(emptyHash(), 0)} + {} static YulStringRepository& instance() { static YulStringRepository inst; return inst; } - size_t stringToId(std::string const& _string) + Handle stringToHandle(std::string const& _string) { if (_string.empty()) - return 0; - size_t& id = m_ids[_string]; - if (id == 0) + return { 0, emptyHash() }; + std::uint64_t h = hash(_string); + auto range = m_hashToID.equal_range(h); + for (auto it = range.first; it != range.second; ++it) + if (*m_strings[it->second] == _string) + return Handle{it->second, h}; + m_strings.emplace_back(std::make_shared(_string)); + size_t id = m_strings.size() - 1; + m_hashToID.emplace_hint(range.second, std::make_pair(h, id)); + return Handle{id, h}; + } + std::string const& idToString(size_t _id) const { return *m_strings.at(_id); } + + static std::uint64_t hash(std::string const& v) + { + // FNV hash - can be replaced by a better one, e.g. xxhash64 + std::uint64_t hash = emptyHash(); + for (auto c: v) { - m_strings.emplace_back(std::make_shared(_string)); - id = m_strings.size() - 1; + hash *= 1099511628211u; + hash ^= c; } - return id; - } - std::string const& idToString(size_t _id) const - { - return *m_strings.at(_id); - } - void reset() - { - m_strings.clear(); - m_ids.clear(); - m_strings.emplace_back(std::make_shared()); - m_ids[std::string{}] = 0; + return hash; } - + static constexpr std::uint64_t emptyHash() { return 14695981039346656037u; } private: std::vector> m_strings; - std::map m_ids; + std::unordered_multimap m_hashToID; }; +/// Wrapper around handles into the YulString repository. +/// Equality of two YulStrings is determined by comparing their ID. +/// The <-operator depends on the string hash and is not consistent +/// with string comparisons (however, it is still deterministic). class YulString { public: YulString() = default; - explicit YulString(std::string const& _s): m_id(YulStringRepository::instance().stringToId(_s)) {} + explicit YulString(std::string const& _s): m_handle(YulStringRepository::instance().stringToHandle(_s)) {} YulString(YulString const&) = default; YulString(YulString&&) = default; YulString& operator=(YulString const&) = default; YulString& operator=(YulString&&) = default; /// This is not consistent with the string <-operator! - bool operator<(YulString const& _other) const { return m_id < _other.m_id; } - bool operator==(YulString const& _other) const { return m_id == _other.m_id; } - bool operator!=(YulString const& _other) const { return m_id != _other.m_id; } + /// First compares the string hashes. If they are equal + /// it checks for identical IDs (only identical strings have + /// identical IDs and identical strings do not compare as "less"). + /// If the hashes are identical and the strings are distinct, it + /// falls back to string comparison. + bool operator<(YulString const& _other) const + { + if (m_handle.hash < _other.m_handle.hash) return true; + if (_other.m_handle.hash < m_handle.hash) return false; + if (m_handle.id == _other.m_handle.id) return false; + return str() < _other.str(); + } + /// Equality is determined based on the string ID. + bool operator==(YulString const& _other) const { return m_handle.id == _other.m_handle.id; } + bool operator!=(YulString const& _other) const { return m_handle.id != _other.m_handle.id; } - bool empty() const { return m_id == 0; } + bool empty() const { return m_handle.id == 0; } std::string const& str() const { - return YulStringRepository::instance().idToString(m_id); + return YulStringRepository::instance().idToString(m_handle.id); } private: - /// ID of the string. Assumes that the empty string has ID zero. - size_t m_id = 0; + /// Handle of the string. Assumes that the empty string has ID zero. + YulStringRepository::Handle m_handle{ 0, YulStringRepository::emptyHash() }; }; } diff --git a/test/libyul/Inliner.cpp b/test/libyul/Inliner.cpp index 4ed52b47b..66810298e 100644 --- a/test/libyul/Inliner.cpp +++ b/test/libyul/Inliner.cpp @@ -71,7 +71,7 @@ BOOST_AUTO_TEST_CASE(simple) BOOST_CHECK_EQUAL(inlinableFunctions("{" "function g(a:u256) -> b:u256 { b := a }" "function f() -> x:u256 { x := g(2:u256) }" - "}"), "f,g"); + "}"), "g,f"); } BOOST_AUTO_TEST_CASE(simple_inside_structures) @@ -82,7 +82,7 @@ BOOST_AUTO_TEST_CASE(simple_inside_structures) "function g(a:u256) -> b:u256 { b := a }" "function f() -> x:u256 { x := g(2:u256) }" "}" - "}"), "f,g"); + "}"), "g,f"); BOOST_CHECK_EQUAL(inlinableFunctions("{" "for {" "function g(a:u256) -> b:u256 { b := a }" @@ -92,7 +92,7 @@ BOOST_AUTO_TEST_CASE(simple_inside_structures) "{" "function h() -> y:u256 { y := 2:u256 }" "}" - "}"), "f,g,h"); + "}"), "h,g,f"); } BOOST_AUTO_TEST_CASE(negative) diff --git a/test/libyul/YulOptimizerTest.cpp b/test/libyul/YulOptimizerTest.cpp index 162b167c9..03cd6446d 100644 --- a/test/libyul/YulOptimizerTest.cpp +++ b/test/libyul/YulOptimizerTest.cpp @@ -90,8 +90,6 @@ YulOptimizerTest::YulOptimizerTest(string const& _filename) bool YulOptimizerTest::run(ostream& _stream, string const& _linePrefix, bool const _formatted) { - yul::YulStringRepository::instance().reset(); - assembly::AsmPrinter printer{m_yul}; shared_ptr ast; shared_ptr analysisInfo; diff --git a/test/libyul/yulOptimizerTests/fullInliner/multi_fun_callback.yul b/test/libyul/yulOptimizerTests/fullInliner/multi_fun_callback.yul index d09877dea..19ac945e5 100644 --- a/test/libyul/yulOptimizerTests/fullInliner/multi_fun_callback.yul +++ b/test/libyul/yulOptimizerTests/fullInliner/multi_fun_callback.yul @@ -41,18 +41,19 @@ // h_t := 2 // mstore(7, h_t) // let g_x_1 := 10 -// f(1) +// let g_f_x_8 := 1 +// mstore(0, g_f_x_8) +// mstore(7, h()) +// g(10) +// mstore(1, g_f_x_8) // mstore(1, x) // } // function g(x_1) // { // let f_x_8 := 1 // mstore(0, f_x_8) -// let f_h_t -// f_h_t := 2 -// mstore(7, f_h_t) -// let f_g_x_1 := 10 -// f(1) +// mstore(7, h()) +// g(10) // mstore(1, f_x_8) // } // function h() -> t