diff --git a/libyul/CMakeLists.txt b/libyul/CMakeLists.txt index ff4d2cdbb..7805dc511 100644 --- a/libyul/CMakeLists.txt +++ b/libyul/CMakeLists.txt @@ -54,6 +54,8 @@ add_library(yul optimiser/ASTWalker.h optimiser/BlockFlattener.cpp optimiser/BlockFlattener.h + optimiser/BlockHasher.cpp + optimiser/BlockHasher.h optimiser/CommonSubexpressionEliminator.cpp optimiser/CommonSubexpressionEliminator.h optimiser/ControlFlowSimplifier.cpp diff --git a/libyul/Utilities.cpp b/libyul/Utilities.cpp index 2cf72f499..b69df0e4f 100644 --- a/libyul/Utilities.cpp +++ b/libyul/Utilities.cpp @@ -85,3 +85,9 @@ bool Less::operator()(Literal const& _lhs, Literal const& _rhs) const else return _lhs.value < _rhs.value; } + +bool SwitchCaseCompareByLiteralValue::operator()(Case const* _lhs, Case const* _rhs) const +{ + yulAssert(_lhs && _rhs, ""); + return Less{}(_lhs->value.get(), _rhs->value.get()); +} diff --git a/libyul/Utilities.h b/libyul/Utilities.h index 827b43aae..0103d74ec 100644 --- a/libyul/Utilities.h +++ b/libyul/Utilities.h @@ -59,4 +59,11 @@ struct Less template<> bool Less::operator()(Literal const& _lhs, Literal const& _rhs) const; extern template struct Less; +// This can only be used for cases within one switch statement and +// relies on the fact that there are no duplicate cases. +struct SwitchCaseCompareByLiteralValue +{ + bool operator()(Case const* _lhsCase, Case const* _rhsCase) const; +}; + } diff --git a/libyul/YulString.h b/libyul/YulString.h index 5e77e0bbd..47fb960a9 100644 --- a/libyul/YulString.h +++ b/libyul/YulString.h @@ -122,6 +122,8 @@ public: return YulStringRepository::instance().idToString(m_handle.id); } + uint64_t hash() const { return m_handle.hash; } + private: /// Handle of the string. Assumes that the empty string has ID zero. YulStringRepository::Handle m_handle{ 0, YulStringRepository::emptyHash() }; diff --git a/libyul/optimiser/BlockHasher.cpp b/libyul/optimiser/BlockHasher.cpp new file mode 100644 index 000000000..58959610e --- /dev/null +++ b/libyul/optimiser/BlockHasher.cpp @@ -0,0 +1,195 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * Optimiser component that calculates hash values for block prefixes. + */ + +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace yul; + +namespace +{ +static constexpr uint64_t compileTimeLiteralHash(char const* _literal, size_t _N) +{ + return (_N == 0) ? BlockHasher::fnvEmptyHash : (static_cast(_literal[0]) * BlockHasher::fnvPrime) ^ compileTimeLiteralHash(_literal + 1, _N - 1); +} + +template +static constexpr uint64_t compileTimeLiteralHash(char const (&_literal)[N]) +{ + return compileTimeLiteralHash(_literal, N); +} +} + +std::map BlockHasher::run(Block const& _block) +{ + std::map result; + BlockHasher blockHasher(result); + blockHasher(_block); + return result; +} + +void BlockHasher::operator()(Literal const& _literal) +{ + hash64(compileTimeLiteralHash("Literal")); + hash64(_literal.value.hash()); + hash64(_literal.type.hash()); + hash8(static_cast(_literal.kind)); +} + +void BlockHasher::operator()(Identifier const& _identifier) +{ + hash64(compileTimeLiteralHash("Identifier")); + auto it = m_variableReferences.find(_identifier.name); + if (it == m_variableReferences.end()) + { + it = m_variableReferences.emplace(_identifier.name, VariableReference { + m_externalIdentifierCount++, + true + }).first; + m_externalReferences.emplace_back(_identifier.name); + } + if (it->second.isExternal) + hash64(compileTimeLiteralHash("external")); + else + hash64(compileTimeLiteralHash("internal")); + hash64(it->second.id); +} + +void BlockHasher::operator()(FunctionalInstruction const& _instr) +{ + hash64(compileTimeLiteralHash("FunctionalInstruction")); + hash8(static_cast>(_instr.instruction)); + hash64(_instr.arguments.size()); + ASTWalker::operator()(_instr); +} + +void BlockHasher::operator()(FunctionCall const& _funCall) +{ + hash64(compileTimeLiteralHash("FunctionCall")); + hash64(_funCall.functionName.name.hash()); + hash64(_funCall.arguments.size()); + ASTWalker::operator()(_funCall); +} + +void BlockHasher::operator()(ExpressionStatement const& _statement) +{ + hash64(compileTimeLiteralHash("ExpressionStatement")); + ASTWalker::operator()(_statement); +} + +void BlockHasher::operator()(Assignment const& _assignment) +{ + hash64(compileTimeLiteralHash("Assignment")); + hash64(_assignment.variableNames.size()); + for (auto const& name: _assignment.variableNames) + (*this)(name); + visit(*_assignment.value); +} + +void BlockHasher::operator()(VariableDeclaration const& _varDecl) +{ + hash64(compileTimeLiteralHash("VariableDeclaration")); + hash64(_varDecl.variables.size()); + for (auto const& var: _varDecl.variables) + { + yulAssert(!m_variableReferences.count(var.name), ""); + m_variableReferences[var.name] = VariableReference{ + m_internalIdentifierCount++, + false + }; + } + ASTWalker::operator()(_varDecl); +} + +void BlockHasher::operator()(If const& _if) +{ + hash64(compileTimeLiteralHash("If")); + ASTWalker::operator()(_if); +} + +void BlockHasher::operator()(Switch const& _switch) +{ + hash64(compileTimeLiteralHash("Switch")); + hash64(_switch.cases.size()); + // Instead of sorting we could consider to combine + // the case hashes using a commutative operation here. + std::set cases; + for (auto const& _case: _switch.cases) + cases.insert(&_case); + + visit(*_switch.expression); + for (auto const& _case: cases) + { + if (_case->value) + (*this)(*_case->value); + (*this)(_case->body); + } +} + +void BlockHasher::operator()(FunctionDefinition const& _funDef) +{ + hash64(compileTimeLiteralHash("FunctionDefinition")); + ASTWalker::operator()(_funDef); +} + +void BlockHasher::operator()(ForLoop const& _loop) +{ + yulAssert(_loop.pre.statements.empty(), ""); + + hash64(compileTimeLiteralHash("ForLoop")); + ASTWalker::operator()(_loop); +} + +void BlockHasher::operator()(Break const& _break) +{ + hash64(compileTimeLiteralHash("Break")); + ASTWalker::operator()(_break); +} + +void BlockHasher::operator()(Continue const& _continue) +{ + hash64(compileTimeLiteralHash("Continue")); + ASTWalker::operator()(_continue); +} + + +void BlockHasher::operator()(Block const& _block) +{ + hash64(compileTimeLiteralHash("Block")); + hash64(_block.statements.size()); + if (_block.statements.empty()) + return; + + BlockHasher subBlockHasher(m_blockHashes); + for (auto const& statement: _block.statements) + subBlockHasher.visit(statement); + + m_blockHashes[&_block] = subBlockHasher.m_hash; + + hash64(subBlockHasher.m_hash); + hash64(subBlockHasher.m_externalReferences.size()); + + for (auto& externalReference: subBlockHasher.m_externalReferences) + (*this)(Identifier{{}, externalReference}); +} diff --git a/libyul/optimiser/BlockHasher.h b/libyul/optimiser/BlockHasher.h new file mode 100644 index 000000000..5f6a26238 --- /dev/null +++ b/libyul/optimiser/BlockHasher.h @@ -0,0 +1,109 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * Optimiser component that calculates hash values for blocks. + */ +#pragma once + +#include +#include +#include +#include + +namespace yul +{ + +/** + * Optimiser component that calculates hash values for blocks. + * Syntactically equal blocks will have identical hashes and + * blocks with equal hashes will likely be syntactically equal. + * + * The names of internally declared variables are replaced by + * a simple counter, so differing names are not taken into account, + * but only the order of references to declared variables. + * + * Similarly, the names of referenced external variables are not considered, + * but replaced by a (distinct) counter as well. + * + * Prerequisite: Disambiguator, ForLoopInitRewriter + */ +class BlockHasher: public ASTWalker +{ +public: + + using ASTWalker::operator(); + + void operator()(Literal const&) override; + void operator()(Identifier const&) override; + void operator()(FunctionalInstruction const& _instr) override; + void operator()(FunctionCall const& _funCall) override; + void operator()(ExpressionStatement const& _statement) override; + void operator()(Assignment const& _assignment) override; + void operator()(VariableDeclaration const& _varDecl) override; + void operator()(If const& _if) override; + void operator()(Switch const& _switch) override; + void operator()(FunctionDefinition const&) override; + void operator()(ForLoop const&) override; + void operator()(Break const&) override; + void operator()(Continue const&) override; + void operator()(Block const& _block) override; + + static std::map run(Block const& _block); + + static constexpr uint64_t fnvPrime = 1099511628211u; + static constexpr uint64_t fnvEmptyHash = 14695981039346656037u; + +private: + BlockHasher(std::map& _blockHashes): m_blockHashes(_blockHashes) {} + + void hash8(uint8_t _value) + { + m_hash *= fnvPrime; + m_hash ^= _value; + } + void hash16(uint16_t _value) + { + hash8(static_cast(_value & 0xFF)); + hash8(static_cast(_value >> 8)); + } + void hash32(uint32_t _value) + { + hash16(static_cast(_value & 0xFFFF)); + hash16(static_cast(_value >> 16)); + } + void hash64(uint64_t _value) + { + hash32(static_cast(_value & 0xFFFFFFFF)); + hash32(static_cast(_value >> 32)); + } + + std::map& m_blockHashes; + + uint64_t m_hash = fnvEmptyHash; + struct VariableReference + { + size_t id = 0; + bool isExternal = false; + }; + std::map m_variableReferences; + std::vector m_externalReferences; + size_t m_externalIdentifierCount = 0; + size_t m_internalIdentifierCount = 0; +}; + + +} diff --git a/libyul/optimiser/EquivalentFunctionDetector.cpp b/libyul/optimiser/EquivalentFunctionDetector.cpp index e4fd7f5c8..2b2f1a6f3 100644 --- a/libyul/optimiser/EquivalentFunctionDetector.cpp +++ b/libyul/optimiser/EquivalentFunctionDetector.cpp @@ -30,8 +30,8 @@ using namespace yul; void EquivalentFunctionDetector::operator()(FunctionDefinition const& _fun) { - RoughHeuristic heuristic(_fun); - auto& candidates = m_candidates[heuristic]; + uint64_t bodyHash = m_blockHashes[&_fun.body]; + auto& candidates = m_candidates[bodyHash]; for (auto const& candidate: candidates) if (SyntacticallyEqual{}.statementEqual(_fun, *candidate)) { @@ -40,23 +40,3 @@ void EquivalentFunctionDetector::operator()(FunctionDefinition const& _fun) } candidates.push_back(&_fun); } - -bool EquivalentFunctionDetector::RoughHeuristic::operator<(EquivalentFunctionDetector::RoughHeuristic const& _rhs) const -{ - if ( - std::make_tuple(m_fun.parameters.size(), m_fun.returnVariables.size()) == - std::make_tuple(_rhs.m_fun.parameters.size(), _rhs.m_fun.returnVariables.size()) - ) - return codeSize() < _rhs.codeSize(); - else - return - std::make_tuple(m_fun.parameters.size(), m_fun.returnVariables.size()) < - std::make_tuple(_rhs.m_fun.parameters.size(), _rhs.m_fun.returnVariables.size()); -} - -size_t EquivalentFunctionDetector::RoughHeuristic::codeSize() const -{ - if (!m_codeSize) - m_codeSize = CodeSize::codeSize(m_fun.body); - return *m_codeSize; -} diff --git a/libyul/optimiser/EquivalentFunctionDetector.h b/libyul/optimiser/EquivalentFunctionDetector.h index 329fd385e..5db3bdb17 100644 --- a/libyul/optimiser/EquivalentFunctionDetector.h +++ b/libyul/optimiser/EquivalentFunctionDetector.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include namespace yul @@ -28,14 +29,14 @@ namespace yul /** * Optimiser component that detects syntactically equivalent functions. * - * Prerequisite: Disambiguator + * Prerequisite: Disambiguator, ForLoopInitRewriter */ class EquivalentFunctionDetector: public ASTWalker { public: static std::map run(Block& _block) { - EquivalentFunctionDetector detector{}; + EquivalentFunctionDetector detector{BlockHasher::run(_block)}; detector(_block); return std::move(detector.m_duplicates); } @@ -44,26 +45,10 @@ public: void operator()(FunctionDefinition const& _fun) override; private: - EquivalentFunctionDetector() = default; - /** - * Fast heuristic to detect distinct, resp. potentially equal functions. - * - * Defines a partial order on function definitions. If two functions - * are comparable (one is "less" than the other), they are distinct. - * If not (neither is "less" than the other), they are *potentially* equal. - */ - class RoughHeuristic - { - public: - RoughHeuristic(FunctionDefinition const& _fun): m_fun(_fun) {} - bool operator<(RoughHeuristic const& _rhs) const; - private: - std::size_t codeSize() const; - FunctionDefinition const& m_fun; - mutable boost::optional m_codeSize; - // In case the heuristic doesn't turn out to be good enough, we might want to define a hash function for code blocks. - }; - std::map> m_candidates; + EquivalentFunctionDetector(std::map _blockHashes): m_blockHashes(std::move(_blockHashes)) {} + + std::map m_blockHashes; + std::map> m_candidates; std::map m_duplicates; }; diff --git a/libyul/optimiser/SyntacticalEquality.cpp b/libyul/optimiser/SyntacticalEquality.cpp index 53f0b029e..9e425b17e 100644 --- a/libyul/optimiser/SyntacticalEquality.cpp +++ b/libyul/optimiser/SyntacticalEquality.cpp @@ -129,11 +129,8 @@ bool SyntacticallyEqual::statementEqual(If const& _lhs, If const& _rhs) bool SyntacticallyEqual::statementEqual(Switch const& _lhs, Switch const& _rhs) { - static auto const sortCasesByValue = [](Case const* _lhsCase, Case const* _rhsCase) -> bool { - return Less{}(_lhsCase->value.get(), _rhsCase->value.get()); - }; - std::set lhsCases(sortCasesByValue); - std::set rhsCases(sortCasesByValue); + std::set lhsCases; + std::set rhsCases; for (auto const& lhsCase: _lhs.cases) lhsCases.insert(&lhsCase); for (auto const& rhsCase: _rhs.cases)