Merge pull request #6776 from ethereum/equivalentFunctionHashes

Use block hashes in EquivalentFunctionDetector.
This commit is contained in:
chriseth 2019-05-20 14:35:39 +02:00 committed by GitHub
commit 8daa281d3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 332 additions and 49 deletions

View File

@ -54,6 +54,8 @@ add_library(yul
optimiser/ASTWalker.h
optimiser/BlockFlattener.cpp
optimiser/BlockFlattener.h
optimiser/BlockHasher.cpp
optimiser/BlockHasher.h
optimiser/CommonSubexpressionEliminator.cpp
optimiser/CommonSubexpressionEliminator.h
optimiser/ControlFlowSimplifier.cpp

View File

@ -85,3 +85,9 @@ bool Less<Literal>::operator()(Literal const& _lhs, Literal const& _rhs) const
else
return _lhs.value < _rhs.value;
}
bool SwitchCaseCompareByLiteralValue::operator()(Case const* _lhs, Case const* _rhs) const
{
yulAssert(_lhs && _rhs, "");
return Less<Literal*>{}(_lhs->value.get(), _rhs->value.get());
}

View File

@ -59,4 +59,11 @@ struct Less<T*>
template<> bool Less<Literal>::operator()(Literal const& _lhs, Literal const& _rhs) const;
extern template struct Less<Literal>;
// This can only be used for cases within one switch statement and
// relies on the fact that there are no duplicate cases.
struct SwitchCaseCompareByLiteralValue
{
bool operator()(Case const* _lhsCase, Case const* _rhsCase) const;
};
}

View File

@ -122,6 +122,8 @@ public:
return YulStringRepository::instance().idToString(m_handle.id);
}
uint64_t hash() const { return m_handle.hash; }
private:
/// Handle of the string. Assumes that the empty string has ID zero.
YulStringRepository::Handle m_handle{ 0, YulStringRepository::emptyHash() };

View File

@ -0,0 +1,195 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Optimiser component that calculates hash values for block prefixes.
*/
#include <libyul/optimiser/BlockHasher.h>
#include <libyul/optimiser/SyntacticalEquality.h>
#include <libyul/Utilities.h>
#include <libdevcore/CommonData.h>
using namespace std;
using namespace dev;
using namespace yul;
namespace
{
static constexpr uint64_t compileTimeLiteralHash(char const* _literal, size_t _N)
{
return (_N == 0) ? BlockHasher::fnvEmptyHash : (static_cast<uint64_t>(_literal[0]) * BlockHasher::fnvPrime) ^ compileTimeLiteralHash(_literal + 1, _N - 1);
}
template<size_t N>
static constexpr uint64_t compileTimeLiteralHash(char const (&_literal)[N])
{
return compileTimeLiteralHash(_literal, N);
}
}
std::map<Block const*, uint64_t> BlockHasher::run(Block const& _block)
{
std::map<Block const*, uint64_t> result;
BlockHasher blockHasher(result);
blockHasher(_block);
return result;
}
void BlockHasher::operator()(Literal const& _literal)
{
hash64(compileTimeLiteralHash("Literal"));
hash64(_literal.value.hash());
hash64(_literal.type.hash());
hash8(static_cast<uint8_t>(_literal.kind));
}
void BlockHasher::operator()(Identifier const& _identifier)
{
hash64(compileTimeLiteralHash("Identifier"));
auto it = m_variableReferences.find(_identifier.name);
if (it == m_variableReferences.end())
{
it = m_variableReferences.emplace(_identifier.name, VariableReference {
m_externalIdentifierCount++,
true
}).first;
m_externalReferences.emplace_back(_identifier.name);
}
if (it->second.isExternal)
hash64(compileTimeLiteralHash("external"));
else
hash64(compileTimeLiteralHash("internal"));
hash64(it->second.id);
}
void BlockHasher::operator()(FunctionalInstruction const& _instr)
{
hash64(compileTimeLiteralHash("FunctionalInstruction"));
hash8(static_cast<std::underlying_type_t<eth::Instruction>>(_instr.instruction));
hash64(_instr.arguments.size());
ASTWalker::operator()(_instr);
}
void BlockHasher::operator()(FunctionCall const& _funCall)
{
hash64(compileTimeLiteralHash("FunctionCall"));
hash64(_funCall.functionName.name.hash());
hash64(_funCall.arguments.size());
ASTWalker::operator()(_funCall);
}
void BlockHasher::operator()(ExpressionStatement const& _statement)
{
hash64(compileTimeLiteralHash("ExpressionStatement"));
ASTWalker::operator()(_statement);
}
void BlockHasher::operator()(Assignment const& _assignment)
{
hash64(compileTimeLiteralHash("Assignment"));
hash64(_assignment.variableNames.size());
for (auto const& name: _assignment.variableNames)
(*this)(name);
visit(*_assignment.value);
}
void BlockHasher::operator()(VariableDeclaration const& _varDecl)
{
hash64(compileTimeLiteralHash("VariableDeclaration"));
hash64(_varDecl.variables.size());
for (auto const& var: _varDecl.variables)
{
yulAssert(!m_variableReferences.count(var.name), "");
m_variableReferences[var.name] = VariableReference{
m_internalIdentifierCount++,
false
};
}
ASTWalker::operator()(_varDecl);
}
void BlockHasher::operator()(If const& _if)
{
hash64(compileTimeLiteralHash("If"));
ASTWalker::operator()(_if);
}
void BlockHasher::operator()(Switch const& _switch)
{
hash64(compileTimeLiteralHash("Switch"));
hash64(_switch.cases.size());
// Instead of sorting we could consider to combine
// the case hashes using a commutative operation here.
std::set<Case const*, SwitchCaseCompareByLiteralValue> cases;
for (auto const& _case: _switch.cases)
cases.insert(&_case);
visit(*_switch.expression);
for (auto const& _case: cases)
{
if (_case->value)
(*this)(*_case->value);
(*this)(_case->body);
}
}
void BlockHasher::operator()(FunctionDefinition const& _funDef)
{
hash64(compileTimeLiteralHash("FunctionDefinition"));
ASTWalker::operator()(_funDef);
}
void BlockHasher::operator()(ForLoop const& _loop)
{
yulAssert(_loop.pre.statements.empty(), "");
hash64(compileTimeLiteralHash("ForLoop"));
ASTWalker::operator()(_loop);
}
void BlockHasher::operator()(Break const& _break)
{
hash64(compileTimeLiteralHash("Break"));
ASTWalker::operator()(_break);
}
void BlockHasher::operator()(Continue const& _continue)
{
hash64(compileTimeLiteralHash("Continue"));
ASTWalker::operator()(_continue);
}
void BlockHasher::operator()(Block const& _block)
{
hash64(compileTimeLiteralHash("Block"));
hash64(_block.statements.size());
if (_block.statements.empty())
return;
BlockHasher subBlockHasher(m_blockHashes);
for (auto const& statement: _block.statements)
subBlockHasher.visit(statement);
m_blockHashes[&_block] = subBlockHasher.m_hash;
hash64(subBlockHasher.m_hash);
hash64(subBlockHasher.m_externalReferences.size());
for (auto& externalReference: subBlockHasher.m_externalReferences)
(*this)(Identifier{{}, externalReference});
}

View File

@ -0,0 +1,109 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Optimiser component that calculates hash values for blocks.
*/
#pragma once
#include <libyul/optimiser/ASTWalker.h>
#include <libyul/AsmDataForward.h>
#include <libyul/YulString.h>
#include <libyul/AsmData.h>
namespace yul
{
/**
* Optimiser component that calculates hash values for blocks.
* Syntactically equal blocks will have identical hashes and
* blocks with equal hashes will likely be syntactically equal.
*
* The names of internally declared variables are replaced by
* a simple counter, so differing names are not taken into account,
* but only the order of references to declared variables.
*
* Similarly, the names of referenced external variables are not considered,
* but replaced by a (distinct) counter as well.
*
* Prerequisite: Disambiguator, ForLoopInitRewriter
*/
class BlockHasher: public ASTWalker
{
public:
using ASTWalker::operator();
void operator()(Literal const&) override;
void operator()(Identifier const&) override;
void operator()(FunctionalInstruction const& _instr) override;
void operator()(FunctionCall const& _funCall) override;
void operator()(ExpressionStatement const& _statement) override;
void operator()(Assignment const& _assignment) override;
void operator()(VariableDeclaration const& _varDecl) override;
void operator()(If const& _if) override;
void operator()(Switch const& _switch) override;
void operator()(FunctionDefinition const&) override;
void operator()(ForLoop const&) override;
void operator()(Break const&) override;
void operator()(Continue const&) override;
void operator()(Block const& _block) override;
static std::map<Block const*, uint64_t> run(Block const& _block);
static constexpr uint64_t fnvPrime = 1099511628211u;
static constexpr uint64_t fnvEmptyHash = 14695981039346656037u;
private:
BlockHasher(std::map<Block const*, uint64_t>& _blockHashes): m_blockHashes(_blockHashes) {}
void hash8(uint8_t _value)
{
m_hash *= fnvPrime;
m_hash ^= _value;
}
void hash16(uint16_t _value)
{
hash8(static_cast<uint8_t>(_value & 0xFF));
hash8(static_cast<uint8_t>(_value >> 8));
}
void hash32(uint32_t _value)
{
hash16(static_cast<uint16_t>(_value & 0xFFFF));
hash16(static_cast<uint16_t>(_value >> 16));
}
void hash64(uint64_t _value)
{
hash32(static_cast<uint32_t>(_value & 0xFFFFFFFF));
hash32(static_cast<uint32_t>(_value >> 32));
}
std::map<Block const*, uint64_t>& m_blockHashes;
uint64_t m_hash = fnvEmptyHash;
struct VariableReference
{
size_t id = 0;
bool isExternal = false;
};
std::map<YulString, VariableReference> m_variableReferences;
std::vector<YulString> m_externalReferences;
size_t m_externalIdentifierCount = 0;
size_t m_internalIdentifierCount = 0;
};
}

View File

@ -30,8 +30,8 @@ using namespace yul;
void EquivalentFunctionDetector::operator()(FunctionDefinition const& _fun)
{
RoughHeuristic heuristic(_fun);
auto& candidates = m_candidates[heuristic];
uint64_t bodyHash = m_blockHashes[&_fun.body];
auto& candidates = m_candidates[bodyHash];
for (auto const& candidate: candidates)
if (SyntacticallyEqual{}.statementEqual(_fun, *candidate))
{
@ -40,23 +40,3 @@ void EquivalentFunctionDetector::operator()(FunctionDefinition const& _fun)
}
candidates.push_back(&_fun);
}
bool EquivalentFunctionDetector::RoughHeuristic::operator<(EquivalentFunctionDetector::RoughHeuristic const& _rhs) const
{
if (
std::make_tuple(m_fun.parameters.size(), m_fun.returnVariables.size()) ==
std::make_tuple(_rhs.m_fun.parameters.size(), _rhs.m_fun.returnVariables.size())
)
return codeSize() < _rhs.codeSize();
else
return
std::make_tuple(m_fun.parameters.size(), m_fun.returnVariables.size()) <
std::make_tuple(_rhs.m_fun.parameters.size(), _rhs.m_fun.returnVariables.size());
}
size_t EquivalentFunctionDetector::RoughHeuristic::codeSize() const
{
if (!m_codeSize)
m_codeSize = CodeSize::codeSize(m_fun.body);
return *m_codeSize;
}

View File

@ -20,6 +20,7 @@
#pragma once
#include <libyul/optimiser/ASTWalker.h>
#include <libyul/optimiser/BlockHasher.h>
#include <libyul/AsmDataForward.h>
namespace yul
@ -28,14 +29,14 @@ namespace yul
/**
* Optimiser component that detects syntactically equivalent functions.
*
* Prerequisite: Disambiguator
* Prerequisite: Disambiguator, ForLoopInitRewriter
*/
class EquivalentFunctionDetector: public ASTWalker
{
public:
static std::map<YulString, FunctionDefinition const*> run(Block& _block)
{
EquivalentFunctionDetector detector{};
EquivalentFunctionDetector detector{BlockHasher::run(_block)};
detector(_block);
return std::move(detector.m_duplicates);
}
@ -44,26 +45,10 @@ public:
void operator()(FunctionDefinition const& _fun) override;
private:
EquivalentFunctionDetector() = default;
/**
* Fast heuristic to detect distinct, resp. potentially equal functions.
*
* Defines a partial order on function definitions. If two functions
* are comparable (one is "less" than the other), they are distinct.
* If not (neither is "less" than the other), they are *potentially* equal.
*/
class RoughHeuristic
{
public:
RoughHeuristic(FunctionDefinition const& _fun): m_fun(_fun) {}
bool operator<(RoughHeuristic const& _rhs) const;
private:
std::size_t codeSize() const;
FunctionDefinition const& m_fun;
mutable boost::optional<std::size_t> m_codeSize;
// In case the heuristic doesn't turn out to be good enough, we might want to define a hash function for code blocks.
};
std::map<RoughHeuristic, std::vector<FunctionDefinition const*>> m_candidates;
EquivalentFunctionDetector(std::map<Block const*, uint64_t> _blockHashes): m_blockHashes(std::move(_blockHashes)) {}
std::map<Block const*, uint64_t> m_blockHashes;
std::map<uint64_t, std::vector<FunctionDefinition const*>> m_candidates;
std::map<YulString, FunctionDefinition const*> m_duplicates;
};

View File

@ -129,11 +129,8 @@ bool SyntacticallyEqual::statementEqual(If const& _lhs, If const& _rhs)
bool SyntacticallyEqual::statementEqual(Switch const& _lhs, Switch const& _rhs)
{
static auto const sortCasesByValue = [](Case const* _lhsCase, Case const* _rhsCase) -> bool {
return Less<Literal*>{}(_lhsCase->value.get(), _rhsCase->value.get());
};
std::set<Case const*, decltype(sortCasesByValue)> lhsCases(sortCasesByValue);
std::set<Case const*, decltype(sortCasesByValue)> rhsCases(sortCasesByValue);
std::set<Case const*, SwitchCaseCompareByLiteralValue> lhsCases;
std::set<Case const*, SwitchCaseCompareByLiteralValue> rhsCases;
for (auto const& lhsCase: _lhs.cases)
lhsCases.insert(&lhsCase);
for (auto const& rhsCase: _rhs.cases)