From 47aa1c65ae7f3acc576372ec327f346d790bccea Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 10 Nov 2022 11:49:40 +0100 Subject: [PATCH] Re-implement KnowledgeBase using groups of constantly-spaced variables. --- libyul/optimiser/DataFlowAnalyzer.cpp | 4 +- libyul/optimiser/KnowledgeBase.cpp | 148 +++++++++++++++------ libyul/optimiser/KnowledgeBase.h | 57 ++++++-- libyul/optimiser/UnusedStoreEliminator.cpp | 8 +- test/libyul/KnowledgeBaseTest.cpp | 8 +- 5 files changed, 167 insertions(+), 58 deletions(-) diff --git a/libyul/optimiser/DataFlowAnalyzer.cpp b/libyul/optimiser/DataFlowAnalyzer.cpp index 59ba7c8f0..81028e300 100644 --- a/libyul/optimiser/DataFlowAnalyzer.cpp +++ b/libyul/optimiser/DataFlowAnalyzer.cpp @@ -50,7 +50,7 @@ DataFlowAnalyzer::DataFlowAnalyzer( ): m_dialect(_dialect), m_functionSideEffects(std::move(_functionSideEffects)), - m_knowledgeBase(_dialect, [this](YulString _var) { return variableValue(_var); }), + m_knowledgeBase([this](YulString _var) { return variableValue(_var); }), m_analyzeStores(_analyzeStores == MemoryAndStorage::Analyze) { if (m_analyzeStores) @@ -76,7 +76,7 @@ void DataFlowAnalyzer::operator()(ExpressionStatement& _statement) cxx20::erase_if(m_state.environment.storage, mapTuple([&](auto&& key, auto&& value) { return !m_knowledgeBase.knownToBeDifferent(vars->first, key) && - !m_knowledgeBase.knownToBeEqual(vars->second, value); + vars->second != value; })); m_state.environment.storage[vars->first] = vars->second; return; diff --git a/libyul/optimiser/KnowledgeBase.cpp b/libyul/optimiser/KnowledgeBase.cpp index c706d529b..2fd5fb0a9 100644 --- a/libyul/optimiser/KnowledgeBase.cpp +++ b/libyul/optimiser/KnowledgeBase.cpp @@ -23,7 +23,6 @@ #include #include -#include #include #include @@ -36,37 +35,24 @@ using namespace solidity::yul; bool KnowledgeBase::knownToBeDifferent(YulString _a, YulString _b) { - // Try to use the simplification rules together with the - // current values to turn `sub(_a, _b)` into a nonzero constant. - // If that fails, try `eq(_a, _b)`. - if (optional difference = differenceIfKnownConstant(_a, _b)) return difference != 0; - - Expression expr2 = simplify(FunctionCall{{}, {{}, "eq"_yulstring}, util::make_vector(Identifier{{}, _a}, Identifier{{}, _b})}); - if (holds_alternative(expr2)) - return valueOfLiteral(std::get(expr2)) == 0; - return false; } optional KnowledgeBase::differenceIfKnownConstant(YulString _a, YulString _b) { - // Try to use the simplification rules together with the - // current values to turn `sub(_a, _b)` into a constant. - - Expression expr1 = simplify(FunctionCall{{}, {{}, "sub"_yulstring}, util::make_vector(Identifier{{}, _a}, Identifier{{}, _b})}); - if (Literal const* value = get_if(&expr1)) - return valueOfLiteral(*value); - - return {}; + VariableOffset offA = explore(_a); + VariableOffset offB = explore(_b); + if (offA.reference == offB.reference) + return offA.offset - offB.offset; + else + return {}; } + bool KnowledgeBase::knownToBeDifferentByAtLeast32(YulString _a, YulString _b) { - // Try to use the simplification rules together with the - // current values to turn `sub(_a, _b)` into a constant whose absolute value is at least 32. - if (optional difference = differenceIfKnownConstant(_a, _b)) return difference >= 32 && difference <= u256(0) - 32; @@ -80,29 +66,113 @@ bool KnowledgeBase::knownToBeZero(YulString _a) optional KnowledgeBase::valueIfKnownConstant(YulString _a) { - if (AssignedValue const* value = m_variableValues(_a)) - if (Literal const* literal = get_if(value->value)) - return valueOfLiteral(*literal); + VariableOffset offset = explore(_a); + if (offset.reference == YulString{}) + return offset.offset; + else + return nullopt; +} + +optional KnowledgeBase::valueIfKnownConstant(Expression const& _expression) +{ + if (Identifier const* ident = get_if(&_expression)) + return valueIfKnownConstant(ident->name); + else if (Literal const* lit = get_if(&_expression)) + return valueOfLiteral(*lit); + else + return {}; +} + +KnowledgeBase::VariableOffset KnowledgeBase::explore(YulString _var) +{ + // We query the value first so that the variable is reset if it has changed + // since the last call. + Expression const* value = valueOf(_var); + if (VariableOffset const* varOff = util::valueOrNullptr(m_offsets, _var)) + return *varOff; + + if (value) + if (optional offset = explore(*value)) + return setOffset(_var, *offset); + return setOffset(_var, VariableOffset{_var, 0}); + +} + +optional KnowledgeBase::explore(Expression const& _value) +{ + if (Literal const* literal = std::get_if(&_value)) + return VariableOffset{YulString{}, valueOfLiteral(*literal)}; + else if (Identifier const* identifier = std::get_if(&_value)) + return explore(identifier->name); + else if (FunctionCall const* f = get_if(&_value)) + if (f->functionName.name == "add"_yulstring || f->functionName.name == "sub"_yulstring) + if (optional a = explore(f->arguments[0])) + if (optional b = explore(f->arguments[1])) + { + u256 offset = + f->functionName.name == "add"_yulstring ? + a->offset + b->offset : + a->offset - b->offset; + if (a->reference == b->reference) + // Offsets relative to the same reference variable + return VariableOffset{a->reference, offset}; + else if (a->reference == YulString{}) + // a is constant + return VariableOffset{b->reference, offset}; + else if (b->reference == YulString{}) + // b is constant + return VariableOffset{a->reference, offset}; + } + return {}; } -Expression KnowledgeBase::simplify(Expression _expression) +Expression const* KnowledgeBase::valueOf(YulString _var) { - m_counter = 0; - return simplifyRecursively(std::move(_expression)); + Expression const* lastValue = m_lastKnownValue[_var]; + AssignedValue const* assignedValue = m_variableValues(_var); + Expression const* currentValue = assignedValue ? assignedValue->value : nullptr; + if (lastValue != currentValue) + reset(_var); + m_lastKnownValue[_var] = currentValue; + return currentValue; } -Expression KnowledgeBase::simplifyRecursively(Expression _expression) +void KnowledgeBase::reset(YulString _var) { - if (m_counter++ > 100) - return _expression; - - if (holds_alternative(_expression)) - for (Expression& arg: std::get(_expression).arguments) - arg = simplifyRecursively(arg); - - if (auto match = SimplificationRules::findFirstMatch(_expression, m_dialect, m_variableValues)) - return simplifyRecursively(match->action().toExpression(debugDataOf(_expression), langutil::EVMVersion())); - - return _expression; + m_lastKnownValue.erase(_var); + if (VariableOffset const* offset = util::valueOrNullptr(m_offsets, _var)) + { + // Remove var from its group + if (offset->reference != YulString{}) + m_groupMembers[offset->reference].erase(_var); + m_offsets.erase(_var); + } + if (set* group = util::valueOrNullptr(m_groupMembers, _var)) + { + // _var was a representative, we might have to find a new one. + if (group->empty()) + m_groupMembers.erase(_var); + else + { + YulString newRepresentative = *group->begin(); + u256 newOffset = m_offsets[newRepresentative].offset; + for (YulString groupMember: *group) + { + yulAssert(m_offsets[groupMember].reference == _var); + m_offsets[groupMember].reference = newRepresentative; + m_offsets[newRepresentative].offset -= newOffset; + } + } + } +} + +KnowledgeBase::VariableOffset KnowledgeBase::setOffset(YulString _variable, VariableOffset _value) +{ + m_offsets[_variable] = _value; + // Constants are not tracked in m_groupMembers because + // the "representative" can never be reset. + if (_value.reference != YulString{}) + m_groupMembers[_value.reference].insert(_variable); + return _value; } diff --git a/libyul/optimiser/KnowledgeBase.h b/libyul/optimiser/KnowledgeBase.h index 999d0e312..82c82a7e9 100644 --- a/libyul/optimiser/KnowledgeBase.h +++ b/libyul/optimiser/KnowledgeBase.h @@ -38,32 +38,69 @@ struct AssignedValue; /** * Class that can answer questions about values of variables and their relations. + * + * Requires a callback that returns the current value of the variable. + * The value can change any time during the lifetime of the KnowledgeBase, + * it will update its internal data structure accordingly. + * + * This means that the code the KnowledgeBase is used on does not need to be in SSA + * form. + * The only requirement is that the assigned values are movable expressions. + * + * Internally, tries to find groups of variables that have a mutual constant + * difference and stores these differences always relative to a specific + * representative variable of the group. + * + * There is a special group which is the constant values. Those use the + * empty YulString as representative "variable". */ class KnowledgeBase { public: - KnowledgeBase( - Dialect const& _dialect, - std::function _variableValues - ): - m_dialect(_dialect), + KnowledgeBase(std::function _variableValues): m_variableValues(std::move(_variableValues)) {} bool knownToBeDifferent(YulString _a, YulString _b); std::optional differenceIfKnownConstant(YulString _a, YulString _b); bool knownToBeDifferentByAtLeast32(YulString _a, YulString _b); - bool knownToBeEqual(YulString _a, YulString _b) const { return _a == _b; } bool knownToBeZero(YulString _a); std::optional valueIfKnownConstant(YulString _a); + std::optional valueIfKnownConstant(Expression const& _expression); private: - Expression simplify(Expression _expression); - Expression simplifyRecursively(Expression _expression); + /** + * Constant offset relative to a reference variable, or absolute constant if the + * reference variable is the empty YulString. + */ + struct VariableOffset + { + YulString reference; + u256 offset; + }; - Dialect const& m_dialect; + VariableOffset explore(YulString _var); + std::optional explore(Expression const& _value); + + /// Retrieves the current value of a variable and potentially resets the variable if it is not up to date. + Expression const* valueOf(YulString _var); + + /// Resets all information about the variable and removes it from its group, + /// potentially finding a new representative. + void reset(YulString _var); + + VariableOffset setOffset(YulString _variable, VariableOffset _value); + + /// Callback to retrieve the current value of a variable. std::function m_variableValues; - size_t m_counter = 0; + + /// Offsets for each variable to one representative per group. + /// The empty string is the representative of the constant value zero. + std::map m_offsets; + /// Last known value of each variable we queried. + std::map m_lastKnownValue; + /// For each representative, variables that use it to offset from. + std::map> m_groupMembers; }; } diff --git a/libyul/optimiser/UnusedStoreEliminator.cpp b/libyul/optimiser/UnusedStoreEliminator.cpp index 2e96be2e7..d5155ad7e 100644 --- a/libyul/optimiser/UnusedStoreEliminator.cpp +++ b/libyul/optimiser/UnusedStoreEliminator.cpp @@ -174,7 +174,7 @@ void UnusedStoreEliminator::visit(Statement const& _statement) initialState = State::Used; auto startOffset = identifierNameIfSSA(funCall->arguments.at(1)); auto length = identifierNameIfSSA(funCall->arguments.at(2)); - KnowledgeBase knowledge(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); }); + KnowledgeBase knowledge([this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); }); if (length && startOffset) { FunctionCall const* lengthCall = get_if(m_ssaValues.at(*length).value); @@ -267,7 +267,7 @@ bool UnusedStoreEliminator::knownUnrelated( UnusedStoreEliminator::Operation const& _op2 ) const { - KnowledgeBase knowledge(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); }); + KnowledgeBase knowledge([this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); }); if (_op1.location != _op2.location) return true; @@ -348,7 +348,7 @@ bool UnusedStoreEliminator::knownCovered( return true; if (_covered.location == Location::Memory) { - KnowledgeBase knowledge(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); }); + KnowledgeBase knowledge([this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); }); if (_covered.length && knowledge.knownToBeZero(*_covered.length)) return true; @@ -359,7 +359,7 @@ bool UnusedStoreEliminator::knownCovered( return false; optional coveredLength = knowledge.valueIfKnownConstant(*_covered.length); optional coveringLength = knowledge.valueIfKnownConstant(*_covering.length); - if (knowledge.knownToBeEqual(*_covered.start, *_covering.start)) + if (*_covered.start == *_covering.start) if (coveredLength && coveringLength && *coveredLength <= *coveringLength) return true; optional coveredStart = knowledge.valueIfKnownConstant(*_covered.start); diff --git a/test/libyul/KnowledgeBaseTest.cpp b/test/libyul/KnowledgeBaseTest.cpp index ec2f0313d..7cb5a8ae3 100644 --- a/test/libyul/KnowledgeBaseTest.cpp +++ b/test/libyul/KnowledgeBaseTest.cpp @@ -58,7 +58,7 @@ protected: for (auto const& [name, expression]: m_ssaValues.values()) m_values[name].value = expression; - return KnowledgeBase(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_values, _var); }); + return KnowledgeBase([this](YulString _var) { return util::valueOrNullptr(m_values, _var); }); } EVMDialect m_dialect{EVMVersion{}, true}; @@ -83,9 +83,11 @@ BOOST_AUTO_TEST_CASE(basic) BOOST_CHECK(!kb.knownToBeDifferent("a"_yulstring, "b"_yulstring)); // This only works if the variable names are the same. // It assumes that SSA+CSE+Simplifier actually replaces the variables. - BOOST_CHECK(!kb.knownToBeEqual("a"_yulstring, "b"_yulstring)); BOOST_CHECK(!kb.valueIfKnownConstant("a"_yulstring)); BOOST_CHECK(kb.valueIfKnownConstant("zero"_yulstring) == u256(0)); + BOOST_CHECK(kb.differenceIfKnownConstant("a"_yulstring, "b"_yulstring) == u256(0)); + BOOST_CHECK(kb.differenceIfKnownConstant("a"_yulstring, "c"_yulstring) == u256(0)); + BOOST_CHECK(kb.valueIfKnownConstant("e"_yulstring) == u256(0)); } BOOST_AUTO_TEST_CASE(difference) @@ -94,7 +96,7 @@ BOOST_AUTO_TEST_CASE(difference) let a := calldataload(0) let b := add(a, 200) let c := add(a, 220) - let d := add(c, 12) + let d := add(12, c) let e := sub(c, 12) })");