Merge pull request #13702 from ethereum/new_knowledge_engine

Re-implement KnowledgeBase using groups of constantly-spaced variables.
This commit is contained in:
chriseth 2023-02-13 20:05:47 +01:00 committed by GitHub
commit 91b14174d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 260 additions and 88 deletions

View File

@ -50,7 +50,7 @@ DataFlowAnalyzer::DataFlowAnalyzer(
):
m_dialect(_dialect),
m_functionSideEffects(std::move(_functionSideEffects)),
m_knowledgeBase(_dialect, [this](YulString _var) { return variableValue(_var); }),
m_knowledgeBase([this](YulString _var) { return variableValue(_var); }),
m_analyzeStores(_analyzeStores == MemoryAndStorage::Analyze)
{
if (m_analyzeStores)
@ -76,7 +76,7 @@ void DataFlowAnalyzer::operator()(ExpressionStatement& _statement)
cxx20::erase_if(m_state.environment.storage, mapTuple([&](auto&& key, auto&& value) {
return
!m_knowledgeBase.knownToBeDifferent(vars->first, key) &&
!m_knowledgeBase.knownToBeEqual(vars->second, value);
vars->second != value;
}));
m_state.environment.storage[vars->first] = vars->second;
return;

View File

@ -23,7 +23,6 @@
#include <libyul/AST.h>
#include <libyul/Utilities.h>
#include <libyul/optimiser/SimplificationRules.h>
#include <libyul/optimiser/DataFlowAnalyzer.h>
#include <libsolutil/CommonData.h>
@ -34,39 +33,31 @@ using namespace std;
using namespace solidity;
using namespace solidity::yul;
KnowledgeBase::KnowledgeBase(map<YulString, AssignedValue> const& _ssaValues):
m_valuesAreSSA(true),
m_variableValues([_ssaValues](YulString _var) { return util::valueOrNullptr(_ssaValues, _var); })
{}
bool KnowledgeBase::knownToBeDifferent(YulString _a, YulString _b)
{
// Try to use the simplification rules together with the
// current values to turn `sub(_a, _b)` into a nonzero constant.
// If that fails, try `eq(_a, _b)`.
if (optional<u256> difference = differenceIfKnownConstant(_a, _b))
return difference != 0;
Expression expr2 = simplify(FunctionCall{{}, {{}, "eq"_yulstring}, util::make_vector<Expression>(Identifier{{}, _a}, Identifier{{}, _b})});
if (holds_alternative<Literal>(expr2))
return valueOfLiteral(std::get<Literal>(expr2)) == 0;
return false;
}
optional<u256> KnowledgeBase::differenceIfKnownConstant(YulString _a, YulString _b)
{
// Try to use the simplification rules together with the
// current values to turn `sub(_a, _b)` into a constant.
Expression expr1 = simplify(FunctionCall{{}, {{}, "sub"_yulstring}, util::make_vector<Expression>(Identifier{{}, _a}, Identifier{{}, _b})});
if (Literal const* value = get_if<Literal>(&expr1))
return valueOfLiteral(*value);
return {};
VariableOffset offA = explore(_a);
VariableOffset offB = explore(_b);
if (offA.reference == offB.reference)
return offA.offset - offB.offset;
else
return nullopt;
}
bool KnowledgeBase::knownToBeDifferentByAtLeast32(YulString _a, YulString _b)
{
// Try to use the simplification rules together with the
// current values to turn `sub(_a, _b)` into a constant whose absolute value is at least 32.
if (optional<u256> difference = differenceIfKnownConstant(_a, _b))
return difference >= 32 && difference <= u256(0) - 32;
@ -75,34 +66,147 @@ bool KnowledgeBase::knownToBeDifferentByAtLeast32(YulString _a, YulString _b)
bool KnowledgeBase::knownToBeZero(YulString _a)
{
return valueIfKnownConstant(_a) == u256{};
return valueIfKnownConstant(_a) == 0;
}
optional<u256> KnowledgeBase::valueIfKnownConstant(YulString _a)
{
if (AssignedValue const* value = m_variableValues(_a))
if (Literal const* literal = get_if<Literal>(value->value))
return valueOfLiteral(*literal);
return {};
return explore(_a).absoluteValue();
}
Expression KnowledgeBase::simplify(Expression _expression)
optional<u256> KnowledgeBase::valueIfKnownConstant(Expression const& _expression)
{
m_counter = 0;
return simplifyRecursively(std::move(_expression));
if (Identifier const* ident = get_if<Identifier>(&_expression))
return valueIfKnownConstant(ident->name);
else if (Literal const* lit = get_if<Literal>(&_expression))
return valueOfLiteral(*lit);
else
return nullopt;
}
Expression KnowledgeBase::simplifyRecursively(Expression _expression)
KnowledgeBase::VariableOffset KnowledgeBase::explore(YulString _var)
{
if (m_counter++ > 100)
return _expression;
if (holds_alternative<FunctionCall>(_expression))
for (Expression& arg: std::get<FunctionCall>(_expression).arguments)
arg = simplifyRecursively(arg);
if (auto match = SimplificationRules::findFirstMatch(_expression, m_dialect, m_variableValues))
return simplifyRecursively(match->action().toExpression(debugDataOf(_expression), langutil::EVMVersion()));
return _expression;
Expression const* value = nullptr;
if (m_valuesAreSSA)
{
// In SSA, a once determined offset is always valid, so we first see
// if we already computed it.
if (VariableOffset const* varOff = util::valueOrNullptr(m_offsets, _var))
return *varOff;
value = valueOf(_var);
}
else
{
// For non-SSA, we query the value first so that the variable is reset if it has changed
// since the last call.
value = valueOf(_var);
if (VariableOffset const* varOff = util::valueOrNullptr(m_offsets, _var))
return *varOff;
}
if (value)
if (optional<VariableOffset> offset = explore(*value))
return setOffset(_var, *offset);
return setOffset(_var, VariableOffset{_var, 0});
}
optional<KnowledgeBase::VariableOffset> KnowledgeBase::explore(Expression const& _value)
{
if (Literal const* literal = get_if<Literal>(&_value))
return VariableOffset{YulString{}, valueOfLiteral(*literal)};
else if (Identifier const* identifier = get_if<Identifier>(&_value))
return explore(identifier->name);
else if (FunctionCall const* f = get_if<FunctionCall>(&_value))
{
if (f->functionName.name == "add"_yulstring)
{
if (optional<VariableOffset> a = explore(f->arguments[0]))
if (optional<VariableOffset> b = explore(f->arguments[1]))
{
u256 offset = a->offset + b->offset;
if (a->isAbsolute())
// a is constant
return VariableOffset{b->reference, offset};
else if (b->isAbsolute())
// b is constant
return VariableOffset{a->reference, offset};
}
}
else if (f->functionName.name == "sub"_yulstring)
if (optional<VariableOffset> a = explore(f->arguments[0]))
if (optional<VariableOffset> b = explore(f->arguments[1]))
{
u256 offset = a->offset - b->offset;
if (a->reference == b->reference)
return VariableOffset{YulString{}, offset};
else if (b->isAbsolute())
// b is constant
return VariableOffset{a->reference, offset};
}
}
return nullopt;
}
Expression const* KnowledgeBase::valueOf(YulString _var)
{
AssignedValue const* assignedValue = m_variableValues(_var);
Expression const* currentValue = assignedValue ? assignedValue->value : nullptr;
if (m_valuesAreSSA)
return currentValue;
Expression const* lastValue = m_lastKnownValue[_var];
if (lastValue != currentValue)
reset(_var);
m_lastKnownValue[_var] = currentValue;
return currentValue;
}
void KnowledgeBase::reset(YulString _var)
{
yulAssert(!m_valuesAreSSA);
m_lastKnownValue.erase(_var);
if (VariableOffset const* offset = util::valueOrNullptr(m_offsets, _var))
{
// Remove var from its group
if (!offset->isAbsolute())
m_groupMembers[offset->reference].erase(_var);
m_offsets.erase(_var);
}
if (set<YulString>* group = util::valueOrNullptr(m_groupMembers, _var))
{
// _var was a representative, we might have to find a new one.
if (!group->empty())
{
YulString newRepresentative = *group->begin();
yulAssert(newRepresentative != _var);
u256 newOffset = m_offsets[newRepresentative].offset;
// newOffset = newRepresentative - _var
for (YulString groupMember: *group)
{
yulAssert(m_offsets[groupMember].reference == _var);
m_offsets[groupMember].reference = newRepresentative;
// groupMember = _var + m_offsets[groupMember].offset (old)
// = newRepresentative - newOffset + m_offsets[groupMember].offset (old)
// so subtracting newOffset from .offset yields the original relation again,
// just with _var replaced by newRepresentative
m_offsets[groupMember].offset -= newOffset;
}
m_groupMembers[newRepresentative] = std::move(*group);
}
m_groupMembers.erase(_var);
}
}
KnowledgeBase::VariableOffset KnowledgeBase::setOffset(YulString _variable, VariableOffset _value)
{
m_offsets[_variable] = _value;
// Constants are not tracked in m_groupMembers because
// the "representative" can never be reset.
if (!_value.reference.empty())
m_groupMembers[_value.reference].insert(_variable);
return _value;
}

View File

@ -38,32 +38,91 @@ struct AssignedValue;
/**
* Class that can answer questions about values of variables and their relations.
*
* Requires a callback that returns the current value of the variable.
* The value can change any time during the lifetime of the KnowledgeBase,
* it will update its internal data structure accordingly.
*
* This means that the code the KnowledgeBase is used on does not need to be in SSA
* form.
* The only requirement is that the assigned values are movable expressions.
*
* There is a constructor to provide all SSA values right at the beginning.
* If you use this, the KnowledgeBase will be slightly more efficient.
*
* Internally, tries to find groups of variables that have a mutual constant
* difference and stores these differences always relative to a specific
* representative variable of the group.
*
* There is a special group which is the constant values. Those use the
* empty YulString as representative "variable".
*/
class KnowledgeBase
{
public:
KnowledgeBase(
Dialect const& _dialect,
std::function<AssignedValue const*(YulString)> _variableValues
):
m_dialect(_dialect),
/// Constructor for arbitrary value callback that allows for variable values
/// to change in between calls to functions of this class.
KnowledgeBase(std::function<AssignedValue const*(YulString)> _variableValues):
m_variableValues(std::move(_variableValues))
{}
/// Constructor to use if source code is in SSA form and values are constant.
KnowledgeBase(std::map<YulString, AssignedValue> const& _ssaValues);
bool knownToBeDifferent(YulString _a, YulString _b);
std::optional<u256> differenceIfKnownConstant(YulString _a, YulString _b);
bool knownToBeDifferentByAtLeast32(YulString _a, YulString _b);
bool knownToBeEqual(YulString _a, YulString _b) const { return _a == _b; }
bool knownToBeZero(YulString _a);
std::optional<u256> valueIfKnownConstant(YulString _a);
std::optional<u256> valueIfKnownConstant(Expression const& _expression);
private:
Expression simplify(Expression _expression);
Expression simplifyRecursively(Expression _expression);
/**
* Constant offset relative to a reference variable, or absolute constant if the
* reference variable is the empty YulString.
*/
struct VariableOffset
{
YulString reference;
u256 offset;
Dialect const& m_dialect;
bool isAbsolute() const
{
return reference.empty();
}
std::optional<u256> absoluteValue() const
{
if (isAbsolute())
return offset;
else
return std::nullopt;
}
};
VariableOffset explore(YulString _var);
std::optional<VariableOffset> explore(Expression const& _value);
/// Retrieves the current value of a variable and potentially resets the variable if it is not up to date.
Expression const* valueOf(YulString _var);
/// Resets all information about the variable and removes it from its group,
/// potentially finding a new representative.
void reset(YulString _var);
VariableOffset setOffset(YulString _variable, VariableOffset _value);
/// If true, we can assume that variable values never change and skip some steps.
bool m_valuesAreSSA = false;
/// Callback to retrieve the current value of a variable.
std::function<AssignedValue const*(YulString)> m_variableValues;
size_t m_counter = 0;
/// Offsets for each variable to one representative per group.
/// The empty string is the representative of the constant value zero.
std::map<YulString, VariableOffset> m_offsets;
/// Last known value of each variable we queried.
std::map<YulString, Expression const*> m_lastKnownValue;
/// For each representative, variables that use it to offset from.
std::map<YulString, std::set<YulString>> m_groupMembers;
};
}

View File

@ -92,6 +92,21 @@ void UnusedStoreEliminator::run(OptimiserStepContext& _context, Block& _ast)
remover(_ast);
}
UnusedStoreEliminator::UnusedStoreEliminator(
Dialect const& _dialect,
map<YulString, SideEffects> const& _functionSideEffects,
map<YulString, ControlFlowSideEffects> _controlFlowSideEffects,
map<YulString, AssignedValue> const& _ssaValues,
bool _ignoreMemory
):
UnusedStoreBase(_dialect),
m_ignoreMemory(_ignoreMemory),
m_functionSideEffects(_functionSideEffects),
m_controlFlowSideEffects(_controlFlowSideEffects),
m_ssaValues(_ssaValues),
m_knowledgeBase(_ssaValues)
{}
void UnusedStoreEliminator::operator()(FunctionCall const& _functionCall)
{
UnusedStoreBase::operator()(_functionCall);
@ -174,12 +189,11 @@ void UnusedStoreEliminator::visit(Statement const& _statement)
initialState = State::Used;
auto startOffset = identifierNameIfSSA(funCall->arguments.at(1));
auto length = identifierNameIfSSA(funCall->arguments.at(2));
KnowledgeBase knowledge(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); });
if (length && startOffset)
{
FunctionCall const* lengthCall = get_if<FunctionCall>(m_ssaValues.at(*length).value);
if (
knowledge.knownToBeZero(*startOffset) &&
m_knowledgeBase.knownToBeZero(*startOffset) &&
lengthCall &&
toEVMInstruction(m_dialect, lengthCall->functionName.name) == Instruction::RETURNDATASIZE
)
@ -267,8 +281,6 @@ bool UnusedStoreEliminator::knownUnrelated(
UnusedStoreEliminator::Operation const& _op2
) const
{
KnowledgeBase knowledge(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); });
if (_op1.location != _op2.location)
return true;
if (_op1.location == Location::Storage)
@ -278,26 +290,26 @@ bool UnusedStoreEliminator::knownUnrelated(
yulAssert(
_op1.length &&
_op2.length &&
knowledge.valueIfKnownConstant(*_op1.length) == 1 &&
knowledge.valueIfKnownConstant(*_op2.length) == 1
m_knowledgeBase.valueIfKnownConstant(*_op1.length) == 1 &&
m_knowledgeBase.valueIfKnownConstant(*_op2.length) == 1
);
return knowledge.knownToBeDifferent(*_op1.start, *_op2.start);
return m_knowledgeBase.knownToBeDifferent(*_op1.start, *_op2.start);
}
}
else
{
yulAssert(_op1.location == Location::Memory, "");
if (
(_op1.length && knowledge.knownToBeZero(*_op1.length)) ||
(_op2.length && knowledge.knownToBeZero(*_op2.length))
(_op1.length && m_knowledgeBase.knownToBeZero(*_op1.length)) ||
(_op2.length && m_knowledgeBase.knownToBeZero(*_op2.length))
)
return true;
if (_op1.start && _op1.length && _op2.start)
{
optional<u256> length1 = knowledge.valueIfKnownConstant(*_op1.length);
optional<u256> start1 = knowledge.valueIfKnownConstant(*_op1.start);
optional<u256> start2 = knowledge.valueIfKnownConstant(*_op2.start);
optional<u256> length1 = m_knowledgeBase.valueIfKnownConstant(*_op1.length);
optional<u256> start1 = m_knowledgeBase.valueIfKnownConstant(*_op1.start);
optional<u256> start2 = m_knowledgeBase.valueIfKnownConstant(*_op2.start);
if (
(length1 && start1 && start2) &&
*start1 + *length1 >= *start1 && // no overflow
@ -307,9 +319,9 @@ bool UnusedStoreEliminator::knownUnrelated(
}
if (_op2.start && _op2.length && _op1.start)
{
optional<u256> length2 = knowledge.valueIfKnownConstant(*_op2.length);
optional<u256> start2 = knowledge.valueIfKnownConstant(*_op2.start);
optional<u256> start1 = knowledge.valueIfKnownConstant(*_op1.start);
optional<u256> length2 = m_knowledgeBase.valueIfKnownConstant(*_op2.length);
optional<u256> start2 = m_knowledgeBase.valueIfKnownConstant(*_op2.start);
optional<u256> start1 = m_knowledgeBase.valueIfKnownConstant(*_op1.start);
if (
(length2 && start2 && start1) &&
*start2 + *length2 >= *start2 && // no overflow
@ -320,12 +332,12 @@ bool UnusedStoreEliminator::knownUnrelated(
if (_op1.start && _op1.length && _op2.start && _op2.length)
{
optional<u256> length1 = knowledge.valueIfKnownConstant(*_op1.length);
optional<u256> length2 = knowledge.valueIfKnownConstant(*_op2.length);
optional<u256> length1 = m_knowledgeBase.valueIfKnownConstant(*_op1.length);
optional<u256> length2 = m_knowledgeBase.valueIfKnownConstant(*_op2.length);
if (
(length1 && *length1 <= 32) &&
(length2 && *length2 <= 32) &&
knowledge.knownToBeDifferentByAtLeast32(*_op1.start, *_op2.start)
m_knowledgeBase.knownToBeDifferentByAtLeast32(*_op1.start, *_op2.start)
)
return true;
}
@ -348,22 +360,20 @@ bool UnusedStoreEliminator::knownCovered(
return true;
if (_covered.location == Location::Memory)
{
KnowledgeBase knowledge(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_ssaValues, _var); });
if (_covered.length && knowledge.knownToBeZero(*_covered.length))
if (_covered.length && m_knowledgeBase.knownToBeZero(*_covered.length))
return true;
// Condition (i = cover_i_ng, e = cover_e_d):
// i.start <= e.start && e.start + e.length <= i.start + i.length
if (!_covered.start || !_covering.start || !_covered.length || !_covering.length)
return false;
optional<u256> coveredLength = knowledge.valueIfKnownConstant(*_covered.length);
optional<u256> coveringLength = knowledge.valueIfKnownConstant(*_covering.length);
if (knowledge.knownToBeEqual(*_covered.start, *_covering.start))
optional<u256> coveredLength = m_knowledgeBase.valueIfKnownConstant(*_covered.length);
optional<u256> coveringLength = m_knowledgeBase.valueIfKnownConstant(*_covering.length);
if (*_covered.start == *_covering.start)
if (coveredLength && coveringLength && *coveredLength <= *coveringLength)
return true;
optional<u256> coveredStart = knowledge.valueIfKnownConstant(*_covered.start);
optional<u256> coveringStart = knowledge.valueIfKnownConstant(*_covering.start);
optional<u256> coveredStart = m_knowledgeBase.valueIfKnownConstant(*_covered.start);
optional<u256> coveringStart = m_knowledgeBase.valueIfKnownConstant(*_covering.start);
if (coveredStart && coveringStart && coveredLength && coveringLength)
if (
*coveringStart <= *coveredStart &&

View File

@ -27,6 +27,7 @@
#include <libyul/optimiser/OptimiserStep.h>
#include <libyul/optimiser/Semantics.h>
#include <libyul/optimiser/UnusedStoreBase.h>
#include <libyul/optimiser/KnowledgeBase.h>
#include <libevmasm/SemanticInformation.h>
@ -68,13 +69,7 @@ public:
std::map<YulString, ControlFlowSideEffects> _controlFlowSideEffects,
std::map<YulString, AssignedValue> const& _ssaValues,
bool _ignoreMemory
):
UnusedStoreBase(_dialect),
m_ignoreMemory(_ignoreMemory),
m_functionSideEffects(_functionSideEffects),
m_controlFlowSideEffects(_controlFlowSideEffects),
m_ssaValues(_ssaValues)
{}
);
using UnusedStoreBase::operator();
void operator()(FunctionCall const& _functionCall) override;
@ -121,6 +116,8 @@ private:
std::map<YulString, AssignedValue> const& m_ssaValues;
std::map<Statement const*, Operation> m_storeOperations;
KnowledgeBase mutable m_knowledgeBase;
};
}

View File

@ -46,7 +46,7 @@ contract C {
}
// ----
// test() -> 0x20, 0x14, "[a called][b called]"
// gas irOptimized: 116673
// gas irOptimized: 116660
// gas legacy: 119030
// gas legacyOptimized: 117021
// test2() -> 0x20, 0x14, "[b called][a called]"

View File

@ -58,7 +58,7 @@ protected:
for (auto const& [name, expression]: m_ssaValues.values())
m_values[name].value = expression;
return KnowledgeBase(m_dialect, [this](YulString _var) { return util::valueOrNullptr(m_values, _var); });
return KnowledgeBase([this](YulString _var) { return util::valueOrNullptr(m_values, _var); });
}
EVMDialect m_dialect{EVMVersion{}, true};
@ -83,9 +83,11 @@ BOOST_AUTO_TEST_CASE(basic)
BOOST_CHECK(!kb.knownToBeDifferent("a"_yulstring, "b"_yulstring));
// This only works if the variable names are the same.
// It assumes that SSA+CSE+Simplifier actually replaces the variables.
BOOST_CHECK(!kb.knownToBeEqual("a"_yulstring, "b"_yulstring));
BOOST_CHECK(!kb.valueIfKnownConstant("a"_yulstring));
BOOST_CHECK(kb.valueIfKnownConstant("zero"_yulstring) == u256(0));
BOOST_CHECK(kb.differenceIfKnownConstant("a"_yulstring, "b"_yulstring) == u256(0));
BOOST_CHECK(kb.differenceIfKnownConstant("a"_yulstring, "c"_yulstring) == u256(0));
BOOST_CHECK(kb.valueIfKnownConstant("e"_yulstring) == u256(0));
}
BOOST_AUTO_TEST_CASE(difference)
@ -94,7 +96,7 @@ BOOST_AUTO_TEST_CASE(difference)
let a := calldataload(0)
let b := add(a, 200)
let c := add(a, 220)
let d := add(c, 12)
let d := add(12, c)
let e := sub(c, 12)
})");