diff --git a/libyul/optimiser/DataFlowAnalyzer.cpp b/libyul/optimiser/DataFlowAnalyzer.cpp index b845d2722..d6f96346e 100644 --- a/libyul/optimiser/DataFlowAnalyzer.cpp +++ b/libyul/optimiser/DataFlowAnalyzer.cpp @@ -32,20 +32,37 @@ #include #include +#include #include +#include + +#include +#include +#include +#include +#include + +#include + +#include + using namespace std; using namespace solidity; using namespace solidity::util; using namespace solidity::yul; +using namespace solidity::smtutil; DataFlowAnalyzer::DataFlowAnalyzer( Dialect const& _dialect, - map _functionSideEffects + map _functionSideEffects, + set _ssaVariables ): -m_dialect(_dialect), -m_functionSideEffects(std::move(_functionSideEffects)), -m_knowledgeBase(_dialect, m_value) + m_dialect(_dialect), + m_functionSideEffects(std::move(_functionSideEffects)), + m_knowledgeBase(_dialect, m_value), + m_ssaVariables(std::move(_ssaVariables)), + m_solver(make_unique()) { if (auto const* builtin = _dialect.memoryStoreFunction(YulString{})) m_storeFunctionName[static_cast(StoreLoadLocation::Memory)] = builtin->name; @@ -109,6 +126,24 @@ void DataFlowAnalyzer::operator()(VariableDeclaration& _varDecl) names.emplace(var.name); m_variableScopes.back().variables += names; + // experimental part + if (_varDecl.variables.size() == 1 && _varDecl.value) + { + YulString varName = _varDecl.variables.front().name; + if (m_ssaVariables.count(varName)) + { + bool const inserted = m_variables.insert({varName, m_solver->newVariable("yul_" + varName.str(), defaultSort())}).second; + yulAssert(inserted, ""); + m_solver->addAssertion(m_variables.at(varName) == encodeExpression(*_varDecl.value)); + } + else // TODO can we skip this part? + { + bool const inserted = m_variables.insert({varName, m_solver->newVariable("yul_" + varName.str(), defaultSort())}).second; + yulAssert(inserted, ""); + m_solver->addAssertion(m_variables.at(varName) == newRestrictedVariable()); + } + } + if (_varDecl.value) { clearKnowledgeIfInvalidated(*_varDecl.value); @@ -354,6 +389,7 @@ void DataFlowAnalyzer::clearKnowledgeIfInvalidated(Block const& _block) SideEffectsCollector sideEffects(m_dialect, _block, &m_functionSideEffects); if (sideEffects.invalidatesStorage()) m_storage.clear(); + // TODO, this the part where we don't clear. if (sideEffects.invalidatesMemory()) m_memory.clear(); } @@ -363,8 +399,17 @@ void DataFlowAnalyzer::clearKnowledgeIfInvalidated(Expression const& _expr) SideEffectsCollector sideEffects(m_dialect, _expr, &m_functionSideEffects); if (sideEffects.invalidatesStorage()) m_storage.clear(); + if (sideEffects.invalidatesMemory()) - m_memory.clear(); + { + set keysToErase; + for ([[maybe_unused]] auto const& [key, value]: m_memory.values) + if (invalidatesMemoryLocation(key, _expr)) + keysToErase.insert(key); + + for (auto const& key: keysToErase) + m_memory.eraseKey(key); + } } void DataFlowAnalyzer::joinKnowledge( @@ -433,3 +478,213 @@ std::optional DataFlowAnalyzer::isSimpleLoad( return {}; } + +smtutil::Expression DataFlowAnalyzer::encodeExpression(yul::Expression const& _expression) +{ + return std::visit(GenericVisitor{ + [&](FunctionCall const& _functionCall) + { + if (auto const* dialect = dynamic_cast(&m_dialect)) + if (auto const* builtin = dialect->builtin(_functionCall.functionName.name)) + if (builtin->instruction) + return encodeEVMBuiltin(*builtin->instruction, _functionCall.arguments); + return newRestrictedVariable(); + }, + [&](Identifier const& _identifier) + { + if ( + m_ssaVariables.count(_identifier.name) && + m_variables.count(_identifier.name) + ) + return m_variables.at(_identifier.name); + else + return newRestrictedVariable(); + + }, + [&](Literal const& _literal) + { + return literalValue(_literal); + } + }, _expression); +} + +smtutil::Expression DataFlowAnalyzer::encodeEVMBuiltin( + evmasm::Instruction _instruction, + vector const& _arguments +) +{ + vector arguments = applyMap( + _arguments, + [this](yul::Expression const& _expr) { return encodeExpression(_expr); } + ); + switch (_instruction) + { + // TODO can wrapping be avoided? + case evmasm::Instruction::ADD: + return wrap(arguments.at(0) + arguments.at(1)); + // TODO only allow multiplication if one of the argument is constant + case evmasm::Instruction::MUL: + return wrap(arguments.at(0) * arguments.at(1)); + case evmasm::Instruction::SUB: + return wrap(arguments.at(0) - arguments.at(1)); + // TODO only allow division if the second argument is a non-zero constant + case evmasm::Instruction::DIV: + return smtutil::Expression::ite( + arguments.at(1) == constantValue(0), + constantValue(0), + arguments.at(0) / arguments.at(1) + ); + default: + break; + } + return newRestrictedVariable(); +} + +smtutil::Expression DataFlowAnalyzer::newVariable() +{ + return m_solver->newVariable(uniqueName(), defaultSort()); +} + +smtutil::Expression DataFlowAnalyzer::newRestrictedVariable() +{ + smtutil::Expression var = newVariable(); + m_solver->addAssertion(0 <= var && var < smtutil::Expression(bigint(1) << 256)); + return var; +} + +string DataFlowAnalyzer::uniqueName() +{ + return "expr_" + to_string(m_varCounter++); +} + +shared_ptr DataFlowAnalyzer::defaultSort() const +{ + return SortProvider::intSort(); +} + + +smtutil::Expression DataFlowAnalyzer::constantValue(size_t _value) const +{ + return _value; +} + +smtutil::Expression DataFlowAnalyzer::literalValue(Literal const& _literal) const +{ + return smtutil::Expression(valueOfLiteral(_literal)); +} + + +smtutil::Expression DataFlowAnalyzer::wrap(smtutil::Expression _value) +{ + smtutil::Expression rest = newRestrictedVariable(); + smtutil::Expression multiplier = newVariable(); + m_solver->addAssertion(_value == multiplier * smtutil::Expression(bigint(1) << 256) + rest); + return rest; +} + +bool DataFlowAnalyzer::invalidatesMemoryLocation(YulString const& _name, Expression const& _expression) +{ + if (!holds_alternative(_expression)) + return true; + // TODO hacky fix (?) + if (!m_variables.count(_name)) + return true; + + FunctionCall const& functionCall = get(_expression); + + auto addMemoryConstraints = [&]( + evmasm::Instruction _instruction, + smtutil::Expression _memoryLocation, + vector const& _arguments + ) + { + vector arguments = applyMap( + _arguments, + [this](yul::Expression const& _expr) { return encodeExpression(_expr); } + ); + + switch (_instruction) + { + + case evmasm::Instruction::CALLDATACOPY: + case evmasm::Instruction::CODECOPY: + case evmasm::Instruction::RETURNDATACOPY: + yulAssert(arguments.size() == 3, ""); + m_solver->addAssertion(arguments.at(0) <= _memoryLocation); + m_solver->addAssertion(_memoryLocation < arguments.at(0) + arguments.at(2)); + break; + + case evmasm::Instruction::EXTCODECOPY: + yulAssert(arguments.size() == 4, ""); + m_solver->addAssertion(arguments.at(1) <= _memoryLocation); + m_solver->addAssertion(_memoryLocation < arguments.at(1) + arguments.at(3)); + break; + + // TODO Should mstore and mstore8 be dealt with separately? + case evmasm::Instruction::MSTORE: + yulAssert(arguments.size() == 2, ""); + m_solver->addAssertion(arguments.at(0) <= _memoryLocation); + m_solver->addAssertion(_memoryLocation < arguments.at(0) + constantValue(32)); + break; + + case evmasm::Instruction::MSTORE8: + yulAssert(arguments.size() == 2, ""); + m_solver->addAssertion(arguments.at(0) <= _memoryLocation); + m_solver->addAssertion(_memoryLocation < arguments.at(0) + constantValue(1)); + break; + + case evmasm::Instruction::CALL: + case evmasm::Instruction::CALLCODE: + yulAssert(arguments.size() == 7, ""); + m_solver->addAssertion(arguments.at(5) <= _memoryLocation); + m_solver->addAssertion(_memoryLocation < arguments.at(5) + arguments.at(6)); + break; + + case evmasm::Instruction::STATICCALL: + case evmasm::Instruction::DELEGATECALL: + yulAssert(arguments.size() == 6, ""); + m_solver->addAssertion(arguments.at(4) <= _memoryLocation); + m_solver->addAssertion(_memoryLocation < arguments.at(4) + arguments.at(5)); + break; + + default: + ; + } + }; + + if (auto dialect = dynamic_cast(&m_dialect)) + if (auto builtin = dialect->builtin(functionCall.functionName.name)) + if (builtin->instruction) + { + // TODO at this point, the constraints should be satisfiable. + // ADD an assert about it? + m_solver->push(); + addMemoryConstraints( + *builtin->instruction, + m_variables.at(_name), + functionCall.arguments + ); + + CheckResult result1 = m_solver->check({}).first; + m_solver->pop(); + + m_solver->push(); + addMemoryConstraints( + *builtin->instruction, + m_variables.at(_name) + constantValue(31), + functionCall.arguments + ); + + CheckResult result2 = m_solver->check({}).first; + m_solver->pop(); + + if ( + (result1 == CheckResult::UNSATISFIABLE) && + (result2 == CheckResult::UNSATISFIABLE) + ) + return false; + + } + + return true; +} diff --git a/libyul/optimiser/DataFlowAnalyzer.h b/libyul/optimiser/DataFlowAnalyzer.h index 7a9b36a24..824be97ba 100644 --- a/libyul/optimiser/DataFlowAnalyzer.h +++ b/libyul/optimiser/DataFlowAnalyzer.h @@ -31,9 +31,15 @@ #include +#include +#include + #include +#include #include +#include + namespace solidity::yul { struct Dialect; @@ -85,7 +91,8 @@ public: /// The parameter is mostly used to determine movability of expressions. explicit DataFlowAnalyzer( Dialect const& _dialect, - std::map _functionSideEffects = {} + std::map _functionSideEffects = {}, + std::set _ssaVars = {} ); using ASTModifier::operator(); @@ -189,6 +196,37 @@ protected: Expression const m_zero{Literal{{}, LiteralKind::Number, YulString{"0"}, {}}}; /// List of scopes. std::vector m_variableScopes; + + // Experimental memory resolver + + smtutil::Expression encodeExpression( + Expression const& _expression + ); + + virtual smtutil::Expression encodeEVMBuiltin( + evmasm::Instruction _instruction, + std::vector const& _arguments + ); + + + smtutil::Expression newVariable(); + virtual smtutil::Expression newRestrictedVariable(); + std::string uniqueName(); + + + virtual std::shared_ptr defaultSort() const; + virtual smtutil::Expression wrap(smtutil::Expression _value); + + virtual smtutil::Expression constantValue(size_t _value) const; + virtual smtutil::Expression literalValue(Literal const& _literal) const; + + bool invalidatesMemoryLocation(YulString const& _name, Expression const& _expression); + + std::set const m_ssaVariables; + std::unique_ptr m_solver; + std::map m_variables; + + size_t m_varCounter = 0; }; }