diff --git a/libdevcore/CommonData.h b/libdevcore/CommonData.h index 1a5a627cc..bc71c88c4 100644 --- a/libdevcore/CommonData.h +++ b/libdevcore/CommonData.h @@ -98,6 +98,14 @@ inline std::set operator+(std::set&& _a, U&& _b) ret += std::forward(_b); return ret; } +/// Remove one set from another one. +template +inline std::set& operator-=(std::set& _a, std::set const& _b) +{ + for (auto const& x: _b) + _a.erase(x); + return _a; +} namespace dev { diff --git a/libyul/optimiser/README.md b/libyul/optimiser/README.md index dfd09184c..58cc006b4 100644 --- a/libyul/optimiser/README.md +++ b/libyul/optimiser/README.md @@ -119,8 +119,9 @@ so that other components can more easily work with it. The final representation will be similar to a static-single-assignment (SSA) form, with the difference that it does not make use of explicit "phi" functions which combines the values from different branches of control flow because such a feature does not exist -in the Yul language. Instead, assignments to existing variables are -used. +in the Yul language. Instead, when control flow merges, if a variable is re-assigned +in one of the branches, a new SSA variable is declared to hold its current value, +so that the following expressions still only need to reference SSA variables. An example transformation is the following: @@ -139,21 +140,25 @@ as follows: { let _1 := 0 - let a_1 := calldataload(_1) + let a_9 := calldataload(_1) + let a := a_9 let _2 := 0x20 - let b_1 := calldataload(_2) - let b := b_1 + let b_10 := calldataload(_2) + let b := b_10 let _3 := 0 - let _4 := gt(a_1, _3) - if _4 { + let _4 := gt(a_9, _3) + if _4 + { let _5 := 0x20 - let b_2 := mul(b_1, _5) - b := b_2 + let b_11 := mul(b_10, _5) + b := b_11 } - let a_2 := add(a_1, 1) - let _6 := 0x20 - let _7 := add(b, _6) - sstore(a_2, _7) + let b_12 := b + let _6 := 1 + let a_13 := add(a_9, _6) + let _7 := 0x20 + let _8 := add(b_12, _7) + sstore(a_13, _8) } Note that the only variable that is re-assigned in this snippet is ``b``. @@ -240,6 +245,10 @@ reference to ``a`` by ``a_i``. The current value mapping is cleared for a variable ``a`` at the end of each block in which it was assigned to and at the end of the for loop init block if it is assigned inside the for loop body or post block. +If a variable's value is cleared according to the rule above and the variable is declared outside +the block, a new SSA variable will be created at the location where control flow joins, +this includes the beginning of loop post/body block and the location right after +If/Switch/ForLoop/Block statement. After this stage, the Redundant Assign Eliminator is recommended to remove the unnecessary intermediate assignments. diff --git a/libyul/optimiser/SSATransform.cpp b/libyul/optimiser/SSATransform.cpp index 18aa42c24..eede7d45b 100644 --- a/libyul/optimiser/SSATransform.cpp +++ b/libyul/optimiser/SSATransform.cpp @@ -50,8 +50,6 @@ public: private: NameDispenser& m_nameDispenser; - /// This is a set of all variables that are assigned to anywhere in the code. - /// Variables that are only declared but never re-assigned are not touched. set const& m_variablesToReplace; }; @@ -130,7 +128,142 @@ void IntroduceSSA::operator()(Block& _block) } /** - * Second step of SSA transform: Replace the references to variables-to-be-replaced + * Second step of SSA transform: Introduces new SSA variables at each control-flow join + * and at the beginning of functions. + */ +class IntroduceControlFlowSSA: public ASTModifier +{ +public: + explicit IntroduceControlFlowSSA( + NameDispenser& _nameDispenser, + set const& _variablesToReplace + ): + m_nameDispenser(_nameDispenser), m_variablesToReplace(_variablesToReplace) + { } + + void operator()(FunctionDefinition& _function) override; + void operator()(ForLoop& _forLoop) override; + void operator()(Switch& _switch) override; + void operator()(Block& _block) override; + +private: + NameDispenser& m_nameDispenser; + set const& m_variablesToReplace; + /// Variables (that are to be replaced) currently in scope. + set m_variablesInScope; + /// Set of variables that do not have a specific value. + set m_variablesToReassign; +}; + +void IntroduceControlFlowSSA::operator()(FunctionDefinition& _function) +{ + set varsInScope; + std::swap(varsInScope, m_variablesInScope); + set toReassign; + std::swap(toReassign, m_variablesToReassign); + + for (auto const& param: _function.parameters) + if (m_variablesToReplace.count(param.name)) + { + m_variablesInScope.insert(param.name); + m_variablesToReassign.insert(param.name); + } + + ASTModifier::operator()(_function); + + m_variablesInScope = std::move(varsInScope); + m_variablesToReassign = std::move(toReassign); +} + +void IntroduceControlFlowSSA::operator()(ForLoop& _for) +{ + (*this)(_for.pre); + + Assignments assignments; + assignments(_for.body); + assignments(_for.post); + + + for (auto const& var: assignments.names()) + if (m_variablesInScope.count(var)) + m_variablesToReassign.insert(var); + + (*this)(_for.body); + (*this)(_for.post); +} + +void IntroduceControlFlowSSA::operator()(Switch& _switch) +{ + yulAssert(m_variablesToReassign.empty(), ""); + + set toReassign; + for (auto& c: _switch.cases) + { + (*this)(c.body); + toReassign += m_variablesToReassign; + } + + m_variablesToReassign += toReassign; +} + +void IntroduceControlFlowSSA::operator()(Block& _block) +{ + set variablesDeclaredHere; + set assignedVariables; + + iterateReplacing( + _block.statements, + [&](Statement& _s) -> boost::optional> + { + vector toPrepend; + for (YulString toReassign: m_variablesToReassign) + { + YulString newName = m_nameDispenser.newName(toReassign); + toPrepend.emplace_back(VariableDeclaration{ + locationOf(_s), + {TypedName{locationOf(_s), newName, {}}}, + make_unique(Identifier{locationOf(_s), toReassign}) + }); + assignedVariables.insert(toReassign); + } + m_variablesToReassign.clear(); + + if (_s.type() == typeid(VariableDeclaration)) + { + VariableDeclaration& varDecl = boost::get(_s); + for (auto const& var: varDecl.variables) + if (m_variablesToReplace.count(var.name)) + { + variablesDeclaredHere.insert(var.name); + m_variablesInScope.insert(var.name); + } + } + else if (_s.type() == typeid(Assignment)) + { + Assignment& assignment = boost::get(_s); + for (auto const& var: assignment.variableNames) + if (m_variablesToReplace.count(var.name)) + assignedVariables.insert(var.name); + } + else + visit(_s); + + if (toPrepend.empty()) + return {}; + else + { + toPrepend.emplace_back(std::move(_s)); + return toPrepend; + } + } + ); + m_variablesToReassign += assignedVariables; + m_variablesInScope -= variablesDeclaredHere; + m_variablesToReassign -= variablesDeclaredHere; +} + +/** + * Third step of SSA transform: Replace the references to variables-to-be-replaced * by their current values. */ class PropagateValues: public ASTModifier @@ -166,13 +299,27 @@ void PropagateValues::operator()(VariableDeclaration& _varDecl) if (_varDecl.variables.size() != 1) return; - YulString name = _varDecl.variables.front().name; - if (!m_variablesToReplace.count(name)) - return; - yulAssert(_varDecl.value->type() == typeid(Identifier), ""); - m_currentVariableValues[name] = boost::get(*_varDecl.value).name; - m_clearAtEndOfBlock.insert(name); + YulString variable = _varDecl.variables.front().name; + if (m_variablesToReplace.count(variable)) + { + // `let a := a_1` - regular declaration of non-SSA variable + yulAssert(_varDecl.value->type() == typeid(Identifier), ""); + m_currentVariableValues[variable] = boost::get(*_varDecl.value).name; + m_clearAtEndOfBlock.insert(variable); + } + else if (_varDecl.value && _varDecl.value->type() == typeid(Identifier)) + { + // `let a_1 := a` - assignment to SSA variable after a branch. + YulString value = boost::get(*_varDecl.value).name; + if (m_variablesToReplace.count(value)) + { + // This is safe because `a_1` is not a "variable to replace" and thus + // will not be re-assigned. + m_currentVariableValues[value] = variable; + m_clearAtEndOfBlock.insert(value); + } + } } @@ -186,7 +333,7 @@ void PropagateValues::operator()(Assignment& _assignment) if (!m_variablesToReplace.count(name)) return; - yulAssert(_assignment.value->type() == typeid(Identifier), ""); + yulAssert(_assignment.value && _assignment.value->type() == typeid(Identifier), ""); m_currentVariableValues[name] = boost::get(*_assignment.value).name; m_clearAtEndOfBlock.insert(name); } @@ -231,6 +378,7 @@ void SSATransform::run(Block& _ast, NameDispenser& _nameDispenser) Assignments assignments; assignments(_ast); IntroduceSSA{_nameDispenser, assignments.names()}(_ast); + IntroduceControlFlowSSA{_nameDispenser, assignments.names()}(_ast); PropagateValues{assignments.names()}(_ast); } diff --git a/libyul/optimiser/SSATransform.h b/libyul/optimiser/SSATransform.h index 66f5e3112..949f82a09 100644 --- a/libyul/optimiser/SSATransform.h +++ b/libyul/optimiser/SSATransform.h @@ -23,6 +23,8 @@ #include #include +#include + #include namespace yul @@ -61,8 +63,10 @@ class NameDispenser; * Furthermore, always note the current variable/value assigned to a and replace each * reference to a by this variable. * The current value mapping is cleared for a variable a at the end of each block - * in which it was assigned and just after the for loop init block if it is assigned - * inside the for loop. + * in which it was assigned. We compensate that by appending a declaration + * of the form of "let a_1 := a" right after the location where control flow joins so + * variable references can use the SSA variable. The only exception to this rule are + * for loop conditions, as we cannot insert a variable declaration there. * * After this stage, redundantAssignmentRemover is recommended to remove the unnecessary * intermediate assignments. @@ -70,7 +74,17 @@ class NameDispenser; * This stage provides best results if CSE is run right before it, because * then it does not generate excessive amounts of variables. * + * The transform is implemented in three stages. All stages are only concerned + * with variables that are assigned somewhere in the code (excluding declarations). + * The first stage inserts new SSA variables for each declaration and assignment of + * such variables. + * The second stage inserts new SSA variables at control flow joins. + * The last stage replaces references to variables that are assigned to somewhere in the + * code by their current SSA variable. + * * TODO Which transforms are required to keep this idempotent? + * + * Prerequisite: Disambiguator. */ class SSATransform: public ASTModifier { diff --git a/test/libyul/yulOptimizerTests/ssaAndBack/for_loop.yul b/test/libyul/yulOptimizerTests/ssaAndBack/for_loop.yul index c9523237e..e4787540d 100644 --- a/test/libyul/yulOptimizerTests/ssaAndBack/for_loop.yul +++ b/test/libyul/yulOptimizerTests/ssaAndBack/for_loop.yul @@ -24,7 +24,7 @@ // for { } lt(mload(a), mload(b)) { a := mload(b) } // { // let b_4 := mload(a) -// let a_7 := mload(b_4) -// b := mload(a_7) +// a := mload(b_4) +// b := mload(a) // } // } diff --git a/test/libyul/yulOptimizerTests/ssaPlusCleanup/control_structures.yul b/test/libyul/yulOptimizerTests/ssaPlusCleanup/control_structures.yul index 501737088..28520d3c5 100644 --- a/test/libyul/yulOptimizerTests/ssaPlusCleanup/control_structures.yul +++ b/test/libyul/yulOptimizerTests/ssaPlusCleanup/control_structures.yul @@ -16,21 +16,25 @@ // { // function copy(from, to) -> length // { -// let length_1 := mload(from) +// let from_6 := from +// let to_7 := to +// let length_1 := mload(from_6) // length := length_1 -// mstore(to, length_1) -// let from_2 := add(from, 0x20) -// let to_3 := add(to, 0x20) +// mstore(to_7, length_1) +// let from_2 := add(from_6, 0x20) +// let to_3 := add(to_7, 0x20) // let x_4 := 1 // let x := x_4 // for { } // lt(x, length_1) // { -// let x_5 := add(x, 0x20) +// let x_9 := x +// let x_5 := add(x_9, 0x20) // x := x_5 // } // { -// mstore(add(to_3, x), mload(add(from_2, x))) +// let x_8 := x +// mstore(add(to_3, x_8), mload(add(from_2, x_8))) // } // } // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/branches.yul b/test/libyul/yulOptimizerTests/ssaTransform/branches.yul index c2fc3c774..76a459160 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/branches.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/branches.yul @@ -20,7 +20,8 @@ // let a_3 := add(a_2, 1) // a := a_3 // } -// let a_4 := add(a, 1) +// let a_5 := a +// let a_4 := add(a_5, 1) // a := a_4 // mstore(a_4, 1) // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_body.yul b/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_body.yul index 3e7fe480e..6901f9f17 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_body.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_body.yul @@ -12,10 +12,17 @@ // { // let a_1 := mload(0) // let a := a_1 -// for { mstore(0, a_1) } a { mstore(0, a) } +// for { mstore(0, a_1) } +// a // { -// let a_2 := add(a, 3) +// let a_4 := a +// mstore(0, a_4) +// } +// { +// let a_3 := a +// let a_2 := add(a_3, 3) // a := a_2 // } -// mstore(0, a) +// let a_5 := a +// mstore(0, a_5) // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_init.yul b/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_init.yul index eeb1cc182..fcf1a4478 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_init.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_init.yul @@ -17,7 +17,14 @@ // a := a_2 // } // a -// { mstore(0, a) } -// { mstore(0, a) } -// mstore(0, a) +// { +// let a_4 := a +// mstore(0, a_4) +// } +// { +// let a_3 := a +// mstore(0, a_3) +// } +// let a_5 := a +// mstore(0, a_5) // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_post.yul b/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_post.yul index bd174c2f0..217a043ad 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_post.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/for_reassign_post.yul @@ -15,9 +15,14 @@ // for { mstore(0, a_1) } // a // { -// let a_2 := add(a, 3) +// let a_4 := a +// let a_2 := add(a_4, 3) // a := a_2 // } -// { mstore(0, a) } -// mstore(0, a) +// { +// let a_3 := a +// mstore(0, a_3) +// } +// let a_5 := a +// mstore(0, a_5) // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/for_simple.yul b/test/libyul/yulOptimizerTests/ssaTransform/for_simple.yul index cac4295d9..cec419633 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/for_simple.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/for_simple.yul @@ -26,23 +26,28 @@ // let a_3 := add(a_2, 2) // a := a_3 // } +// let a_9 := a // { -// let a_4 := add(a, 4) +// let a_4 := add(a_9, 4) // a := a_4 // } +// let a_10 := a // for { -// let a_5 := add(a, 3) +// let a_5 := add(a_10, 3) // a := a_5 // } // a // { -// let a_6 := add(a, 6) +// let a_12 := a +// let a_6 := add(a_12, 6) // a := a_6 // } // { -// let a_7 := add(a, 12) +// let a_11 := a +// let a_7 := add(a_11, 12) // a := a_7 // } -// let a_8 := add(a, 8) +// let a_13 := a +// let a_8 := add(a_13, 8) // a := a_8 // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/function.yul b/test/libyul/yulOptimizerTests/ssaTransform/function.yul index 16f5b03cd..b6e120f40 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/function.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/function.yul @@ -12,13 +12,15 @@ // { // function f(a, b) -> c, d // { -// let b_1 := add(b, a) +// let b_5 := b +// let a_6 := a +// let b_1 := add(b_5, a_6) // b := b_1 // let c_2 := add(c, b_1) // c := c_2 // let d_3 := add(d, c_2) // d := d_3 -// let a_4 := add(a, d_3) +// let a_4 := add(a_6, d_3) // a := a_4 // } // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/nested.yul b/test/libyul/yulOptimizerTests/ssaTransform/nested.yul index 7f3af38c2..a48de0002 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/nested.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/nested.yul @@ -28,6 +28,7 @@ // let a_6 := 4 // a := a_6 // } -// let a_7 := add(b_4, a) +// let a_8 := a +// let a_7 := add(b_4, a_8) // a := a_7 // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/nested_reassign.yul b/test/libyul/yulOptimizerTests/ssaTransform/nested_reassign.yul new file mode 100644 index 000000000..7d2d7bc33 --- /dev/null +++ b/test/libyul/yulOptimizerTests/ssaTransform/nested_reassign.yul @@ -0,0 +1,32 @@ +{ + let a + let b + let x + if a { + if b { + x := 2 + } + } + // Should create new SSA variables for x here, + // but not above because end of block + mstore(0, x) +} +// ==== +// step: ssaTransform +// ---- +// { +// let a +// let b +// let x_1 +// let x := x_1 +// if a +// { +// if b +// { +// let x_2 := 2 +// x := x_2 +// } +// } +// let x_3 := x +// mstore(0, x_3) +// } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/switch.yul b/test/libyul/yulOptimizerTests/ssaTransform/switch.yul index 404736923..32d6b339b 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/switch.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/switch.yul @@ -20,8 +20,10 @@ // a := a_2 // } // default { -// let a_3 := add(a, 8) +// let a_4 := a +// let a_3 := add(a_4, 8) // a := a_3 // } -// mstore(0, a) +// let a_5 := a +// mstore(0, a_5) // } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/switch_reassign.yul b/test/libyul/yulOptimizerTests/ssaTransform/switch_reassign.yul new file mode 100644 index 000000000..775df55f4 --- /dev/null +++ b/test/libyul/yulOptimizerTests/ssaTransform/switch_reassign.yul @@ -0,0 +1,23 @@ +{ + let a := mload(0) + switch a + case 0 { a := add(a, 4) } + default { } + // should still create an SSA variable for a + mstore(0, a) +} +// ==== +// step: ssaTransform +// ---- +// { +// let a_1 := mload(0) +// let a := a_1 +// switch a_1 +// case 0 { +// let a_2 := add(a_1, 4) +// a := a_2 +// } +// default { } +// let a_3 := a +// mstore(0, a_3) +// } diff --git a/test/libyul/yulOptimizerTests/ssaTransform/used.yul b/test/libyul/yulOptimizerTests/ssaTransform/used.yul index ef104512b..c23bc6ee8 100644 --- a/test/libyul/yulOptimizerTests/ssaTransform/used.yul +++ b/test/libyul/yulOptimizerTests/ssaTransform/used.yul @@ -33,7 +33,8 @@ // a := a_4 // mstore(a_4, 0) // } -// mstore(a, 0) +// let a_6 := a +// mstore(a_6, 0) // let a_5 := 4 // a := a_5 // mstore(a_5, 0)