From 83583f3448c767fd05a2a9361a079e63cc23448a Mon Sep 17 00:00:00 2001 From: r0qs Date: Wed, 7 Jun 2023 12:17:10 +0200 Subject: [PATCH] Implementation of Lengauer-Tarjan algorithm to find dominators --- libyul/CMakeLists.txt | 1 + libyul/backends/evm/Dominator.h | 271 +++++++++++++++++++ test/CMakeLists.txt | 1 + test/libyul/DominatorTest.cpp | 465 ++++++++++++++++++++++++++++++++ 4 files changed, 738 insertions(+) create mode 100644 libyul/backends/evm/Dominator.h create mode 100644 test/libyul/DominatorTest.cpp diff --git a/libyul/CMakeLists.txt b/libyul/CMakeLists.txt index 811a6239b..25b6950c7 100644 --- a/libyul/CMakeLists.txt +++ b/libyul/CMakeLists.txt @@ -45,6 +45,7 @@ add_library(yul backends/evm/ControlFlowGraph.h backends/evm/ControlFlowGraphBuilder.cpp backends/evm/ControlFlowGraphBuilder.h + backends/evm/Dominator.h backends/evm/EthAssemblyAdapter.cpp backends/evm/EthAssemblyAdapter.h backends/evm/EVMCodeTransform.cpp diff --git a/libyul/backends/evm/Dominator.h b/libyul/backends/evm/Dominator.h new file mode 100644 index 000000000..ae5ba670c --- /dev/null +++ b/libyul/backends/evm/Dominator.h @@ -0,0 +1,271 @@ + +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 +/** + * Dominator analysis of a control flow graph. + * The implementation is based on the following paper: + * https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf + * See appendix B pg. 139. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace solidity::yul +{ + +template +class Dominator +{ +public: + + Dominator(Vertex _entry, size_t _numVertices) + { + m_vertex = std::vector(_numVertices); + m_immediateDominator = lengauerTarjanDominator(_entry, _numVertices); + buildDominatorTree(); + } + + std::vector vertices() const + { + return m_vertex; + } + + std::map vertexIndices() const + { + return m_vertexIndex; + } + + std::vector immediateDominators() const + { + return m_immediateDominator; + } + + std::map> dominatorTree() const + { + return m_dominatorTree; + } + + // Checks whether ``_a`` dominates ``_b`` by going + // through the path from ``_b`` to the entry node. + // If ``_a`` is found, then it dominates ``_b`` + // otherwise it doesn't. + bool dominates(Vertex _a, Vertex _b) + { + size_t aIdx = m_vertexIndex[_a]; + size_t bIdx = m_vertexIndex[_b]; + + if (aIdx == bIdx) + return true; + + size_t idomIdx = m_immediateDominator[bIdx]; + while (idomIdx != 0) + { + if (idomIdx == aIdx) + return true; + idomIdx = m_immediateDominator[idomIdx]; + } + // Now that we reach the entry node (i.e. idx = 0), + // either ``aIdx == 0`` or it does not dominates other node. + return idomIdx == aIdx; + } + + // Find all dominators of a node _v + // @note for a vertex ``_v``, the _v’s inclusion in the set of dominators of ``_v`` is implicit. + std::vector dominatorsOf(Vertex _v) + { + assert(m_vertex.size() > 0); + // The entry node always dominates all other nodes + std::vector dominators = std::vector{m_vertex[0]}; + + size_t idomIdx = m_immediateDominator[m_vertexIndex[_v]]; + if (idomIdx == 0) + return std::move(dominators); + + while (idomIdx != 0) + { + dominators.emplace_back(m_vertex[idomIdx]); + idomIdx = m_immediateDominator[idomIdx]; + } + return std::move(dominators); + } + + void buildDominatorTree() { + assert(m_vertex.size() > 0); + assert(m_immediateDominator.size() > 0); + + //Ignoring the entry node since no one dominates it. + for (size_t i = 1; i < m_immediateDominator.size(); ++i) + m_dominatorTree[m_immediateDominator[i]].emplace_back(i); + } + + // Path compression updates the ancestors of vertices along + // the path to the ancestor with the minimum label value. + void compressPath( + std::vector &_ancestor, + std::vector &_label, + std::vector &_semi, + size_t _v + ) + { + assert(_ancestor[_v] != std::numeric_limits::max()); + size_t u = _ancestor[_v]; + if (_ancestor[u] != std::numeric_limits::max()) + { + compressPath(_ancestor, _label, _semi, u); + if (_semi[_label[u]] < _semi[_label[_v]]) + _label[_v] = _label[u]; + _ancestor[_v] = _ancestor[u]; + } + } + + std::vector lengauerTarjanDominator(Vertex _entry, size_t numVertices) + { + assert(numVertices > 0); + // semi(w): The dfs index of the semidominator of ``w``. + std::vector semi(numVertices, std::numeric_limits::max()); + // parent(w): The index of the vertex which is the parent of ``w`` in the spanning + // tree generated by the dfs. + std::vector parent(numVertices, std::numeric_limits::max()); + // ancestor(w): The highest ancestor of a vertex ``w`` in the dominator tree used + // for path compression. + std::vector ancestor(numVertices, std::numeric_limits::max()); + // label(w): The index of the vertex ``w`` with the minimum semidominator in the path + // to its parent. + std::vector label(numVertices, 0); + + // ``link`` adds an edge to the virtual forest. + // It copies the parent of w to the ancestor array to limit the search path upwards. + // TODO: implement sophisticated link-eval algorithm as shown in pg 132 + // See: https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf + auto link = [&](size_t _parent, size_t _w) + { + ancestor[_w] = _parent; + }; + + // ``eval`` computes the path compression. + // Finds ancestor with lowest semi-dominator dfs number (i.e. index). + auto eval = [&](size_t _v) -> size_t + { + if (ancestor[_v] != std::numeric_limits::max()) + { + compressPath(ancestor, label, semi, _v); + return label[_v]; + } + return _v; + }; + + // step 1 + std::set visited; + // predecessors(w): The set of vertices ``v`` such that (``v``, ``w``) is an edge of the graph. + std::vector> predecessors(numVertices); + // bucket(w): a set of vertices whose semidominator is ``w`` + // The index of the array represents the vertex's ``dfIdx`` + std::vector> bucket(numVertices); + // idom(w): the index of the immediate dominator of ``w`` + std::vector idom(numVertices, std::numeric_limits::max()); + // The number of vertices reached during the dfs. + // The vertices are indexed based on this number. + size_t dfIdx = 0; + auto dfs = [&](Vertex _v, auto _dfs) -> void { + if (visited.count(_v)) + return; + visited.insert(_v); + m_vertex[dfIdx] = _v; + m_vertexIndex[_v] = dfIdx; + semi[dfIdx] = dfIdx; + label[dfIdx] = dfIdx; + dfIdx++; + ForEachSuccessor{}(_v, [&](Vertex w) { + if (semi[dfIdx] == std::numeric_limits::max()) + { + parent[dfIdx] = m_vertexIndex[_v]; + _dfs(w, _dfs); + } + predecessors[m_vertexIndex[w]].insert(m_vertexIndex[_v]); + }); + }; + dfs(_entry, dfs); + + // Process the vertices in decreasing order of the dfs number + for (auto it = m_vertex.rbegin(); it != m_vertex.rend(); ++it) + { + auto w = m_vertexIndex[*it]; + // step 3 + // NOTE: this is an optimization, i.e. performing the step 3 before step 2. + // The goal is to process the bucket in the beginning of the loop for the vertex ``w`` + // instead of ``parent[w]`` in the end of the loop as described in the original paper. + // Inverting those steps ensures that a bucket is only processed once and + // it does not need to be erased. + // The optimization proposal is available here: https://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf pg.77 + for_each( + bucket[w].begin(), + bucket[w].end(), + [&](size_t v) + { + size_t u = eval(v); + idom[v] = (semi[u] < semi[v]) ? u : w; + } + ); + + // step 2 + for (auto v: predecessors[w]) + { + size_t u = eval(v); + if (semi[u] < semi[w]) + semi[w] = semi[u]; + } + bucket[semi[w]].emplace_back(w); + link(parent[w], w); + } + + // step 4 + idom[0] = 0; + for (auto it = m_vertex.begin() + 1; it != m_vertex.end(); ++it) + { + size_t w = m_vertexIndex[*it]; + if (idom[w] != semi[w]) + idom[w] = idom[idom[w]]; + } + return idom; + } +private: + // Keep the list of vertices in the dfs order. + // i.e. m_vertex[i]: the vertex whose dfs index is i. + std::vector m_vertex; + // Maps Vertex to their dfs index. + std::map m_vertexIndex; + // Immediate dominators by index. + // Maps a Vertex based on its dfs index (i.e. array index) to its immediate dominator dfs index. + // + // e.g. to get the immediate dominator of a Vertex w: + // idomIdx = m_immediateDominator[m_vertexIndex[w]] + // idomVertex = m_vertex[domIdx] + std::vector m_immediateDominator; + + // Maps a Vertex to all vertices that it dominates. + // If the vertex does not dominates any other vertex it has no entry in the map. + std::map> m_dominatorTree; +}; +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f8242db7a..af358ac2a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -138,6 +138,7 @@ set(libyul_sources libyul/ControlFlowGraphTest.h libyul/ControlFlowSideEffectsTest.cpp libyul/ControlFlowSideEffectsTest.h + libyul/DominatorTest.cpp libyul/EVMCodeTransformTest.cpp libyul/EVMCodeTransformTest.h libyul/FunctionSideEffects.cpp diff --git a/test/libyul/DominatorTest.cpp b/test/libyul/DominatorTest.cpp new file mode 100644 index 000000000..0fbf56722 --- /dev/null +++ b/test/libyul/DominatorTest.cpp @@ -0,0 +1,465 @@ +/* + This file is part of solidity. + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * Unit tests for the algorithm to find dominators from a graph. + */ +#include + +#include + +using namespace solidity::yul; + +namespace solidity::yul::test +{ + +struct ImmediateDominatorTest +{ + struct Vertex { + std::string data; + std::vector successors; + + bool operator<(Vertex const& _other) const + { + return data < _other.data; + } + }; + + typedef std::pair edge; + + struct ForEachVertexSuccessorTest { + template + void operator()(Vertex _v, Callable&& _callable) const + { + for (auto w: _v.successors) + _callable(*w); + } + }; + + size_t numVertices; + Vertex* entry; + std::map vertices; + std::vector expectedIdom; + std::map expectedDFSIndices; +}; + +class DominatorFixture +{ + typedef ImmediateDominatorTest::Vertex Vertex; +protected: + static ImmediateDominatorTest const* generateGraph( + std::vector _vertices, + std::vector _edges, + std::vector _expectedIdom, + std::map _expectedDFSIndices + ) + { + assert(_edges.size() > 0); + + ImmediateDominatorTest* graph = new ImmediateDominatorTest(); + for (std::string v: _vertices) + graph->vertices.insert(make_pair(v, new Vertex{v, std::vector{}})); + graph->entry = graph->vertices[_vertices[0]]; + + assert(_vertices.size() > 0 && _vertices.size() == graph->vertices.size()); + + graph->numVertices = _vertices.size(); + for (auto [from, to]: _edges) + graph->vertices[from]->successors.push_back(graph->vertices[to]); + + graph->expectedIdom = _expectedIdom; + graph->expectedDFSIndices = _expectedDFSIndices; + return graph; + } +}; + +BOOST_AUTO_TEST_SUITE(Dominators) + +BOOST_FIXTURE_TEST_CASE(immediate_dominator, DominatorFixture) +{ + typedef ImmediateDominatorTest::edge edge; + std::vector inputGraph(9); + + // A + // │ + // ▼ + // ┌───B + // │ │ + // ▼ │ + // C ──┼───┐ + // │ │ │ + // ▼ │ ▼ + // D◄──┘ G + // │ │ + // ▼ ▼ + // E H + // │ │ + // └──►F◄──┘ + inputGraph[0] = generateGraph( + { "A", "B", "C", "D", "E", "F", "G", "H" }, + { + edge("A", "B"), + edge("B", "C"), + edge("B", "D"), + edge("C", "D"), + edge("C", "G"), + edge("D", "E"), + edge("E", "F"), + edge("G", "H"), + edge("H", "F") + }, + {0, 0, 1, 1, 3, 1, 2, 6}, + { + {"A", 0}, + {"B", 1}, + {"C", 2}, + {"D", 3}, + {"E", 4}, + {"F", 5}, + {"G", 6}, + {"H", 7} + } + ); + + // ┌────►A──────┐ + // │ │ ▼ + // │ B◄──┘ ┌──D──┐ + // │ │ │ │ + // │ ▼ ▼ ▼ + // └─C◄───┐ E F + // │ │ │ │ + // └───►G◄─┴─────┘ + inputGraph[1] = generateGraph( + { "A", "B", "C", "D", "E", "F", "G" }, + { + edge("A", "B"), + edge("B", "C"), + edge("C", "G"), + edge("C", "A"), + edge("A", "D"), + edge("D", "E"), + edge("D", "F"), + edge("E", "G"), + edge("F", "G"), + edge("G", "C") + }, + {0, 0, 0, 0, 0, 4, 4}, + { + {"A", 0}, + {"B", 1}, + {"C", 2}, + {"G", 3}, + {"D", 4}, + {"E", 5}, + {"F", 6} + } + ); + + // ┌─────────┐ + // │ ▼ + // │ ┌───A───┐ + // │ │ │ + // │ ▼ ▼ + // │ ┌──►C◄───── B──┬──────┐ + // │ │ │ ▲ │ │ + // │ │ │ ┌────┘ │ │ + // │ │ ▼ │ ▼ ▼ + // │ │ D──┘ ┌───►E◄─────I + // │ │ ▲ │ │ │ + // │ │ │ │ ├───┐ │ + // │ │ │ │ │ │ │ + // │ │ │ │ ▼ │ ▼ + // │ └───┼─────┼────F └─►H + // │ │ │ │ │ + // │ │ │ │ │ + // │ │ │ │ │ + // │ └─────┴─G◄─┴──────┘ + // │ │ + // └─────────────┘ + inputGraph[2] = generateGraph( + { "A", "B", "C", "D", "E", "F", "G", "H", "I" }, + { + edge("A", "B"), + edge("A", "C"), + edge("B", "C"), + edge("B", "I"), + edge("B", "E"), + edge("C", "D"), + edge("D", "B"), + edge("E", "H"), + edge("E", "F"), + edge("F", "G"), + edge("F", "C"), + edge("G", "E"), + edge("G", "A"), + edge("G", "D"), + edge("H", "G"), + edge("I", "E"), + edge("I", "H") + }, + {0, 0, 0, 0, 1, 1, 1, 1, 5}, + { + {"A", 0}, + {"B", 1}, + {"C", 2}, + {"D", 3}, + {"I", 4}, + {"E", 5}, + {"H", 6}, + {"G", 7}, + {"F", 8} + } + ); + + // T. Lengauer and R. E. Tarjan pg. 122 fig. 1 + // ref: https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf + inputGraph[3] = generateGraph( + { "R", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L", "K" }, + { + edge("R", "B"), + edge("R", "A"), + edge("R", "C"), + edge("B", "A"), + edge("B", "D"), + edge("B", "E"), + edge("A", "D"), + edge("D", "L"), + edge("L", "H"), + edge("E", "H"), + edge("H", "E"), + edge("H", "K"), + edge("K", "I"), + edge("K", "R"), + edge("C", "F"), + edge("C", "G"), + edge("F", "I"), + edge("G", "I"), + edge("G", "J"), + edge("J", "I"), + edge("I", "K"), + }, + {0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 9, 9, 11}, + { + {"R", 0}, + {"B", 1}, + {"A", 2}, + {"D", 3}, + {"L", 4}, + {"H", 5}, + {"E", 6}, + {"K", 7}, + {"I", 8}, + {"C", 9}, + {"F", 10}, + {"G", 11}, + {"J", 12} + } + ); + + // Extracted from Loukas Georgiadis Dissertation - Linear-Time Algorithms for Dominators and Related Problems + // pg. 12 Fig. 2.2 + // ref: https://www.cs.princeton.edu/techreports/2005/737.pdf + inputGraph[4] = generateGraph( + { "R", "W", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "Y" }, + { + edge("R", "W"), + edge("R", "Y"), + edge("W", "X1"), + edge("Y", "X7"), + edge("X1", "X2"), + edge("X2", "X1"), + edge("X2", "X3"), + edge("X3", "X2"), + edge("X3", "X4"), + edge("X4", "X3"), + edge("X4", "X5"), + edge("X5", "X4"), + edge("X5", "X6"), + edge("X6", "X5"), + edge("X6", "X7"), + edge("X7", "X6") + }, + {0, 0, 0, 0, 0, 0, 0, 0, 0 , 0}, + { + {"R", 0}, + {"W", 1}, + {"X1", 2}, + {"X2", 3}, + {"X3", 4}, + {"X4", 5}, + {"X5", 6}, + {"X6", 7}, + {"X7", 8}, + {"Y", 9} + } + ); + + // Worst-case families for k = 3 + // Example itworst(3) pg. 26 fig. 2.9 + // ref: https://www.cs.princeton.edu/techreports/2005/737.pdf + inputGraph[5] = generateGraph( + { "R", "W1", "W2", "W3", "X1", "X2", "X3", "Y1", "Y2", "Y3", "Z1", "Z2", "Z3" }, + { + edge("R", "W1"), + edge("R", "X1"), + edge("R", "Z3"), + edge("W1", "W2"), + edge("W2", "W3"), + edge("X1", "X2"), + edge("X2", "X3"), + edge("X3", "Y1"), + edge("Y1", "W1"), + edge("Y1", "W2"), + edge("Y1", "W3"), + edge("Y1", "Y2"), + edge("Y2", "W1"), + edge("Y2", "W2"), + edge("Y2", "W3"), + edge("Y2", "Y3"), + edge("Y3", "W1"), + edge("Y3", "W2"), + edge("Y3", "W3"), + edge("Y3", "Z1"), + edge("Z1", "Z2"), + edge("Z2", "Z1"), + edge("Z2", "Z3"), + edge("Z3", "Z2") + }, + {0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 0, 0, 0}, + { + {"R", 0}, + {"W1", 1}, + {"W2", 2}, + {"W3", 3}, + {"X1", 4}, + {"X2", 5}, + {"X3", 6}, + {"Y1", 7}, + {"Y2", 8}, + {"Y3", 9}, + {"Z1", 10}, + {"Z2", 11}, + {"Z3", 12} + } + ); + + + // Worst-case families for k = 3 + // Example idfsquad(3) pg. 26 fig. 2.9 + // ref: https://www.cs.princeton.edu/techreports/2005/737.pdf + inputGraph[6] = generateGraph( + { "R", "X1", "X2", "X3", "Y1", "Y2", "Y3", "Z1", "Z2", "Z3" }, + { + edge("R", "X1"), + edge("R", "Z1"), + edge("X1", "Y1"), + edge("X1", "X2"), + edge("X2", "X3"), + edge("X2", "Y2"), + edge("X3", "Y3"), + edge("Y1", "Z1"), + edge("Y1", "Z2"), + edge("Z1", "Y1"), + edge("Y2", "Z2"), + edge("Y2", "Z3"), + edge("Z2", "Y2"), + edge("Y3", "Z3"), + edge("Z3", "Y3") + }, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 8}, + { + {"R", 0}, + {"X1", 1}, + {"Y1", 2}, + {"Z1", 3}, + {"Z2", 4}, + {"Y2", 5}, + {"Z3", 6}, + {"Y3", 7}, + {"X2", 8}, + {"X3", 9} + } + ); + + // Worst-case families for k = 3 + // Example ibfsquad(3) pg. 26 fig. 2.9 + // ref: https://www.cs.princeton.edu/techreports/2005/737.pdf + inputGraph[7] = generateGraph( + { "R", "W", "X1", "X2", "X3", "Y", "Z" }, + { + edge("R", "W"), + edge("R", "Y"), + edge("W", "X1"), + edge("W", "X2"), + edge("W", "X3"), + edge("Y", "Z"), + edge("Z", "X3"), + edge("X3", "X2"), + edge("X2", "X1") + }, + {0, 0, 0, 0, 0, 0, 5}, + { + {"R", 0}, + {"W", 1}, + {"X1", 2}, + {"X2", 3}, + {"X3", 4}, + {"Y", 5}, + {"Z", 6} + } + ); + + // Worst-case families for k = 3 + // Example sncaworst(3) pg. 26 fig. 2.9 + // ref: https://www.cs.princeton.edu/techreports/2005/737.pdf + inputGraph[8] = generateGraph( + { "R", "X1", "X2", "X3", "Y1", "Y2", "Y3" }, + { + edge("R", "X1"), + edge("R", "Y1"), + edge("R", "Y2"), + edge("R", "Y3"), + edge("X1", "X2"), + edge("X2", "X3"), + edge("X3", "Y1"), + edge("X3", "Y2"), + edge("X3", "Y3") + }, + {0, 0, 1, 2, 0, 0, 0}, + { + {"R", 0}, + {"X1", 1}, + {"X2", 2}, + {"X3", 3}, + {"Y1", 4}, + {"Y2", 5}, + {"Y3", 6}, + } + ); + + for (ImmediateDominatorTest const* g: inputGraph) + { + Dominator< + ImmediateDominatorTest::Vertex, + ImmediateDominatorTest::ForEachVertexSuccessorTest + > dom(*g->entry, g->numVertices); + + for (auto [v, idx]: dom.vertexIndices()) + BOOST_CHECK(g->expectedDFSIndices.at(v.data) == idx); + BOOST_TEST(dom.immediateDominators() == g->expectedIdom); + } + +} + +BOOST_AUTO_TEST_SUITE_END() +}