Implementation of Lengauer-Tarjan algorithm to find dominators

This commit is contained in:
r0qs 2023-06-07 12:17:10 +02:00
parent cc7a14a61d
commit 83583f3448
No known key found for this signature in database
GPG Key ID: 61503DBA6667276C
4 changed files with 738 additions and 0 deletions

View File

@ -45,6 +45,7 @@ add_library(yul
backends/evm/ControlFlowGraph.h
backends/evm/ControlFlowGraphBuilder.cpp
backends/evm/ControlFlowGraphBuilder.h
backends/evm/Dominator.h
backends/evm/EthAssemblyAdapter.cpp
backends/evm/EthAssemblyAdapter.h
backends/evm/EVMCodeTransform.cpp

View File

@ -0,0 +1,271 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0
/**
* Dominator analysis of a control flow graph.
* The implementation is based on the following paper:
* https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf
* See appendix B pg. 139.
*/
#pragma once
#include <libyul/backends/evm/ControlFlowGraph.h>
#include <libsolutil/Visitor.h>
#include <vector>
#include <map>
#include <set>
#include <deque>
namespace solidity::yul
{
template<typename Vertex, typename ForEachSuccessor>
class Dominator
{
public:
Dominator(Vertex _entry, size_t _numVertices)
{
m_vertex = std::vector<Vertex>(_numVertices);
m_immediateDominator = lengauerTarjanDominator(_entry, _numVertices);
buildDominatorTree();
}
std::vector<Vertex> vertices() const
{
return m_vertex;
}
std::map<Vertex, size_t> vertexIndices() const
{
return m_vertexIndex;
}
std::vector<size_t> immediateDominators() const
{
return m_immediateDominator;
}
std::map<size_t, std::vector<size_t>> dominatorTree() const
{
return m_dominatorTree;
}
// Checks whether ``_a`` dominates ``_b`` by going
// through the path from ``_b`` to the entry node.
// If ``_a`` is found, then it dominates ``_b``
// otherwise it doesn't.
bool dominates(Vertex _a, Vertex _b)
{
size_t aIdx = m_vertexIndex[_a];
size_t bIdx = m_vertexIndex[_b];
if (aIdx == bIdx)
return true;
size_t idomIdx = m_immediateDominator[bIdx];
while (idomIdx != 0)
{
if (idomIdx == aIdx)
return true;
idomIdx = m_immediateDominator[idomIdx];
}
// Now that we reach the entry node (i.e. idx = 0),
// either ``aIdx == 0`` or it does not dominates other node.
return idomIdx == aIdx;
}
// Find all dominators of a node _v
// @note for a vertex ``_v``, the _vs inclusion in the set of dominators of ``_v`` is implicit.
std::vector<Vertex> dominatorsOf(Vertex _v)
{
assert(m_vertex.size() > 0);
// The entry node always dominates all other nodes
std::vector<Vertex> dominators = std::vector<Vertex>{m_vertex[0]};
size_t idomIdx = m_immediateDominator[m_vertexIndex[_v]];
if (idomIdx == 0)
return std::move(dominators);
while (idomIdx != 0)
{
dominators.emplace_back(m_vertex[idomIdx]);
idomIdx = m_immediateDominator[idomIdx];
}
return std::move(dominators);
}
void buildDominatorTree() {
assert(m_vertex.size() > 0);
assert(m_immediateDominator.size() > 0);
//Ignoring the entry node since no one dominates it.
for (size_t i = 1; i < m_immediateDominator.size(); ++i)
m_dominatorTree[m_immediateDominator[i]].emplace_back(i);
}
// Path compression updates the ancestors of vertices along
// the path to the ancestor with the minimum label value.
void compressPath(
std::vector<size_t> &_ancestor,
std::vector<size_t> &_label,
std::vector<size_t> &_semi,
size_t _v
)
{
assert(_ancestor[_v] != std::numeric_limits<size_t>::max());
size_t u = _ancestor[_v];
if (_ancestor[u] != std::numeric_limits<size_t>::max())
{
compressPath(_ancestor, _label, _semi, u);
if (_semi[_label[u]] < _semi[_label[_v]])
_label[_v] = _label[u];
_ancestor[_v] = _ancestor[u];
}
}
std::vector<size_t> lengauerTarjanDominator(Vertex _entry, size_t numVertices)
{
assert(numVertices > 0);
// semi(w): The dfs index of the semidominator of ``w``.
std::vector<size_t> semi(numVertices, std::numeric_limits<size_t>::max());
// parent(w): The index of the vertex which is the parent of ``w`` in the spanning
// tree generated by the dfs.
std::vector<size_t> parent(numVertices, std::numeric_limits<size_t>::max());
// ancestor(w): The highest ancestor of a vertex ``w`` in the dominator tree used
// for path compression.
std::vector<size_t> ancestor(numVertices, std::numeric_limits<size_t>::max());
// label(w): The index of the vertex ``w`` with the minimum semidominator in the path
// to its parent.
std::vector<size_t> label(numVertices, 0);
// ``link`` adds an edge to the virtual forest.
// It copies the parent of w to the ancestor array to limit the search path upwards.
// TODO: implement sophisticated link-eval algorithm as shown in pg 132
// See: https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf
auto link = [&](size_t _parent, size_t _w)
{
ancestor[_w] = _parent;
};
// ``eval`` computes the path compression.
// Finds ancestor with lowest semi-dominator dfs number (i.e. index).
auto eval = [&](size_t _v) -> size_t
{
if (ancestor[_v] != std::numeric_limits<size_t>::max())
{
compressPath(ancestor, label, semi, _v);
return label[_v];
}
return _v;
};
// step 1
std::set<Vertex> visited;
// predecessors(w): The set of vertices ``v`` such that (``v``, ``w``) is an edge of the graph.
std::vector<std::set<size_t>> predecessors(numVertices);
// bucket(w): a set of vertices whose semidominator is ``w``
// The index of the array represents the vertex's ``dfIdx``
std::vector<std::deque<size_t>> bucket(numVertices);
// idom(w): the index of the immediate dominator of ``w``
std::vector<size_t> idom(numVertices, std::numeric_limits<size_t>::max());
// The number of vertices reached during the dfs.
// The vertices are indexed based on this number.
size_t dfIdx = 0;
auto dfs = [&](Vertex _v, auto _dfs) -> void {
if (visited.count(_v))
return;
visited.insert(_v);
m_vertex[dfIdx] = _v;
m_vertexIndex[_v] = dfIdx;
semi[dfIdx] = dfIdx;
label[dfIdx] = dfIdx;
dfIdx++;
ForEachSuccessor{}(_v, [&](Vertex w) {
if (semi[dfIdx] == std::numeric_limits<size_t>::max())
{
parent[dfIdx] = m_vertexIndex[_v];
_dfs(w, _dfs);
}
predecessors[m_vertexIndex[w]].insert(m_vertexIndex[_v]);
});
};
dfs(_entry, dfs);
// Process the vertices in decreasing order of the dfs number
for (auto it = m_vertex.rbegin(); it != m_vertex.rend(); ++it)
{
auto w = m_vertexIndex[*it];
// step 3
// NOTE: this is an optimization, i.e. performing the step 3 before step 2.
// The goal is to process the bucket in the beginning of the loop for the vertex ``w``
// instead of ``parent[w]`` in the end of the loop as described in the original paper.
// Inverting those steps ensures that a bucket is only processed once and
// it does not need to be erased.
// The optimization proposal is available here: https://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf pg.77
for_each(
bucket[w].begin(),
bucket[w].end(),
[&](size_t v)
{
size_t u = eval(v);
idom[v] = (semi[u] < semi[v]) ? u : w;
}
);
// step 2
for (auto v: predecessors[w])
{
size_t u = eval(v);
if (semi[u] < semi[w])
semi[w] = semi[u];
}
bucket[semi[w]].emplace_back(w);
link(parent[w], w);
}
// step 4
idom[0] = 0;
for (auto it = m_vertex.begin() + 1; it != m_vertex.end(); ++it)
{
size_t w = m_vertexIndex[*it];
if (idom[w] != semi[w])
idom[w] = idom[idom[w]];
}
return idom;
}
private:
// Keep the list of vertices in the dfs order.
// i.e. m_vertex[i]: the vertex whose dfs index is i.
std::vector<Vertex> m_vertex;
// Maps Vertex to their dfs index.
std::map<Vertex, size_t> m_vertexIndex;
// Immediate dominators by index.
// Maps a Vertex based on its dfs index (i.e. array index) to its immediate dominator dfs index.
//
// e.g. to get the immediate dominator of a Vertex w:
// idomIdx = m_immediateDominator[m_vertexIndex[w]]
// idomVertex = m_vertex[domIdx]
std::vector<size_t> m_immediateDominator;
// Maps a Vertex to all vertices that it dominates.
// If the vertex does not dominates any other vertex it has no entry in the map.
std::map<size_t, std::vector<size_t>> m_dominatorTree;
};
}

View File

@ -138,6 +138,7 @@ set(libyul_sources
libyul/ControlFlowGraphTest.h
libyul/ControlFlowSideEffectsTest.cpp
libyul/ControlFlowSideEffectsTest.h
libyul/DominatorTest.cpp
libyul/EVMCodeTransformTest.cpp
libyul/EVMCodeTransformTest.h
libyul/FunctionSideEffects.cpp

View File

@ -0,0 +1,465 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Unit tests for the algorithm to find dominators from a graph.
*/
#include <libyul/backends/evm/Dominator.h>
#include <boost/test/unit_test.hpp>
using namespace solidity::yul;
namespace solidity::yul::test
{
struct ImmediateDominatorTest
{
struct Vertex {
std::string data;
std::vector<Vertex*> successors;
bool operator<(Vertex const& _other) const
{
return data < _other.data;
}
};
typedef std::pair<std::string, std::string> edge;
struct ForEachVertexSuccessorTest {
template<typename Callable>
void operator()(Vertex _v, Callable&& _callable) const
{
for (auto w: _v.successors)
_callable(*w);
}
};
size_t numVertices;
Vertex* entry;
std::map<std::string, Vertex*> vertices;
std::vector<size_t> expectedIdom;
std::map<std::string, size_t> expectedDFSIndices;
};
class DominatorFixture
{
typedef ImmediateDominatorTest::Vertex Vertex;
protected:
static ImmediateDominatorTest const* generateGraph(
std::vector<std::string> _vertices,
std::vector<ImmediateDominatorTest::edge> _edges,
std::vector<size_t> _expectedIdom,
std::map<std::string, size_t> _expectedDFSIndices
)
{
assert(_edges.size() > 0);
ImmediateDominatorTest* graph = new ImmediateDominatorTest();
for (std::string v: _vertices)
graph->vertices.insert(make_pair(v, new Vertex{v, std::vector<Vertex*>{}}));
graph->entry = graph->vertices[_vertices[0]];
assert(_vertices.size() > 0 && _vertices.size() == graph->vertices.size());
graph->numVertices = _vertices.size();
for (auto [from, to]: _edges)
graph->vertices[from]->successors.push_back(graph->vertices[to]);
graph->expectedIdom = _expectedIdom;
graph->expectedDFSIndices = _expectedDFSIndices;
return graph;
}
};
BOOST_AUTO_TEST_SUITE(Dominators)
BOOST_FIXTURE_TEST_CASE(immediate_dominator, DominatorFixture)
{
typedef ImmediateDominatorTest::edge edge;
std::vector<ImmediateDominatorTest const*> inputGraph(9);
// A
// │
// ▼
// ┌───B
// │ │
// ▼ │
// C ──┼───┐
// │ │ │
// ▼ │ ▼
// D◄──┘ G
// │ │
// ▼ ▼
// E H
// │ │
// └──►F◄──┘
inputGraph[0] = generateGraph(
{ "A", "B", "C", "D", "E", "F", "G", "H" },
{
edge("A", "B"),
edge("B", "C"),
edge("B", "D"),
edge("C", "D"),
edge("C", "G"),
edge("D", "E"),
edge("E", "F"),
edge("G", "H"),
edge("H", "F")
},
{0, 0, 1, 1, 3, 1, 2, 6},
{
{"A", 0},
{"B", 1},
{"C", 2},
{"D", 3},
{"E", 4},
{"F", 5},
{"G", 6},
{"H", 7}
}
);
// ┌────►A──────┐
// │ │ ▼
// │ B◄──┘ ┌──D──┐
// │ │ │ │
// │ ▼ ▼ ▼
// └─C◄───┐ E F
// │ │ │ │
// └───►G◄─┴─────┘
inputGraph[1] = generateGraph(
{ "A", "B", "C", "D", "E", "F", "G" },
{
edge("A", "B"),
edge("B", "C"),
edge("C", "G"),
edge("C", "A"),
edge("A", "D"),
edge("D", "E"),
edge("D", "F"),
edge("E", "G"),
edge("F", "G"),
edge("G", "C")
},
{0, 0, 0, 0, 0, 4, 4},
{
{"A", 0},
{"B", 1},
{"C", 2},
{"G", 3},
{"D", 4},
{"E", 5},
{"F", 6}
}
);
// ┌─────────┐
// │ ▼
// │ ┌───A───┐
// │ │ │
// │ ▼ ▼
// │ ┌──►C◄───── B──┬──────┐
// │ │ │ ▲ │ │
// │ │ │ ┌────┘ │ │
// │ │ ▼ │ ▼ ▼
// │ │ D──┘ ┌───►E◄─────I
// │ │ ▲ │ │ │
// │ │ │ │ ├───┐ │
// │ │ │ │ │ │ │
// │ │ │ │ ▼ │ ▼
// │ └───┼─────┼────F └─►H
// │ │ │ │ │
// │ │ │ │ │
// │ │ │ │ │
// │ └─────┴─G◄─┴──────┘
// │ │
// └─────────────┘
inputGraph[2] = generateGraph(
{ "A", "B", "C", "D", "E", "F", "G", "H", "I" },
{
edge("A", "B"),
edge("A", "C"),
edge("B", "C"),
edge("B", "I"),
edge("B", "E"),
edge("C", "D"),
edge("D", "B"),
edge("E", "H"),
edge("E", "F"),
edge("F", "G"),
edge("F", "C"),
edge("G", "E"),
edge("G", "A"),
edge("G", "D"),
edge("H", "G"),
edge("I", "E"),
edge("I", "H")
},
{0, 0, 0, 0, 1, 1, 1, 1, 5},
{
{"A", 0},
{"B", 1},
{"C", 2},
{"D", 3},
{"I", 4},
{"E", 5},
{"H", 6},
{"G", 7},
{"F", 8}
}
);
// T. Lengauer and R. E. Tarjan pg. 122 fig. 1
// ref: https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf
inputGraph[3] = generateGraph(
{ "R", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L", "K" },
{
edge("R", "B"),
edge("R", "A"),
edge("R", "C"),
edge("B", "A"),
edge("B", "D"),
edge("B", "E"),
edge("A", "D"),
edge("D", "L"),
edge("L", "H"),
edge("E", "H"),
edge("H", "E"),
edge("H", "K"),
edge("K", "I"),
edge("K", "R"),
edge("C", "F"),
edge("C", "G"),
edge("F", "I"),
edge("G", "I"),
edge("G", "J"),
edge("J", "I"),
edge("I", "K"),
},
{0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 9, 9, 11},
{
{"R", 0},
{"B", 1},
{"A", 2},
{"D", 3},
{"L", 4},
{"H", 5},
{"E", 6},
{"K", 7},
{"I", 8},
{"C", 9},
{"F", 10},
{"G", 11},
{"J", 12}
}
);
// Extracted from Loukas Georgiadis Dissertation - Linear-Time Algorithms for Dominators and Related Problems
// pg. 12 Fig. 2.2
// ref: https://www.cs.princeton.edu/techreports/2005/737.pdf
inputGraph[4] = generateGraph(
{ "R", "W", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "Y" },
{
edge("R", "W"),
edge("R", "Y"),
edge("W", "X1"),
edge("Y", "X7"),
edge("X1", "X2"),
edge("X2", "X1"),
edge("X2", "X3"),
edge("X3", "X2"),
edge("X3", "X4"),
edge("X4", "X3"),
edge("X4", "X5"),
edge("X5", "X4"),
edge("X5", "X6"),
edge("X6", "X5"),
edge("X6", "X7"),
edge("X7", "X6")
},
{0, 0, 0, 0, 0, 0, 0, 0, 0 , 0},
{
{"R", 0},
{"W", 1},
{"X1", 2},
{"X2", 3},
{"X3", 4},
{"X4", 5},
{"X5", 6},
{"X6", 7},
{"X7", 8},
{"Y", 9}
}
);
// Worst-case families for k = 3
// Example itworst(3) pg. 26 fig. 2.9
// ref: https://www.cs.princeton.edu/techreports/2005/737.pdf
inputGraph[5] = generateGraph(
{ "R", "W1", "W2", "W3", "X1", "X2", "X3", "Y1", "Y2", "Y3", "Z1", "Z2", "Z3" },
{
edge("R", "W1"),
edge("R", "X1"),
edge("R", "Z3"),
edge("W1", "W2"),
edge("W2", "W3"),
edge("X1", "X2"),
edge("X2", "X3"),
edge("X3", "Y1"),
edge("Y1", "W1"),
edge("Y1", "W2"),
edge("Y1", "W3"),
edge("Y1", "Y2"),
edge("Y2", "W1"),
edge("Y2", "W2"),
edge("Y2", "W3"),
edge("Y2", "Y3"),
edge("Y3", "W1"),
edge("Y3", "W2"),
edge("Y3", "W3"),
edge("Y3", "Z1"),
edge("Z1", "Z2"),
edge("Z2", "Z1"),
edge("Z2", "Z3"),
edge("Z3", "Z2")
},
{0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 0, 0, 0},
{
{"R", 0},
{"W1", 1},
{"W2", 2},
{"W3", 3},
{"X1", 4},
{"X2", 5},
{"X3", 6},
{"Y1", 7},
{"Y2", 8},
{"Y3", 9},
{"Z1", 10},
{"Z2", 11},
{"Z3", 12}
}
);
// Worst-case families for k = 3
// Example idfsquad(3) pg. 26 fig. 2.9
// ref: https://www.cs.princeton.edu/techreports/2005/737.pdf
inputGraph[6] = generateGraph(
{ "R", "X1", "X2", "X3", "Y1", "Y2", "Y3", "Z1", "Z2", "Z3" },
{
edge("R", "X1"),
edge("R", "Z1"),
edge("X1", "Y1"),
edge("X1", "X2"),
edge("X2", "X3"),
edge("X2", "Y2"),
edge("X3", "Y3"),
edge("Y1", "Z1"),
edge("Y1", "Z2"),
edge("Z1", "Y1"),
edge("Y2", "Z2"),
edge("Y2", "Z3"),
edge("Z2", "Y2"),
edge("Y3", "Z3"),
edge("Z3", "Y3")
},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 8},
{
{"R", 0},
{"X1", 1},
{"Y1", 2},
{"Z1", 3},
{"Z2", 4},
{"Y2", 5},
{"Z3", 6},
{"Y3", 7},
{"X2", 8},
{"X3", 9}
}
);
// Worst-case families for k = 3
// Example ibfsquad(3) pg. 26 fig. 2.9
// ref: https://www.cs.princeton.edu/techreports/2005/737.pdf
inputGraph[7] = generateGraph(
{ "R", "W", "X1", "X2", "X3", "Y", "Z" },
{
edge("R", "W"),
edge("R", "Y"),
edge("W", "X1"),
edge("W", "X2"),
edge("W", "X3"),
edge("Y", "Z"),
edge("Z", "X3"),
edge("X3", "X2"),
edge("X2", "X1")
},
{0, 0, 0, 0, 0, 0, 5},
{
{"R", 0},
{"W", 1},
{"X1", 2},
{"X2", 3},
{"X3", 4},
{"Y", 5},
{"Z", 6}
}
);
// Worst-case families for k = 3
// Example sncaworst(3) pg. 26 fig. 2.9
// ref: https://www.cs.princeton.edu/techreports/2005/737.pdf
inputGraph[8] = generateGraph(
{ "R", "X1", "X2", "X3", "Y1", "Y2", "Y3" },
{
edge("R", "X1"),
edge("R", "Y1"),
edge("R", "Y2"),
edge("R", "Y3"),
edge("X1", "X2"),
edge("X2", "X3"),
edge("X3", "Y1"),
edge("X3", "Y2"),
edge("X3", "Y3")
},
{0, 0, 1, 2, 0, 0, 0},
{
{"R", 0},
{"X1", 1},
{"X2", 2},
{"X3", 3},
{"Y1", 4},
{"Y2", 5},
{"Y3", 6},
}
);
for (ImmediateDominatorTest const* g: inputGraph)
{
Dominator<
ImmediateDominatorTest::Vertex,
ImmediateDominatorTest::ForEachVertexSuccessorTest
> dom(*g->entry, g->numVertices);
for (auto [v, idx]: dom.vertexIndices())
BOOST_CHECK(g->expectedDFSIndices.at(v.data) == idx);
BOOST_TEST(dom.immediateDominators() == g->expectedIdom);
}
}
BOOST_AUTO_TEST_SUITE_END()
}