solidity/libyul/backends/evm/Dominator.h


/*
	This file is part of solidity.

	solidity is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	solidity is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0
/**
 * Dominator analysis of a control flow graph.
 * The implementation is based on the following paper:
 * https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf
 * See appendix B pg. 139.
 */
#pragma once

#include <libyul/backends/evm/ControlFlowGraph.h>
#include <libsolutil/Visitor.h>

#include <vector>
#include <map>
#include <set>
#include <deque>

namespace solidity::yul
{

template<typename Vertex, typename ForEachSuccessor>
class Dominator
{
public:

	Dominator(Vertex _entry, size_t _numVertices)
	{
		m_vertex = std::vector<Vertex>(_numVertices);
		m_immediateDominator = lengauerTarjanDominator(_entry, _numVertices);
		buildDominatorTree();
	}

	std::vector<Vertex> vertices() const
	{
		return m_vertex;
	}

	std::map<Vertex, size_t> vertexIndices() const
	{
		return m_vertexIndex;
	}

	std::vector<size_t> immediateDominators() const
	{
		return m_immediateDominator;
	}

	std::map<size_t, std::vector<size_t>> dominatorTree() const
	{
		return m_dominatorTree;
	}

	// Checks whether ``_a`` dominates ``_b`` by going
	// through the path from ``_b`` to the entry node.
	// If ``_a`` is found, then it dominates ``_b``
	// otherwise it doesn't.
	bool dominates(Vertex _a, Vertex _b)
	{
		size_t aIdx = m_vertexIndex[_a];
		size_t bIdx = m_vertexIndex[_b];

		if (aIdx == bIdx)
			return true;

		size_t idomIdx = m_immediateDominator[bIdx];
		while (idomIdx != 0)
		{
			if (idomIdx == aIdx)
				return true;
			idomIdx = m_immediateDominator[idomIdx];
		}
		// Now that we reach the entry node (i.e. idx = 0),
		// either ``aIdx == 0`` or it does not dominates other node.
		return idomIdx == aIdx;
	}

	// Find all dominators of a node _v
	// @note for a vertex ``_v``, the _v’s inclusion in the set of dominators of ``_v`` is implicit.
	std::vector<Vertex> dominatorsOf(Vertex _v)
	{
		solAssert(m_vertex.size() > 0);
		// The entry node always dominates all other nodes
		std::vector<Vertex> dominators = std::vector<Vertex>{m_vertex[0]};

		size_t idomIdx = m_immediateDominator[m_vertexIndex[_v]];
		if (idomIdx == 0)
			return std::move(dominators);

		while (idomIdx != 0)
		{
			dominators.emplace_back(m_vertex[idomIdx]);
			idomIdx = m_immediateDominator[idomIdx];
		}
		return std::move(dominators);
	}

	void buildDominatorTree() {
		solAssert(m_vertex.size() > 0);
		solAssert(m_immediateDominator.size() > 0);

		//Ignoring the entry node since no one dominates it.
		for (size_t i = 1; i < m_immediateDominator.size(); ++i)
			m_dominatorTree[m_immediateDominator[i]].emplace_back(i);
	}

	// Path compression updates the ancestors of vertices along
	// the path to the ancestor with the minimum label value.
	void compressPath(
		std::vector<size_t> &_ancestor,
		std::vector<size_t> &_label,
		std::vector<size_t> &_semi,
		size_t _v
	)
	{
		solAssert(_ancestor[_v] != std::numeric_limits<size_t>::max());
		size_t u = _ancestor[_v];
		if (_ancestor[u] != std::numeric_limits<size_t>::max())
		{
			compressPath(_ancestor, _label, _semi, u);
			if (_semi[_label[u]] < _semi[_label[_v]])
				_label[_v] = _label[u];
			_ancestor[_v] = _ancestor[u];
		}
	}

	std::vector<size_t> lengauerTarjanDominator(Vertex _entry, size_t numVertices)
	{
		solAssert(numVertices > 0);
		// semi(w): The dfs index of the semidominator of ``w``.
		std::vector<size_t> semi(numVertices, std::numeric_limits<size_t>::max());
		// parent(w): The index of the vertex which is the parent of ``w`` in the spanning
		// tree generated by the dfs.
		std::vector<size_t> parent(numVertices, std::numeric_limits<size_t>::max());
		// ancestor(w): The highest ancestor of a vertex ``w`` in the dominator tree used
		// for path compression.
		std::vector<size_t> ancestor(numVertices, std::numeric_limits<size_t>::max());
		// label(w): The index of the vertex ``w`` with the minimum semidominator in the path
		// to its parent.
		std::vector<size_t> label(numVertices, 0);

		// ``link`` adds an edge to the virtual forest.
		// It copies the parent of w to the ancestor array to limit the search path upwards.
		// TODO: implement sophisticated link-eval algorithm as shown in pg 132
		// See: https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf
		auto link = [&](size_t _parent, size_t _w)
		{
			ancestor[_w] = _parent;
		};

		// ``eval`` computes the path compression.
		// Finds ancestor with lowest semi-dominator dfs number (i.e. index).
		auto eval = [&](size_t _v) -> size_t
		{
			if (ancestor[_v] != std::numeric_limits<size_t>::max())
			{
				compressPath(ancestor, label, semi, _v);
				return label[_v];
			}
			return _v;
		};

		// step 1
		std::set<Vertex> visited;
		// predecessors(w): The set of vertices ``v`` such that (``v``, ``w``) is an edge of the graph.
		std::vector<std::set<size_t>> predecessors(numVertices);
		// bucket(w): a set of vertices whose semidominator is ``w``
		// The index of the array represents the vertex's ``dfIdx``
		std::vector<std::deque<size_t>> bucket(numVertices);
		// idom(w): the index of the immediate dominator of ``w``
		std::vector<size_t> idom(numVertices, std::numeric_limits<size_t>::max());
		// The number of vertices reached during the dfs.
		// The vertices are indexed based on this number.
		size_t dfIdx = 0;
		auto dfs = [&](Vertex _v, auto _dfs) -> void {
			if (visited.count(_v))
				return;
			visited.insert(_v);
			m_vertex[dfIdx] = _v;
			m_vertexIndex[_v] = dfIdx;
			semi[dfIdx] = dfIdx;
			label[dfIdx] = dfIdx;
			dfIdx++;
			ForEachSuccessor{}(_v, [&](Vertex w) {
				if (semi[dfIdx] == std::numeric_limits<size_t>::max())
				{
					parent[dfIdx] = m_vertexIndex[_v];
					_dfs(w, _dfs);
				}
				predecessors[m_vertexIndex[w]].insert(m_vertexIndex[_v]);
			});
		};
		dfs(_entry, dfs);

		// Process the vertices in decreasing order of the dfs number
		for (auto it = m_vertex.rbegin(); it != m_vertex.rend(); ++it)
		{
			auto w = m_vertexIndex[*it];
			// step 3
			// NOTE: this is an optimization, i.e. performing the step 3 before step 2.
			// The goal is to process the bucket in the beginning of the loop for the vertex ``w``
			// instead of ``parent[w]`` in the end of the loop as described in the original paper.
			// Inverting those steps ensures that a bucket is only processed once and
			// it does not need to be erased.
			// The optimization proposal is available here: https://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf pg.77
			for_each(
				bucket[w].begin(),
				bucket[w].end(),
				[&](size_t v)
				{
					size_t u = eval(v);
					idom[v] = (semi[u] < semi[v]) ? u : w;
				}
			);

			// step 2
			for (auto v: predecessors[w])
			{
				size_t u = eval(v);
				if (semi[u] < semi[w])
					semi[w] = semi[u];
			}
			bucket[semi[w]].emplace_back(w);
			link(parent[w], w);
		}

		// step 4
		idom[0] = 0;
		for (auto it = m_vertex.begin() + 1; it != m_vertex.end(); ++it)
		{
			size_t w = m_vertexIndex[*it];
			if (idom[w] != semi[w])
				idom[w] = idom[idom[w]];
		}
		return idom;
	}
private:
	// Keep the list of vertices in the dfs order.
	// i.e. m_vertex[i]: the vertex whose dfs index is i.
	std::vector<Vertex> m_vertex;
	// Maps Vertex to their dfs index.
	std::map<Vertex, size_t> m_vertexIndex;
	// Immediate dominators by index.
	// Maps a Vertex based on its dfs index (i.e. array index) to its immediate dominator dfs index.
	//
	// e.g. to get the immediate dominator of a Vertex w:
	// idomIdx = m_immediateDominator[m_vertexIndex[w]]
	// idomVertex = m_vertex[domIdx]
	std::vector<size_t> m_immediateDominator;

	// Maps a Vertex to all vertices that it dominates.
	// If the vertex does not dominates any other vertex it has no entry in the map.
	std::map<size_t, std::vector<size_t>> m_dominatorTree;
};
}