Move assembly related files to libevmasm and Params.h/.cpp to libevmcore.

2023-10-03 13:03:40 +00:00 · 2015-04-24 17:35:16 +02:00 · 2015-04-24 17:35:16 +02:00 · b9d7387e7a
commit b9d7387e7a
15 changed files with 3077 additions and 0 deletions
--- a/Assembly.cpp
+++ b/Assembly.cpp
@ -0,0 +1,485 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file Assembly.cpp
+ * @author Gav Wood <i@gavwood.com>
+ * @date 2014
+ */
+
+#include "Assembly.h"
+#include <fstream>
+#include <libdevcore/Log.h>
+#include <libevmasm/CommonSubexpressionEliminator.h>
+#include <libevmasm/ControlFlowGraph.h>
+#include <json/json.h>
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+void Assembly::append(Assembly const& _a)
+{
+	auto newDeposit = m_deposit + _a.deposit();
+	for (AssemblyItem i: _a.m_items)
+	{
+		if (i.type() == Tag || i.type() == PushTag)
+			i.setData(i.data() + m_usedTags);
+		else if (i.type() == PushSub || i.type() == PushSubSize)
+			i.setData(i.data() + m_usedTags);
+		append(i);
+	}
+	m_deposit = newDeposit;
+	m_usedTags += _a.m_usedTags;
+	for (auto const& i: _a.m_data)
+		m_data.insert(i);
+	for (auto const& i: _a.m_strings)
+		m_strings.insert(i);
+	for (auto const& i: _a.m_subs)
+		m_subs.push_back(i);
+
+	assert(!_a.m_baseDeposit);
+	assert(!_a.m_totalDeposit);
+}
+
+void Assembly::append(Assembly const& _a, int _deposit)
+{
+	if (_deposit > _a.m_deposit)
+		BOOST_THROW_EXCEPTION(InvalidDeposit());
+	else
+	{
+		append(_a);
+		while (_deposit++ < _a.m_deposit)
+			append(Instruction::POP);
+	}
+}
+
+string Assembly::out() const
+{
+	stringstream ret;
+	stream(ret);
+	return ret.str();
+}
+
+unsigned Assembly::bytesRequired() const
+{
+	for (unsigned br = 1;; ++br)
+	{
+		unsigned ret = 1;
+		for (auto const& i: m_data)
+			ret += i.second.size();
+
+		for (AssemblyItem const& i: m_items)
+			ret += i.bytesRequired(br);
+		if (dev::bytesRequired(ret) <= br)
+			return ret;
+	}
+}
+
+string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const
+{
+	if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0)
+		return "";
+
+	auto it = _sourceCodes.find(*_location.sourceName);
+	if (it == _sourceCodes.end())
+		return "";
+
+	string const& source = it->second;
+	if (size_t(_location.start) >= source.size())
+		return "";
+
+	string cut = source.substr(_location.start, _location.end - _location.start);
+	auto newLinePos = cut.find_first_of("\n");
+	if (newLinePos != string::npos)
+		cut = cut.substr(0, newLinePos) + "...";
+
+	return move(cut);
+}
+
+ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const
+{
+	_out << _prefix << ".code:" << endl;
+	for (AssemblyItem const& i: m_items)
+	{
+		_out << _prefix;
+		switch (i.type())
+		{
+		case Operation:
+			_out << "  " << instructionInfo(i.instruction()).name  << "\t" << i.getJumpTypeAsString();
+			break;
+		case Push:
+			_out << "  PUSH " << i.data();
+			break;
+		case PushString:
+			_out << "  PUSH \"" << m_strings.at((h256)i.data()) << "\"";
+			break;
+		case PushTag:
+			_out << "  PUSH [tag" << i.data() << "]";
+			break;
+		case PushSub:
+			_out << "  PUSH [$" << h256(i.data()).abridged() << "]";
+			break;
+		case PushSubSize:
+			_out << "  PUSH #[$" << h256(i.data()).abridged() << "]";
+			break;
+		case PushProgramSize:
+			_out << "  PUSHSIZE";
+			break;
+		case Tag:
+			_out << "tag" << i.data() << ": " << endl << _prefix << "  JUMPDEST";
+			break;
+		case PushData:
+			_out << "  PUSH [" << hex << (unsigned)i.data() << "]";
+			break;
+		default:
+			BOOST_THROW_EXCEPTION(InvalidOpcode());
+		}
+		_out << "\t\t" << getLocationFromSources(_sourceCodes, i.getLocation()) << endl;
+	}
+
+	if (!m_data.empty() || !m_subs.empty())
+	{
+		_out << _prefix << ".data:" << endl;
+		for (auto const& i: m_data)
+			if (u256(i.first) >= m_subs.size())
+				_out << _prefix << "  " << hex << (unsigned)(u256)i.first << ": " << toHex(i.second) << endl;
+		for (size_t i = 0; i < m_subs.size(); ++i)
+		{
+			_out << _prefix << "  " << hex << i << ": " << endl;
+			m_subs[i].stream(_out, _prefix + "  ", _sourceCodes);
+		}
+	}
+	return _out;
+}
+
+Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const
+{
+	Json::Value value;
+	value["name"] = _name;
+	value["begin"] = _begin;
+	value["end"] = _end;
+	if (!_value.empty())
+		value["value"] = _value;
+	if (!_jumpType.empty())
+		value["jumpType"] = _jumpType;
+	return value;
+}
+
+string toStringInHex(u256 _value)
+{
+	std::stringstream hexStr;
+	hexStr << hex << _value;
+	return hexStr.str();
+}
+
+Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const
+{
+	Json::Value root;
+
+	Json::Value collection(Json::arrayValue);
+	for (AssemblyItem const& i: m_items)
+	{
+		switch (i.type())
+		{
+		case Operation:
+			collection.append(
+				createJsonValue(instructionInfo(i.instruction()).name, i.getLocation().start, i.getLocation().end, i.getJumpTypeAsString()));
+			break;
+		case Push:
+			collection.append(
+				createJsonValue("PUSH", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()), i.getJumpTypeAsString()));
+			break;
+		case PushString:
+			collection.append(
+				createJsonValue("PUSH tag", i.getLocation().start, i.getLocation().end, m_strings.at((h256)i.data())));
+			break;
+		case PushTag:
+			collection.append(
+				createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, toStringInHex(i.data())));
+			break;
+		case PushSub:
+			collection.append(
+				createJsonValue("PUSH [$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data()))));
+			break;
+		case PushSubSize:
+			collection.append(
+				createJsonValue("PUSH #[$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data()))));
+			break;
+		case PushProgramSize:
+			collection.append(
+				createJsonValue("PUSHSIZE", i.getLocation().start, i.getLocation().end));
+			break;
+		case Tag:
+		{
+			collection.append(
+				createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data())));
+			collection.append(
+				createJsonValue("JUMDEST", i.getLocation().start, i.getLocation().end));
+		}
+			break;
+		case PushData:
+		{
+			Json::Value pushData;
+			pushData["name"] = "PUSH hex";
+			collection.append(createJsonValue("PUSH hex", i.getLocation().start, i.getLocation().end, toStringInHex(i.data())));
+		}
+			break;
+		default:
+			BOOST_THROW_EXCEPTION(InvalidOpcode());
+		}
+	}
+
+	root[".code"] = collection;
+
+	if (!m_data.empty() || !m_subs.empty())
+	{
+		Json::Value data;
+		for (auto const& i: m_data)
+			if (u256(i.first) >= m_subs.size())
+				data[toStringInHex((u256)i.first)] = toHex(i.second);
+
+		for (size_t i = 0; i < m_subs.size(); ++i)
+		{
+			std::stringstream hexStr;
+			hexStr << hex << i;
+			data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true);
+		}
+		root[".data"] = data;
+		_out << root;
+	}
+	return root;
+}
+
+Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const
+{
+	if (_inJsonFormat)
+		return streamAsmJson(_out, _sourceCodes);
+	else
+	{
+		streamAsm(_out, _prefix, _sourceCodes);
+		return Json::Value();
+	}
+}
+
+AssemblyItem const& Assembly::append(AssemblyItem const& _i)
+{
+	m_deposit += _i.deposit();
+	m_items.push_back(_i);
+	if (m_items.back().getLocation().isEmpty() && !m_currentSourceLocation.isEmpty())
+		m_items.back().setLocation(m_currentSourceLocation);
+	return back();
+}
+
+void Assembly::injectStart(AssemblyItem const& _i)
+{
+	m_items.insert(m_items.begin(), _i);
+}
+
+inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b)
+{
+	if (_a.size() != _b.size())
+		return false;
+	for (unsigned i = 0; i < _a.size(); ++i)
+		if (!_a[i].match(_b[i]))
+			return false;
+	return true;
+}
+
+struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; };
+#define copt dev::LogOutputStream<OptimiserChannel, true>()
+
+Assembly& Assembly::optimise(bool _enable)
+{
+	if (!_enable)
+		return *this;
+	std::vector<pair<AssemblyItems, function<AssemblyItems(AssemblyItemsConstRef)>>> rules;
+	// jump to next instruction
+	rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }});
+
+	unsigned total = 0;
+	for (unsigned count = 1; count > 0; total += count)
+	{
+		copt << toString(*this);
+		count = 0;
+
+		copt << "Performing control flow analysis...";
+		{
+			ControlFlowGraph cfg(m_items);
+			AssemblyItems optItems = cfg.optimisedItems();
+			if (optItems.size() < m_items.size())
+			{
+				copt << "Old size: " << m_items.size() << ", new size: " << optItems.size();
+				m_items = move(optItems);
+				count++;
+			}
+		}
+
+		copt << "Performing common subexpression elimination...";
+		for (auto iter = m_items.begin(); iter != m_items.end();)
+		{
+			CommonSubexpressionEliminator eliminator;
+			auto orig = iter;
+			iter = eliminator.feedItems(iter, m_items.end());
+			AssemblyItems optItems;
+			bool shouldReplace = false;
+			try
+			{
+				optItems = eliminator.getOptimizedItems();
+				shouldReplace = (optItems.size() < size_t(iter - orig));
+			}
+			catch (StackTooDeepException const&)
+			{
+				// This might happen if the opcode reconstruction is not as efficient
+				// as the hand-crafted code.
+			}
+
+			if (shouldReplace)
+			{
+				copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size();
+				count++;
+				for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter)
+					*orig = move(*moveIter);
+				iter = m_items.erase(orig, iter);
+			}
+		}
+	}
+
+	copt << total << " optimisations done.";
+
+	for (auto& sub: m_subs)
+	  sub.optimise(true);
+
+	return *this;
+}
+
+bytes Assembly::assemble() const
+{
+	bytes ret;
+
+	unsigned totalBytes = bytesRequired();
+	vector<unsigned> tagPos(m_usedTags);
+	map<unsigned, unsigned> tagRef;
+	multimap<h256, unsigned> dataRef;
+	vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted
+	unsigned bytesPerTag = dev::bytesRequired(totalBytes);
+	byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag;
+
+	for (size_t i = 0; i < m_subs.size(); ++i)
+		m_data[u256(i)] = m_subs[i].assemble();
+
+	unsigned bytesRequiredIncludingData = bytesRequired();
+	unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData);
+	byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef;
+	ret.reserve(bytesRequiredIncludingData);
+	// m_data must not change from here on
+
+	for (AssemblyItem const& i: m_items)
+		switch (i.type())
+		{
+		case Operation:
+			ret.push_back((byte)i.data());
+			break;
+		case PushString:
+		{
+			ret.push_back((byte)Instruction::PUSH32);
+			unsigned ii = 0;
+			for (auto j: m_strings.at((h256)i.data()))
+				if (++ii > 32)
+					break;
+				else
+					ret.push_back((byte)j);
+			while (ii++ < 32)
+				ret.push_back(0);
+			break;
+		}
+		case Push:
+		{
+			byte b = max<unsigned>(1, dev::bytesRequired(i.data()));
+			ret.push_back((byte)Instruction::PUSH1 - 1 + b);
+			ret.resize(ret.size() + b);
+			bytesRef byr(&ret.back() + 1 - b, b);
+			toBigEndian(i.data(), byr);
+			break;
+		}
+		case PushTag:
+		{
+			ret.push_back(tagPush);
+			tagRef[ret.size()] = (unsigned)i.data();
+			ret.resize(ret.size() + bytesPerTag);
+			break;
+		}
+		case PushData: case PushSub:
+		{
+			ret.push_back(dataRefPush);
+			dataRef.insert(make_pair((h256)i.data(), ret.size()));
+			ret.resize(ret.size() + bytesPerDataRef);
+			break;
+		}
+		case PushSubSize:
+		{
+			auto s = m_data[i.data()].size();
+			byte b = max<unsigned>(1, dev::bytesRequired(s));
+			ret.push_back((byte)Instruction::PUSH1 - 1 + b);
+			ret.resize(ret.size() + b);
+			bytesRef byr(&ret.back() + 1 - b, b);
+			toBigEndian(s, byr);
+			break;
+		}
+		case PushProgramSize:
+		{
+			ret.push_back(dataRefPush);
+			sizeRef.push_back(ret.size());
+			ret.resize(ret.size() + bytesPerDataRef);
+			break;
+		}
+		case Tag:
+			tagPos[(unsigned)i.data()] = ret.size();
+			ret.push_back((byte)Instruction::JUMPDEST);
+			break;
+		default:
+			BOOST_THROW_EXCEPTION(InvalidOpcode());
+		}
+
+	for (auto const& i: tagRef)
+	{
+		bytesRef r(ret.data() + i.first, bytesPerTag);
+		toBigEndian(tagPos[i.second], r);
+	}
+
+	if (!m_data.empty())
+	{
+		ret.push_back(0);
+		for (auto const& i: m_data)
+		{
+			auto its = dataRef.equal_range(i.first);
+			if (its.first != its.second)
+			{
+				for (auto it = its.first; it != its.second; ++it)
+				{
+					bytesRef r(ret.data() + it->second, bytesPerDataRef);
+					toBigEndian(ret.size(), r);
+				}
+				for (auto b: i.second)
+					ret.push_back(b);
+			}
+		}
+	}
+	for (unsigned pos: sizeRef)
+	{
+		bytesRef r(ret.data() + pos, bytesPerDataRef);
+		toBigEndian(ret.size(), r);
+	}
+	return ret;
+}
--- a/Assembly.h
+++ b/Assembly.h
@ -0,0 +1,132 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file Assembly.h
+ * @author Gav Wood <i@gavwood.com>
+ * @date 2014
+ */
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <libdevcore/Common.h>
+#include <libdevcore/Assertions.h>
+#include <libevmcore/Instruction.h>
+#include <libevmasm/SourceLocation.h>
+#include <libevmasm/AssemblyItem.h>
+#include "Exceptions.h"
+#include <json/json.h>
+
+namespace Json
+{
+class Value;
+}
+namespace dev
+{
+namespace eth
+{
+
+class Assembly
+{
+public:
+	Assembly() {}
+
+	AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); }
+	AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); }
+	AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash<std::string>()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); }
+	AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
+	AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash<std::string>()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); }
+	AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
+
+	AssemblyItem append() { return append(newTag()); }
+	void append(Assembly const& _a);
+	void append(Assembly const& _a, int _deposit);
+	AssemblyItem const& append(AssemblyItem const& _i);
+	AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); }
+	AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); }
+	AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; }
+	/// Pushes the final size of the current assembly itself. Use this when the code is modified
+	/// after compilation and CODESIZE is not an option.
+	void appendProgramSize() { append(AssemblyItem(PushProgramSize)); }
+
+	AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; }
+	AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; }
+	AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; }
+	AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; }
+	template <class T> Assembly& operator<<(T const& _d) { append(_d); return *this; }
+	AssemblyItems const& getItems() const { return m_items; }
+	AssemblyItem const& back() const { return m_items.back(); }
+	std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); }
+
+	void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; }
+	void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; }
+	void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; }
+	void ignored() { m_baseDeposit = m_deposit; }
+	void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; }
+
+	void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); }
+
+	void injectStart(AssemblyItem const& _i);
+	std::string out() const;
+	int deposit() const { return m_deposit; }
+	void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
+	void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
+
+	/// Changes the source location used for each appended item.
+	void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; }
+
+	bytes assemble() const;
+	Assembly& optimise(bool _enable);
+	Json::Value stream(
+		std::ostream& _out,
+		std::string const& _prefix = "",
+		const StringMap &_sourceCodes = StringMap(),
+		bool _inJsonFormat = false
+	) const;
+
+protected:
+	std::string getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const;
+	void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
+	unsigned bytesRequired() const;
+
+private:
+	Json::Value streamAsmJson(std::ostream& _out, const StringMap &_sourceCodes) const;
+	std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const;
+	Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const;
+
+protected:
+	unsigned m_usedTags = 0;
+	AssemblyItems m_items;
+	mutable std::map<h256, bytes> m_data;
+	std::vector<Assembly> m_subs;
+	std::map<h256, std::string> m_strings;
+
+	int m_deposit = 0;
+	int m_baseDeposit = 0;
+	int m_totalDeposit = 0;
+
+	SourceLocation m_currentSourceLocation;
+};
+
+inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a)
+{
+	_a.stream(_out);
+	return _out;
+}
+
+}
+}
--- a/AssemblyItem.cpp
+++ b/AssemblyItem.cpp
@ -0,0 +1,135 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file Assembly.cpp
+ * @author Gav Wood <i@gavwood.com>
+ * @date 2014
+ */
+
+#include "AssemblyItem.h"
+#include <fstream>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const
+{
+	switch (m_type)
+	{
+	case Operation:
+	case Tag: // 1 byte for the JUMPDEST
+		return 1;
+	case PushString:
+		return 33;
+	case Push:
+		return 1 + max<unsigned>(1, dev::bytesRequired(m_data));
+	case PushSubSize:
+	case PushProgramSize:
+		return 4;		// worst case: a 16MB program
+	case PushTag:
+	case PushData:
+	case PushSub:
+		return 1 + _addressLength;
+	default:
+		break;
+	}
+	BOOST_THROW_EXCEPTION(InvalidOpcode());
+}
+
+int AssemblyItem::deposit() const
+{
+	switch (m_type)
+	{
+	case Operation:
+		return instructionInfo(instruction()).ret - instructionInfo(instruction()).args;
+	case Push:
+	case PushString:
+	case PushTag:
+	case PushData:
+	case PushSub:
+	case PushSubSize:
+	case PushProgramSize:
+		return 1;
+	case Tag:
+		return 0;
+	default:;
+	}
+	return 0;
+}
+
+string AssemblyItem::getJumpTypeAsString() const
+{
+	switch (m_jumpType)
+	{
+	case JumpType::IntoFunction:
+		return "[in]";
+	case JumpType::OutOfFunction:
+		return "[out]";
+	case JumpType::Ordinary:
+	default:
+		return "";
+	}
+}
+
+ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item)
+{
+	switch (_item.type())
+	{
+	case Operation:
+		_out << " " << instructionInfo(_item.instruction()).name;
+		if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI)
+			_out << "\t" << _item.getJumpTypeAsString();
+		break;
+	case Push:
+		_out << " PUSH " << hex << _item.data();
+		break;
+	case PushString:
+		_out << " PushString"  << hex << (unsigned)_item.data();
+		break;
+	case PushTag:
+		_out << " PushTag " << _item.data();
+		break;
+	case Tag:
+		_out << " Tag " << _item.data();
+		break;
+	case PushData:
+		_out << " PushData " << hex << (unsigned)_item.data();
+		break;
+	case PushSub:
+		_out << " PushSub " << hex << h256(_item.data()).abridged();
+		break;
+	case PushSubSize:
+		_out << " PushSubSize " << hex << h256(_item.data()).abridged();
+		break;
+	case PushProgramSize:
+		_out << " PushProgramSize";
+		break;
+	case UndefinedItem:
+		_out << " ???";
+		break;
+	default:
+		BOOST_THROW_EXCEPTION(InvalidOpcode());
+	}
+	return _out;
+}
+
+ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i)
+{
+	for (AssemblyItem const& i: _i)
+		_out << i;
+	return _out;
+}
--- a/AssemblyItem.h
+++ b/AssemblyItem.h
@ -0,0 +1,100 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file Assembly.h
+ * @author Gav Wood <i@gavwood.com>
+ * @date 2014
+ */
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <libdevcore/Common.h>
+#include <libdevcore/Assertions.h>
+#include <libevmcore/Instruction.h>
+#include <libevmasm/SourceLocation.h>
+#include "Exceptions.h"
+
+namespace dev
+{
+namespace eth
+{
+
+enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData };
+
+class Assembly;
+
+class AssemblyItem
+{
+public:
+	enum class JumpType { Ordinary, IntoFunction, OutOfFunction };
+
+	AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()):
+		AssemblyItem(Push, _push, _location) { }
+	AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()):
+		AssemblyItem(Operation, byte(_i), _location) { }
+	AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()):
+		m_type(_type),
+		m_data(_data),
+		m_location(_location)
+	{
+	}
+
+	AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); }
+	AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); }
+
+	AssemblyItemType type() const { return m_type; }
+	u256 const& data() const { return m_data; }
+	void setType(AssemblyItemType const _type) { m_type = _type; }
+	void setData(u256 const& _data) { m_data = _data; }
+
+	/// @returns the instruction of this item (only valid if type() == Operation)
+	Instruction instruction() const { return Instruction(byte(m_data)); }
+
+	/// @returns true iff the type and data of the items are equal.
+	bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; }
+	bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); }
+
+	/// @returns an upper bound for the number of bytes required by this item, assuming that
+	/// the value of a jump tag takes @a _addressLength bytes.
+	unsigned bytesRequired(unsigned _addressLength) const;
+	int deposit() const;
+
+	bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); }
+	void setLocation(SourceLocation const& _location) { m_location = _location; }
+	SourceLocation const& getLocation() const { return m_location; }
+
+	void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; }
+	JumpType getJumpType() const { return m_jumpType; }
+	std::string getJumpTypeAsString() const;
+
+private:
+	AssemblyItemType m_type;
+	u256 m_data;
+	SourceLocation m_location;
+	JumpType m_jumpType = JumpType::Ordinary;
+};
+
+using AssemblyItems = std::vector<AssemblyItem>;
+using AssemblyItemsConstRef = vector_ref<AssemblyItem const>;
+
+std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item);
+std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i);
+inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); }
+
+}
+}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,33 @@
+cmake_policy(SET CMP0015 NEW)
+# this policy was introduced in cmake 3.0
+# remove if, once 3.0 will be used on unix
+if (${CMAKE_MAJOR_VERSION} GREATER 2)
+	# old policy do not use MACOSX_RPATH
+	cmake_policy(SET CMP0042 OLD)
+endif()
+set(CMAKE_AUTOMOC OFF)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB")
+
+aux_source_directory(. SRC_LIST)
+
+include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS})
+include_directories(BEFORE ..)
+include_directories(${Boost_INCLUDE_DIRS})
+
+set(EXECUTABLE evmasm)
+
+file(GLOB HEADERS "*.h")
+
+if (ETH_STATIC)
+	add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS})
+else()
+	add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS})
+endif()
+
+target_link_libraries(${EXECUTABLE} evmcore)
+target_link_libraries(${EXECUTABLE} devcrypto)
+
+install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )
+install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} )
+
--- a/CommonSubexpressionEliminator.cpp
+++ b/CommonSubexpressionEliminator.cpp
@ -0,0 +1,672 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file CommonSubexpressionEliminator.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Optimizer step for common subexpression elimination and stack reorganisation.
+ */
+
+#include <functional>
+#include <boost/range/adaptor/reversed.hpp>
+#include <libdevcrypto/SHA3.h>
+#include <libevmasm/CommonSubexpressionEliminator.h>
+#include <libevmasm/AssemblyItem.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
+{
+	optimizeBreakingItem();
+
+	map<int, Id> initialStackContents;
+	map<int, Id> targetStackContents;
+	int minHeight = m_stackHeight + 1;
+	if (!m_stackElements.empty())
+		minHeight = min(minHeight, m_stackElements.begin()->first);
+	for (int height = minHeight; height <= 0; ++height)
+		initialStackContents[height] = initialStackElement(height, SourceLocation());
+	for (int height = minHeight; height <= m_stackHeight; ++height)
+		targetStackContents[height] = stackElement(height, SourceLocation());
+
+	// Debug info:
+	//stream(cout, initialStackContents, targetStackContents);
+
+	AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode(
+		initialStackContents,
+		targetStackContents
+	);
+	if (m_breakingItem)
+		items.push_back(*m_breakingItem);
+	return items;
+}
+
+ostream& CommonSubexpressionEliminator::stream(
+	ostream& _out,
+	map<int, Id> _initialStack,
+	map<int, Id> _targetStack
+) const
+{
+	auto streamExpressionClass = [this](ostream& _out, Id _id)
+	{
+		auto const& expr = m_expressionClasses.representative(_id);
+		_out << "  " << dec << _id << ": " << *expr.item;
+		if (expr.sequenceNumber)
+			_out << "@" << dec << expr.sequenceNumber;
+		_out << "(";
+		for (Id arg: expr.arguments)
+			_out << dec << arg << ",";
+		_out << ")" << endl;
+	};
+
+	_out << "Optimizer analysis:" << endl;
+	_out << "Final stack height: " << dec << m_stackHeight << endl;
+	_out << "Equivalence classes: " << endl;
+	for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
+		streamExpressionClass(_out, eqClass);
+
+	_out << "Initial stack: " << endl;
+	for (auto const& it: _initialStack)
+	{
+		_out << "  " << dec << it.first << ": ";
+		streamExpressionClass(_out, it.second);
+	}
+	_out << "Target stack: " << endl;
+	for (auto const& it: _targetStack)
+	{
+		_out << "  " << dec << it.first << ": ";
+		streamExpressionClass(_out, it.second);
+	}
+
+	return _out;
+}
+
+void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
+{
+	if (_item.type() != Operation)
+	{
+		assertThrow(_item.deposit() == 1, InvalidDeposit, "");
+		setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem));
+	}
+	else
+	{
+		Instruction instruction = _item.instruction();
+		InstructionInfo info = instructionInfo(instruction);
+		if (SemanticInformation::isDupInstruction(_item))
+			setStackElement(
+				m_stackHeight + 1,
+				stackElement(
+					m_stackHeight - int(instruction) + int(Instruction::DUP1),
+					_item.getLocation()
+				)
+			);
+		else if (SemanticInformation::isSwapInstruction(_item))
+			swapStackElements(
+				m_stackHeight,
+				m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
+				_item.getLocation()
+			);
+		else if (instruction != Instruction::POP)
+		{
+			vector<Id> arguments(info.args);
+			for (int i = 0; i < info.args; ++i)
+				arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
+			if (_item.instruction() == Instruction::SSTORE)
+				storeInStorage(arguments[0], arguments[1], _item.getLocation());
+			else if (_item.instruction() == Instruction::SLOAD)
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					loadFromStorage(arguments[0], _item.getLocation())
+				);
+			else if (_item.instruction() == Instruction::MSTORE)
+				storeInMemory(arguments[0], arguments[1], _item.getLocation());
+			else if (_item.instruction() == Instruction::MLOAD)
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					loadFromMemory(arguments[0], _item.getLocation())
+				);
+			else if (_item.instruction() == Instruction::SHA3)
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
+				);
+			else
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					m_expressionClasses.find(_item, arguments, _copyItem)
+				);
+		}
+		m_stackHeight += _item.deposit();
+	}
+}
+
+void CommonSubexpressionEliminator::optimizeBreakingItem()
+{
+	if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI))
+		return;
+
+	SourceLocation const& location = m_breakingItem->getLocation();
+	AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
+
+	Id condition = stackElement(m_stackHeight - 1, location);
+	Id zero = m_expressionClasses.find(u256(0));
+	if (m_expressionClasses.knownToBeDifferent(condition, zero))
+	{
+		feedItem(AssemblyItem(Instruction::SWAP1, location), true);
+		feedItem(AssemblyItem(Instruction::POP, location), true);
+
+		AssemblyItem item(Instruction::JUMP, location);
+		item.setJumpType(jumpType);
+		m_breakingItem = m_expressionClasses.storeItem(item);
+		return;
+	}
+	Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition});
+	if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero))
+	{
+		AssemblyItem it(Instruction::POP, location);
+		feedItem(it, true);
+		feedItem(it, true);
+		m_breakingItem = nullptr;
+	}
+}
+
+void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
+{
+	m_stackElements[_stackHeight] = _class;
+}
+
+void CommonSubexpressionEliminator::swapStackElements(
+	int _stackHeightA,
+	int _stackHeightB,
+	SourceLocation const& _location
+)
+{
+	assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
+	// ensure they are created
+	stackElement(_stackHeightA, _location);
+	stackElement(_stackHeightB, _location);
+
+	swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
+}
+
+ExpressionClasses::Id CommonSubexpressionEliminator::stackElement(
+	int _stackHeight,
+	SourceLocation const& _location
+)
+{
+	if (m_stackElements.count(_stackHeight))
+		return m_stackElements.at(_stackHeight);
+	// Stack element not found (not assigned yet), create new equivalence class.
+	return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
+}
+
+ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(
+	int _stackHeight,
+	SourceLocation const& _location
+)
+{
+	assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
+	assertThrow(_stackHeight > -16, StackTooDeepException, "");
+	// This is a special assembly item that refers to elements pre-existing on the initial stack.
+	return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
+}
+
+void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location)
+{
+	if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
+		// do not execute the storage if we know that the value is already there
+		return;
+	m_sequenceNumber++;
+	decltype(m_storageContent) storageContents;
+	// Copy over all values (i.e. retain knowledge about them) where we know that this store
+	// operation will not destroy the knowledge. Specifically, we copy storage locations we know
+	// are different from _slot or locations where we know that the stored value is equal to _value.
+	for (auto const& storageItem: m_storageContent)
+		if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
+			storageContents.insert(storageItem);
+	m_storageContent = move(storageContents);
+
+	AssemblyItem item(Instruction::SSTORE, _location);
+	Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
+	m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
+	m_storageContent[_slot] = _value;
+	// increment a second time so that we get unique sequence numbers for writes
+	m_sequenceNumber++;
+}
+
+ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location)
+{
+	if (m_storageContent.count(_slot))
+		return m_storageContent.at(_slot);
+
+	AssemblyItem item(Instruction::SLOAD, _location);
+	return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
+}
+
+void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
+{
+	if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
+		// do not execute the store if we know that the value is already there
+		return;
+	m_sequenceNumber++;
+	decltype(m_memoryContent) memoryContents;
+	// copy over values at points where we know that they are different from _slot by at least 32
+	for (auto const& memoryItem: m_memoryContent)
+		if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
+			memoryContents.insert(memoryItem);
+	m_memoryContent = move(memoryContents);
+
+	AssemblyItem item(Instruction::MSTORE, _location);
+	Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
+	m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
+	m_memoryContent[_slot] = _value;
+	// increment a second time so that we get unique sequence numbers for writes
+	m_sequenceNumber++;
+}
+
+ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location)
+{
+	if (m_memoryContent.count(_slot))
+		return m_memoryContent.at(_slot);
+
+	AssemblyItem item(Instruction::MLOAD, _location);
+	return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
+}
+
+CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(
+	Id _start,
+	Id _length,
+	SourceLocation const& _location
+)
+{
+	AssemblyItem sha3Item(Instruction::SHA3, _location);
+	// Special logic if length is a short constant, otherwise we cannot tell.
+	u256 const* l = m_expressionClasses.knownConstant(_length);
+	// unknown or too large length
+	if (!l || *l > 128)
+		return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+
+	vector<Id> arguments;
+	for (u256 i = 0; i < *l; i += 32)
+	{
+		Id slot = m_expressionClasses.find(
+			AssemblyItem(Instruction::ADD, _location),
+			{_start, m_expressionClasses.find(i)}
+		);
+		arguments.push_back(loadFromMemory(slot, _location));
+	}
+	if (m_knownSha3Hashes.count(arguments))
+		return m_knownSha3Hashes.at(arguments);
+	Id v;
+	// If all arguments are known constants, compute the sha3 here
+	if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
+	{
+		bytes data;
+		for (Id a: arguments)
+			data += toBigEndian(*m_expressionClasses.knownConstant(a));
+		data.resize(size_t(*l));
+		v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location));
+	}
+	else
+		v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+	return m_knownSha3Hashes[arguments] = v;
+}
+
+CSECodeGenerator::CSECodeGenerator(
+	ExpressionClasses& _expressionClasses,
+	vector<CSECodeGenerator::StoreOperation> const& _storeOperations
+):
+	m_expressionClasses(_expressionClasses)
+{
+	for (auto const& store: _storeOperations)
+		m_storeOperations[make_pair(store.target, store.slot)].push_back(store);
+}
+
+AssemblyItems CSECodeGenerator::generateCode(
+	map<int, Id> const& _initialStack,
+	map<int, Id> const& _targetStackContents
+)
+{
+	m_stack = _initialStack;
+	for (auto const& item: m_stack)
+		if (!m_classPositions.count(item.second))
+			m_classPositions[item.second] = item.first;
+
+	// @todo: provide information about the positions of copies of class elements
+
+	// generate the dependency graph starting from final storage and memory writes and target stack contents
+	for (auto const& p: m_storeOperations)
+		addDependencies(p.second.back().expression);
+	for (auto const& targetItem: _targetStackContents)
+	{
+		m_finalClasses.insert(targetItem.second);
+		addDependencies(targetItem.second);
+	}
+
+	// store all needed sequenced expressions
+	set<pair<unsigned, Id>> sequencedExpressions;
+	for (auto const& p: m_neededBy)
+		for (auto id: {p.first, p.second})
+			if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber)
+				sequencedExpressions.insert(make_pair(seqNr, id));
+
+	// Perform all operations on storage and memory in order, if they are needed.
+	for (auto const& seqAndId: sequencedExpressions)
+		if (!m_classPositions.count(seqAndId.second))
+			generateClassElement(seqAndId.second, true);
+
+	// generate the target stack elements
+	for (auto const& targetItem: _targetStackContents)
+	{
+		int position = generateClassElement(targetItem.second);
+		assertThrow(position != c_invalidPosition, OptimizerException, "");
+		if (position == targetItem.first)
+			continue;
+		SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation();
+		if (position < targetItem.first)
+			// it is already at its target, we need another copy
+			appendDup(position, location);
+		else
+			appendOrRemoveSwap(position, location);
+		appendOrRemoveSwap(targetItem.first, location);
+	}
+
+	// remove surplus elements
+	while (removeStackTopIfPossible())
+	{
+		// no-op
+	}
+
+	// check validity
+	int finalHeight = 0;
+	if (!_targetStackContents.empty())
+		// have target stack, so its height should be the final height
+		finalHeight = (--_targetStackContents.end())->first;
+	else if (!_initialStack.empty())
+		// no target stack, only erase the initial stack
+		finalHeight = _initialStack.begin()->first - 1;
+	else
+		// neither initial no target stack, no change in height
+		finalHeight = 0;
+	assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height.");
+	return m_generatedItems;
+}
+
+void CSECodeGenerator::addDependencies(Id _c)
+{
+	if (m_neededBy.count(_c))
+		return; // we already computed the dependencies for _c
+	ExpressionClasses::Expression expr = m_expressionClasses.representative(_c);
+	for (Id argument: expr.arguments)
+	{
+		addDependencies(argument);
+		m_neededBy.insert(make_pair(argument, _c));
+	}
+	if (expr.item->type() == Operation && (
+		expr.item->instruction() == Instruction::SLOAD ||
+		expr.item->instruction() == Instruction::MLOAD ||
+		expr.item->instruction() == Instruction::SHA3
+	))
+	{
+		// this loads an unknown value from storage or memory and thus, in addition to its
+		// arguments, depends on all store operations to addresses where we do not know that
+		// they are different that occur before this load
+		StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ?
+			StoreOperation::Storage : StoreOperation::Memory;
+		Id slotToLoadFrom = expr.arguments.at(0);
+		for (auto const& p: m_storeOperations)
+		{
+			if (p.first.first != target)
+				continue;
+			Id slot = p.first.second;
+			StoreOperations const& storeOps = p.second;
+			if (storeOps.front().sequenceNumber > expr.sequenceNumber)
+				continue;
+			bool knownToBeIndependent = false;
+			switch (expr.item->instruction())
+			{
+			case Instruction::SLOAD:
+				knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom);
+				break;
+			case Instruction::MLOAD:
+				knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom);
+				break;
+			case Instruction::SHA3:
+			{
+				Id length = expr.arguments.at(1);
+				AssemblyItem offsetInstr(Instruction::SUB, expr.item->getLocation());
+				Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom});
+				u256 const* o = m_expressionClasses.knownConstant(offsetToStart);
+				u256 const* l = m_expressionClasses.knownConstant(length);
+				if (l && *l == 0)
+					knownToBeIndependent = true;
+				else if (o)
+				{
+					// We could get problems here if both *o and *l are larger than 2**254
+					// but it is probably ok for the optimizer to produce wrong code for such cases
+					// which cannot be executed anyway because of the non-payable price.
+					if (u2s(*o) <= -32)
+						knownToBeIndependent = true;
+					else if (l && u2s(*o) >= 0 && *o >= *l)
+						knownToBeIndependent = true;
+				}
+				break;
+			}
+			default:
+				break;
+			}
+			if (knownToBeIndependent)
+				continue;
+
+			// note that store and load never have the same sequence number
+			Id latestStore = storeOps.front().expression;
+			for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it)
+				if (it->sequenceNumber < expr.sequenceNumber)
+					latestStore = it->expression;
+			addDependencies(latestStore);
+			m_neededBy.insert(make_pair(latestStore, _c));
+		}
+	}
+}
+
+int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced)
+{
+	// do some cleanup
+	removeStackTopIfPossible();
+
+	if (m_classPositions.count(_c))
+	{
+		assertThrow(
+			m_classPositions[_c] != c_invalidPosition,
+			OptimizerException,
+			"Element already removed but still needed."
+		);
+		return m_classPositions[_c];
+	}
+	ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c);
+	assertThrow(
+		_allowSequenced || expr.sequenceNumber == 0,
+		OptimizerException,
+		"Sequence constrained operation requested out of sequence."
+	);
+	vector<Id> const& arguments = expr.arguments;
+	for (Id arg: boost::adaptors::reverse(arguments))
+		generateClassElement(arg);
+
+	SourceLocation const& location = expr.item->getLocation();
+	// The arguments are somewhere on the stack now, so it remains to move them at the correct place.
+	// This is quite difficult as sometimes, the values also have to removed in this process
+	// (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is
+	// implemented for every single case for combinations of up to two arguments manually.
+	if (arguments.size() == 1)
+	{
+		if (canBeRemoved(arguments[0], _c))
+			appendOrRemoveSwap(classElementPosition(arguments[0]), location);
+		else
+			appendDup(classElementPosition(arguments[0]), location);
+	}
+	else if (arguments.size() == 2)
+	{
+		if (canBeRemoved(arguments[1], _c))
+		{
+			appendOrRemoveSwap(classElementPosition(arguments[1]), location);
+			if (arguments[0] == arguments[1])
+				appendDup(m_stackHeight, location);
+			else if (canBeRemoved(arguments[0], _c))
+			{
+				appendOrRemoveSwap(m_stackHeight - 1, location);
+				appendOrRemoveSwap(classElementPosition(arguments[0]), location);
+			}
+			else
+				appendDup(classElementPosition(arguments[0]), location);
+		}
+		else
+		{
+			if (arguments[0] == arguments[1])
+			{
+				appendDup(classElementPosition(arguments[0]), location);
+				appendDup(m_stackHeight, location);
+			}
+			else if (canBeRemoved(arguments[0], _c))
+			{
+				appendOrRemoveSwap(classElementPosition(arguments[0]), location);
+				appendDup(classElementPosition(arguments[1]), location);
+				appendOrRemoveSwap(m_stackHeight - 1, location);
+			}
+			else
+			{
+				appendDup(classElementPosition(arguments[1]), location);
+				appendDup(classElementPosition(arguments[0]), location);
+			}
+		}
+	}
+	else
+		assertThrow(
+			arguments.size() <= 2,
+			OptimizerException,
+			"Opcodes with more than two arguments not implemented yet."
+		);
+	for (size_t i = 0; i < arguments.size(); ++i)
+		assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." );
+
+	while (SemanticInformation::isCommutativeOperation(*expr.item) &&
+			!m_generatedItems.empty() &&
+			m_generatedItems.back() == AssemblyItem(Instruction::SWAP1))
+		// this will not append a swap but remove the one that is already there
+		appendOrRemoveSwap(m_stackHeight - 1, location);
+	for (auto arg: arguments)
+		if (canBeRemoved(arg, _c))
+			m_classPositions[arg] = c_invalidPosition;
+	for (size_t i = 0; i < arguments.size(); ++i)
+		m_stack.erase(m_stackHeight - i);
+	appendItem(*expr.item);
+	if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1)
+	{
+		m_stack[m_stackHeight] = _c;
+		return m_classPositions[_c] = m_stackHeight;
+	}
+	else
+	{
+		assertThrow(
+			instructionInfo(expr.item->instruction()).ret == 0,
+			OptimizerException,
+			"Invalid number of return values."
+		);
+		return m_classPositions[_c] = c_invalidPosition;
+	}
+}
+
+int CSECodeGenerator::classElementPosition(Id _id) const
+{
+	assertThrow(
+		m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition,
+		OptimizerException,
+		"Element requested but is not present."
+	);
+	return m_classPositions.at(_id);
+}
+
+bool CSECodeGenerator::canBeRemoved(Id _element, Id _result)
+{
+	// Returns false if _element is finally needed or is needed by a class that has not been
+	// computed yet. Note that m_classPositions also includes classes that were deleted in the meantime.
+	if (m_finalClasses.count(_element))
+		return false;
+
+	auto range = m_neededBy.equal_range(_element);
+	for (auto it = range.first; it != range.second; ++it)
+		if (it->second != _result && !m_classPositions.count(it->second))
+			return false;
+	return true;
+}
+
+bool CSECodeGenerator::removeStackTopIfPossible()
+{
+	if (m_stack.empty())
+		return false;
+	assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, "");
+	Id top = m_stack[m_stackHeight];
+	if (!canBeRemoved(top))
+		return false;
+	m_generatedItems.push_back(AssemblyItem(Instruction::POP));
+	m_stack.erase(m_stackHeight);
+	m_stackHeight--;
+	return true;
+}
+
+void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location)
+{
+	assertThrow(_fromPosition != c_invalidPosition, OptimizerException, "");
+	int instructionNum = 1 + m_stackHeight - _fromPosition;
+	assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep.");
+	assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access.");
+	appendItem(AssemblyItem(dupInstruction(instructionNum), _location));
+	m_stack[m_stackHeight] = m_stack[_fromPosition];
+}
+
+void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location)
+{
+	assertThrow(_fromPosition != c_invalidPosition, OptimizerException, "");
+	if (_fromPosition == m_stackHeight)
+		return;
+	int instructionNum = m_stackHeight - _fromPosition;
+	assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep.");
+	assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access.");
+	appendItem(AssemblyItem(swapInstruction(instructionNum), _location));
+	// The value of a class can be present in multiple locations on the stack. We only update the
+	// "canonical" one that is tracked by m_classPositions
+	if (m_classPositions[m_stack[m_stackHeight]] == m_stackHeight)
+		m_classPositions[m_stack[m_stackHeight]] = _fromPosition;
+	if (m_classPositions[m_stack[_fromPosition]] == _fromPosition)
+		m_classPositions[m_stack[_fromPosition]] = m_stackHeight;
+	swap(m_stack[m_stackHeight], m_stack[_fromPosition]);
+	if (m_generatedItems.size() >= 2 &&
+		SemanticInformation::isSwapInstruction(m_generatedItems.back()) &&
+		*(m_generatedItems.end() - 2) == m_generatedItems.back())
+	{
+		m_generatedItems.pop_back();
+		m_generatedItems.pop_back();
+	}
+}
+
+void CSECodeGenerator::appendItem(AssemblyItem const& _item)
+{
+	m_generatedItems.push_back(_item);
+	m_stackHeight += _item.deposit();
+}
--- a/CommonSubexpressionEliminator.h
+++ b/CommonSubexpressionEliminator.h
@ -0,0 +1,233 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file CommonSubexpressionEliminator.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Optimizer step for common subexpression elimination and stack reorganisation.
+ */
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <set>
+#include <tuple>
+#include <ostream>
+#include <libdevcore/CommonIO.h>
+#include <libdevcore/Exceptions.h>
+#include <libevmasm/ExpressionClasses.h>
+#include <libevmasm/SemanticInformation.h>
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+using AssemblyItems = std::vector<AssemblyItem>;
+
+/**
+ * Optimizer step that performs common subexpression elimination and stack reorganisation,
+ * i.e. it tries to infer equality among expressions and compute the values of two expressions
+ * known to be equal only once.
+ *
+ * The general workings are that for each assembly item that is fed into the eliminator, an
+ * equivalence class is derived from the operation and the equivalence class of its arguments.
+ * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these
+ * classes are determined.
+ *
+ * When the list of optimized items is requested, they are generated in a bottom-up fashion,
+ * adding code for equivalence classes that were not yet computed.
+ */
+class CommonSubexpressionEliminator
+{
+public:
+	using Id = ExpressionClasses::Id;
+	struct StoreOperation
+	{
+		enum Target { Memory, Storage };
+		StoreOperation(
+			Target _target,
+			Id _slot,
+			unsigned _sequenceNumber,
+			Id _expression
+		): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
+		Target target;
+		Id slot;
+		unsigned sequenceNumber;
+		Id expression;
+	};
+
+	/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
+	/// item that must be fed into a new instance of the eliminator.
+	template <class _AssemblyItemIterator>
+	_AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end);
+
+	/// @returns the resulting items after optimization.
+	AssemblyItems getOptimizedItems();
+
+	/// Streams debugging information to @a _out.
+	std::ostream& stream(
+		std::ostream& _out,
+		std::map<int, Id> _initialStack = std::map<int, Id>(),
+		std::map<int, Id> _targetStack = std::map<int, Id>()
+	) const;
+
+private:
+	/// Feeds the item into the system for analysis.
+	void feedItem(AssemblyItem const& _item, bool _copyItem = false);
+
+	/// Tries to optimize the item that breaks the basic block at the end.
+	void optimizeBreakingItem();
+
+	/// Simplifies the given item using
+	/// Assigns a new equivalence class to the next sequence number of the given stack element.
+	void setStackElement(int _stackHeight, Id _class);
+	/// Swaps the given stack elements in their next sequence number.
+	void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
+	/// Retrieves the current equivalence class fo the given stack element (or generates a new
+	/// one if it does not exist yet).
+	Id stackElement(int _stackHeight, SourceLocation const& _location);
+	/// @returns the equivalence class id of the special initial stack element at the given height
+	/// (must not be positive).
+	Id initialStackElement(int _stackHeight, SourceLocation const& _location);
+
+	/// Increments the sequence number, deletes all storage information that might be overwritten
+	/// and stores the new value at the given slot.
+	void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
+	/// Retrieves the current value at the given slot in storage or creates a new special sload class.
+	Id loadFromStorage(Id _slot, SourceLocation const& _location);
+	/// Increments the sequence number, deletes all memory information that might be overwritten
+	/// and stores the new value at the given slot.
+	void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
+	/// Retrieves the current value at the given slot in memory or creates a new special mload class.
+	Id loadFromMemory(Id _slot, SourceLocation const& _location);
+	/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
+	Id applySha3(Id _start, Id _length, SourceLocation const& _location);
+
+	/// Current stack height, can be negative.
+	int m_stackHeight = 0;
+	/// Current stack layout, mapping stack height -> equivalence class
+	std::map<int, Id> m_stackElements;
+	/// Current sequence number, this is incremented with each modification to storage or memory.
+	unsigned m_sequenceNumber = 1;
+	/// Knowledge about storage content.
+	std::map<Id, Id> m_storageContent;
+	/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
+	/// and are not contained here if they are not completely known.
+	std::map<Id, Id> m_memoryContent;
+	/// Keeps record of all sha3 hashes that are computed.
+	std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+	/// Keeps information about which storage or memory slots were written to at which sequence
+	/// number with what instruction.
+	std::vector<StoreOperation> m_storeOperations;
+	/// Structure containing the classes of equivalent expressions.
+	ExpressionClasses m_expressionClasses;
+
+	/// The item that breaks the basic block, can be nullptr.
+	/// It is usually appended to the block but can be optimized in some cases.
+	AssemblyItem const* m_breakingItem = nullptr;
+};
+
+/**
+ * Unit that generates code from current stack layout, target stack layout and information about
+ * the equivalence classes.
+ */
+class CSECodeGenerator
+{
+public:
+	using StoreOperation = CommonSubexpressionEliminator::StoreOperation;
+	using StoreOperations = std::vector<StoreOperation>;
+	using Id = ExpressionClasses::Id;
+
+	/// Initializes the code generator with the given classes and store operations.
+	/// The store operations have to be sorted by sequence number in ascending order.
+	CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations);
+
+	/// @returns the assembly items generated from the given requirements
+	/// @param _initialStack current contents of the stack (up to stack height of zero)
+	/// @param _targetStackContents final contents of the stack, by stack height relative to initial
+	/// @note should only be called once on each object.
+	AssemblyItems generateCode(
+		std::map<int, Id> const& _initialStack,
+		std::map<int, Id> const& _targetStackContents
+	);
+
+private:
+	/// Recursively discovers all dependencies to @a m_requests.
+	void addDependencies(Id _c);
+
+	/// Produce code that generates the given element if it is not yet present.
+	/// @returns the stack position of the element or c_invalidPosition if it does not actually
+	/// generate a value on the stack.
+	/// @param _allowSequenced indicates that sequence-constrained operations are allowed
+	int generateClassElement(Id _c, bool _allowSequenced = false);
+	/// @returns the position of the representative of the given id on the stack.
+	/// @note throws an exception if it is not on the stack.
+	int classElementPosition(Id _id) const;
+
+	/// @returns true if @a _element can be removed - in general or, if given, while computing @a _result.
+	bool canBeRemoved(Id _element, Id _result = Id(-1));
+
+	/// Appends code to remove the topmost stack element if it can be removed.
+	bool removeStackTopIfPossible();
+
+	/// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position.
+	void appendDup(int _fromPosition, SourceLocation const& _location);
+	/// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position.
+	/// @note this might also remove the last item if it exactly the same swap instruction.
+	void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location);
+	/// Appends the given assembly item.
+	void appendItem(AssemblyItem const& _item);
+
+	static const int c_invalidPosition = -0x7fffffff;
+
+	AssemblyItems m_generatedItems;
+	/// Current height of the stack relative to the start.
+	int m_stackHeight = 0;
+	/// If (b, a) is in m_requests then b is needed to compute a.
+	std::multimap<Id, Id> m_neededBy;
+	/// Current content of the stack.
+	std::map<int, Id> m_stack;
+	/// Current positions of equivalence classes, equal to c_invalidPosition if already deleted.
+	std::map<Id, int> m_classPositions;
+
+	/// The actual eqivalence class items and how to compute them.
+	ExpressionClasses& m_expressionClasses;
+	/// Keeps information about which storage or memory slots were written to by which operations.
+	/// The operations are sorted ascendingly by sequence number.
+	std::map<std::pair<StoreOperation::Target, Id>, StoreOperations> m_storeOperations;
+	/// The set of equivalence classes that should be present on the stack at the end.
+	std::set<Id> m_finalClasses;
+};
+
+template <class _AssemblyItemIterator>
+_AssemblyItemIterator CommonSubexpressionEliminator::feedItems(
+	_AssemblyItemIterator _iterator,
+	_AssemblyItemIterator _end
+)
+{
+	for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator)
+		feedItem(*_iterator);
+	if (_iterator != _end)
+		m_breakingItem = &(*_iterator++);
+	return _iterator;
+}
+
+}
+}
--- a/ControlFlowGraph.cpp
+++ b/ControlFlowGraph.cpp
@ -0,0 +1,260 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file ControlFlowGraph.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Control flow analysis for the optimizer.
+ */
+
+#include <libevmasm/ControlFlowGraph.h>
+#include <map>
+#include <libevmasm/Exceptions.h>
+#include <libevmasm/AssemblyItem.h>
+#include <libevmasm/SemanticInformation.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+BlockId::BlockId(u256 const& _id): m_id(_id)
+{
+	assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large.");
+}
+
+AssemblyItems ControlFlowGraph::optimisedItems()
+{
+	if (m_items.empty())
+		return m_items;
+
+	findLargestTag();
+	splitBlocks();
+	resolveNextLinks();
+	removeUnusedBlocks();
+	setPrevLinks();
+
+	return rebuildCode();
+}
+
+void ControlFlowGraph::findLargestTag()
+{
+	m_lastUsedId = 0;
+	for (auto const& item: m_items)
+		if (item.type() == Tag || item.type() == PushTag)
+		{
+			// Assert that it can be converted.
+			BlockId(item.data());
+			m_lastUsedId = max(unsigned(item.data()), m_lastUsedId);
+		}
+}
+
+void ControlFlowGraph::splitBlocks()
+{
+	m_blocks.clear();
+	BlockId id = BlockId::initial();
+	m_blocks[id].begin = 0;
+	for (size_t index = 0; index < m_items.size(); ++index)
+	{
+		AssemblyItem const& item = m_items.at(index);
+		if (item.type() == Tag)
+		{
+			if (id)
+				m_blocks[id].end = index;
+			id = BlockId::invalid();
+		}
+		if (!id)
+		{
+			id = item.type() == Tag ? BlockId(item.data()) : generateNewId();
+			m_blocks[id].begin = index;
+		}
+		if (item.type() == PushTag)
+			m_blocks[id].pushedTags.push_back(BlockId(item.data()));
+		if (SemanticInformation::altersControlFlow(item))
+		{
+			m_blocks[id].end = index + 1;
+			if (item == Instruction::JUMP)
+				m_blocks[id].endType = BasicBlock::EndType::JUMP;
+			else if (item == Instruction::JUMPI)
+				m_blocks[id].endType = BasicBlock::EndType::JUMPI;
+			else
+				m_blocks[id].endType = BasicBlock::EndType::STOP;
+			id = BlockId::invalid();
+		}
+	}
+	if (id)
+	{
+		m_blocks[id].end = m_items.size();
+		if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER)
+			m_blocks[id].endType = BasicBlock::EndType::STOP;
+	}
+}
+
+void ControlFlowGraph::resolveNextLinks()
+{
+	map<unsigned, BlockId> blockByBeginPos;
+	for (auto const& idAndBlock: m_blocks)
+		if (idAndBlock.second.begin != idAndBlock.second.end)
+			blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first;
+
+	for (auto& idAndBlock: m_blocks)
+	{
+		BasicBlock& block = idAndBlock.second;
+		switch (block.endType)
+		{
+		case BasicBlock::EndType::JUMPI:
+		case BasicBlock::EndType::HANDOVER:
+			assertThrow(
+				blockByBeginPos.count(block.end),
+				OptimizerException,
+				"Successor block not found."
+			);
+			block.next = blockByBeginPos.at(block.end);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+void ControlFlowGraph::removeUnusedBlocks()
+{
+	vector<BlockId> blocksToProcess{BlockId::initial()};
+	set<BlockId> neededBlocks{BlockId::initial()};
+	while (!blocksToProcess.empty())
+	{
+		BasicBlock const& block = m_blocks.at(blocksToProcess.back());
+		blocksToProcess.pop_back();
+		for (BlockId tag: block.pushedTags)
+			if (!neededBlocks.count(tag))
+			{
+				neededBlocks.insert(tag);
+				blocksToProcess.push_back(tag);
+			}
+		if (block.next && !neededBlocks.count(block.next))
+		{
+			neededBlocks.insert(block.next);
+			blocksToProcess.push_back(block.next);
+		}
+	}
+	for (auto it = m_blocks.begin(); it != m_blocks.end();)
+		if (neededBlocks.count(it->first))
+			++it;
+		else
+			m_blocks.erase(it++);
+}
+
+void ControlFlowGraph::setPrevLinks()
+{
+	for (auto& idAndBlock: m_blocks)
+	{
+		BasicBlock& block = idAndBlock.second;
+		switch (block.endType)
+		{
+		case BasicBlock::EndType::JUMPI:
+		case BasicBlock::EndType::HANDOVER:
+			assertThrow(
+				!m_blocks.at(block.next).prev,
+				OptimizerException,
+				"Successor already has predecessor."
+			);
+			m_blocks[block.next].prev = idAndBlock.first;
+			break;
+		default:
+			break;
+		}
+	}
+	// If block ends with jump to not yet linked block, link them removing the jump
+	for (auto& idAndBlock: m_blocks)
+	{
+		BlockId blockId = idAndBlock.first;
+		BasicBlock& block = idAndBlock.second;
+		if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2)
+			continue;
+		AssemblyItem const& push = m_items.at(block.end - 2);
+		if (push.type() != PushTag)
+			continue;
+		BlockId nextId(push.data());
+		if (m_blocks.at(nextId).prev)
+			continue;
+		bool hasLoop = false;
+		for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next)
+			hasLoop = (id == blockId);
+		if (hasLoop)
+			continue;
+
+		m_blocks[nextId].prev = blockId;
+		block.next = nextId;
+		block.end -= 2;
+		assertThrow(
+			!block.pushedTags.empty() && block.pushedTags.back() == nextId,
+			OptimizerException,
+			"Last pushed tag not at end of pushed list."
+		);
+		block.pushedTags.pop_back();
+		block.endType = BasicBlock::EndType::HANDOVER;
+	}
+}
+
+AssemblyItems ControlFlowGraph::rebuildCode()
+{
+	map<BlockId, unsigned> pushes;
+	for (auto& idAndBlock: m_blocks)
+		for (BlockId ref: idAndBlock.second.pushedTags)
+			pushes[ref]++;
+
+	set<BlockId> blocksToAdd;
+	for (auto it: m_blocks)
+		blocksToAdd.insert(it.first);
+	set<BlockId> blocksAdded;
+	AssemblyItems code;
+
+	for (
+		BlockId blockId = BlockId::initial();
+		blockId;
+		blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin()
+	)
+	{
+		bool previousHandedOver = (blockId == BlockId::initial());
+		while (m_blocks.at(blockId).prev)
+			blockId = m_blocks.at(blockId).prev;
+		for (; blockId; blockId = m_blocks.at(blockId).next)
+		{
+			BasicBlock const& block = m_blocks.at(blockId);
+			blocksToAdd.erase(blockId);
+			blocksAdded.insert(blockId);
+
+			auto begin = m_items.begin() + block.begin;
+			auto end = m_items.begin() + block.end;
+			if (begin == end)
+				continue;
+			// If block starts with unused tag, skip it.
+			if (previousHandedOver && !pushes[blockId] && begin->type() == Tag)
+				++begin;
+			previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER);
+			copy(begin, end, back_inserter(code));
+		}
+	}
+
+	return code;
+}
+
+BlockId ControlFlowGraph::generateNewId()
+{
+	BlockId id = BlockId(++m_lastUsedId);
+	assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs.");
+	return id;
+}
--- a/ControlFlowGraph.h
+++ b/ControlFlowGraph.h
@ -0,0 +1,108 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file ControlFlowGraph.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Control flow analysis for the optimizer.
+ */
+
+#pragma once
+
+#include <vector>
+#include <libdevcore/Common.h>
+#include <libdevcore/Assertions.h>
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+using AssemblyItems = std::vector<AssemblyItem>;
+
+/**
+ * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special
+ * ID for the inital block.
+ */
+class BlockId
+{
+public:
+	BlockId() { *this = invalid(); }
+	explicit BlockId(unsigned _id): m_id(_id) {}
+	explicit BlockId(u256 const& _id);
+	static BlockId initial() { return BlockId(-2); }
+	static BlockId invalid() { return BlockId(-1); }
+
+	bool operator==(BlockId const& _other) const { return m_id == _other.m_id; }
+	bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; }
+	bool operator<(BlockId const& _other) const { return m_id < _other.m_id; }
+	explicit operator bool() const { return *this != invalid(); }
+
+private:
+	unsigned m_id;
+};
+
+/**
+ * Control flow block inside which instruction counter is always incremented by one
+ * (except for possibly the last instruction).
+ */
+struct BasicBlock
+{
+	/// Start index into assembly item list.
+	unsigned begin = 0;
+	/// End index (excluded) inte assembly item list.
+	unsigned end = 0;
+	/// Tags pushed inside this block, with multiplicity.
+	std::vector<BlockId> pushedTags;
+	/// ID of the block that always follows this one (either JUMP or flow into new block),
+	/// or BlockId::invalid() otherwise
+	BlockId next = BlockId::invalid();
+	/// ID of the block that has to precede this one.
+	BlockId prev = BlockId::invalid();
+
+	enum class EndType { JUMP, JUMPI, STOP, HANDOVER };
+	EndType endType = EndType::HANDOVER;
+};
+
+class ControlFlowGraph
+{
+public:
+	/// Initializes the control flow graph.
+	/// @a _items has to persist across the usage of this class.
+	ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {}
+	/// @returns the collection of optimised items, should be called only once.
+	AssemblyItems optimisedItems();
+
+private:
+	void findLargestTag();
+	void splitBlocks();
+	void resolveNextLinks();
+	void removeUnusedBlocks();
+	void setPrevLinks();
+	AssemblyItems rebuildCode();
+
+	BlockId generateNewId();
+
+	unsigned m_lastUsedId = 0;
+	AssemblyItems const& m_items;
+	std::map<BlockId, BasicBlock> m_blocks;
+};
+
+
+}
+}
--- a/Exceptions.h
+++ b/Exceptions.h
@ -0,0 +1,36 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file Exceptions.h
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ */
+
+#pragma once
+
+#include <libdevcore/Exceptions.h>
+
+namespace dev
+{
+namespace eth
+{
+
+struct AssemblyException: virtual Exception {};
+struct OptimizerException: virtual AssemblyException {};
+struct StackTooDeepException: virtual OptimizerException {};
+
+}
+}
--- a/ExpressionClasses.cpp
+++ b/ExpressionClasses.cpp
@ -0,0 +1,438 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file ExpressionClasses.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Container for equivalence classes of expressions for use in common subexpression elimination.
+ */
+
+#include <libevmasm/ExpressionClasses.h>
+#include <utility>
+#include <tuple>
+#include <functional>
+#include <boost/range/adaptor/reversed.hpp>
+#include <boost/noncopyable.hpp>
+#include <libevmasm/Assembly.h>
+#include <libevmasm/CommonSubexpressionEliminator.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+
+bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const
+{
+	auto type = item->type();
+	auto otherType = _other.item->type();
+	return std::tie(type, item->data(), arguments, sequenceNumber) <
+		std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber);
+}
+
+ExpressionClasses::Id ExpressionClasses::find(
+	AssemblyItem const& _item,
+	Ids const& _arguments,
+	bool _copyItem,
+	unsigned _sequenceNumber
+)
+{
+	Expression exp;
+	exp.id = Id(-1);
+	exp.item = &_item;
+	exp.arguments = _arguments;
+	exp.sequenceNumber = _sequenceNumber;
+
+	if (SemanticInformation::isCommutativeOperation(_item))
+		sort(exp.arguments.begin(), exp.arguments.end());
+
+	auto it = m_expressions.find(exp);
+	if (it != m_expressions.end())
+		return it->id;
+
+	if (_copyItem)
+		exp.item = storeItem(_item);
+
+	ExpressionClasses::Id id = tryToSimplify(exp);
+	if (id < m_representatives.size())
+		exp.id = id;
+	else
+	{
+		exp.id = m_representatives.size();
+		m_representatives.push_back(exp);
+	}
+	m_expressions.insert(exp);
+	return exp.id;
+}
+
+bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
+{
+	// Try to simplify "_a - _b" and return true iff the value is a non-zero constant.
+	return knownNonZero(find(Instruction::SUB, {_a, _b}));
+}
+
+bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
+{
+	// Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero.
+	u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b}));
+	// forbidden interval is ["-31", 31]
+	return v && *v + 31 > u256(62);
+}
+
+bool ExpressionClasses::knownZero(Id _c)
+{
+	return Pattern(u256(0)).matches(representative(_c), *this);
+}
+
+bool ExpressionClasses::knownNonZero(Id _c)
+{
+	return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this);
+}
+
+u256 const* ExpressionClasses::knownConstant(Id _c)
+{
+	map<unsigned, Expression const*> matchGroups;
+	Pattern constant(Push);
+	constant.setMatchGroup(1, matchGroups);
+	if (!constant.matches(representative(_c), *this))
+		return nullptr;
+	return &constant.d();
+}
+
+AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item)
+{
+	m_spareAssemblyItems.push_back(make_shared<AssemblyItem>(_item));
+	return m_spareAssemblyItems.back().get();
+}
+
+string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const
+{
+	Expression const& expr = representative(_id);
+	stringstream str;
+	str << dec << expr.id << ":" << *expr.item << "(";
+	for (Id arg: expr.arguments)
+		str << fullDAGToString(arg) << ",";
+	str << ")";
+	return str.str();
+}
+
+class Rules: public boost::noncopyable
+{
+public:
+	Rules();
+	void resetMatchGroups() { m_matchGroups.clear(); }
+	vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; }
+
+private:
+	using Expression = ExpressionClasses::Expression;
+	map<unsigned, Expression const*> m_matchGroups;
+	vector<pair<Pattern, function<Pattern()>>> m_rules;
+};
+
+Rules::Rules()
+{
+	// Multiple occurences of one of these inside one rule must match the same equivalence class.
+	// Constants.
+	Pattern A(Push);
+	Pattern B(Push);
+	Pattern C(Push);
+	// Anything.
+	Pattern X;
+	Pattern Y;
+	Pattern Z;
+	A.setMatchGroup(1, m_matchGroups);
+	B.setMatchGroup(2, m_matchGroups);
+	C.setMatchGroup(3, m_matchGroups);
+	X.setMatchGroup(4, m_matchGroups);
+	Y.setMatchGroup(5, m_matchGroups);
+	Z.setMatchGroup(6, m_matchGroups);
+
+	m_rules = vector<pair<Pattern, function<Pattern()>>>{
+		// arithmetics on constants
+		{{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }},
+		{{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }},
+		{{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }},
+		{{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }},
+		{{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }},
+		{{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }},
+		{{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }},
+		{{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }},
+		{{Instruction::NOT, {A}}, [=]{ return ~A.d(); }},
+		{{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }},
+		{{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }},
+		{{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }},
+		{{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }},
+		{{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }},
+		{{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }},
+		{{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }},
+		{{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }},
+		{{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }},
+		{{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }},
+		{{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }},
+		{{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }},
+		{{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }},
+		{{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 {
+			if (A.d() >= 31)
+				return B.d();
+			unsigned testBit = unsigned(A.d()) * 8 + 7;
+			u256 mask = (u256(1) << testBit) - 1;
+			return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask);
+		}},
+
+		// invariants involving known constants
+		{{Instruction::ADD, {X, 0}}, [=]{ return X; }},
+		{{Instruction::MUL, {X, 1}}, [=]{ return X; }},
+		{{Instruction::DIV, {X, 1}}, [=]{ return X; }},
+		{{Instruction::SDIV, {X, 1}}, [=]{ return X; }},
+		{{Instruction::OR, {X, 0}}, [=]{ return X; }},
+		{{Instruction::XOR, {X, 0}}, [=]{ return X; }},
+		{{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }},
+		{{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }},
+		{{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }},
+		{{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }},
+		{{Instruction::MOD, {0, X}}, [=]{ return u256(0); }},
+		{{Instruction::AND, {X, 0}}, [=]{ return u256(0); }},
+		{{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }},
+		// operations involving an expression and itself
+		{{Instruction::AND, {X, X}}, [=]{ return X; }},
+		{{Instruction::OR, {X, X}}, [=]{ return X; }},
+		{{Instruction::SUB, {X, X}}, [=]{ return u256(0); }},
+		{{Instruction::EQ, {X, X}}, [=]{ return u256(1); }},
+		{{Instruction::LT, {X, X}}, [=]{ return u256(0); }},
+		{{Instruction::SLT, {X, X}}, [=]{ return u256(0); }},
+		{{Instruction::GT, {X, X}}, [=]{ return u256(0); }},
+		{{Instruction::SGT, {X, X}}, [=]{ return u256(0); }},
+		{{Instruction::MOD, {X, X}}, [=]{ return u256(0); }},
+
+		{{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }},
+	};
+	// Associative operations
+	for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{
+		{Instruction::ADD, plus<u256>()},
+		{Instruction::MUL, multiplies<u256>()},
+		{Instruction::AND, bit_and<u256>()},
+		{Instruction::OR, bit_or<u256>()},
+		{Instruction::XOR, bit_xor<u256>()}
+	})
+	{
+		auto op = opFun.first;
+		auto fun = opFun.second;
+		// Moving constants to the outside, order matters here!
+		// we need actions that return expressions (or patterns?) here, and we need also reversed rules
+		// (X+A)+B -> X+(A+B)
+		m_rules += vector<pair<Pattern, function<Pattern()>>>{{
+			{op, {{op, {X, A}}, B}},
+			[=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
+		}, {
+		// X+(Y+A) -> (X+Y)+A
+			{op, {{op, {X, A}}, Y}},
+			[=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
+		}, {
+		// For now, we still need explicit commutativity for the inner pattern
+			{op, {{op, {A, X}}, B}},
+			[=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
+		}, {
+			{op, {{op, {A, X}}, Y}},
+			[=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
+		}};
+	}
+	// move constants across subtractions
+	m_rules += vector<pair<Pattern, function<Pattern()>>>{
+		{
+			// X - A -> X + (-A)
+			{Instruction::SUB, {X, A}},
+			[=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; }
+		}, {
+			// (X + A) - Y -> (X - Y) + A
+			{Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}},
+			[=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
+		}, {
+			// (A + X) - Y -> (X - Y) + A
+			{Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}},
+			[=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
+		}, {
+			// X - (Y + A) -> (X - Y) + (-A)
+			{Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}},
+			[=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
+		}, {
+			// X - (A + Y) -> (X - Y) + (-A)
+			{Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}},
+			[=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
+		}
+	};
+}
+
+ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun)
+{
+	static Rules rules;
+
+	if (_expr.item->type() != Operation)
+		return -1;
+
+	for (auto const& rule: rules.rules())
+	{
+		rules.resetMatchGroups();
+		if (rule.first.matches(_expr, *this))
+		{
+			// Debug info
+			//cout << "Simplifying " << *_expr.item << "(";
+			//for (Id arg: _expr.arguments)
+			//	cout << fullDAGToString(arg) << ", ";
+			//cout << ")" << endl;
+			//cout << "with rule " << rule.first.toString() << endl;
+			//ExpressionTemplate t(rule.second());
+			//cout << "to " << rule.second().toString() << endl;
+			return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->getLocation()));
+		}
+	}
+
+	if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item))
+	{
+		Expression expr = _expr;
+		swap(expr.arguments[0], expr.arguments[1]);
+		return tryToSimplify(expr, true);
+	}
+
+	return -1;
+}
+
+ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template)
+{
+	if (_template.hasId)
+		return _template.id;
+
+	Ids arguments;
+	for (ExpressionTemplate const& t: _template.arguments)
+		arguments.push_back(rebuildExpression(t));
+	return find(_template.item, arguments);
+}
+
+
+Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments):
+	m_type(Operation),
+	m_requireDataMatch(true),
+	m_data(_instruction),
+	m_arguments(_arguments)
+{
+}
+
+void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups)
+{
+	m_matchGroup = _group;
+	m_matchGroups = &_matchGroups;
+}
+
+bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
+{
+	if (!matchesBaseItem(*_expr.item))
+		return false;
+	if (m_matchGroup)
+	{
+		if (!m_matchGroups->count(m_matchGroup))
+			(*m_matchGroups)[m_matchGroup] = &_expr;
+		else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id)
+			return false;
+	}
+	assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, "");
+	for (size_t i = 0; i < m_arguments.size(); ++i)
+		if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes))
+			return false;
+	return true;
+}
+
+AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const
+{
+	return AssemblyItem(m_type, m_data, _location);
+}
+
+string Pattern::toString() const
+{
+	stringstream s;
+	switch (m_type)
+	{
+	case Operation:
+		s << instructionInfo(Instruction(unsigned(m_data))).name;
+		break;
+	case Push:
+		s << "PUSH " << hex << m_data;
+		break;
+	case UndefinedItem:
+		s << "ANY";
+		break;
+	default:
+		s << "t=" << dec << m_type << " d=" << hex << m_data;
+		break;
+	}
+	if (!m_requireDataMatch)
+		s << " ~";
+	if (m_matchGroup)
+		s << "[" << dec << m_matchGroup << "]";
+	s << "(";
+	for (Pattern const& p: m_arguments)
+		s << p.toString() << ", ";
+	s << ")";
+	return s.str();
+}
+
+bool Pattern::matchesBaseItem(AssemblyItem const& _item) const
+{
+	if (m_type == UndefinedItem)
+		return true;
+	if (m_type != _item.type())
+		return false;
+	if (m_requireDataMatch && m_data != _item.data())
+		return false;
+	return true;
+}
+
+Pattern::Expression const& Pattern::matchGroupValue() const
+{
+	assertThrow(m_matchGroup > 0, OptimizerException, "");
+	assertThrow(!!m_matchGroups, OptimizerException, "");
+	assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, "");
+	return *(*m_matchGroups)[m_matchGroup];
+}
+
+
+ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location)
+{
+	if (_pattern.matchGroup())
+	{
+		hasId = true;
+		id = _pattern.id();
+	}
+	else
+	{
+		hasId = false;
+		item = _pattern.toAssemblyItem(_location);
+	}
+	for (auto const& arg: _pattern.arguments())
+		arguments.push_back(ExpressionTemplate(arg, _location));
+}
+
+string ExpressionTemplate::toString() const
+{
+	stringstream s;
+	if (hasId)
+		s << id;
+	else
+		s << item;
+	s << "(";
+	for (auto const& arg: arguments)
+		s << arg.toString();
+	s << ")";
+	return s.str();
+}
--- a/ExpressionClasses.h
+++ b/ExpressionClasses.h
@ -0,0 +1,181 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file ExpressionClasses.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Container for equivalence classes of expressions for use in common subexpression elimination.
+ */
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <memory>
+#include <libdevcore/Common.h>
+#include <libevmasm/AssemblyItem.h>
+
+namespace dev
+{
+namespace eth
+{
+
+class Pattern;
+struct ExpressionTemplate;
+
+/**
+ * Collection of classes of equivalent expressions that can also determine the class of an expression.
+ * Identifiers are contiguously assigned to new classes starting from zero.
+ */
+class ExpressionClasses
+{
+public:
+	using Id = unsigned;
+	using Ids = std::vector<Id>;
+
+	struct Expression
+	{
+		Id id;
+		AssemblyItem const* item;
+		Ids arguments;
+		unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions.
+		/// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber).
+		bool operator<(Expression const& _other) const;
+	};
+
+	/// Retrieves the id of the expression equivalence class resulting from the given item applied to the
+	/// given classes, might also create a new one.
+	/// @param _copyItem if true, copies the assembly item to an internal storage instead of just
+	/// keeping a pointer.
+	/// The @a _sequenceNumber indicates the current storage or memory access sequence.
+	Id find(
+		AssemblyItem const& _item,
+		Ids const& _arguments = {},
+		bool _copyItem = true,
+		unsigned _sequenceNumber = 0
+	);
+	/// @returns the canonical representative of an expression class.
+	Expression const& representative(Id _id) const { return m_representatives.at(_id); }
+	/// @returns the number of classes.
+	Id size() const { return m_representatives.size(); }
+
+	/// @returns true if the values of the given classes are known to be different (on every input).
+	/// @note that this function might still return false for some different inputs.
+	bool knownToBeDifferent(Id _a, Id _b);
+	/// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32.
+	bool knownToBeDifferentBy32(Id _a, Id _b);
+	/// @returns true if the value of the given class is known to be zero.
+	/// @note that this is not the negation of knownNonZero
+	bool knownZero(Id _c);
+	/// @returns true if the value of the given class is known to be nonzero.
+	/// @note that this is not the negation of knownZero
+	bool knownNonZero(Id _c);
+	/// @returns a pointer to the value if the given class is known to be a constant,
+	/// and a nullptr otherwise.
+	u256 const* knownConstant(Id _c);
+
+	/// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for
+	/// the lifetime of the ExpressionClasses object.
+	AssemblyItem const* storeItem(AssemblyItem const& _item);
+
+	std::string fullDAGToString(Id _id) const;
+
+private:
+	/// Tries to simplify the given expression.
+	/// @returns its class if it possible or Id(-1) otherwise.
+	/// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed
+	Id tryToSimplify(Expression const& _expr, bool _secondRun = false);
+
+	/// Rebuilds an expression from a (matched) pattern.
+	Id rebuildExpression(ExpressionTemplate const& _template);
+
+	std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const;
+
+	/// Expression equivalence class representatives - we only store one item of an equivalence.
+	std::vector<Expression> m_representatives;
+	/// All expression ever encountered.
+	std::set<Expression> m_expressions;
+	std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems;
+};
+
+/**
+ * Pattern to match against an expression.
+ * Also stores matched expressions to retrieve them later, for constructing new expressions using
+ * ExpressionTemplate.
+ */
+class Pattern
+{
+public:
+	using Expression = ExpressionClasses::Expression;
+	using Id = ExpressionClasses::Id;
+
+	// Matches a specific constant value.
+	Pattern(unsigned _value): Pattern(u256(_value)) {}
+	// Matches a specific constant value.
+	Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {}
+	// Matches a specific assembly item type or anything if not given.
+	Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {}
+	// Matches a given instruction with given arguments
+	Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {});
+	/// Sets this pattern to be part of the match group with the identifier @a _group.
+	/// Inside one rule, all patterns in the same match group have to match expressions from the
+	/// same expression equivalence class.
+	void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups);
+	unsigned matchGroup() const { return m_matchGroup; }
+	bool matches(Expression const& _expr, ExpressionClasses const& _classes) const;
+
+	AssemblyItem toAssemblyItem(SourceLocation const& _location) const;
+	std::vector<Pattern> arguments() const { return m_arguments; }
+
+	/// @returns the id of the matched expression if this pattern is part of a match group.
+	Id id() const { return matchGroupValue().id; }
+	/// @returns the data of the matched expression if this pattern is part of a match group.
+	u256 const& d() const { return matchGroupValue().item->data(); }
+
+	std::string toString() const;
+
+private:
+	bool matchesBaseItem(AssemblyItem const& _item) const;
+	Expression const& matchGroupValue() const;
+
+	AssemblyItemType m_type;
+	bool m_requireDataMatch = false;
+	u256 m_data = 0;
+	std::vector<Pattern> m_arguments;
+	unsigned m_matchGroup = 0;
+	std::map<unsigned, Expression const*>* m_matchGroups = nullptr;
+};
+
+/**
+ * Template for a new expression that can be built from matched patterns.
+ */
+struct ExpressionTemplate
+{
+	using Expression = ExpressionClasses::Expression;
+	using Id = ExpressionClasses::Id;
+	explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location);
+	std::string toString() const;
+	bool hasId = false;
+	/// Id of the matched expression, if available.
+	Id id = Id(-1);
+	// Otherwise, assembly item.
+	AssemblyItem item = UndefinedItem;
+	std::vector<ExpressionTemplate> arguments;
+};
+
+}
+}
--- a/SemanticInformation.cpp
+++ b/SemanticInformation.cpp
@ -0,0 +1,124 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file SemanticInformation.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Helper to provide semantic information about assembly items.
+ */
+
+#include <libevmasm/SemanticInformation.h>
+#include <libevmasm/AssemblyItem.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item)
+{
+	switch (_item.type())
+	{
+	default:
+	case UndefinedItem:
+	case Tag:
+		return true;
+	case Push:
+	case PushString:
+	case PushTag:
+	case PushSub:
+	case PushSubSize:
+	case PushProgramSize:
+	case PushData:
+		return false;
+	case Operation:
+	{
+		if (isSwapInstruction(_item) || isDupInstruction(_item))
+			return false;
+		if (_item.instruction() == Instruction::GAS || _item.instruction() == Instruction::PC)
+			return true; // GAS and PC assume a specific order of opcodes
+		if (_item.instruction() == Instruction::MSIZE)
+			return true; // msize is modified already by memory access, avoid that for now
+		InstructionInfo info = instructionInfo(_item.instruction());
+		if (_item.instruction() == Instruction::SSTORE)
+			return false;
+		if (_item.instruction() == Instruction::MSTORE)
+			return false;
+		//@todo: We do not handle the following memory instructions for now:
+		// calldatacopy, codecopy, extcodecopy, mstore8,
+		// msize (note that msize also depends on memory read access)
+
+		// the second requirement will be lifted once it is implemented
+		return info.sideEffects || info.args > 2;
+	}
+	}
+}
+
+bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item)
+{
+	if (_item.type() != Operation)
+		return false;
+	switch (_item.instruction())
+	{
+	case Instruction::ADD:
+	case Instruction::MUL:
+	case Instruction::EQ:
+	case Instruction::AND:
+	case Instruction::OR:
+	case Instruction::XOR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool SemanticInformation::isDupInstruction(AssemblyItem const& _item)
+{
+	if (_item.type() != Operation)
+		return false;
+	return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16;
+}
+
+bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item)
+{
+	if (_item.type() != Operation)
+		return false;
+	return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16;
+}
+
+bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item)
+{
+	return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI);
+}
+
+bool SemanticInformation::altersControlFlow(AssemblyItem const& _item)
+{
+	if (_item.type() != Operation)
+		return false;
+	switch (_item.instruction())
+	{
+	// note that CALL, CALLCODE and CREATE do not really alter the control flow, because we
+	// continue on the next instruction (unless an exception happens which can always happen)
+	case Instruction::JUMP:
+	case Instruction::JUMPI:
+	case Instruction::RETURN:
+	case Instruction::SUICIDE:
+	case Instruction::STOP:
+		return true;
+	default:
+		return false;
+	}
+}
--- a/SemanticInformation.h
+++ b/SemanticInformation.h
@ -0,0 +1,51 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file SemanticInformation.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Helper to provide semantic information about assembly items.
+ */
+
+#pragma once
+
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+
+/**
+ * Helper functions to provide context-independent information about assembly items.
+ */
+struct SemanticInformation
+{
+	/// @returns true if the given items starts a new block for common subexpression analysis.
+	static bool breaksCSEAnalysisBlock(AssemblyItem const& _item);
+	/// @returns true if the item is a two-argument operation whose value does not depend on the
+	/// order of its arguments.
+	static bool isCommutativeOperation(AssemblyItem const& _item);
+	static bool isDupInstruction(AssemblyItem const& _item);
+	static bool isSwapInstruction(AssemblyItem const& _item);
+	static bool isJumpInstruction(AssemblyItem const& _item);
+	static bool altersControlFlow(AssemblyItem const& _item);
+};
+
+}
+}
--- a/SourceLocation.h
+++ b/SourceLocation.h
@ -0,0 +1,89 @@
+/*
+    This file is part of cpp-ethereum.
+
+    cpp-ethereum is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cpp-ethereum is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @author Lefteris Karapetsas <lefteris@ethdev.com>
+ * @date 2015
+ * Represents a location in a source file
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <ostream>
+#include <tuple>
+
+namespace dev
+{
+
+/**
+ * Representation of an interval of source positions.
+ * The interval includes start and excludes end.
+ */
+struct SourceLocation
+{
+	SourceLocation(int _start, int _end, std::shared_ptr<std::string const> _sourceName):
+		start(_start), end(_end), sourceName(_sourceName) { }
+	SourceLocation(): start(-1), end(-1) { }
+
+	SourceLocation(SourceLocation const& _other):
+		start(_other.start), end(_other.end), sourceName(_other.sourceName) {}
+	SourceLocation& operator=(SourceLocation const& _other) { start = _other.start; end = _other.end; sourceName = _other.sourceName; return *this;}
+
+	bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;}
+	bool operator!=(SourceLocation const& _other) const { return !operator==(_other); }
+	inline bool operator<(SourceLocation const& _other) const;
+	inline bool contains(SourceLocation const& _other) const;
+	inline bool intersects(SourceLocation const& _other) const;
+
+	bool isEmpty() const { return start == -1 && end == -1; }
+
+	int start;
+	int end;
+	std::shared_ptr<std::string const> sourceName;
+};
+
+/// Stream output for Location (used e.g. in boost exceptions).
+inline std::ostream& operator<<(std::ostream& _out, SourceLocation const& _location)
+{
+	if (_location.isEmpty())
+		return _out << "NO_LOCATION_SPECIFIED";
+	return _out << *_location.sourceName << "[" << _location.start << "," << _location.end << ")";
+}
+
+bool SourceLocation::operator<(SourceLocation const& _other) const
+{
+	if (!sourceName || !_other.sourceName)
+		return int(!!sourceName) < int(!!_other.sourceName);
+	return make_tuple(*sourceName, start, end) < make_tuple(*_other.sourceName, _other.start, _other.end);
+}
+
+bool SourceLocation::contains(SourceLocation const& _other) const
+{
+	if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName)
+		return false;
+	return start <= _other.start && _other.end <= end;
+}
+
+bool SourceLocation::intersects(SourceLocation const& _other) const
+{
+	if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName)
+		return false;
+	return _other.start < end && start < _other.end;
+}
+
+}