diff --git a/CMakeLists.txt b/CMakeLists.txt
index a02b779e8..18566bc72 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,10 +26,13 @@ include(EthUtils)
include(EthOptions)
configure_project(TESTS)
+add_subdirectory(libevmasm)
add_subdirectory(libsolidity)
add_subdirectory(solc)
if (NOT EMSCRIPTEN)
+ add_subdirectory(liblll)
add_subdirectory(test)
+ add_subdirectory(lllc)
endif()
# TODO installation and packaging rules
diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp
new file mode 100644
index 000000000..7277865e0
--- /dev/null
+++ b/libevmasm/Assembly.cpp
@@ -0,0 +1,545 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/** @file Assembly.cpp
+ * @author Gav Wood
+ * @date 2014
+ */
+
+#include "Assembly.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+void Assembly::append(Assembly const& _a)
+{
+ auto newDeposit = m_deposit + _a.deposit();
+ for (AssemblyItem i: _a.m_items)
+ {
+ if (i.type() == Tag || i.type() == PushTag)
+ i.setData(i.data() + m_usedTags);
+ else if (i.type() == PushSub || i.type() == PushSubSize)
+ i.setData(i.data() + m_subs.size());
+ append(i);
+ }
+ m_deposit = newDeposit;
+ m_usedTags += _a.m_usedTags;
+ for (auto const& i: _a.m_data)
+ m_data.insert(i);
+ for (auto const& i: _a.m_strings)
+ m_strings.insert(i);
+ m_subs += _a.m_subs;
+ for (auto const& lib: _a.m_libraries)
+ m_libraries.insert(lib);
+
+ assert(!_a.m_baseDeposit);
+ assert(!_a.m_totalDeposit);
+}
+
+void Assembly::append(Assembly const& _a, int _deposit)
+{
+ if (_deposit > _a.m_deposit)
+ BOOST_THROW_EXCEPTION(InvalidDeposit());
+ else
+ {
+ append(_a);
+ while (_deposit++ < _a.m_deposit)
+ append(Instruction::POP);
+ }
+}
+
+string Assembly::out() const
+{
+ stringstream ret;
+ stream(ret);
+ return ret.str();
+}
+
+unsigned Assembly::bytesRequired() const
+{
+ for (unsigned br = 1;; ++br)
+ {
+ unsigned ret = 1;
+ for (auto const& i: m_data)
+ ret += i.second.size();
+
+ for (AssemblyItem const& i: m_items)
+ ret += i.bytesRequired(br);
+ if (dev::bytesRequired(ret) <= br)
+ return ret;
+ }
+}
+
+string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const
+{
+ if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0)
+ return "";
+
+ auto it = _sourceCodes.find(*_location.sourceName);
+ if (it == _sourceCodes.end())
+ return "";
+
+ string const& source = it->second;
+ if (size_t(_location.start) >= source.size())
+ return "";
+
+ string cut = source.substr(_location.start, _location.end - _location.start);
+ auto newLinePos = cut.find_first_of("\n");
+ if (newLinePos != string::npos)
+ cut = cut.substr(0, newLinePos) + "...";
+
+ return cut;
+}
+
+ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const
+{
+ _out << _prefix << ".code:" << endl;
+ for (AssemblyItem const& i: m_items)
+ {
+ _out << _prefix;
+ switch (i.type())
+ {
+ case Operation:
+ _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString();
+ break;
+ case Push:
+ _out << " PUSH " << hex << i.data();
+ break;
+ case PushString:
+ _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\"";
+ break;
+ case PushTag:
+ if (i.data() == 0)
+ _out << " PUSH [ErrorTag]";
+ else
+ _out << " PUSH [tag" << dec << i.data() << "]";
+ break;
+ case PushSub:
+ _out << " PUSH [$" << h256(i.data()).abridgedMiddle() << "]";
+ break;
+ case PushSubSize:
+ _out << " PUSH #[$" << h256(i.data()).abridgedMiddle() << "]";
+ break;
+ case PushProgramSize:
+ _out << " PUSHSIZE";
+ break;
+ case PushLibraryAddress:
+ _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\"";
+ break;
+ case Tag:
+ _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST";
+ break;
+ case PushData:
+ _out << " PUSH [" << hex << (unsigned)i.data() << "]";
+ break;
+ default:
+ BOOST_THROW_EXCEPTION(InvalidOpcode());
+ }
+ _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl;
+ }
+
+ if (!m_data.empty() || !m_subs.empty())
+ {
+ _out << _prefix << ".data:" << endl;
+ for (auto const& i: m_data)
+ if (u256(i.first) >= m_subs.size())
+ _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl;
+ for (size_t i = 0; i < m_subs.size(); ++i)
+ {
+ _out << _prefix << " " << hex << i << ": " << endl;
+ m_subs[i].stream(_out, _prefix + " ", _sourceCodes);
+ }
+ }
+ return _out;
+}
+
+Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const
+{
+ Json::Value value;
+ value["name"] = _name;
+ value["begin"] = _begin;
+ value["end"] = _end;
+ if (!_value.empty())
+ value["value"] = _value;
+ if (!_jumpType.empty())
+ value["jumpType"] = _jumpType;
+ return value;
+}
+
+string toStringInHex(u256 _value)
+{
+ std::stringstream hexStr;
+ hexStr << hex << _value;
+ return hexStr.str();
+}
+
+Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const
+{
+ Json::Value root;
+
+ Json::Value collection(Json::arrayValue);
+ for (AssemblyItem const& i: m_items)
+ {
+ switch (i.type())
+ {
+ case Operation:
+ collection.append(
+ createJsonValue(instructionInfo(i.instruction()).name, i.location().start, i.location().end, i.getJumpTypeAsString()));
+ break;
+ case Push:
+ collection.append(
+ createJsonValue("PUSH", i.location().start, i.location().end, toStringInHex(i.data()), i.getJumpTypeAsString()));
+ break;
+ case PushString:
+ collection.append(
+ createJsonValue("PUSH tag", i.location().start, i.location().end, m_strings.at((h256)i.data())));
+ break;
+ case PushTag:
+ if (i.data() == 0)
+ collection.append(
+ createJsonValue("PUSH [ErrorTag]", i.location().start, i.location().end, ""));
+ else
+ collection.append(
+ createJsonValue("PUSH [tag]", i.location().start, i.location().end, string(i.data())));
+ break;
+ case PushSub:
+ collection.append(
+ createJsonValue("PUSH [$]", i.location().start, i.location().end, dev::toString(h256(i.data()))));
+ break;
+ case PushSubSize:
+ collection.append(
+ createJsonValue("PUSH #[$]", i.location().start, i.location().end, dev::toString(h256(i.data()))));
+ break;
+ case PushProgramSize:
+ collection.append(
+ createJsonValue("PUSHSIZE", i.location().start, i.location().end));
+ break;
+ case PushLibraryAddress:
+ collection.append(
+ createJsonValue("PUSHLIB", i.location().start, i.location().end, m_libraries.at(h256(i.data())))
+ );
+ break;
+ case Tag:
+ collection.append(
+ createJsonValue("tag", i.location().start, i.location().end, string(i.data())));
+ collection.append(
+ createJsonValue("JUMPDEST", i.location().start, i.location().end));
+ break;
+ case PushData:
+ collection.append(createJsonValue("PUSH data", i.location().start, i.location().end, toStringInHex(i.data())));
+ break;
+ default:
+ BOOST_THROW_EXCEPTION(InvalidOpcode());
+ }
+ }
+
+ root[".code"] = collection;
+
+ if (!m_data.empty() || !m_subs.empty())
+ {
+ Json::Value data;
+ for (auto const& i: m_data)
+ if (u256(i.first) >= m_subs.size())
+ data[toStringInHex((u256)i.first)] = toHex(i.second);
+
+ for (size_t i = 0; i < m_subs.size(); ++i)
+ {
+ std::stringstream hexStr;
+ hexStr << hex << i;
+ data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true);
+ }
+ root[".data"] = data;
+ _out << root;
+ }
+ return root;
+}
+
+Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const
+{
+ if (_inJsonFormat)
+ return streamAsmJson(_out, _sourceCodes);
+ else
+ {
+ streamAsm(_out, _prefix, _sourceCodes);
+ return Json::Value();
+ }
+}
+
+AssemblyItem const& Assembly::append(AssemblyItem const& _i)
+{
+ m_deposit += _i.deposit();
+ m_items.push_back(_i);
+ if (m_items.back().location().isEmpty() && !m_currentSourceLocation.isEmpty())
+ m_items.back().setLocation(m_currentSourceLocation);
+ return back();
+}
+
+AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier)
+{
+ h256 h(dev::sha3(_identifier));
+ m_libraries[h] = _identifier;
+ return AssemblyItem(PushLibraryAddress, h);
+}
+
+void Assembly::injectStart(AssemblyItem const& _i)
+{
+ m_items.insert(m_items.begin(), _i);
+}
+
+struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; };
+#define copt dev::LogOutputStream()
+
+Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs)
+{
+ if (!_enable)
+ return *this;
+
+ unsigned total = 0;
+ for (unsigned count = 1; count > 0; total += count)
+ {
+ copt << toString(*this);
+ count = 0;
+
+ copt << "Performing optimisation...";
+ // This only modifies PushTags, we have to run again to actually remove code.
+ BlockDeduplicator dedup(m_items);
+ if (dedup.deduplicate())
+ count++;
+
+ {
+ ControlFlowGraph cfg(m_items);
+ AssemblyItems optimisedItems;
+ for (BasicBlock const& block: cfg.optimisedBlocks())
+ {
+ assertThrow(!!block.startState, OptimizerException, "");
+ CommonSubexpressionEliminator eliminator(*block.startState);
+ auto iter = m_items.begin() + block.begin;
+ auto const end = m_items.begin() + block.end;
+ while (iter < end)
+ {
+ auto orig = iter;
+ iter = eliminator.feedItems(iter, end);
+ bool shouldReplace = false;
+ AssemblyItems optimisedChunk;
+ try
+ {
+ optimisedChunk = eliminator.getOptimizedItems();
+ shouldReplace = (optimisedChunk.size() < size_t(iter - orig));
+ }
+ catch (StackTooDeepException const&)
+ {
+ // This might happen if the opcode reconstruction is not as efficient
+ // as the hand-crafted code.
+ }
+ catch (ItemNotAvailableException const&)
+ {
+ // This might happen if e.g. associativity and commutativity rules
+ // reorganise the expression tree, but not all leaves are available.
+ }
+
+ if (shouldReplace)
+ {
+ copt << "Old size: " << (iter - orig) << ", new size: " << optimisedChunk.size();
+ count++;
+ optimisedItems += optimisedChunk;
+ }
+ else
+ copy(orig, iter, back_inserter(optimisedItems));
+ }
+ }
+
+ if (optimisedItems.size() < m_items.size())
+ {
+ m_items = move(optimisedItems);
+ count++;
+ }
+ }
+ }
+
+ total += ConstantOptimisationMethod::optimiseConstants(
+ _isCreation,
+ _isCreation ? 1 : _runs,
+ *this,
+ m_items
+ );
+
+ copt << total << " optimisations done.";
+
+ for (auto& sub: m_subs)
+ sub.optimise(true, false, _runs);
+
+ return *this;
+}
+
+LinkerObject const& Assembly::assemble() const
+{
+ if (!m_assembledObject.bytecode.empty())
+ return m_assembledObject;
+
+ LinkerObject& ret = m_assembledObject;
+
+ unsigned totalBytes = bytesRequired();
+ vector tagPos(m_usedTags);
+ map tagRef;
+ multimap dataRef;
+ multimap subRef;
+ vector sizeRef; ///< Pointers to code locations where the size of the program is inserted
+ unsigned bytesPerTag = dev::bytesRequired(totalBytes);
+ byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag;
+
+ unsigned bytesRequiredIncludingData = bytesRequired();
+ for (auto const& sub: m_subs)
+ bytesRequiredIncludingData += sub.assemble().bytecode.size();
+
+ unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData);
+ byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef;
+ ret.bytecode.reserve(bytesRequiredIncludingData);
+
+ for (AssemblyItem const& i: m_items)
+ {
+ // store position of the invalid jump destination
+ if (i.type() != Tag && tagPos[0] == 0)
+ tagPos[0] = ret.bytecode.size();
+
+ switch (i.type())
+ {
+ case Operation:
+ ret.bytecode.push_back((byte)i.data());
+ break;
+ case PushString:
+ {
+ ret.bytecode.push_back((byte)Instruction::PUSH32);
+ unsigned ii = 0;
+ for (auto j: m_strings.at((h256)i.data()))
+ if (++ii > 32)
+ break;
+ else
+ ret.bytecode.push_back((byte)j);
+ while (ii++ < 32)
+ ret.bytecode.push_back(0);
+ break;
+ }
+ case Push:
+ {
+ byte b = max(1, dev::bytesRequired(i.data()));
+ ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b);
+ ret.bytecode.resize(ret.bytecode.size() + b);
+ bytesRef byr(&ret.bytecode.back() + 1 - b, b);
+ toBigEndian(i.data(), byr);
+ break;
+ }
+ case PushTag:
+ {
+ ret.bytecode.push_back(tagPush);
+ tagRef[ret.bytecode.size()] = (unsigned)i.data();
+ ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
+ break;
+ }
+ case PushData:
+ ret.bytecode.push_back(dataRefPush);
+ dataRef.insert(make_pair((h256)i.data(), ret.bytecode.size()));
+ ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+ break;
+ case PushSub:
+ ret.bytecode.push_back(dataRefPush);
+ subRef.insert(make_pair(size_t(i.data()), ret.bytecode.size()));
+ ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+ break;
+ case PushSubSize:
+ {
+ auto s = m_subs.at(size_t(i.data())).assemble().bytecode.size();
+ i.setPushedValue(u256(s));
+ byte b = max(1, dev::bytesRequired(s));
+ ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b);
+ ret.bytecode.resize(ret.bytecode.size() + b);
+ bytesRef byr(&ret.bytecode.back() + 1 - b, b);
+ toBigEndian(s, byr);
+ break;
+ }
+ case PushProgramSize:
+ {
+ ret.bytecode.push_back(dataRefPush);
+ sizeRef.push_back(ret.bytecode.size());
+ ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
+ break;
+ }
+ case PushLibraryAddress:
+ ret.bytecode.push_back(byte(Instruction::PUSH20));
+ ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data());
+ ret.bytecode.resize(ret.bytecode.size() + 20);
+ break;
+ case Tag:
+ tagPos[(unsigned)i.data()] = ret.bytecode.size();
+ assertThrow(i.data() != 0, AssemblyException, "");
+ ret.bytecode.push_back((byte)Instruction::JUMPDEST);
+ break;
+ default:
+ BOOST_THROW_EXCEPTION(InvalidOpcode());
+ }
+ }
+ for (auto const& i: tagRef)
+ {
+ bytesRef r(ret.bytecode.data() + i.first, bytesPerTag);
+ auto tag = i.second;
+ if (tag >= tagPos.size())
+ tag = 0;
+ if (tag == 0)
+ assertThrow(tagPos[tag] != 0, AssemblyException, "");
+
+ toBigEndian(tagPos[tag], r);
+ }
+
+ if (!dataRef.empty() && !subRef.empty())
+ ret.bytecode.push_back(0);
+ for (size_t i = 0; i < m_subs.size(); ++i)
+ {
+ auto references = subRef.equal_range(i);
+ if (references.first == references.second)
+ continue;
+ for (auto ref = references.first; ref != references.second; ++ref)
+ {
+ bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef);
+ toBigEndian(ret.bytecode.size(), r);
+ }
+ ret.append(m_subs[i].assemble());
+ }
+ for (auto const& dataItem: m_data)
+ {
+ auto references = dataRef.equal_range(dataItem.first);
+ if (references.first == references.second)
+ continue;
+ for (auto ref = references.first; ref != references.second; ++ref)
+ {
+ bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef);
+ toBigEndian(ret.bytecode.size(), r);
+ }
+ ret.bytecode += dataItem.second;
+ }
+ for (unsigned pos: sizeRef)
+ {
+ bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef);
+ toBigEndian(ret.bytecode.size(), r);
+ }
+ return ret;
+}
diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h
new file mode 100644
index 000000000..28328277d
--- /dev/null
+++ b/libevmasm/Assembly.h
@@ -0,0 +1,151 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/** @file Assembly.h
+ * @author Gav Wood
+ * @date 2014
+ */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "Exceptions.h"
+#include
+
+namespace Json
+{
+class Value;
+}
+namespace dev
+{
+namespace eth
+{
+
+class Assembly
+{
+public:
+ Assembly() {}
+
+ AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); }
+ AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); }
+ AssemblyItem newData(bytes const& _data) { h256 h(sha3(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
+ AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
+ Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); }
+ Assembly& sub(size_t _sub) { return m_subs.at(_sub); }
+ AssemblyItem newPushString(std::string const& _data) { h256 h(sha3(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); }
+ AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
+ AssemblyItem newPushLibraryAddress(std::string const& _identifier);
+
+ AssemblyItem append() { return append(newTag()); }
+ void append(Assembly const& _a);
+ void append(Assembly const& _a, int _deposit);
+ AssemblyItem const& append(AssemblyItem const& _i);
+ AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); }
+ AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); }
+ AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; }
+ /// Pushes the final size of the current assembly itself. Use this when the code is modified
+ /// after compilation and CODESIZE is not an option.
+ void appendProgramSize() { append(AssemblyItem(PushProgramSize)); }
+ void appendLibraryAddress(std::string const& _identifier) { append(newPushLibraryAddress(_identifier)); }
+
+ AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; }
+ AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; }
+ AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; }
+ AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; }
+ AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); }
+
+ template Assembly& operator<<(T const& _d) { append(_d); return *this; }
+ AssemblyItems const& items() const { return m_items; }
+ AssemblyItem const& back() const { return m_items.back(); }
+ std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); }
+
+ void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; }
+ void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; }
+ void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; }
+ void ignored() { m_baseDeposit = m_deposit; }
+ void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; }
+
+ void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); }
+
+ void injectStart(AssemblyItem const& _i);
+ std::string out() const;
+ int deposit() const { return m_deposit; }
+ void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
+ void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
+
+ /// Changes the source location used for each appended item.
+ void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; }
+
+ /// Assembles the assembly into bytecode. The assembly should not be modified after this call.
+ LinkerObject const& assemble() const;
+ bytes const& data(h256 const& _i) const { return m_data.at(_i); }
+
+ /// Modify (if @a _enable is set) and return the current assembly such that creation and
+ /// execution gas usage is optimised. @a _isCreation should be true for the top-level assembly.
+ /// @a _runs specifes an estimate on how often each opcode in this assembly will be executed,
+ /// i.e. use a small value to optimise for size and a large value to optimise for runtime.
+ Assembly& optimise(bool _enable, bool _isCreation = true, size_t _runs = 200);
+ Json::Value stream(
+ std::ostream& _out,
+ std::string const& _prefix = "",
+ const StringMap &_sourceCodes = StringMap(),
+ bool _inJsonFormat = false
+ ) const;
+
+protected:
+ std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const;
+ void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
+ unsigned bytesRequired() const;
+
+private:
+ Json::Value streamAsmJson(std::ostream& _out, StringMap const& _sourceCodes) const;
+ std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const;
+ Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const;
+
+protected:
+ // 0 is reserved for exception
+ unsigned m_usedTags = 1;
+ AssemblyItems m_items;
+ std::map m_data;
+ std::vector m_subs;
+ std::map m_strings;
+ std::map m_libraries; ///< Identifiers of libraries to be linked.
+
+ mutable LinkerObject m_assembledObject;
+
+ int m_deposit = 0;
+ int m_baseDeposit = 0;
+ int m_totalDeposit = 0;
+
+ SourceLocation m_currentSourceLocation;
+};
+
+inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a)
+{
+ _a.stream(_out);
+ return _out;
+}
+
+}
+}
diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp
new file mode 100644
index 000000000..d70510646
--- /dev/null
+++ b/libevmasm/AssemblyItem.cpp
@@ -0,0 +1,134 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/** @file Assembly.cpp
+ * @author Gav Wood
+ * @date 2014
+ */
+
+#include "AssemblyItem.h"
+#include
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const
+{
+ switch (m_type)
+ {
+ case Operation:
+ case Tag: // 1 byte for the JUMPDEST
+ return 1;
+ case PushString:
+ return 33;
+ case Push:
+ return 1 + max(1, dev::bytesRequired(m_data));
+ case PushSubSize:
+ case PushProgramSize:
+ return 4; // worst case: a 16MB program
+ case PushTag:
+ case PushData:
+ case PushSub:
+ return 1 + _addressLength;
+ case PushLibraryAddress:
+ return 21;
+ default:
+ break;
+ }
+ BOOST_THROW_EXCEPTION(InvalidOpcode());
+}
+
+int AssemblyItem::deposit() const
+{
+ switch (m_type)
+ {
+ case Operation:
+ return instructionInfo(instruction()).ret - instructionInfo(instruction()).args;
+ case Push:
+ case PushString:
+ case PushTag:
+ case PushData:
+ case PushSub:
+ case PushSubSize:
+ case PushProgramSize:
+ case PushLibraryAddress:
+ return 1;
+ case Tag:
+ return 0;
+ default:;
+ }
+ return 0;
+}
+
+string AssemblyItem::getJumpTypeAsString() const
+{
+ switch (m_jumpType)
+ {
+ case JumpType::IntoFunction:
+ return "[in]";
+ case JumpType::OutOfFunction:
+ return "[out]";
+ case JumpType::Ordinary:
+ default:
+ return "";
+ }
+}
+
+ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item)
+{
+ switch (_item.type())
+ {
+ case Operation:
+ _out << " " << instructionInfo(_item.instruction()).name;
+ if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI)
+ _out << "\t" << _item.getJumpTypeAsString();
+ break;
+ case Push:
+ _out << " PUSH " << hex << _item.data();
+ break;
+ case PushString:
+ _out << " PushString" << hex << (unsigned)_item.data();
+ break;
+ case PushTag:
+ _out << " PushTag " << _item.data();
+ break;
+ case Tag:
+ _out << " Tag " << _item.data();
+ break;
+ case PushData:
+ _out << " PushData " << hex << (unsigned)_item.data();
+ break;
+ case PushSub:
+ _out << " PushSub " << hex << h256(_item.data()).abridgedMiddle();
+ break;
+ case PushSubSize:
+ _out << " PushSubSize " << hex << h256(_item.data()).abridgedMiddle();
+ break;
+ case PushProgramSize:
+ _out << " PushProgramSize";
+ break;
+ case PushLibraryAddress:
+ _out << " PushLibraryAddress " << hex << h256(_item.data()).abridgedMiddle();
+ break;
+ case UndefinedItem:
+ _out << " ???";
+ break;
+ default:
+ BOOST_THROW_EXCEPTION(InvalidOpcode());
+ }
+ return _out;
+}
diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h
new file mode 100644
index 000000000..795b5a8a2
--- /dev/null
+++ b/libevmasm/AssemblyItem.h
@@ -0,0 +1,123 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/** @file Assembly.h
+ * @author Gav Wood
+ * @date 2014
+ */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include "Exceptions.h"
+
+namespace dev
+{
+namespace eth
+{
+
+enum AssemblyItemType {
+ UndefinedItem,
+ Operation,
+ Push,
+ PushString,
+ PushTag,
+ PushSub,
+ PushSubSize,
+ PushProgramSize,
+ Tag,
+ PushData,
+ PushLibraryAddress ///< Push a currently unknown address of another (library) contract.
+};
+
+class Assembly;
+
+class AssemblyItem
+{
+public:
+ enum class JumpType { Ordinary, IntoFunction, OutOfFunction };
+
+ AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()):
+ AssemblyItem(Push, _push, _location) { }
+ AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()):
+ AssemblyItem(Operation, byte(_i), _location) { }
+ AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()):
+ m_type(_type),
+ m_data(_data),
+ m_location(_location)
+ {
+ }
+
+ AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); }
+ AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); }
+
+ AssemblyItemType type() const { return m_type; }
+ u256 const& data() const { return m_data; }
+ void setType(AssemblyItemType const _type) { m_type = _type; }
+ void setData(u256 const& _data) { m_data = _data; }
+
+ /// @returns the instruction of this item (only valid if type() == Operation)
+ Instruction instruction() const { return Instruction(byte(m_data)); }
+
+ /// @returns true if the type and data of the items are equal.
+ bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; }
+ bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); }
+ /// Less-than operator compatible with operator==.
+ bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); }
+
+ /// @returns an upper bound for the number of bytes required by this item, assuming that
+ /// the value of a jump tag takes @a _addressLength bytes.
+ unsigned bytesRequired(unsigned _addressLength) const;
+ int deposit() const;
+
+ bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); }
+ void setLocation(SourceLocation const& _location) { m_location = _location; }
+ SourceLocation const& location() const { return m_location; }
+
+ void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; }
+ JumpType getJumpType() const { return m_jumpType; }
+ std::string getJumpTypeAsString() const;
+
+ void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared(_value); }
+ u256 const* pushedValue() const { return m_pushedValue.get(); }
+
+private:
+ AssemblyItemType m_type;
+ u256 m_data;
+ SourceLocation m_location;
+ JumpType m_jumpType = JumpType::Ordinary;
+ /// Pushed value for operations with data to be determined during assembly stage,
+ /// e.g. PushSubSize, PushTag, PushSub, etc.
+ mutable std::shared_ptr m_pushedValue;
+};
+
+using AssemblyItems = std::vector;
+
+std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item);
+inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _items)
+{
+ for (AssemblyItem const& item: _items)
+ _out << item;
+ return _out;
+}
+
+}
+}
diff --git a/libevmasm/BlockDeduplicator.cpp b/libevmasm/BlockDeduplicator.cpp
new file mode 100644
index 000000000..d930ea22b
--- /dev/null
+++ b/libevmasm/BlockDeduplicator.cpp
@@ -0,0 +1,126 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/**
+ * @file BlockDeduplicator.cpp
+ * @author Christian
+ * @date 2015
+ * Unifies basic blocks that share content.
+ */
+
+#include
+#include
+#include
+#include
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+
+bool BlockDeduplicator::deduplicate()
+{
+ // Compares indices based on the suffix that starts there, ignoring tags and stopping at
+ // opcodes that stop the control flow.
+
+ // Virtual tag that signifies "the current block" and which is used to optimise loops.
+ // We abort if this virtual tag actually exists.
+ AssemblyItem pushSelf(PushTag, u256(-4));
+ if (
+ std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()) ||
+ std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag())
+ )
+ return false;
+
+ function comparator = [&](size_t _i, size_t _j)
+ {
+ if (_i == _j)
+ return false;
+
+ // To compare recursive loops, we have to already unify PushTag opcodes of the
+ // block's own tag.
+ AssemblyItem pushFirstTag(pushSelf);
+ AssemblyItem pushSecondTag(pushSelf);
+
+ if (_i < m_items.size() && m_items.at(_i).type() == Tag)
+ pushFirstTag = m_items.at(_i).pushTag();
+ if (_j < m_items.size() && m_items.at(_j).type() == Tag)
+ pushSecondTag = m_items.at(_j).pushTag();
+
+ BlockIterator first(m_items.begin() + _i, m_items.end(), &pushFirstTag, &pushSelf);
+ BlockIterator second(m_items.begin() + _j, m_items.end(), &pushSecondTag, &pushSelf);
+ BlockIterator end(m_items.end(), m_items.end());
+
+ if (first != end && (*first).type() == Tag)
+ ++first;
+ if (second != end && (*second).type() == Tag)
+ ++second;
+
+ return std::lexicographical_compare(first, end, second, end);
+ };
+
+ size_t iterations = 0;
+ for (; ; ++iterations)
+ {
+ //@todo this should probably be optimized.
+ set> blocksSeen(comparator);
+ map tagReplacement;
+ for (size_t i = 0; i < m_items.size(); ++i)
+ {
+ if (m_items.at(i).type() != Tag)
+ continue;
+ auto it = blocksSeen.find(i);
+ if (it == blocksSeen.end())
+ blocksSeen.insert(i);
+ else
+ tagReplacement[m_items.at(i).data()] = m_items.at(*it).data();
+ }
+
+ bool changed = false;
+ for (AssemblyItem& item: m_items)
+ if (item.type() == PushTag && tagReplacement.count(item.data()))
+ {
+ changed = true;
+ item.setData(tagReplacement.at(item.data()));
+ }
+ if (!changed)
+ break;
+ }
+ return iterations > 0;
+}
+
+BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++()
+{
+ if (it == end)
+ return *this;
+ if (SemanticInformation::altersControlFlow(*it) && *it != AssemblyItem(eth::Instruction::JUMPI))
+ it = end;
+ else
+ {
+ ++it;
+ while (it != end && it->type() == Tag)
+ ++it;
+ }
+ return *this;
+}
+
+AssemblyItem const& BlockDeduplicator::BlockIterator::operator*() const
+{
+ if (replaceItem && replaceWith && *it == *replaceItem)
+ return *replaceWith;
+ else
+ return *it;
+}
diff --git a/libevmasm/BlockDeduplicator.h b/libevmasm/BlockDeduplicator.h
new file mode 100644
index 000000000..c48835fd4
--- /dev/null
+++ b/libevmasm/BlockDeduplicator.h
@@ -0,0 +1,77 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/**
+ * @file BlockDeduplicator.h
+ * @author Christian
+ * @date 2015
+ * Unifies basic blocks that share content.
+ */
+
+#pragma once
+
+#include
+#include
+#include
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+using AssemblyItems = std::vector;
+
+/**
+ * Optimizer class to be used to unify blocks that share content.
+ * Modifies the passed vector in place.
+ */
+class BlockDeduplicator
+{
+public:
+ BlockDeduplicator(AssemblyItems& _items): m_items(_items) {}
+ /// @returns true if something was changed
+ bool deduplicate();
+
+private:
+ /// Iterator that skips tags and skips to the end if (all branches of) the control
+ /// flow does not continue to the next instruction.
+ /// If the arguments are supplied to the constructor, replaces items on the fly.
+ struct BlockIterator: std::iterator
+ {
+ public:
+ BlockIterator(
+ AssemblyItems::const_iterator _it,
+ AssemblyItems::const_iterator _end,
+ AssemblyItem const* _replaceItem = nullptr,
+ AssemblyItem const* _replaceWith = nullptr
+ ):
+ it(_it), end(_end), replaceItem(_replaceItem), replaceWith(_replaceWith) {}
+ BlockIterator& operator++();
+ bool operator==(BlockIterator const& _other) const { return it == _other.it; }
+ bool operator!=(BlockIterator const& _other) const { return it != _other.it; }
+ AssemblyItem const& operator*() const;
+ AssemblyItems::const_iterator it;
+ AssemblyItems::const_iterator end;
+ AssemblyItem const* replaceItem;
+ AssemblyItem const* replaceWith;
+ };
+
+ AssemblyItems& m_items;
+};
+
+}
+}
diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt
new file mode 100644
index 000000000..424644cad
--- /dev/null
+++ b/libevmasm/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB")
+
+aux_source_directory(. SRC_LIST)
+
+set(EXECUTABLE evmasm)
+
+file(GLOB HEADERS "*.h")
+
+include_directories(BEFORE ..)
+add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS})
+eth_use(${EXECUTABLE} REQUIRED Eth::evmcore)
+
+install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )
+install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} )
+
diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp
new file mode 100644
index 000000000..0797dd294
--- /dev/null
+++ b/libevmasm/CommonSubexpressionEliminator.cpp
@@ -0,0 +1,506 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/**
+ * @file CommonSubexpressionEliminator.cpp
+ * @author Christian
+ * @date 2015
+ * Optimizer step for common subexpression elimination and stack reorganisation.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+vector CommonSubexpressionEliminator::getOptimizedItems()
+{
+ optimizeBreakingItem();
+
+ KnownState nextInitialState = m_state;
+ if (m_breakingItem)
+ nextInitialState.feedItem(*m_breakingItem);
+ KnownState nextState = nextInitialState;
+
+ ScopeGuard reset([&]()
+ {
+ m_breakingItem = nullptr;
+ m_storeOperations.clear();
+ m_initialState = move(nextInitialState);
+ m_state = move(nextState);
+ });
+
+ map initialStackContents;
+ map targetStackContents;
+ int minHeight = m_state.stackHeight() + 1;
+ if (!m_state.stackElements().empty())
+ minHeight = min(minHeight, m_state.stackElements().begin()->first);
+ for (int height = minHeight; height <= m_initialState.stackHeight(); ++height)
+ initialStackContents[height] = m_initialState.stackElement(height, SourceLocation());
+ for (int height = minHeight; height <= m_state.stackHeight(); ++height)
+ targetStackContents[height] = m_state.stackElement(height, SourceLocation());
+
+ AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode(
+ m_initialState.sequenceNumber(),
+ m_initialState.stackHeight(),
+ initialStackContents,
+ targetStackContents
+ );
+ if (m_breakingItem)
+ items.push_back(*m_breakingItem);
+
+ return items;
+}
+
+void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
+{
+ StoreOperation op = m_state.feedItem(_item, _copyItem);
+ if (op.isValid())
+ m_storeOperations.push_back(op);
+}
+
+void CommonSubexpressionEliminator::optimizeBreakingItem()
+{
+ if (!m_breakingItem)
+ return;
+
+ ExpressionClasses& classes = m_state.expressionClasses();
+ SourceLocation const& itemLocation = m_breakingItem->location();
+ if (*m_breakingItem == AssemblyItem(Instruction::JUMPI))
+ {
+ AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
+
+ Id condition = m_state.stackElement(m_state.stackHeight() - 1, itemLocation);
+ if (classes.knownNonZero(condition))
+ {
+ feedItem(AssemblyItem(Instruction::SWAP1, itemLocation), true);
+ feedItem(AssemblyItem(Instruction::POP, itemLocation), true);
+
+ AssemblyItem item(Instruction::JUMP, itemLocation);
+ item.setJumpType(jumpType);
+ m_breakingItem = classes.storeItem(item);
+ }
+ else if (classes.knownZero(condition))
+ {
+ AssemblyItem it(Instruction::POP, itemLocation);
+ feedItem(it, true);
+ feedItem(it, true);
+ m_breakingItem = nullptr;
+ }
+ }
+ else if (*m_breakingItem == AssemblyItem(Instruction::RETURN))
+ {
+ Id size = m_state.stackElement(m_state.stackHeight() - 1, itemLocation);
+ if (classes.knownZero(size))
+ {
+ feedItem(AssemblyItem(Instruction::POP, itemLocation), true);
+ feedItem(AssemblyItem(Instruction::POP, itemLocation), true);
+ AssemblyItem item(Instruction::STOP, itemLocation);
+ m_breakingItem = classes.storeItem(item);
+ }
+ }
+}
+
+CSECodeGenerator::CSECodeGenerator(
+ ExpressionClasses& _expressionClasses,
+ vector const& _storeOperations
+):
+ m_expressionClasses(_expressionClasses)
+{
+ for (auto const& store: _storeOperations)
+ m_storeOperations[make_pair(store.target, store.slot)].push_back(store);
+}
+
+AssemblyItems CSECodeGenerator::generateCode(
+ unsigned _initialSequenceNumber,
+ int _initialStackHeight,
+ map const& _initialStack,
+ map const& _targetStackContents
+)
+{
+ m_stackHeight = _initialStackHeight;
+ m_stack = _initialStack;
+ m_targetStack = _targetStackContents;
+ for (auto const& item: m_stack)
+ m_classPositions[item.second].insert(item.first);
+
+ // generate the dependency graph starting from final storage and memory writes and target stack contents
+ for (auto const& p: m_storeOperations)
+ addDependencies(p.second.back().expression);
+ for (auto const& targetItem: m_targetStack)
+ {
+ m_finalClasses.insert(targetItem.second);
+ addDependencies(targetItem.second);
+ }
+
+ // store all needed sequenced expressions
+ set> sequencedExpressions;
+ for (auto const& p: m_neededBy)
+ for (auto id: {p.first, p.second})
+ if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber)
+ {
+ if (seqNr < _initialSequenceNumber)
+ // Invalid sequenced operation.
+ // @todo quick fix for now. Proper fix needs to choose representative with higher
+ // sequence number during dependency analyis.
+ BOOST_THROW_EXCEPTION(StackTooDeepException());
+ sequencedExpressions.insert(make_pair(seqNr, id));
+ }
+
+ // Perform all operations on storage and memory in order, if they are needed.
+ for (auto const& seqAndId: sequencedExpressions)
+ if (!m_classPositions.count(seqAndId.second))
+ generateClassElement(seqAndId.second, true);
+
+ // generate the target stack elements
+ for (auto const& targetItem: m_targetStack)
+ {
+ if (m_stack.count(targetItem.first) && m_stack.at(targetItem.first) == targetItem.second)
+ continue; // already there
+ generateClassElement(targetItem.second);
+ assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, "");
+ if (m_classPositions[targetItem.second].count(targetItem.first))
+ continue;
+ SourceLocation sourceLocation;
+ if (m_expressionClasses.representative(targetItem.second).item)
+ sourceLocation = m_expressionClasses.representative(targetItem.second).item->location();
+ int position = classElementPosition(targetItem.second);
+ if (position < targetItem.first)
+ // it is already at its target, we need another copy
+ appendDup(position, sourceLocation);
+ else
+ appendOrRemoveSwap(position, sourceLocation);
+ appendOrRemoveSwap(targetItem.first, sourceLocation);
+ }
+
+ // remove surplus elements
+ while (removeStackTopIfPossible())
+ {
+ // no-op
+ }
+
+ // check validity
+ int finalHeight = 0;
+ if (!m_targetStack.empty())
+ // have target stack, so its height should be the final height
+ finalHeight = (--m_targetStack.end())->first;
+ else if (!_initialStack.empty())
+ // no target stack, only erase the initial stack
+ finalHeight = _initialStack.begin()->first - 1;
+ else
+ // neither initial no target stack, no change in height
+ finalHeight = _initialStackHeight;
+ assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height.");
+
+ return m_generatedItems;
+}
+
+void CSECodeGenerator::addDependencies(Id _c)
+{
+ if (m_classPositions.count(_c))
+ return; // it is already on the stack
+ if (m_neededBy.count(_c))
+ return; // we already computed the dependencies for _c
+ ExpressionClasses::Expression expr = m_expressionClasses.representative(_c);
+ if (expr.item->type() == UndefinedItem)
+ BOOST_THROW_EXCEPTION(
+ // If this exception happens, we need to find a different way to generate the
+ // compound expression.
+ ItemNotAvailableException() << errinfo_comment("Undefined item requested but not available.")
+ );
+ for (Id argument: expr.arguments)
+ {
+ addDependencies(argument);
+ m_neededBy.insert(make_pair(argument, _c));
+ }
+ if (expr.item && expr.item->type() == Operation && (
+ expr.item->instruction() == Instruction::SLOAD ||
+ expr.item->instruction() == Instruction::MLOAD ||
+ expr.item->instruction() == Instruction::SHA3
+ ))
+ {
+ // this loads an unknown value from storage or memory and thus, in addition to its
+ // arguments, depends on all store operations to addresses where we do not know that
+ // they are different that occur before this load
+ StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ?
+ StoreOperation::Storage : StoreOperation::Memory;
+ Id slotToLoadFrom = expr.arguments.at(0);
+ for (auto const& p: m_storeOperations)
+ {
+ if (p.first.first != target)
+ continue;
+ Id slot = p.first.second;
+ StoreOperations const& storeOps = p.second;
+ if (storeOps.front().sequenceNumber > expr.sequenceNumber)
+ continue;
+ bool knownToBeIndependent = false;
+ switch (expr.item->instruction())
+ {
+ case Instruction::SLOAD:
+ knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom);
+ break;
+ case Instruction::MLOAD:
+ knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom);
+ break;
+ case Instruction::SHA3:
+ {
+ Id length = expr.arguments.at(1);
+ AssemblyItem offsetInstr(Instruction::SUB, expr.item->location());
+ Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom});
+ u256 const* o = m_expressionClasses.knownConstant(offsetToStart);
+ u256 const* l = m_expressionClasses.knownConstant(length);
+ if (l && *l == 0)
+ knownToBeIndependent = true;
+ else if (o)
+ {
+ // We could get problems here if both *o and *l are larger than 2**254
+ // but it is probably ok for the optimizer to produce wrong code for such cases
+ // which cannot be executed anyway because of the non-payable price.
+ if (u2s(*o) <= -32)
+ knownToBeIndependent = true;
+ else if (l && u2s(*o) >= 0 && *o >= *l)
+ knownToBeIndependent = true;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ if (knownToBeIndependent)
+ continue;
+
+ // note that store and load never have the same sequence number
+ Id latestStore = storeOps.front().expression;
+ for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it)
+ if (it->sequenceNumber < expr.sequenceNumber)
+ latestStore = it->expression;
+ addDependencies(latestStore);
+ m_neededBy.insert(make_pair(latestStore, _c));
+ }
+ }
+}
+
+void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced)
+{
+ for (auto it: m_classPositions)
+ for (auto p: it.second)
+ if (p > m_stackHeight)
+ assertThrow(false, OptimizerException, "");
+ // do some cleanup
+ removeStackTopIfPossible();
+
+ if (m_classPositions.count(_c))
+ {
+ assertThrow(
+ !m_classPositions[_c].empty(),
+ OptimizerException,
+ "Element already removed but still needed."
+ );
+ return;
+ }
+ ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c);
+ assertThrow(
+ _allowSequenced || expr.sequenceNumber == 0,
+ OptimizerException,
+ "Sequence constrained operation requested out of sequence."
+ );
+ assertThrow(expr.item, OptimizerException, "Non-generated expression without item.");
+ assertThrow(
+ expr.item->type() != UndefinedItem,
+ OptimizerException,
+ "Undefined item requested but not available."
+ );
+ vector const& arguments = expr.arguments;
+ for (Id arg: boost::adaptors::reverse(arguments))
+ generateClassElement(arg);
+
+ SourceLocation const& itemLocation = expr.item->location();
+ // The arguments are somewhere on the stack now, so it remains to move them at the correct place.
+ // This is quite difficult as sometimes, the values also have to removed in this process
+ // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is
+ // implemented for every single case for combinations of up to two arguments manually.
+ if (arguments.size() == 1)
+ {
+ if (canBeRemoved(arguments[0], _c))
+ appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation);
+ else
+ appendDup(classElementPosition(arguments[0]), itemLocation);
+ }
+ else if (arguments.size() == 2)
+ {
+ if (canBeRemoved(arguments[1], _c))
+ {
+ appendOrRemoveSwap(classElementPosition(arguments[1]), itemLocation);
+ if (arguments[0] == arguments[1])
+ appendDup(m_stackHeight, itemLocation);
+ else if (canBeRemoved(arguments[0], _c))
+ {
+ appendOrRemoveSwap(m_stackHeight - 1, itemLocation);
+ appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation);
+ }
+ else
+ appendDup(classElementPosition(arguments[0]), itemLocation);
+ }
+ else
+ {
+ if (arguments[0] == arguments[1])
+ {
+ appendDup(classElementPosition(arguments[0]), itemLocation);
+ appendDup(m_stackHeight, itemLocation);
+ }
+ else if (canBeRemoved(arguments[0], _c))
+ {
+ appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation);
+ appendDup(classElementPosition(arguments[1]), itemLocation);
+ appendOrRemoveSwap(m_stackHeight - 1, itemLocation);
+ }
+ else
+ {
+ appendDup(classElementPosition(arguments[1]), itemLocation);
+ appendDup(classElementPosition(arguments[0]), itemLocation);
+ }
+ }
+ }
+ else
+ assertThrow(
+ arguments.size() <= 2,
+ OptimizerException,
+ "Opcodes with more than two arguments not implemented yet."
+ );
+ for (size_t i = 0; i < arguments.size(); ++i)
+ assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." );
+
+ while (SemanticInformation::isCommutativeOperation(*expr.item) &&
+ !m_generatedItems.empty() &&
+ m_generatedItems.back() == AssemblyItem(Instruction::SWAP1))
+ // this will not append a swap but remove the one that is already there
+ appendOrRemoveSwap(m_stackHeight - 1, itemLocation);
+ for (size_t i = 0; i < arguments.size(); ++i)
+ {
+ m_classPositions[m_stack[m_stackHeight - i]].erase(m_stackHeight - i);
+ m_stack.erase(m_stackHeight - i);
+ }
+ appendItem(*expr.item);
+ if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1)
+ {
+ m_stack[m_stackHeight] = _c;
+ m_classPositions[_c].insert(m_stackHeight);
+ }
+ else
+ {
+ assertThrow(
+ instructionInfo(expr.item->instruction()).ret == 0,
+ OptimizerException,
+ "Invalid number of return values."
+ );
+ m_classPositions[_c]; // ensure it is created to mark the expression as generated
+ }
+}
+
+int CSECodeGenerator::classElementPosition(Id _id) const
+{
+ assertThrow(
+ m_classPositions.count(_id) && !m_classPositions.at(_id).empty(),
+ OptimizerException,
+ "Element requested but is not present."
+ );
+ return *max_element(m_classPositions.at(_id).begin(), m_classPositions.at(_id).end());
+}
+
+bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition)
+{
+ // Default for _fromPosition is the canonical position of the element.
+ if (_fromPosition == c_invalidPosition)
+ _fromPosition = classElementPosition(_element);
+
+ bool haveCopy = m_classPositions.at(_element).size() > 1;
+ if (m_finalClasses.count(_element))
+ // It is part of the target stack. It can be removed if it is a copy that is not in the target position.
+ return haveCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element);
+ else if (!haveCopy)
+ {
+ // Can be removed unless it is needed by a class that has not been computed yet.
+ // Note that m_classPositions also includes classes that were deleted in the meantime.
+ auto range = m_neededBy.equal_range(_element);
+ for (auto it = range.first; it != range.second; ++it)
+ if (it->second != _result && !m_classPositions.count(it->second))
+ return false;
+ }
+ return true;
+}
+
+bool CSECodeGenerator::removeStackTopIfPossible()
+{
+ if (m_stack.empty())
+ return false;
+ assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, "");
+ Id top = m_stack[m_stackHeight];
+ if (!canBeRemoved(top, Id(-1), m_stackHeight))
+ return false;
+ m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight);
+ m_stack.erase(m_stackHeight);
+ appendItem(AssemblyItem(Instruction::POP));
+ return true;
+}
+
+void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location)
+{
+ assertThrow(_fromPosition != c_invalidPosition, OptimizerException, "");
+ int instructionNum = 1 + m_stackHeight - _fromPosition;
+ assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables.");
+ assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access.");
+ appendItem(AssemblyItem(dupInstruction(instructionNum), _location));
+ m_stack[m_stackHeight] = m_stack[_fromPosition];
+ m_classPositions[m_stack[m_stackHeight]].insert(m_stackHeight);
+}
+
+void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location)
+{
+ assertThrow(_fromPosition != c_invalidPosition, OptimizerException, "");
+ if (_fromPosition == m_stackHeight)
+ return;
+ int instructionNum = m_stackHeight - _fromPosition;
+ assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables.");
+ assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access.");
+ appendItem(AssemblyItem(swapInstruction(instructionNum), _location));
+
+ if (m_stack[m_stackHeight] != m_stack[_fromPosition])
+ {
+ m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight);
+ m_classPositions[m_stack[m_stackHeight]].insert(_fromPosition);
+ m_classPositions[m_stack[_fromPosition]].erase(_fromPosition);
+ m_classPositions[m_stack[_fromPosition]].insert(m_stackHeight);
+ swap(m_stack[m_stackHeight], m_stack[_fromPosition]);
+ }
+ if (m_generatedItems.size() >= 2 &&
+ SemanticInformation::isSwapInstruction(m_generatedItems.back()) &&
+ *(m_generatedItems.end() - 2) == m_generatedItems.back())
+ {
+ m_generatedItems.pop_back();
+ m_generatedItems.pop_back();
+ }
+}
+
+void CSECodeGenerator::appendItem(AssemblyItem const& _item)
+{
+ m_generatedItems.push_back(_item);
+ m_stackHeight += _item.deposit();
+}
diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h
new file mode 100644
index 000000000..f6c43c57a
--- /dev/null
+++ b/libevmasm/CommonSubexpressionEliminator.h
@@ -0,0 +1,183 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see .
+*/
+/**
+ * @file CommonSubexpressionEliminator.h
+ * @author Christian
+ * @date 2015
+ * Optimizer step for common subexpression elimination and stack reorganisation.
+ */
+
+#pragma once
+
+#include
+#include