From b9d7387e7abbb1f4fa7f7c32a3757386e75e5650 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 24 Apr 2015 17:35:16 +0200 Subject: [PATCH] Move assembly related files to libevmasm and Params.h/.cpp to libevmcore. --- Assembly.cpp | 485 +++++++++++++++++++++ Assembly.h | 132 ++++++ AssemblyItem.cpp | 135 ++++++ AssemblyItem.h | 100 +++++ CMakeLists.txt | 33 ++ CommonSubexpressionEliminator.cpp | 672 ++++++++++++++++++++++++++++++ CommonSubexpressionEliminator.h | 233 +++++++++++ ControlFlowGraph.cpp | 260 ++++++++++++ ControlFlowGraph.h | 108 +++++ Exceptions.h | 36 ++ ExpressionClasses.cpp | 438 +++++++++++++++++++ ExpressionClasses.h | 181 ++++++++ SemanticInformation.cpp | 124 ++++++ SemanticInformation.h | 51 +++ SourceLocation.h | 89 ++++ 15 files changed, 3077 insertions(+) create mode 100644 Assembly.cpp create mode 100644 Assembly.h create mode 100644 AssemblyItem.cpp create mode 100644 AssemblyItem.h create mode 100644 CMakeLists.txt create mode 100644 CommonSubexpressionEliminator.cpp create mode 100644 CommonSubexpressionEliminator.h create mode 100644 ControlFlowGraph.cpp create mode 100644 ControlFlowGraph.h create mode 100644 Exceptions.h create mode 100644 ExpressionClasses.cpp create mode 100644 ExpressionClasses.h create mode 100644 SemanticInformation.cpp create mode 100644 SemanticInformation.h create mode 100644 SourceLocation.h diff --git a/Assembly.cpp b/Assembly.cpp new file mode 100644 index 000000000..6cc09a4bc --- /dev/null +++ b/Assembly.cpp @@ -0,0 +1,485 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "Assembly.h" +#include +#include +#include +#include +#include +using namespace std; +using namespace dev; +using namespace dev::eth; + +void Assembly::append(Assembly const& _a) +{ + auto newDeposit = m_deposit + _a.deposit(); + for (AssemblyItem i: _a.m_items) + { + if (i.type() == Tag || i.type() == PushTag) + i.setData(i.data() + m_usedTags); + else if (i.type() == PushSub || i.type() == PushSubSize) + i.setData(i.data() + m_usedTags); + append(i); + } + m_deposit = newDeposit; + m_usedTags += _a.m_usedTags; + for (auto const& i: _a.m_data) + m_data.insert(i); + for (auto const& i: _a.m_strings) + m_strings.insert(i); + for (auto const& i: _a.m_subs) + m_subs.push_back(i); + + assert(!_a.m_baseDeposit); + assert(!_a.m_totalDeposit); +} + +void Assembly::append(Assembly const& _a, int _deposit) +{ + if (_deposit > _a.m_deposit) + BOOST_THROW_EXCEPTION(InvalidDeposit()); + else + { + append(_a); + while (_deposit++ < _a.m_deposit) + append(Instruction::POP); + } +} + +string Assembly::out() const +{ + stringstream ret; + stream(ret); + return ret.str(); +} + +unsigned Assembly::bytesRequired() const +{ + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + ret += i.bytesRequired(br); + if (dev::bytesRequired(ret) <= br) + return ret; + } +} + +string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const +{ + if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0) + return ""; + + auto it = _sourceCodes.find(*_location.sourceName); + if (it == _sourceCodes.end()) + return ""; + + string const& source = it->second; + if (size_t(_location.start) >= source.size()) + return ""; + + string cut = source.substr(_location.start, _location.end - _location.start); + auto newLinePos = cut.find_first_of("\n"); + if (newLinePos != string::npos) + cut = cut.substr(0, newLinePos) + "..."; + + return move(cut); +} + +ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const +{ + _out << _prefix << ".code:" << endl; + for (AssemblyItem const& i: m_items) + { + _out << _prefix; + switch (i.type()) + { + case Operation: + _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << i.data(); + break; + case PushString: + _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; + break; + case PushTag: + _out << " PUSH [tag" << i.data() << "]"; + break; + case PushSub: + _out << " PUSH [$" << h256(i.data()).abridged() << "]"; + break; + case PushSubSize: + _out << " PUSH #[$" << h256(i.data()).abridged() << "]"; + break; + case PushProgramSize: + _out << " PUSHSIZE"; + break; + case Tag: + _out << "tag" << i.data() << ": " << endl << _prefix << " JUMPDEST"; + break; + case PushData: + _out << " PUSH [" << hex << (unsigned)i.data() << "]"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + _out << "\t\t" << getLocationFromSources(_sourceCodes, i.getLocation()) << endl; + } + + if (!m_data.empty() || !m_subs.empty()) + { + _out << _prefix << ".data:" << endl; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << toHex(i.second) << endl; + for (size_t i = 0; i < m_subs.size(); ++i) + { + _out << _prefix << " " << hex << i << ": " << endl; + m_subs[i].stream(_out, _prefix + " ", _sourceCodes); + } + } + return _out; +} + +Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const +{ + Json::Value value; + value["name"] = _name; + value["begin"] = _begin; + value["end"] = _end; + if (!_value.empty()) + value["value"] = _value; + if (!_jumpType.empty()) + value["jumpType"] = _jumpType; + return value; +} + +string toStringInHex(u256 _value) +{ + std::stringstream hexStr; + hexStr << hex << _value; + return hexStr.str(); +} + +Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const +{ + Json::Value root; + + Json::Value collection(Json::arrayValue); + for (AssemblyItem const& i: m_items) + { + switch (i.type()) + { + case Operation: + collection.append( + createJsonValue(instructionInfo(i.instruction()).name, i.getLocation().start, i.getLocation().end, i.getJumpTypeAsString())); + break; + case Push: + collection.append( + createJsonValue("PUSH", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()), i.getJumpTypeAsString())); + break; + case PushString: + collection.append( + createJsonValue("PUSH tag", i.getLocation().start, i.getLocation().end, m_strings.at((h256)i.data()))); + break; + case PushTag: + collection.append( + createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + break; + case PushSub: + collection.append( + createJsonValue("PUSH [$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + break; + case PushSubSize: + collection.append( + createJsonValue("PUSH #[$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + break; + case PushProgramSize: + collection.append( + createJsonValue("PUSHSIZE", i.getLocation().start, i.getLocation().end)); + break; + case Tag: + { + collection.append( + createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data()))); + collection.append( + createJsonValue("JUMDEST", i.getLocation().start, i.getLocation().end)); + } + break; + case PushData: + { + Json::Value pushData; + pushData["name"] = "PUSH hex"; + collection.append(createJsonValue("PUSH hex", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + } + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + + root[".code"] = collection; + + if (!m_data.empty() || !m_subs.empty()) + { + Json::Value data; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + data[toStringInHex((u256)i.first)] = toHex(i.second); + + for (size_t i = 0; i < m_subs.size(); ++i) + { + std::stringstream hexStr; + hexStr << hex << i; + data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true); + } + root[".data"] = data; + _out << root; + } + return root; +} + +Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const +{ + if (_inJsonFormat) + return streamAsmJson(_out, _sourceCodes); + else + { + streamAsm(_out, _prefix, _sourceCodes); + return Json::Value(); + } +} + +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + m_deposit += _i.deposit(); + m_items.push_back(_i); + if (m_items.back().getLocation().isEmpty() && !m_currentSourceLocation.isEmpty()) + m_items.back().setLocation(m_currentSourceLocation); + return back(); +} + +void Assembly::injectStart(AssemblyItem const& _i) +{ + m_items.insert(m_items.begin(), _i); +} + +inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b) +{ + if (_a.size() != _b.size()) + return false; + for (unsigned i = 0; i < _a.size(); ++i) + if (!_a[i].match(_b[i])) + return false; + return true; +} + +struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; +#define copt dev::LogOutputStream() + +Assembly& Assembly::optimise(bool _enable) +{ + if (!_enable) + return *this; + std::vector>> rules; + // jump to next instruction + rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }}); + + unsigned total = 0; + for (unsigned count = 1; count > 0; total += count) + { + copt << toString(*this); + count = 0; + + copt << "Performing control flow analysis..."; + { + ControlFlowGraph cfg(m_items); + AssemblyItems optItems = cfg.optimisedItems(); + if (optItems.size() < m_items.size()) + { + copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); + m_items = move(optItems); + count++; + } + } + + copt << "Performing common subexpression elimination..."; + for (auto iter = m_items.begin(); iter != m_items.end();) + { + CommonSubexpressionEliminator eliminator; + auto orig = iter; + iter = eliminator.feedItems(iter, m_items.end()); + AssemblyItems optItems; + bool shouldReplace = false; + try + { + optItems = eliminator.getOptimizedItems(); + shouldReplace = (optItems.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } + + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size(); + count++; + for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter) + *orig = move(*moveIter); + iter = m_items.erase(orig, iter); + } + } + } + + copt << total << " optimisations done."; + + for (auto& sub: m_subs) + sub.optimise(true); + + return *this; +} + +bytes Assembly::assemble() const +{ + bytes ret; + + unsigned totalBytes = bytesRequired(); + vector tagPos(m_usedTags); + map tagRef; + multimap dataRef; + vector sizeRef; ///< Pointers to code locations where the size of the program is inserted + unsigned bytesPerTag = dev::bytesRequired(totalBytes); + byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + + for (size_t i = 0; i < m_subs.size(); ++i) + m_data[u256(i)] = m_subs[i].assemble(); + + unsigned bytesRequiredIncludingData = bytesRequired(); + unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); + byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; + ret.reserve(bytesRequiredIncludingData); + // m_data must not change from here on + + for (AssemblyItem const& i: m_items) + switch (i.type()) + { + case Operation: + ret.push_back((byte)i.data()); + break; + case PushString: + { + ret.push_back((byte)Instruction::PUSH32); + unsigned ii = 0; + for (auto j: m_strings.at((h256)i.data())) + if (++ii > 32) + break; + else + ret.push_back((byte)j); + while (ii++ < 32) + ret.push_back(0); + break; + } + case Push: + { + byte b = max(1, dev::bytesRequired(i.data())); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(i.data(), byr); + break; + } + case PushTag: + { + ret.push_back(tagPush); + tagRef[ret.size()] = (unsigned)i.data(); + ret.resize(ret.size() + bytesPerTag); + break; + } + case PushData: case PushSub: + { + ret.push_back(dataRefPush); + dataRef.insert(make_pair((h256)i.data(), ret.size())); + ret.resize(ret.size() + bytesPerDataRef); + break; + } + case PushSubSize: + { + auto s = m_data[i.data()].size(); + byte b = max(1, dev::bytesRequired(s)); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(s, byr); + break; + } + case PushProgramSize: + { + ret.push_back(dataRefPush); + sizeRef.push_back(ret.size()); + ret.resize(ret.size() + bytesPerDataRef); + break; + } + case Tag: + tagPos[(unsigned)i.data()] = ret.size(); + ret.push_back((byte)Instruction::JUMPDEST); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + + for (auto const& i: tagRef) + { + bytesRef r(ret.data() + i.first, bytesPerTag); + toBigEndian(tagPos[i.second], r); + } + + if (!m_data.empty()) + { + ret.push_back(0); + for (auto const& i: m_data) + { + auto its = dataRef.equal_range(i.first); + if (its.first != its.second) + { + for (auto it = its.first; it != its.second; ++it) + { + bytesRef r(ret.data() + it->second, bytesPerDataRef); + toBigEndian(ret.size(), r); + } + for (auto b: i.second) + ret.push_back(b); + } + } + } + for (unsigned pos: sizeRef) + { + bytesRef r(ret.data() + pos, bytesPerDataRef); + toBigEndian(ret.size(), r); + } + return ret; +} diff --git a/Assembly.h b/Assembly.h new file mode 100644 index 000000000..b4850f7d0 --- /dev/null +++ b/Assembly.h @@ -0,0 +1,132 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Exceptions.h" +#include + +namespace Json +{ +class Value; +} +namespace dev +{ +namespace eth +{ + +class Assembly +{ +public: + Assembly() {} + + AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } + AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); } + AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } + AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } + AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } + + AssemblyItem append() { return append(newTag()); } + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } + AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; } + /// Pushes the final size of the current assembly itself. Use this when the code is modified + /// after compilation and CODESIZE is not an option. + void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } + + AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + template Assembly& operator<<(T const& _d) { append(_d); return *this; } + AssemblyItems const& getItems() const { return m_items; } + AssemblyItem const& back() const { return m_items.back(); } + std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } + + void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + + void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); } + + void injectStart(AssemblyItem const& _i); + std::string out() const; + int deposit() const { return m_deposit; } + void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + + /// Changes the source location used for each appended item. + void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } + + bytes assemble() const; + Assembly& optimise(bool _enable); + Json::Value stream( + std::ostream& _out, + std::string const& _prefix = "", + const StringMap &_sourceCodes = StringMap(), + bool _inJsonFormat = false + ) const; + +protected: + std::string getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + unsigned bytesRequired() const; + +private: + Json::Value streamAsmJson(std::ostream& _out, const StringMap &_sourceCodes) const; + std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const; + Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; + +protected: + unsigned m_usedTags = 0; + AssemblyItems m_items; + mutable std::map m_data; + std::vector m_subs; + std::map m_strings; + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; + + SourceLocation m_currentSourceLocation; +}; + +inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a) +{ + _a.stream(_out); + return _out; +} + +} +} diff --git a/AssemblyItem.cpp b/AssemblyItem.cpp new file mode 100644 index 000000000..a4485a144 --- /dev/null +++ b/AssemblyItem.cpp @@ -0,0 +1,135 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "AssemblyItem.h" +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const +{ + switch (m_type) + { + case Operation: + case Tag: // 1 byte for the JUMPDEST + return 1; + case PushString: + return 33; + case Push: + return 1 + max(1, dev::bytesRequired(m_data)); + case PushSubSize: + case PushProgramSize: + return 4; // worst case: a 16MB program + case PushTag: + case PushData: + case PushSub: + return 1 + _addressLength; + default: + break; + } + BOOST_THROW_EXCEPTION(InvalidOpcode()); +} + +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + return 1; + case Tag: + return 0; + default:; + } + return 0; +} + +string AssemblyItem::getJumpTypeAsString() const +{ + switch (m_jumpType) + { + case JumpType::IntoFunction: + return "[in]"; + case JumpType::OutOfFunction: + return "[out]"; + case JumpType::Ordinary: + default: + return ""; + } +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) +{ + switch (_item.type()) + { + case Operation: + _out << " " << instructionInfo(_item.instruction()).name; + if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) + _out << "\t" << _item.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << _item.data(); + break; + case PushString: + _out << " PushString" << hex << (unsigned)_item.data(); + break; + case PushTag: + _out << " PushTag " << _item.data(); + break; + case Tag: + _out << " Tag " << _item.data(); + break; + case PushData: + _out << " PushData " << hex << (unsigned)_item.data(); + break; + case PushSub: + _out << " PushSub " << hex << h256(_item.data()).abridged(); + break; + case PushSubSize: + _out << " PushSubSize " << hex << h256(_item.data()).abridged(); + break; + case PushProgramSize: + _out << " PushProgramSize"; + break; + case UndefinedItem: + _out << " ???"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + return _out; +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i) +{ + for (AssemblyItem const& i: _i) + _out << i; + return _out; +} diff --git a/AssemblyItem.h b/AssemblyItem.h new file mode 100644 index 000000000..6f2a65de9 --- /dev/null +++ b/AssemblyItem.h @@ -0,0 +1,100 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "Exceptions.h" + +namespace dev +{ +namespace eth +{ + +enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData }; + +class Assembly; + +class AssemblyItem +{ +public: + enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; + + AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Push, _push, _location) { } + AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Operation, byte(_i), _location) { } + AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): + m_type(_type), + m_data(_data), + m_location(_location) + { + } + + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + + AssemblyItemType type() const { return m_type; } + u256 const& data() const { return m_data; } + void setType(AssemblyItemType const _type) { m_type = _type; } + void setData(u256 const& _data) { m_data = _data; } + + /// @returns the instruction of this item (only valid if type() == Operation) + Instruction instruction() const { return Instruction(byte(m_data)); } + + /// @returns true iff the type and data of the items are equal. + bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + + /// @returns an upper bound for the number of bytes required by this item, assuming that + /// the value of a jump tag takes @a _addressLength bytes. + unsigned bytesRequired(unsigned _addressLength) const; + int deposit() const; + + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + SourceLocation const& getLocation() const { return m_location; } + + void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } + JumpType getJumpType() const { return m_jumpType; } + std::string getJumpTypeAsString() const; + +private: + AssemblyItemType m_type; + u256 m_data; + SourceLocation m_location; + JumpType m_jumpType = JumpType::Ordinary; +}; + +using AssemblyItems = std::vector; +using AssemblyItemsConstRef = vector_ref; + +std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); +std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } + +} +} diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..f8150806f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_policy(SET CMP0015 NEW) +# this policy was introduced in cmake 3.0 +# remove if, once 3.0 will be used on unix +if (${CMAKE_MAJOR_VERSION} GREATER 2) + # old policy do not use MACOSX_RPATH + cmake_policy(SET CMP0042 OLD) +endif() +set(CMAKE_AUTOMOC OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) +include_directories(BEFORE ..) +include_directories(${Boost_INCLUDE_DIRS}) + +set(EXECUTABLE evmasm) + +file(GLOB HEADERS "*.h") + +if (ETH_STATIC) + add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +else() + add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS}) +endif() + +target_link_libraries(${EXECUTABLE} evmcore) +target_link_libraries(${EXECUTABLE} devcrypto) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp new file mode 100644 index 000000000..63524d6f3 --- /dev/null +++ b/CommonSubexpressionEliminator.cpp @@ -0,0 +1,672 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file CommonSubexpressionEliminator.cpp + * @author Christian + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +vector CommonSubexpressionEliminator::getOptimizedItems() +{ + optimizeBreakingItem(); + + map initialStackContents; + map targetStackContents; + int minHeight = m_stackHeight + 1; + if (!m_stackElements.empty()) + minHeight = min(minHeight, m_stackElements.begin()->first); + for (int height = minHeight; height <= 0; ++height) + initialStackContents[height] = initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_stackHeight; ++height) + targetStackContents[height] = stackElement(height, SourceLocation()); + + // Debug info: + //stream(cout, initialStackContents, targetStackContents); + + AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + initialStackContents, + targetStackContents + ); + if (m_breakingItem) + items.push_back(*m_breakingItem); + return items; +} + +ostream& CommonSubexpressionEliminator::stream( + ostream& _out, + map _initialStack, + map _targetStack +) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses.representative(_id); + _out << " " << dec << _id << ": " << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "Optimizer analysis:" << endl; + _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Initial stack: " << endl; + for (auto const& it: _initialStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Target stack: " << endl; + for (auto const& it: _targetStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) + storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses.find(_item, arguments, _copyItem) + ); + } + m_stackHeight += _item.deposit(); + } +} + +void CommonSubexpressionEliminator::optimizeBreakingItem() +{ + if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI)) + return; + + SourceLocation const& location = m_breakingItem->getLocation(); + AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); + + Id condition = stackElement(m_stackHeight - 1, location); + Id zero = m_expressionClasses.find(u256(0)); + if (m_expressionClasses.knownToBeDifferent(condition, zero)) + { + feedItem(AssemblyItem(Instruction::SWAP1, location), true); + feedItem(AssemblyItem(Instruction::POP, location), true); + + AssemblyItem item(Instruction::JUMP, location); + item.setJumpType(jumpType); + m_breakingItem = m_expressionClasses.storeItem(item); + return; + } + Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); + if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + { + AssemblyItem it(Instruction::POP, location); + feedItem(it, true); + feedItem(it, true); + m_breakingItem = nullptr; + } +} + +void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void CommonSubexpressionEliminator::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new equivalence class. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); + assertThrow(_stackHeight > -16, StackTooDeepException, ""); + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); +} + +void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return; + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); + m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; +} + +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); +} + +void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return; + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); + m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; +} + +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); +} + +CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses.knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses.find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses.find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses.knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + +CSECodeGenerator::CSECodeGenerator( + ExpressionClasses& _expressionClasses, + vector const& _storeOperations +): + m_expressionClasses(_expressionClasses) +{ + for (auto const& store: _storeOperations) + m_storeOperations[make_pair(store.target, store.slot)].push_back(store); +} + +AssemblyItems CSECodeGenerator::generateCode( + map const& _initialStack, + map const& _targetStackContents +) +{ + m_stack = _initialStack; + for (auto const& item: m_stack) + if (!m_classPositions.count(item.second)) + m_classPositions[item.second] = item.first; + + // @todo: provide information about the positions of copies of class elements + + // generate the dependency graph starting from final storage and memory writes and target stack contents + for (auto const& p: m_storeOperations) + addDependencies(p.second.back().expression); + for (auto const& targetItem: _targetStackContents) + { + m_finalClasses.insert(targetItem.second); + addDependencies(targetItem.second); + } + + // store all needed sequenced expressions + set> sequencedExpressions; + for (auto const& p: m_neededBy) + for (auto id: {p.first, p.second}) + if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) + sequencedExpressions.insert(make_pair(seqNr, id)); + + // Perform all operations on storage and memory in order, if they are needed. + for (auto const& seqAndId: sequencedExpressions) + if (!m_classPositions.count(seqAndId.second)) + generateClassElement(seqAndId.second, true); + + // generate the target stack elements + for (auto const& targetItem: _targetStackContents) + { + int position = generateClassElement(targetItem.second); + assertThrow(position != c_invalidPosition, OptimizerException, ""); + if (position == targetItem.first) + continue; + SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + if (position < targetItem.first) + // it is already at its target, we need another copy + appendDup(position, location); + else + appendOrRemoveSwap(position, location); + appendOrRemoveSwap(targetItem.first, location); + } + + // remove surplus elements + while (removeStackTopIfPossible()) + { + // no-op + } + + // check validity + int finalHeight = 0; + if (!_targetStackContents.empty()) + // have target stack, so its height should be the final height + finalHeight = (--_targetStackContents.end())->first; + else if (!_initialStack.empty()) + // no target stack, only erase the initial stack + finalHeight = _initialStack.begin()->first - 1; + else + // neither initial no target stack, no change in height + finalHeight = 0; + assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + return m_generatedItems; +} + +void CSECodeGenerator::addDependencies(Id _c) +{ + if (m_neededBy.count(_c)) + return; // we already computed the dependencies for _c + ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); + for (Id argument: expr.arguments) + { + addDependencies(argument); + m_neededBy.insert(make_pair(argument, _c)); + } + if (expr.item->type() == Operation && ( + expr.item->instruction() == Instruction::SLOAD || + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 + )) + { + // this loads an unknown value from storage or memory and thus, in addition to its + // arguments, depends on all store operations to addresses where we do not know that + // they are different that occur before this load + StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ? + StoreOperation::Storage : StoreOperation::Memory; + Id slotToLoadFrom = expr.arguments.at(0); + for (auto const& p: m_storeOperations) + { + if (p.first.first != target) + continue; + Id slot = p.first.second; + StoreOperations const& storeOps = p.second; + if (storeOps.front().sequenceNumber > expr.sequenceNumber) + continue; + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + AssemblyItem offsetInstr(Instruction::SUB, expr.item->getLocation()); + Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) + continue; + + // note that store and load never have the same sequence number + Id latestStore = storeOps.front().expression; + for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) + if (it->sequenceNumber < expr.sequenceNumber) + latestStore = it->expression; + addDependencies(latestStore); + m_neededBy.insert(make_pair(latestStore, _c)); + } + } +} + +int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +{ + // do some cleanup + removeStackTopIfPossible(); + + if (m_classPositions.count(_c)) + { + assertThrow( + m_classPositions[_c] != c_invalidPosition, + OptimizerException, + "Element already removed but still needed." + ); + return m_classPositions[_c]; + } + ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); + assertThrow( + _allowSequenced || expr.sequenceNumber == 0, + OptimizerException, + "Sequence constrained operation requested out of sequence." + ); + vector const& arguments = expr.arguments; + for (Id arg: boost::adaptors::reverse(arguments)) + generateClassElement(arg); + + SourceLocation const& location = expr.item->getLocation(); + // The arguments are somewhere on the stack now, so it remains to move them at the correct place. + // This is quite difficult as sometimes, the values also have to removed in this process + // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is + // implemented for every single case for combinations of up to two arguments manually. + if (arguments.size() == 1) + { + if (canBeRemoved(arguments[0], _c)) + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + else + appendDup(classElementPosition(arguments[0]), location); + } + else if (arguments.size() == 2) + { + if (canBeRemoved(arguments[1], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[1]), location); + if (arguments[0] == arguments[1]) + appendDup(m_stackHeight, location); + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(m_stackHeight - 1, location); + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + } + else + appendDup(classElementPosition(arguments[0]), location); + } + else + { + if (arguments[0] == arguments[1]) + { + appendDup(classElementPosition(arguments[0]), location); + appendDup(m_stackHeight, location); + } + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + appendDup(classElementPosition(arguments[1]), location); + appendOrRemoveSwap(m_stackHeight - 1, location); + } + else + { + appendDup(classElementPosition(arguments[1]), location); + appendDup(classElementPosition(arguments[0]), location); + } + } + } + else + assertThrow( + arguments.size() <= 2, + OptimizerException, + "Opcodes with more than two arguments not implemented yet." + ); + for (size_t i = 0; i < arguments.size(); ++i) + assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." ); + + while (SemanticInformation::isCommutativeOperation(*expr.item) && + !m_generatedItems.empty() && + m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) + // this will not append a swap but remove the one that is already there + appendOrRemoveSwap(m_stackHeight - 1, location); + for (auto arg: arguments) + if (canBeRemoved(arg, _c)) + m_classPositions[arg] = c_invalidPosition; + for (size_t i = 0; i < arguments.size(); ++i) + m_stack.erase(m_stackHeight - i); + appendItem(*expr.item); + if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) + { + m_stack[m_stackHeight] = _c; + return m_classPositions[_c] = m_stackHeight; + } + else + { + assertThrow( + instructionInfo(expr.item->instruction()).ret == 0, + OptimizerException, + "Invalid number of return values." + ); + return m_classPositions[_c] = c_invalidPosition; + } +} + +int CSECodeGenerator::classElementPosition(Id _id) const +{ + assertThrow( + m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition, + OptimizerException, + "Element requested but is not present." + ); + return m_classPositions.at(_id); +} + +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result) +{ + // Returns false if _element is finally needed or is needed by a class that has not been + // computed yet. Note that m_classPositions also includes classes that were deleted in the meantime. + if (m_finalClasses.count(_element)) + return false; + + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + return true; +} + +bool CSECodeGenerator::removeStackTopIfPossible() +{ + if (m_stack.empty()) + return false; + assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); + Id top = m_stack[m_stackHeight]; + if (!canBeRemoved(top)) + return false; + m_generatedItems.push_back(AssemblyItem(Instruction::POP)); + m_stack.erase(m_stackHeight); + m_stackHeight--; + return true; +} + +void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + int instructionNum = 1 + m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); + m_stack[m_stackHeight] = m_stack[_fromPosition]; +} + +void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + if (_fromPosition == m_stackHeight) + return; + int instructionNum = m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); + // The value of a class can be present in multiple locations on the stack. We only update the + // "canonical" one that is tracked by m_classPositions + if (m_classPositions[m_stack[m_stackHeight]] == m_stackHeight) + m_classPositions[m_stack[m_stackHeight]] = _fromPosition; + if (m_classPositions[m_stack[_fromPosition]] == _fromPosition) + m_classPositions[m_stack[_fromPosition]] = m_stackHeight; + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + if (m_generatedItems.size() >= 2 && + SemanticInformation::isSwapInstruction(m_generatedItems.back()) && + *(m_generatedItems.end() - 2) == m_generatedItems.back()) + { + m_generatedItems.pop_back(); + m_generatedItems.pop_back(); + } +} + +void CSECodeGenerator::appendItem(AssemblyItem const& _item) +{ + m_generatedItems.push_back(_item); + m_stackHeight += _item.deposit(); +} diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h new file mode 100644 index 000000000..6156bc81a --- /dev/null +++ b/CommonSubexpressionEliminator.h @@ -0,0 +1,233 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file CommonSubexpressionEliminator.h + * @author Christian + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Optimizer step that performs common subexpression elimination and stack reorganisation, + * i.e. it tries to infer equality among expressions and compute the values of two expressions + * known to be equal only once. + * + * The general workings are that for each assembly item that is fed into the eliminator, an + * equivalence class is derived from the operation and the equivalence class of its arguments. + * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these + * classes are determined. + * + * When the list of optimized items is requested, they are generated in a bottom-up fashion, + * adding code for equivalence classes that were not yet computed. + */ +class CommonSubexpressionEliminator +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Memory, Storage }; + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first + /// item that must be fed into a new instance of the eliminator. + template + _AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end); + + /// @returns the resulting items after optimization. + AssemblyItems getOptimizedItems(); + + /// Streams debugging information to @a _out. + std::ostream& stream( + std::ostream& _out, + std::map _initialStack = std::map(), + std::map _targetStack = std::map() + ) const; + +private: + /// Feeds the item into the system for analysis. + void feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Tries to optimize the item that breaks the basic block at the end. + void optimizeBreakingItem(); + + /// Simplifies the given item using + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height + /// (must not be positive). + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; + /// Keeps information about which storage or memory slots were written to at which sequence + /// number with what instruction. + std::vector m_storeOperations; + /// Structure containing the classes of equivalent expressions. + ExpressionClasses m_expressionClasses; + + /// The item that breaks the basic block, can be nullptr. + /// It is usually appended to the block but can be optimized in some cases. + AssemblyItem const* m_breakingItem = nullptr; +}; + +/** + * Unit that generates code from current stack layout, target stack layout and information about + * the equivalence classes. + */ +class CSECodeGenerator +{ +public: + using StoreOperation = CommonSubexpressionEliminator::StoreOperation; + using StoreOperations = std::vector; + using Id = ExpressionClasses::Id; + + /// Initializes the code generator with the given classes and store operations. + /// The store operations have to be sorted by sequence number in ascending order. + CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations); + + /// @returns the assembly items generated from the given requirements + /// @param _initialStack current contents of the stack (up to stack height of zero) + /// @param _targetStackContents final contents of the stack, by stack height relative to initial + /// @note should only be called once on each object. + AssemblyItems generateCode( + std::map const& _initialStack, + std::map const& _targetStackContents + ); + +private: + /// Recursively discovers all dependencies to @a m_requests. + void addDependencies(Id _c); + + /// Produce code that generates the given element if it is not yet present. + /// @returns the stack position of the element or c_invalidPosition if it does not actually + /// generate a value on the stack. + /// @param _allowSequenced indicates that sequence-constrained operations are allowed + int generateClassElement(Id _c, bool _allowSequenced = false); + /// @returns the position of the representative of the given id on the stack. + /// @note throws an exception if it is not on the stack. + int classElementPosition(Id _id) const; + + /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1)); + + /// Appends code to remove the topmost stack element if it can be removed. + bool removeStackTopIfPossible(); + + /// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position. + void appendDup(int _fromPosition, SourceLocation const& _location); + /// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position. + /// @note this might also remove the last item if it exactly the same swap instruction. + void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location); + /// Appends the given assembly item. + void appendItem(AssemblyItem const& _item); + + static const int c_invalidPosition = -0x7fffffff; + + AssemblyItems m_generatedItems; + /// Current height of the stack relative to the start. + int m_stackHeight = 0; + /// If (b, a) is in m_requests then b is needed to compute a. + std::multimap m_neededBy; + /// Current content of the stack. + std::map m_stack; + /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted. + std::map m_classPositions; + + /// The actual eqivalence class items and how to compute them. + ExpressionClasses& m_expressionClasses; + /// Keeps information about which storage or memory slots were written to by which operations. + /// The operations are sorted ascendingly by sequence number. + std::map, StoreOperations> m_storeOperations; + /// The set of equivalence classes that should be present on the stack at the end. + std::set m_finalClasses; +}; + +template +_AssemblyItemIterator CommonSubexpressionEliminator::feedItems( + _AssemblyItemIterator _iterator, + _AssemblyItemIterator _end +) +{ + for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) + feedItem(*_iterator); + if (_iterator != _end) + m_breakingItem = &(*_iterator++); + return _iterator; +} + +} +} diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp new file mode 100644 index 000000000..cc4367e64 --- /dev/null +++ b/ControlFlowGraph.cpp @@ -0,0 +1,260 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ControlFlowGraph.cpp + * @author Christian + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +BlockId::BlockId(u256 const& _id): m_id(_id) +{ + assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); +} + +AssemblyItems ControlFlowGraph::optimisedItems() +{ + if (m_items.empty()) + return m_items; + + findLargestTag(); + splitBlocks(); + resolveNextLinks(); + removeUnusedBlocks(); + setPrevLinks(); + + return rebuildCode(); +} + +void ControlFlowGraph::findLargestTag() +{ + m_lastUsedId = 0; + for (auto const& item: m_items) + if (item.type() == Tag || item.type() == PushTag) + { + // Assert that it can be converted. + BlockId(item.data()); + m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); + } +} + +void ControlFlowGraph::splitBlocks() +{ + m_blocks.clear(); + BlockId id = BlockId::initial(); + m_blocks[id].begin = 0; + for (size_t index = 0; index < m_items.size(); ++index) + { + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag) + { + if (id) + m_blocks[id].end = index; + id = BlockId::invalid(); + } + if (!id) + { + id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); + m_blocks[id].begin = index; + } + if (item.type() == PushTag) + m_blocks[id].pushedTags.push_back(BlockId(item.data())); + if (SemanticInformation::altersControlFlow(item)) + { + m_blocks[id].end = index + 1; + if (item == Instruction::JUMP) + m_blocks[id].endType = BasicBlock::EndType::JUMP; + else if (item == Instruction::JUMPI) + m_blocks[id].endType = BasicBlock::EndType::JUMPI; + else + m_blocks[id].endType = BasicBlock::EndType::STOP; + id = BlockId::invalid(); + } + } + if (id) + { + m_blocks[id].end = m_items.size(); + if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) + m_blocks[id].endType = BasicBlock::EndType::STOP; + } +} + +void ControlFlowGraph::resolveNextLinks() +{ + map blockByBeginPos; + for (auto const& idAndBlock: m_blocks) + if (idAndBlock.second.begin != idAndBlock.second.end) + blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; + + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + blockByBeginPos.count(block.end), + OptimizerException, + "Successor block not found." + ); + block.next = blockByBeginPos.at(block.end); + break; + default: + break; + } + } +} + +void ControlFlowGraph::removeUnusedBlocks() +{ + vector blocksToProcess{BlockId::initial()}; + set neededBlocks{BlockId::initial()}; + while (!blocksToProcess.empty()) + { + BasicBlock const& block = m_blocks.at(blocksToProcess.back()); + blocksToProcess.pop_back(); + for (BlockId tag: block.pushedTags) + if (!neededBlocks.count(tag)) + { + neededBlocks.insert(tag); + blocksToProcess.push_back(tag); + } + if (block.next && !neededBlocks.count(block.next)) + { + neededBlocks.insert(block.next); + blocksToProcess.push_back(block.next); + } + } + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (neededBlocks.count(it->first)) + ++it; + else + m_blocks.erase(it++); +} + +void ControlFlowGraph::setPrevLinks() +{ + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + !m_blocks.at(block.next).prev, + OptimizerException, + "Successor already has predecessor." + ); + m_blocks[block.next].prev = idAndBlock.first; + break; + default: + break; + } + } + // If block ends with jump to not yet linked block, link them removing the jump + for (auto& idAndBlock: m_blocks) + { + BlockId blockId = idAndBlock.first; + BasicBlock& block = idAndBlock.second; + if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) + continue; + AssemblyItem const& push = m_items.at(block.end - 2); + if (push.type() != PushTag) + continue; + BlockId nextId(push.data()); + if (m_blocks.at(nextId).prev) + continue; + bool hasLoop = false; + for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next) + hasLoop = (id == blockId); + if (hasLoop) + continue; + + m_blocks[nextId].prev = blockId; + block.next = nextId; + block.end -= 2; + assertThrow( + !block.pushedTags.empty() && block.pushedTags.back() == nextId, + OptimizerException, + "Last pushed tag not at end of pushed list." + ); + block.pushedTags.pop_back(); + block.endType = BasicBlock::EndType::HANDOVER; + } +} + +AssemblyItems ControlFlowGraph::rebuildCode() +{ + map pushes; + for (auto& idAndBlock: m_blocks) + for (BlockId ref: idAndBlock.second.pushedTags) + pushes[ref]++; + + set blocksToAdd; + for (auto it: m_blocks) + blocksToAdd.insert(it.first); + set blocksAdded; + AssemblyItems code; + + for ( + BlockId blockId = BlockId::initial(); + blockId; + blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() + ) + { + bool previousHandedOver = (blockId == BlockId::initial()); + while (m_blocks.at(blockId).prev) + blockId = m_blocks.at(blockId).prev; + for (; blockId; blockId = m_blocks.at(blockId).next) + { + BasicBlock const& block = m_blocks.at(blockId); + blocksToAdd.erase(blockId); + blocksAdded.insert(blockId); + + auto begin = m_items.begin() + block.begin; + auto end = m_items.begin() + block.end; + if (begin == end) + continue; + // If block starts with unused tag, skip it. + if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + ++begin; + previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); + copy(begin, end, back_inserter(code)); + } + } + + return code; +} + +BlockId ControlFlowGraph::generateNewId() +{ + BlockId id = BlockId(++m_lastUsedId); + assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); + return id; +} diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h new file mode 100644 index 000000000..5d16df327 --- /dev/null +++ b/ControlFlowGraph.h @@ -0,0 +1,108 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ControlFlowGraph.h + * @author Christian + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#pragma once + +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special + * ID for the inital block. + */ +class BlockId +{ +public: + BlockId() { *this = invalid(); } + explicit BlockId(unsigned _id): m_id(_id) {} + explicit BlockId(u256 const& _id); + static BlockId initial() { return BlockId(-2); } + static BlockId invalid() { return BlockId(-1); } + + bool operator==(BlockId const& _other) const { return m_id == _other.m_id; } + bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; } + bool operator<(BlockId const& _other) const { return m_id < _other.m_id; } + explicit operator bool() const { return *this != invalid(); } + +private: + unsigned m_id; +}; + +/** + * Control flow block inside which instruction counter is always incremented by one + * (except for possibly the last instruction). + */ +struct BasicBlock +{ + /// Start index into assembly item list. + unsigned begin = 0; + /// End index (excluded) inte assembly item list. + unsigned end = 0; + /// Tags pushed inside this block, with multiplicity. + std::vector pushedTags; + /// ID of the block that always follows this one (either JUMP or flow into new block), + /// or BlockId::invalid() otherwise + BlockId next = BlockId::invalid(); + /// ID of the block that has to precede this one. + BlockId prev = BlockId::invalid(); + + enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; + EndType endType = EndType::HANDOVER; +}; + +class ControlFlowGraph +{ +public: + /// Initializes the control flow graph. + /// @a _items has to persist across the usage of this class. + ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} + /// @returns the collection of optimised items, should be called only once. + AssemblyItems optimisedItems(); + +private: + void findLargestTag(); + void splitBlocks(); + void resolveNextLinks(); + void removeUnusedBlocks(); + void setPrevLinks(); + AssemblyItems rebuildCode(); + + BlockId generateNewId(); + + unsigned m_lastUsedId = 0; + AssemblyItems const& m_items; + std::map m_blocks; +}; + + +} +} diff --git a/Exceptions.h b/Exceptions.h new file mode 100644 index 000000000..7cc190e41 --- /dev/null +++ b/Exceptions.h @@ -0,0 +1,36 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Exceptions.h + * @author Christian + * @date 2014 + */ + +#pragma once + +#include + +namespace dev +{ +namespace eth +{ + +struct AssemblyException: virtual Exception {}; +struct OptimizerException: virtual AssemblyException {}; +struct StackTooDeepException: virtual OptimizerException {}; + +} +} diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp new file mode 100644 index 000000000..1e60a7fe8 --- /dev/null +++ b/ExpressionClasses.cpp @@ -0,0 +1,438 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const +{ + auto type = item->type(); + auto otherType = _other.item->type(); + return std::tie(type, item->data(), arguments, sequenceNumber) < + std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); +} + +ExpressionClasses::Id ExpressionClasses::find( + AssemblyItem const& _item, + Ids const& _arguments, + bool _copyItem, + unsigned _sequenceNumber +) +{ + Expression exp; + exp.id = Id(-1); + exp.item = &_item; + exp.arguments = _arguments; + exp.sequenceNumber = _sequenceNumber; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + + if (_copyItem) + exp.item = storeItem(_item); + + ExpressionClasses::Id id = tryToSimplify(exp); + if (id < m_representatives.size()) + exp.id = id; + else + { + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + } + m_expressions.insert(exp); + return exp.id; +} + +bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. + return knownNonZero(find(Instruction::SUB, {_a, _b})); +} + +bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero. + u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b})); + // forbidden interval is ["-31", 31] + return v && *v + 31 > u256(62); +} + +bool ExpressionClasses::knownZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(_c), *this); +} + +bool ExpressionClasses::knownNonZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this); +} + +u256 const* ExpressionClasses::knownConstant(Id _c) +{ + map matchGroups; + Pattern constant(Push); + constant.setMatchGroup(1, matchGroups); + if (!constant.matches(representative(_c), *this)) + return nullptr; + return &constant.d(); +} + +AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item) +{ + m_spareAssemblyItems.push_back(make_shared(_item)); + return m_spareAssemblyItems.back().get(); +} + +string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const +{ + Expression const& expr = representative(_id); + stringstream str; + str << dec << expr.id << ":" << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + return str.str(); +} + +class Rules: public boost::noncopyable +{ +public: + Rules(); + void resetMatchGroups() { m_matchGroups.clear(); } + vector>> rules() const { return m_rules; } + +private: + using Expression = ExpressionClasses::Expression; + map m_matchGroups; + vector>> m_rules; +}; + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + m_rules = vector>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + }; + // Associative operations + for (auto const& opFun: vector>>{ + {Instruction::ADD, plus()}, + {Instruction::MUL, multiplies()}, + {Instruction::AND, bit_and()}, + {Instruction::OR, bit_or()}, + {Instruction::XOR, bit_xor()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + m_rules += vector>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}; + } + // move constants across subtractions + m_rules += vector>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }; +} + +ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +{ + static Rules rules; + + if (_expr.item->type() != Operation) + return -1; + + for (auto const& rule: rules.rules()) + { + rules.resetMatchGroups(); + if (rule.first.matches(_expr, *this)) + { + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << rule.first.toString() << endl; + //ExpressionTemplate t(rule.second()); + //cout << "to " << rule.second().toString() << endl; + return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->getLocation())); + } + } + + if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) + { + Expression expr = _expr; + swap(expr.arguments[0], expr.arguments[1]); + return tryToSimplify(expr, true); + } + + return -1; +} + +ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template) +{ + if (_template.hasId) + return _template.id; + + Ids arguments; + for (ExpressionTemplate const& t: _template.arguments) + arguments.push_back(rebuildExpression(t)); + return find(_template.item, arguments); +} + + +Pattern::Pattern(Instruction _instruction, std::vector const& _arguments): + m_type(Operation), + m_requireDataMatch(true), + m_data(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(*_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + return AssemblyItem(m_type, m_data, _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(Instruction(unsigned(m_data))).name; + break; + case Push: + s << "PUSH " << hex << m_data; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << m_data; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +{ + if (m_type == UndefinedItem) + return true; + if (m_type != _item.type()) + return false; + if (m_requireDataMatch && m_data != _item.data()) + return false; + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/ExpressionClasses.h b/ExpressionClasses.h new file mode 100644 index 000000000..2f720f606 --- /dev/null +++ b/ExpressionClasses.h @@ -0,0 +1,181 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ExpressionClasses.h + * @author Christian + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class Pattern; +struct ExpressionTemplate; + +/** + * Collection of classes of equivalent expressions that can also determine the class of an expression. + * Identifiers are contiguously assigned to new classes starting from zero. + */ +class ExpressionClasses +{ +public: + using Id = unsigned; + using Ids = std::vector; + + struct Expression + { + Id id; + AssemblyItem const* item; + Ids arguments; + unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. + /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). + bool operator<(Expression const& _other) const; + }; + + /// Retrieves the id of the expression equivalence class resulting from the given item applied to the + /// given classes, might also create a new one. + /// @param _copyItem if true, copies the assembly item to an internal storage instead of just + /// keeping a pointer. + /// The @a _sequenceNumber indicates the current storage or memory access sequence. + Id find( + AssemblyItem const& _item, + Ids const& _arguments = {}, + bool _copyItem = true, + unsigned _sequenceNumber = 0 + ); + /// @returns the canonical representative of an expression class. + Expression const& representative(Id _id) const { return m_representatives.at(_id); } + /// @returns the number of classes. + Id size() const { return m_representatives.size(); } + + /// @returns true if the values of the given classes are known to be different (on every input). + /// @note that this function might still return false for some different inputs. + bool knownToBeDifferent(Id _a, Id _b); + /// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32. + bool knownToBeDifferentBy32(Id _a, Id _b); + /// @returns true if the value of the given class is known to be zero. + /// @note that this is not the negation of knownNonZero + bool knownZero(Id _c); + /// @returns true if the value of the given class is known to be nonzero. + /// @note that this is not the negation of knownZero + bool knownNonZero(Id _c); + /// @returns a pointer to the value if the given class is known to be a constant, + /// and a nullptr otherwise. + u256 const* knownConstant(Id _c); + + /// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for + /// the lifetime of the ExpressionClasses object. + AssemblyItem const* storeItem(AssemblyItem const& _item); + + std::string fullDAGToString(Id _id) const; + +private: + /// Tries to simplify the given expression. + /// @returns its class if it possible or Id(-1) otherwise. + /// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed + Id tryToSimplify(Expression const& _expr, bool _secondRun = false); + + /// Rebuilds an expression from a (matched) pattern. + Id rebuildExpression(ExpressionTemplate const& _template); + + std::vector>> createRules() const; + + /// Expression equivalence class representatives - we only store one item of an equivalence. + std::vector m_representatives; + /// All expression ever encountered. + std::set m_expressions; + std::vector> m_spareAssemblyItems; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + +private: + bool matchesBaseItem(AssemblyItem const& _item) const; + Expression const& matchGroupValue() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + u256 m_data = 0; + std::vector m_arguments; + unsigned m_matchGroup = 0; + std::map* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector arguments; +}; + +} +} diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp new file mode 100644 index 000000000..83d59efc7 --- /dev/null +++ b/SemanticInformation.cpp @@ -0,0 +1,124 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file SemanticInformation.cpp + * @author Christian + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) +{ + switch (_item.type()) + { + default: + case UndefinedItem: + case Tag: + return true; + case Push: + case PushString: + case PushTag: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushData: + return false; + case Operation: + { + if (isSwapInstruction(_item) || isDupInstruction(_item)) + return false; + if (_item.instruction() == Instruction::GAS || _item.instruction() == Instruction::PC) + return true; // GAS and PC assume a specific order of opcodes + if (_item.instruction() == Instruction::MSIZE) + return true; // msize is modified already by memory access, avoid that for now + InstructionInfo info = instructionInfo(_item.instruction()); + if (_item.instruction() == Instruction::SSTORE) + return false; + if (_item.instruction() == Instruction::MSTORE) + return false; + //@todo: We do not handle the following memory instructions for now: + // calldatacopy, codecopy, extcodecopy, mstore8, + // msize (note that msize also depends on memory read access) + + // the second requirement will be lifted once it is implemented + return info.sideEffects || info.args > 2; + } + } +} + +bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + case Instruction::ADD: + case Instruction::MUL: + case Instruction::EQ: + case Instruction::AND: + case Instruction::OR: + case Instruction::XOR: + return true; + default: + return false; + } +} + +bool SemanticInformation::isDupInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16; +} + +bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16; +} + +bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item) +{ + return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI); +} + +bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we + // continue on the next instruction (unless an exception happens which can always happen) + case Instruction::JUMP: + case Instruction::JUMPI: + case Instruction::RETURN: + case Instruction::SUICIDE: + case Instruction::STOP: + return true; + default: + return false; + } +} diff --git a/SemanticInformation.h b/SemanticInformation.h new file mode 100644 index 000000000..27aa6f1a4 --- /dev/null +++ b/SemanticInformation.h @@ -0,0 +1,51 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file SemanticInformation.h + * @author Christian + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#pragma once + + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; + +/** + * Helper functions to provide context-independent information about assembly items. + */ +struct SemanticInformation +{ + /// @returns true if the given items starts a new block for common subexpression analysis. + static bool breaksCSEAnalysisBlock(AssemblyItem const& _item); + /// @returns true if the item is a two-argument operation whose value does not depend on the + /// order of its arguments. + static bool isCommutativeOperation(AssemblyItem const& _item); + static bool isDupInstruction(AssemblyItem const& _item); + static bool isSwapInstruction(AssemblyItem const& _item); + static bool isJumpInstruction(AssemblyItem const& _item); + static bool altersControlFlow(AssemblyItem const& _item); +}; + +} +} diff --git a/SourceLocation.h b/SourceLocation.h new file mode 100644 index 000000000..35e3c0318 --- /dev/null +++ b/SourceLocation.h @@ -0,0 +1,89 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @author Lefteris Karapetsas + * @date 2015 + * Represents a location in a source file + */ + +#pragma once + +#include +#include +#include +#include + +namespace dev +{ + +/** + * Representation of an interval of source positions. + * The interval includes start and excludes end. + */ +struct SourceLocation +{ + SourceLocation(int _start, int _end, std::shared_ptr _sourceName): + start(_start), end(_end), sourceName(_sourceName) { } + SourceLocation(): start(-1), end(-1) { } + + SourceLocation(SourceLocation const& _other): + start(_other.start), end(_other.end), sourceName(_other.sourceName) {} + SourceLocation& operator=(SourceLocation const& _other) { start = _other.start; end = _other.end; sourceName = _other.sourceName; return *this;} + + bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;} + bool operator!=(SourceLocation const& _other) const { return !operator==(_other); } + inline bool operator<(SourceLocation const& _other) const; + inline bool contains(SourceLocation const& _other) const; + inline bool intersects(SourceLocation const& _other) const; + + bool isEmpty() const { return start == -1 && end == -1; } + + int start; + int end; + std::shared_ptr sourceName; +}; + +/// Stream output for Location (used e.g. in boost exceptions). +inline std::ostream& operator<<(std::ostream& _out, SourceLocation const& _location) +{ + if (_location.isEmpty()) + return _out << "NO_LOCATION_SPECIFIED"; + return _out << *_location.sourceName << "[" << _location.start << "," << _location.end << ")"; +} + +bool SourceLocation::operator<(SourceLocation const& _other) const +{ + if (!sourceName || !_other.sourceName) + return int(!!sourceName) < int(!!_other.sourceName); + return make_tuple(*sourceName, start, end) < make_tuple(*_other.sourceName, _other.start, _other.end); +} + +bool SourceLocation::contains(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return start <= _other.start && _other.end <= end; +} + +bool SourceLocation::intersects(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return _other.start < end && start < _other.end; +} + +}