From b9d7387e7abbb1f4fa7f7c32a3757386e75e5650 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 24 Apr 2015 17:35:16 +0200 Subject: [PATCH 01/67] Move assembly related files to libevmasm and Params.h/.cpp to libevmcore. --- Assembly.cpp | 485 +++++++++++++++++++++ Assembly.h | 132 ++++++ AssemblyItem.cpp | 135 ++++++ AssemblyItem.h | 100 +++++ CMakeLists.txt | 33 ++ CommonSubexpressionEliminator.cpp | 672 ++++++++++++++++++++++++++++++ CommonSubexpressionEliminator.h | 233 +++++++++++ ControlFlowGraph.cpp | 260 ++++++++++++ ControlFlowGraph.h | 108 +++++ Exceptions.h | 36 ++ ExpressionClasses.cpp | 438 +++++++++++++++++++ ExpressionClasses.h | 181 ++++++++ SemanticInformation.cpp | 124 ++++++ SemanticInformation.h | 51 +++ SourceLocation.h | 89 ++++ 15 files changed, 3077 insertions(+) create mode 100644 Assembly.cpp create mode 100644 Assembly.h create mode 100644 AssemblyItem.cpp create mode 100644 AssemblyItem.h create mode 100644 CMakeLists.txt create mode 100644 CommonSubexpressionEliminator.cpp create mode 100644 CommonSubexpressionEliminator.h create mode 100644 ControlFlowGraph.cpp create mode 100644 ControlFlowGraph.h create mode 100644 Exceptions.h create mode 100644 ExpressionClasses.cpp create mode 100644 ExpressionClasses.h create mode 100644 SemanticInformation.cpp create mode 100644 SemanticInformation.h create mode 100644 SourceLocation.h diff --git a/Assembly.cpp b/Assembly.cpp new file mode 100644 index 000000000..6cc09a4bc --- /dev/null +++ b/Assembly.cpp @@ -0,0 +1,485 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "Assembly.h" +#include +#include +#include +#include +#include +using namespace std; +using namespace dev; +using namespace dev::eth; + +void Assembly::append(Assembly const& _a) +{ + auto newDeposit = m_deposit + _a.deposit(); + for (AssemblyItem i: _a.m_items) + { + if (i.type() == Tag || i.type() == PushTag) + i.setData(i.data() + m_usedTags); + else if (i.type() == PushSub || i.type() == PushSubSize) + i.setData(i.data() + m_usedTags); + append(i); + } + m_deposit = newDeposit; + m_usedTags += _a.m_usedTags; + for (auto const& i: _a.m_data) + m_data.insert(i); + for (auto const& i: _a.m_strings) + m_strings.insert(i); + for (auto const& i: _a.m_subs) + m_subs.push_back(i); + + assert(!_a.m_baseDeposit); + assert(!_a.m_totalDeposit); +} + +void Assembly::append(Assembly const& _a, int _deposit) +{ + if (_deposit > _a.m_deposit) + BOOST_THROW_EXCEPTION(InvalidDeposit()); + else + { + append(_a); + while (_deposit++ < _a.m_deposit) + append(Instruction::POP); + } +} + +string Assembly::out() const +{ + stringstream ret; + stream(ret); + return ret.str(); +} + +unsigned Assembly::bytesRequired() const +{ + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + ret += i.bytesRequired(br); + if (dev::bytesRequired(ret) <= br) + return ret; + } +} + +string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const +{ + if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0) + return ""; + + auto it = _sourceCodes.find(*_location.sourceName); + if (it == _sourceCodes.end()) + return ""; + + string const& source = it->second; + if (size_t(_location.start) >= source.size()) + return ""; + + string cut = source.substr(_location.start, _location.end - _location.start); + auto newLinePos = cut.find_first_of("\n"); + if (newLinePos != string::npos) + cut = cut.substr(0, newLinePos) + "..."; + + return move(cut); +} + +ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const +{ + _out << _prefix << ".code:" << endl; + for (AssemblyItem const& i: m_items) + { + _out << _prefix; + switch (i.type()) + { + case Operation: + _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << i.data(); + break; + case PushString: + _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; + break; + case PushTag: + _out << " PUSH [tag" << i.data() << "]"; + break; + case PushSub: + _out << " PUSH [$" << h256(i.data()).abridged() << "]"; + break; + case PushSubSize: + _out << " PUSH #[$" << h256(i.data()).abridged() << "]"; + break; + case PushProgramSize: + _out << " PUSHSIZE"; + break; + case Tag: + _out << "tag" << i.data() << ": " << endl << _prefix << " JUMPDEST"; + break; + case PushData: + _out << " PUSH [" << hex << (unsigned)i.data() << "]"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + _out << "\t\t" << getLocationFromSources(_sourceCodes, i.getLocation()) << endl; + } + + if (!m_data.empty() || !m_subs.empty()) + { + _out << _prefix << ".data:" << endl; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << toHex(i.second) << endl; + for (size_t i = 0; i < m_subs.size(); ++i) + { + _out << _prefix << " " << hex << i << ": " << endl; + m_subs[i].stream(_out, _prefix + " ", _sourceCodes); + } + } + return _out; +} + +Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const +{ + Json::Value value; + value["name"] = _name; + value["begin"] = _begin; + value["end"] = _end; + if (!_value.empty()) + value["value"] = _value; + if (!_jumpType.empty()) + value["jumpType"] = _jumpType; + return value; +} + +string toStringInHex(u256 _value) +{ + std::stringstream hexStr; + hexStr << hex << _value; + return hexStr.str(); +} + +Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const +{ + Json::Value root; + + Json::Value collection(Json::arrayValue); + for (AssemblyItem const& i: m_items) + { + switch (i.type()) + { + case Operation: + collection.append( + createJsonValue(instructionInfo(i.instruction()).name, i.getLocation().start, i.getLocation().end, i.getJumpTypeAsString())); + break; + case Push: + collection.append( + createJsonValue("PUSH", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()), i.getJumpTypeAsString())); + break; + case PushString: + collection.append( + createJsonValue("PUSH tag", i.getLocation().start, i.getLocation().end, m_strings.at((h256)i.data()))); + break; + case PushTag: + collection.append( + createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + break; + case PushSub: + collection.append( + createJsonValue("PUSH [$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + break; + case PushSubSize: + collection.append( + createJsonValue("PUSH #[$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + break; + case PushProgramSize: + collection.append( + createJsonValue("PUSHSIZE", i.getLocation().start, i.getLocation().end)); + break; + case Tag: + { + collection.append( + createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data()))); + collection.append( + createJsonValue("JUMDEST", i.getLocation().start, i.getLocation().end)); + } + break; + case PushData: + { + Json::Value pushData; + pushData["name"] = "PUSH hex"; + collection.append(createJsonValue("PUSH hex", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + } + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + + root[".code"] = collection; + + if (!m_data.empty() || !m_subs.empty()) + { + Json::Value data; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + data[toStringInHex((u256)i.first)] = toHex(i.second); + + for (size_t i = 0; i < m_subs.size(); ++i) + { + std::stringstream hexStr; + hexStr << hex << i; + data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true); + } + root[".data"] = data; + _out << root; + } + return root; +} + +Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const +{ + if (_inJsonFormat) + return streamAsmJson(_out, _sourceCodes); + else + { + streamAsm(_out, _prefix, _sourceCodes); + return Json::Value(); + } +} + +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + m_deposit += _i.deposit(); + m_items.push_back(_i); + if (m_items.back().getLocation().isEmpty() && !m_currentSourceLocation.isEmpty()) + m_items.back().setLocation(m_currentSourceLocation); + return back(); +} + +void Assembly::injectStart(AssemblyItem const& _i) +{ + m_items.insert(m_items.begin(), _i); +} + +inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b) +{ + if (_a.size() != _b.size()) + return false; + for (unsigned i = 0; i < _a.size(); ++i) + if (!_a[i].match(_b[i])) + return false; + return true; +} + +struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; +#define copt dev::LogOutputStream() + +Assembly& Assembly::optimise(bool _enable) +{ + if (!_enable) + return *this; + std::vector>> rules; + // jump to next instruction + rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }}); + + unsigned total = 0; + for (unsigned count = 1; count > 0; total += count) + { + copt << toString(*this); + count = 0; + + copt << "Performing control flow analysis..."; + { + ControlFlowGraph cfg(m_items); + AssemblyItems optItems = cfg.optimisedItems(); + if (optItems.size() < m_items.size()) + { + copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); + m_items = move(optItems); + count++; + } + } + + copt << "Performing common subexpression elimination..."; + for (auto iter = m_items.begin(); iter != m_items.end();) + { + CommonSubexpressionEliminator eliminator; + auto orig = iter; + iter = eliminator.feedItems(iter, m_items.end()); + AssemblyItems optItems; + bool shouldReplace = false; + try + { + optItems = eliminator.getOptimizedItems(); + shouldReplace = (optItems.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } + + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size(); + count++; + for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter) + *orig = move(*moveIter); + iter = m_items.erase(orig, iter); + } + } + } + + copt << total << " optimisations done."; + + for (auto& sub: m_subs) + sub.optimise(true); + + return *this; +} + +bytes Assembly::assemble() const +{ + bytes ret; + + unsigned totalBytes = bytesRequired(); + vector tagPos(m_usedTags); + map tagRef; + multimap dataRef; + vector sizeRef; ///< Pointers to code locations where the size of the program is inserted + unsigned bytesPerTag = dev::bytesRequired(totalBytes); + byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + + for (size_t i = 0; i < m_subs.size(); ++i) + m_data[u256(i)] = m_subs[i].assemble(); + + unsigned bytesRequiredIncludingData = bytesRequired(); + unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); + byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; + ret.reserve(bytesRequiredIncludingData); + // m_data must not change from here on + + for (AssemblyItem const& i: m_items) + switch (i.type()) + { + case Operation: + ret.push_back((byte)i.data()); + break; + case PushString: + { + ret.push_back((byte)Instruction::PUSH32); + unsigned ii = 0; + for (auto j: m_strings.at((h256)i.data())) + if (++ii > 32) + break; + else + ret.push_back((byte)j); + while (ii++ < 32) + ret.push_back(0); + break; + } + case Push: + { + byte b = max(1, dev::bytesRequired(i.data())); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(i.data(), byr); + break; + } + case PushTag: + { + ret.push_back(tagPush); + tagRef[ret.size()] = (unsigned)i.data(); + ret.resize(ret.size() + bytesPerTag); + break; + } + case PushData: case PushSub: + { + ret.push_back(dataRefPush); + dataRef.insert(make_pair((h256)i.data(), ret.size())); + ret.resize(ret.size() + bytesPerDataRef); + break; + } + case PushSubSize: + { + auto s = m_data[i.data()].size(); + byte b = max(1, dev::bytesRequired(s)); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(s, byr); + break; + } + case PushProgramSize: + { + ret.push_back(dataRefPush); + sizeRef.push_back(ret.size()); + ret.resize(ret.size() + bytesPerDataRef); + break; + } + case Tag: + tagPos[(unsigned)i.data()] = ret.size(); + ret.push_back((byte)Instruction::JUMPDEST); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + + for (auto const& i: tagRef) + { + bytesRef r(ret.data() + i.first, bytesPerTag); + toBigEndian(tagPos[i.second], r); + } + + if (!m_data.empty()) + { + ret.push_back(0); + for (auto const& i: m_data) + { + auto its = dataRef.equal_range(i.first); + if (its.first != its.second) + { + for (auto it = its.first; it != its.second; ++it) + { + bytesRef r(ret.data() + it->second, bytesPerDataRef); + toBigEndian(ret.size(), r); + } + for (auto b: i.second) + ret.push_back(b); + } + } + } + for (unsigned pos: sizeRef) + { + bytesRef r(ret.data() + pos, bytesPerDataRef); + toBigEndian(ret.size(), r); + } + return ret; +} diff --git a/Assembly.h b/Assembly.h new file mode 100644 index 000000000..b4850f7d0 --- /dev/null +++ b/Assembly.h @@ -0,0 +1,132 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Exceptions.h" +#include + +namespace Json +{ +class Value; +} +namespace dev +{ +namespace eth +{ + +class Assembly +{ +public: + Assembly() {} + + AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } + AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); } + AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } + AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } + AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } + + AssemblyItem append() { return append(newTag()); } + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } + AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; } + /// Pushes the final size of the current assembly itself. Use this when the code is modified + /// after compilation and CODESIZE is not an option. + void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } + + AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + template Assembly& operator<<(T const& _d) { append(_d); return *this; } + AssemblyItems const& getItems() const { return m_items; } + AssemblyItem const& back() const { return m_items.back(); } + std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } + + void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + + void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); } + + void injectStart(AssemblyItem const& _i); + std::string out() const; + int deposit() const { return m_deposit; } + void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + + /// Changes the source location used for each appended item. + void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } + + bytes assemble() const; + Assembly& optimise(bool _enable); + Json::Value stream( + std::ostream& _out, + std::string const& _prefix = "", + const StringMap &_sourceCodes = StringMap(), + bool _inJsonFormat = false + ) const; + +protected: + std::string getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + unsigned bytesRequired() const; + +private: + Json::Value streamAsmJson(std::ostream& _out, const StringMap &_sourceCodes) const; + std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const; + Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; + +protected: + unsigned m_usedTags = 0; + AssemblyItems m_items; + mutable std::map m_data; + std::vector m_subs; + std::map m_strings; + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; + + SourceLocation m_currentSourceLocation; +}; + +inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a) +{ + _a.stream(_out); + return _out; +} + +} +} diff --git a/AssemblyItem.cpp b/AssemblyItem.cpp new file mode 100644 index 000000000..a4485a144 --- /dev/null +++ b/AssemblyItem.cpp @@ -0,0 +1,135 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "AssemblyItem.h" +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const +{ + switch (m_type) + { + case Operation: + case Tag: // 1 byte for the JUMPDEST + return 1; + case PushString: + return 33; + case Push: + return 1 + max(1, dev::bytesRequired(m_data)); + case PushSubSize: + case PushProgramSize: + return 4; // worst case: a 16MB program + case PushTag: + case PushData: + case PushSub: + return 1 + _addressLength; + default: + break; + } + BOOST_THROW_EXCEPTION(InvalidOpcode()); +} + +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + return 1; + case Tag: + return 0; + default:; + } + return 0; +} + +string AssemblyItem::getJumpTypeAsString() const +{ + switch (m_jumpType) + { + case JumpType::IntoFunction: + return "[in]"; + case JumpType::OutOfFunction: + return "[out]"; + case JumpType::Ordinary: + default: + return ""; + } +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) +{ + switch (_item.type()) + { + case Operation: + _out << " " << instructionInfo(_item.instruction()).name; + if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) + _out << "\t" << _item.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << _item.data(); + break; + case PushString: + _out << " PushString" << hex << (unsigned)_item.data(); + break; + case PushTag: + _out << " PushTag " << _item.data(); + break; + case Tag: + _out << " Tag " << _item.data(); + break; + case PushData: + _out << " PushData " << hex << (unsigned)_item.data(); + break; + case PushSub: + _out << " PushSub " << hex << h256(_item.data()).abridged(); + break; + case PushSubSize: + _out << " PushSubSize " << hex << h256(_item.data()).abridged(); + break; + case PushProgramSize: + _out << " PushProgramSize"; + break; + case UndefinedItem: + _out << " ???"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + return _out; +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i) +{ + for (AssemblyItem const& i: _i) + _out << i; + return _out; +} diff --git a/AssemblyItem.h b/AssemblyItem.h new file mode 100644 index 000000000..6f2a65de9 --- /dev/null +++ b/AssemblyItem.h @@ -0,0 +1,100 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "Exceptions.h" + +namespace dev +{ +namespace eth +{ + +enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData }; + +class Assembly; + +class AssemblyItem +{ +public: + enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; + + AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Push, _push, _location) { } + AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Operation, byte(_i), _location) { } + AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): + m_type(_type), + m_data(_data), + m_location(_location) + { + } + + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + + AssemblyItemType type() const { return m_type; } + u256 const& data() const { return m_data; } + void setType(AssemblyItemType const _type) { m_type = _type; } + void setData(u256 const& _data) { m_data = _data; } + + /// @returns the instruction of this item (only valid if type() == Operation) + Instruction instruction() const { return Instruction(byte(m_data)); } + + /// @returns true iff the type and data of the items are equal. + bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + + /// @returns an upper bound for the number of bytes required by this item, assuming that + /// the value of a jump tag takes @a _addressLength bytes. + unsigned bytesRequired(unsigned _addressLength) const; + int deposit() const; + + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + SourceLocation const& getLocation() const { return m_location; } + + void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } + JumpType getJumpType() const { return m_jumpType; } + std::string getJumpTypeAsString() const; + +private: + AssemblyItemType m_type; + u256 m_data; + SourceLocation m_location; + JumpType m_jumpType = JumpType::Ordinary; +}; + +using AssemblyItems = std::vector; +using AssemblyItemsConstRef = vector_ref; + +std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); +std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } + +} +} diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..f8150806f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_policy(SET CMP0015 NEW) +# this policy was introduced in cmake 3.0 +# remove if, once 3.0 will be used on unix +if (${CMAKE_MAJOR_VERSION} GREATER 2) + # old policy do not use MACOSX_RPATH + cmake_policy(SET CMP0042 OLD) +endif() +set(CMAKE_AUTOMOC OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) +include_directories(BEFORE ..) +include_directories(${Boost_INCLUDE_DIRS}) + +set(EXECUTABLE evmasm) + +file(GLOB HEADERS "*.h") + +if (ETH_STATIC) + add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +else() + add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS}) +endif() + +target_link_libraries(${EXECUTABLE} evmcore) +target_link_libraries(${EXECUTABLE} devcrypto) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp new file mode 100644 index 000000000..63524d6f3 --- /dev/null +++ b/CommonSubexpressionEliminator.cpp @@ -0,0 +1,672 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file CommonSubexpressionEliminator.cpp + * @author Christian + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +vector CommonSubexpressionEliminator::getOptimizedItems() +{ + optimizeBreakingItem(); + + map initialStackContents; + map targetStackContents; + int minHeight = m_stackHeight + 1; + if (!m_stackElements.empty()) + minHeight = min(minHeight, m_stackElements.begin()->first); + for (int height = minHeight; height <= 0; ++height) + initialStackContents[height] = initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_stackHeight; ++height) + targetStackContents[height] = stackElement(height, SourceLocation()); + + // Debug info: + //stream(cout, initialStackContents, targetStackContents); + + AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + initialStackContents, + targetStackContents + ); + if (m_breakingItem) + items.push_back(*m_breakingItem); + return items; +} + +ostream& CommonSubexpressionEliminator::stream( + ostream& _out, + map _initialStack, + map _targetStack +) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses.representative(_id); + _out << " " << dec << _id << ": " << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "Optimizer analysis:" << endl; + _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Initial stack: " << endl; + for (auto const& it: _initialStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Target stack: " << endl; + for (auto const& it: _targetStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) + storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses.find(_item, arguments, _copyItem) + ); + } + m_stackHeight += _item.deposit(); + } +} + +void CommonSubexpressionEliminator::optimizeBreakingItem() +{ + if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI)) + return; + + SourceLocation const& location = m_breakingItem->getLocation(); + AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); + + Id condition = stackElement(m_stackHeight - 1, location); + Id zero = m_expressionClasses.find(u256(0)); + if (m_expressionClasses.knownToBeDifferent(condition, zero)) + { + feedItem(AssemblyItem(Instruction::SWAP1, location), true); + feedItem(AssemblyItem(Instruction::POP, location), true); + + AssemblyItem item(Instruction::JUMP, location); + item.setJumpType(jumpType); + m_breakingItem = m_expressionClasses.storeItem(item); + return; + } + Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); + if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + { + AssemblyItem it(Instruction::POP, location); + feedItem(it, true); + feedItem(it, true); + m_breakingItem = nullptr; + } +} + +void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void CommonSubexpressionEliminator::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new equivalence class. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); + assertThrow(_stackHeight > -16, StackTooDeepException, ""); + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); +} + +void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return; + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); + m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; +} + +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); +} + +void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return; + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); + m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; +} + +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); +} + +CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses.knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses.find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses.find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses.knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + +CSECodeGenerator::CSECodeGenerator( + ExpressionClasses& _expressionClasses, + vector const& _storeOperations +): + m_expressionClasses(_expressionClasses) +{ + for (auto const& store: _storeOperations) + m_storeOperations[make_pair(store.target, store.slot)].push_back(store); +} + +AssemblyItems CSECodeGenerator::generateCode( + map const& _initialStack, + map const& _targetStackContents +) +{ + m_stack = _initialStack; + for (auto const& item: m_stack) + if (!m_classPositions.count(item.second)) + m_classPositions[item.second] = item.first; + + // @todo: provide information about the positions of copies of class elements + + // generate the dependency graph starting from final storage and memory writes and target stack contents + for (auto const& p: m_storeOperations) + addDependencies(p.second.back().expression); + for (auto const& targetItem: _targetStackContents) + { + m_finalClasses.insert(targetItem.second); + addDependencies(targetItem.second); + } + + // store all needed sequenced expressions + set> sequencedExpressions; + for (auto const& p: m_neededBy) + for (auto id: {p.first, p.second}) + if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) + sequencedExpressions.insert(make_pair(seqNr, id)); + + // Perform all operations on storage and memory in order, if they are needed. + for (auto const& seqAndId: sequencedExpressions) + if (!m_classPositions.count(seqAndId.second)) + generateClassElement(seqAndId.second, true); + + // generate the target stack elements + for (auto const& targetItem: _targetStackContents) + { + int position = generateClassElement(targetItem.second); + assertThrow(position != c_invalidPosition, OptimizerException, ""); + if (position == targetItem.first) + continue; + SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + if (position < targetItem.first) + // it is already at its target, we need another copy + appendDup(position, location); + else + appendOrRemoveSwap(position, location); + appendOrRemoveSwap(targetItem.first, location); + } + + // remove surplus elements + while (removeStackTopIfPossible()) + { + // no-op + } + + // check validity + int finalHeight = 0; + if (!_targetStackContents.empty()) + // have target stack, so its height should be the final height + finalHeight = (--_targetStackContents.end())->first; + else if (!_initialStack.empty()) + // no target stack, only erase the initial stack + finalHeight = _initialStack.begin()->first - 1; + else + // neither initial no target stack, no change in height + finalHeight = 0; + assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + return m_generatedItems; +} + +void CSECodeGenerator::addDependencies(Id _c) +{ + if (m_neededBy.count(_c)) + return; // we already computed the dependencies for _c + ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); + for (Id argument: expr.arguments) + { + addDependencies(argument); + m_neededBy.insert(make_pair(argument, _c)); + } + if (expr.item->type() == Operation && ( + expr.item->instruction() == Instruction::SLOAD || + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 + )) + { + // this loads an unknown value from storage or memory and thus, in addition to its + // arguments, depends on all store operations to addresses where we do not know that + // they are different that occur before this load + StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ? + StoreOperation::Storage : StoreOperation::Memory; + Id slotToLoadFrom = expr.arguments.at(0); + for (auto const& p: m_storeOperations) + { + if (p.first.first != target) + continue; + Id slot = p.first.second; + StoreOperations const& storeOps = p.second; + if (storeOps.front().sequenceNumber > expr.sequenceNumber) + continue; + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + AssemblyItem offsetInstr(Instruction::SUB, expr.item->getLocation()); + Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) + continue; + + // note that store and load never have the same sequence number + Id latestStore = storeOps.front().expression; + for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) + if (it->sequenceNumber < expr.sequenceNumber) + latestStore = it->expression; + addDependencies(latestStore); + m_neededBy.insert(make_pair(latestStore, _c)); + } + } +} + +int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +{ + // do some cleanup + removeStackTopIfPossible(); + + if (m_classPositions.count(_c)) + { + assertThrow( + m_classPositions[_c] != c_invalidPosition, + OptimizerException, + "Element already removed but still needed." + ); + return m_classPositions[_c]; + } + ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); + assertThrow( + _allowSequenced || expr.sequenceNumber == 0, + OptimizerException, + "Sequence constrained operation requested out of sequence." + ); + vector const& arguments = expr.arguments; + for (Id arg: boost::adaptors::reverse(arguments)) + generateClassElement(arg); + + SourceLocation const& location = expr.item->getLocation(); + // The arguments are somewhere on the stack now, so it remains to move them at the correct place. + // This is quite difficult as sometimes, the values also have to removed in this process + // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is + // implemented for every single case for combinations of up to two arguments manually. + if (arguments.size() == 1) + { + if (canBeRemoved(arguments[0], _c)) + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + else + appendDup(classElementPosition(arguments[0]), location); + } + else if (arguments.size() == 2) + { + if (canBeRemoved(arguments[1], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[1]), location); + if (arguments[0] == arguments[1]) + appendDup(m_stackHeight, location); + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(m_stackHeight - 1, location); + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + } + else + appendDup(classElementPosition(arguments[0]), location); + } + else + { + if (arguments[0] == arguments[1]) + { + appendDup(classElementPosition(arguments[0]), location); + appendDup(m_stackHeight, location); + } + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + appendDup(classElementPosition(arguments[1]), location); + appendOrRemoveSwap(m_stackHeight - 1, location); + } + else + { + appendDup(classElementPosition(arguments[1]), location); + appendDup(classElementPosition(arguments[0]), location); + } + } + } + else + assertThrow( + arguments.size() <= 2, + OptimizerException, + "Opcodes with more than two arguments not implemented yet." + ); + for (size_t i = 0; i < arguments.size(); ++i) + assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." ); + + while (SemanticInformation::isCommutativeOperation(*expr.item) && + !m_generatedItems.empty() && + m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) + // this will not append a swap but remove the one that is already there + appendOrRemoveSwap(m_stackHeight - 1, location); + for (auto arg: arguments) + if (canBeRemoved(arg, _c)) + m_classPositions[arg] = c_invalidPosition; + for (size_t i = 0; i < arguments.size(); ++i) + m_stack.erase(m_stackHeight - i); + appendItem(*expr.item); + if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) + { + m_stack[m_stackHeight] = _c; + return m_classPositions[_c] = m_stackHeight; + } + else + { + assertThrow( + instructionInfo(expr.item->instruction()).ret == 0, + OptimizerException, + "Invalid number of return values." + ); + return m_classPositions[_c] = c_invalidPosition; + } +} + +int CSECodeGenerator::classElementPosition(Id _id) const +{ + assertThrow( + m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition, + OptimizerException, + "Element requested but is not present." + ); + return m_classPositions.at(_id); +} + +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result) +{ + // Returns false if _element is finally needed or is needed by a class that has not been + // computed yet. Note that m_classPositions also includes classes that were deleted in the meantime. + if (m_finalClasses.count(_element)) + return false; + + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + return true; +} + +bool CSECodeGenerator::removeStackTopIfPossible() +{ + if (m_stack.empty()) + return false; + assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); + Id top = m_stack[m_stackHeight]; + if (!canBeRemoved(top)) + return false; + m_generatedItems.push_back(AssemblyItem(Instruction::POP)); + m_stack.erase(m_stackHeight); + m_stackHeight--; + return true; +} + +void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + int instructionNum = 1 + m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); + m_stack[m_stackHeight] = m_stack[_fromPosition]; +} + +void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + if (_fromPosition == m_stackHeight) + return; + int instructionNum = m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); + // The value of a class can be present in multiple locations on the stack. We only update the + // "canonical" one that is tracked by m_classPositions + if (m_classPositions[m_stack[m_stackHeight]] == m_stackHeight) + m_classPositions[m_stack[m_stackHeight]] = _fromPosition; + if (m_classPositions[m_stack[_fromPosition]] == _fromPosition) + m_classPositions[m_stack[_fromPosition]] = m_stackHeight; + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + if (m_generatedItems.size() >= 2 && + SemanticInformation::isSwapInstruction(m_generatedItems.back()) && + *(m_generatedItems.end() - 2) == m_generatedItems.back()) + { + m_generatedItems.pop_back(); + m_generatedItems.pop_back(); + } +} + +void CSECodeGenerator::appendItem(AssemblyItem const& _item) +{ + m_generatedItems.push_back(_item); + m_stackHeight += _item.deposit(); +} diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h new file mode 100644 index 000000000..6156bc81a --- /dev/null +++ b/CommonSubexpressionEliminator.h @@ -0,0 +1,233 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file CommonSubexpressionEliminator.h + * @author Christian + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Optimizer step that performs common subexpression elimination and stack reorganisation, + * i.e. it tries to infer equality among expressions and compute the values of two expressions + * known to be equal only once. + * + * The general workings are that for each assembly item that is fed into the eliminator, an + * equivalence class is derived from the operation and the equivalence class of its arguments. + * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these + * classes are determined. + * + * When the list of optimized items is requested, they are generated in a bottom-up fashion, + * adding code for equivalence classes that were not yet computed. + */ +class CommonSubexpressionEliminator +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Memory, Storage }; + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first + /// item that must be fed into a new instance of the eliminator. + template + _AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end); + + /// @returns the resulting items after optimization. + AssemblyItems getOptimizedItems(); + + /// Streams debugging information to @a _out. + std::ostream& stream( + std::ostream& _out, + std::map _initialStack = std::map(), + std::map _targetStack = std::map() + ) const; + +private: + /// Feeds the item into the system for analysis. + void feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Tries to optimize the item that breaks the basic block at the end. + void optimizeBreakingItem(); + + /// Simplifies the given item using + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height + /// (must not be positive). + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; + /// Keeps information about which storage or memory slots were written to at which sequence + /// number with what instruction. + std::vector m_storeOperations; + /// Structure containing the classes of equivalent expressions. + ExpressionClasses m_expressionClasses; + + /// The item that breaks the basic block, can be nullptr. + /// It is usually appended to the block but can be optimized in some cases. + AssemblyItem const* m_breakingItem = nullptr; +}; + +/** + * Unit that generates code from current stack layout, target stack layout and information about + * the equivalence classes. + */ +class CSECodeGenerator +{ +public: + using StoreOperation = CommonSubexpressionEliminator::StoreOperation; + using StoreOperations = std::vector; + using Id = ExpressionClasses::Id; + + /// Initializes the code generator with the given classes and store operations. + /// The store operations have to be sorted by sequence number in ascending order. + CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations); + + /// @returns the assembly items generated from the given requirements + /// @param _initialStack current contents of the stack (up to stack height of zero) + /// @param _targetStackContents final contents of the stack, by stack height relative to initial + /// @note should only be called once on each object. + AssemblyItems generateCode( + std::map const& _initialStack, + std::map const& _targetStackContents + ); + +private: + /// Recursively discovers all dependencies to @a m_requests. + void addDependencies(Id _c); + + /// Produce code that generates the given element if it is not yet present. + /// @returns the stack position of the element or c_invalidPosition if it does not actually + /// generate a value on the stack. + /// @param _allowSequenced indicates that sequence-constrained operations are allowed + int generateClassElement(Id _c, bool _allowSequenced = false); + /// @returns the position of the representative of the given id on the stack. + /// @note throws an exception if it is not on the stack. + int classElementPosition(Id _id) const; + + /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1)); + + /// Appends code to remove the topmost stack element if it can be removed. + bool removeStackTopIfPossible(); + + /// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position. + void appendDup(int _fromPosition, SourceLocation const& _location); + /// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position. + /// @note this might also remove the last item if it exactly the same swap instruction. + void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location); + /// Appends the given assembly item. + void appendItem(AssemblyItem const& _item); + + static const int c_invalidPosition = -0x7fffffff; + + AssemblyItems m_generatedItems; + /// Current height of the stack relative to the start. + int m_stackHeight = 0; + /// If (b, a) is in m_requests then b is needed to compute a. + std::multimap m_neededBy; + /// Current content of the stack. + std::map m_stack; + /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted. + std::map m_classPositions; + + /// The actual eqivalence class items and how to compute them. + ExpressionClasses& m_expressionClasses; + /// Keeps information about which storage or memory slots were written to by which operations. + /// The operations are sorted ascendingly by sequence number. + std::map, StoreOperations> m_storeOperations; + /// The set of equivalence classes that should be present on the stack at the end. + std::set m_finalClasses; +}; + +template +_AssemblyItemIterator CommonSubexpressionEliminator::feedItems( + _AssemblyItemIterator _iterator, + _AssemblyItemIterator _end +) +{ + for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) + feedItem(*_iterator); + if (_iterator != _end) + m_breakingItem = &(*_iterator++); + return _iterator; +} + +} +} diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp new file mode 100644 index 000000000..cc4367e64 --- /dev/null +++ b/ControlFlowGraph.cpp @@ -0,0 +1,260 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ControlFlowGraph.cpp + * @author Christian + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +BlockId::BlockId(u256 const& _id): m_id(_id) +{ + assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); +} + +AssemblyItems ControlFlowGraph::optimisedItems() +{ + if (m_items.empty()) + return m_items; + + findLargestTag(); + splitBlocks(); + resolveNextLinks(); + removeUnusedBlocks(); + setPrevLinks(); + + return rebuildCode(); +} + +void ControlFlowGraph::findLargestTag() +{ + m_lastUsedId = 0; + for (auto const& item: m_items) + if (item.type() == Tag || item.type() == PushTag) + { + // Assert that it can be converted. + BlockId(item.data()); + m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); + } +} + +void ControlFlowGraph::splitBlocks() +{ + m_blocks.clear(); + BlockId id = BlockId::initial(); + m_blocks[id].begin = 0; + for (size_t index = 0; index < m_items.size(); ++index) + { + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag) + { + if (id) + m_blocks[id].end = index; + id = BlockId::invalid(); + } + if (!id) + { + id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); + m_blocks[id].begin = index; + } + if (item.type() == PushTag) + m_blocks[id].pushedTags.push_back(BlockId(item.data())); + if (SemanticInformation::altersControlFlow(item)) + { + m_blocks[id].end = index + 1; + if (item == Instruction::JUMP) + m_blocks[id].endType = BasicBlock::EndType::JUMP; + else if (item == Instruction::JUMPI) + m_blocks[id].endType = BasicBlock::EndType::JUMPI; + else + m_blocks[id].endType = BasicBlock::EndType::STOP; + id = BlockId::invalid(); + } + } + if (id) + { + m_blocks[id].end = m_items.size(); + if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) + m_blocks[id].endType = BasicBlock::EndType::STOP; + } +} + +void ControlFlowGraph::resolveNextLinks() +{ + map blockByBeginPos; + for (auto const& idAndBlock: m_blocks) + if (idAndBlock.second.begin != idAndBlock.second.end) + blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; + + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + blockByBeginPos.count(block.end), + OptimizerException, + "Successor block not found." + ); + block.next = blockByBeginPos.at(block.end); + break; + default: + break; + } + } +} + +void ControlFlowGraph::removeUnusedBlocks() +{ + vector blocksToProcess{BlockId::initial()}; + set neededBlocks{BlockId::initial()}; + while (!blocksToProcess.empty()) + { + BasicBlock const& block = m_blocks.at(blocksToProcess.back()); + blocksToProcess.pop_back(); + for (BlockId tag: block.pushedTags) + if (!neededBlocks.count(tag)) + { + neededBlocks.insert(tag); + blocksToProcess.push_back(tag); + } + if (block.next && !neededBlocks.count(block.next)) + { + neededBlocks.insert(block.next); + blocksToProcess.push_back(block.next); + } + } + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (neededBlocks.count(it->first)) + ++it; + else + m_blocks.erase(it++); +} + +void ControlFlowGraph::setPrevLinks() +{ + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + !m_blocks.at(block.next).prev, + OptimizerException, + "Successor already has predecessor." + ); + m_blocks[block.next].prev = idAndBlock.first; + break; + default: + break; + } + } + // If block ends with jump to not yet linked block, link them removing the jump + for (auto& idAndBlock: m_blocks) + { + BlockId blockId = idAndBlock.first; + BasicBlock& block = idAndBlock.second; + if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) + continue; + AssemblyItem const& push = m_items.at(block.end - 2); + if (push.type() != PushTag) + continue; + BlockId nextId(push.data()); + if (m_blocks.at(nextId).prev) + continue; + bool hasLoop = false; + for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next) + hasLoop = (id == blockId); + if (hasLoop) + continue; + + m_blocks[nextId].prev = blockId; + block.next = nextId; + block.end -= 2; + assertThrow( + !block.pushedTags.empty() && block.pushedTags.back() == nextId, + OptimizerException, + "Last pushed tag not at end of pushed list." + ); + block.pushedTags.pop_back(); + block.endType = BasicBlock::EndType::HANDOVER; + } +} + +AssemblyItems ControlFlowGraph::rebuildCode() +{ + map pushes; + for (auto& idAndBlock: m_blocks) + for (BlockId ref: idAndBlock.second.pushedTags) + pushes[ref]++; + + set blocksToAdd; + for (auto it: m_blocks) + blocksToAdd.insert(it.first); + set blocksAdded; + AssemblyItems code; + + for ( + BlockId blockId = BlockId::initial(); + blockId; + blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() + ) + { + bool previousHandedOver = (blockId == BlockId::initial()); + while (m_blocks.at(blockId).prev) + blockId = m_blocks.at(blockId).prev; + for (; blockId; blockId = m_blocks.at(blockId).next) + { + BasicBlock const& block = m_blocks.at(blockId); + blocksToAdd.erase(blockId); + blocksAdded.insert(blockId); + + auto begin = m_items.begin() + block.begin; + auto end = m_items.begin() + block.end; + if (begin == end) + continue; + // If block starts with unused tag, skip it. + if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + ++begin; + previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); + copy(begin, end, back_inserter(code)); + } + } + + return code; +} + +BlockId ControlFlowGraph::generateNewId() +{ + BlockId id = BlockId(++m_lastUsedId); + assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); + return id; +} diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h new file mode 100644 index 000000000..5d16df327 --- /dev/null +++ b/ControlFlowGraph.h @@ -0,0 +1,108 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ControlFlowGraph.h + * @author Christian + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#pragma once + +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special + * ID for the inital block. + */ +class BlockId +{ +public: + BlockId() { *this = invalid(); } + explicit BlockId(unsigned _id): m_id(_id) {} + explicit BlockId(u256 const& _id); + static BlockId initial() { return BlockId(-2); } + static BlockId invalid() { return BlockId(-1); } + + bool operator==(BlockId const& _other) const { return m_id == _other.m_id; } + bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; } + bool operator<(BlockId const& _other) const { return m_id < _other.m_id; } + explicit operator bool() const { return *this != invalid(); } + +private: + unsigned m_id; +}; + +/** + * Control flow block inside which instruction counter is always incremented by one + * (except for possibly the last instruction). + */ +struct BasicBlock +{ + /// Start index into assembly item list. + unsigned begin = 0; + /// End index (excluded) inte assembly item list. + unsigned end = 0; + /// Tags pushed inside this block, with multiplicity. + std::vector pushedTags; + /// ID of the block that always follows this one (either JUMP or flow into new block), + /// or BlockId::invalid() otherwise + BlockId next = BlockId::invalid(); + /// ID of the block that has to precede this one. + BlockId prev = BlockId::invalid(); + + enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; + EndType endType = EndType::HANDOVER; +}; + +class ControlFlowGraph +{ +public: + /// Initializes the control flow graph. + /// @a _items has to persist across the usage of this class. + ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} + /// @returns the collection of optimised items, should be called only once. + AssemblyItems optimisedItems(); + +private: + void findLargestTag(); + void splitBlocks(); + void resolveNextLinks(); + void removeUnusedBlocks(); + void setPrevLinks(); + AssemblyItems rebuildCode(); + + BlockId generateNewId(); + + unsigned m_lastUsedId = 0; + AssemblyItems const& m_items; + std::map m_blocks; +}; + + +} +} diff --git a/Exceptions.h b/Exceptions.h new file mode 100644 index 000000000..7cc190e41 --- /dev/null +++ b/Exceptions.h @@ -0,0 +1,36 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Exceptions.h + * @author Christian + * @date 2014 + */ + +#pragma once + +#include + +namespace dev +{ +namespace eth +{ + +struct AssemblyException: virtual Exception {}; +struct OptimizerException: virtual AssemblyException {}; +struct StackTooDeepException: virtual OptimizerException {}; + +} +} diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp new file mode 100644 index 000000000..1e60a7fe8 --- /dev/null +++ b/ExpressionClasses.cpp @@ -0,0 +1,438 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const +{ + auto type = item->type(); + auto otherType = _other.item->type(); + return std::tie(type, item->data(), arguments, sequenceNumber) < + std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); +} + +ExpressionClasses::Id ExpressionClasses::find( + AssemblyItem const& _item, + Ids const& _arguments, + bool _copyItem, + unsigned _sequenceNumber +) +{ + Expression exp; + exp.id = Id(-1); + exp.item = &_item; + exp.arguments = _arguments; + exp.sequenceNumber = _sequenceNumber; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + + if (_copyItem) + exp.item = storeItem(_item); + + ExpressionClasses::Id id = tryToSimplify(exp); + if (id < m_representatives.size()) + exp.id = id; + else + { + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + } + m_expressions.insert(exp); + return exp.id; +} + +bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. + return knownNonZero(find(Instruction::SUB, {_a, _b})); +} + +bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero. + u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b})); + // forbidden interval is ["-31", 31] + return v && *v + 31 > u256(62); +} + +bool ExpressionClasses::knownZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(_c), *this); +} + +bool ExpressionClasses::knownNonZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this); +} + +u256 const* ExpressionClasses::knownConstant(Id _c) +{ + map matchGroups; + Pattern constant(Push); + constant.setMatchGroup(1, matchGroups); + if (!constant.matches(representative(_c), *this)) + return nullptr; + return &constant.d(); +} + +AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item) +{ + m_spareAssemblyItems.push_back(make_shared(_item)); + return m_spareAssemblyItems.back().get(); +} + +string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const +{ + Expression const& expr = representative(_id); + stringstream str; + str << dec << expr.id << ":" << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + return str.str(); +} + +class Rules: public boost::noncopyable +{ +public: + Rules(); + void resetMatchGroups() { m_matchGroups.clear(); } + vector>> rules() const { return m_rules; } + +private: + using Expression = ExpressionClasses::Expression; + map m_matchGroups; + vector>> m_rules; +}; + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + m_rules = vector>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + }; + // Associative operations + for (auto const& opFun: vector>>{ + {Instruction::ADD, plus()}, + {Instruction::MUL, multiplies()}, + {Instruction::AND, bit_and()}, + {Instruction::OR, bit_or()}, + {Instruction::XOR, bit_xor()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + m_rules += vector>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}; + } + // move constants across subtractions + m_rules += vector>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }; +} + +ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +{ + static Rules rules; + + if (_expr.item->type() != Operation) + return -1; + + for (auto const& rule: rules.rules()) + { + rules.resetMatchGroups(); + if (rule.first.matches(_expr, *this)) + { + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << rule.first.toString() << endl; + //ExpressionTemplate t(rule.second()); + //cout << "to " << rule.second().toString() << endl; + return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->getLocation())); + } + } + + if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) + { + Expression expr = _expr; + swap(expr.arguments[0], expr.arguments[1]); + return tryToSimplify(expr, true); + } + + return -1; +} + +ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template) +{ + if (_template.hasId) + return _template.id; + + Ids arguments; + for (ExpressionTemplate const& t: _template.arguments) + arguments.push_back(rebuildExpression(t)); + return find(_template.item, arguments); +} + + +Pattern::Pattern(Instruction _instruction, std::vector const& _arguments): + m_type(Operation), + m_requireDataMatch(true), + m_data(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(*_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + return AssemblyItem(m_type, m_data, _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(Instruction(unsigned(m_data))).name; + break; + case Push: + s << "PUSH " << hex << m_data; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << m_data; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +{ + if (m_type == UndefinedItem) + return true; + if (m_type != _item.type()) + return false; + if (m_requireDataMatch && m_data != _item.data()) + return false; + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/ExpressionClasses.h b/ExpressionClasses.h new file mode 100644 index 000000000..2f720f606 --- /dev/null +++ b/ExpressionClasses.h @@ -0,0 +1,181 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file ExpressionClasses.h + * @author Christian + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class Pattern; +struct ExpressionTemplate; + +/** + * Collection of classes of equivalent expressions that can also determine the class of an expression. + * Identifiers are contiguously assigned to new classes starting from zero. + */ +class ExpressionClasses +{ +public: + using Id = unsigned; + using Ids = std::vector; + + struct Expression + { + Id id; + AssemblyItem const* item; + Ids arguments; + unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. + /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). + bool operator<(Expression const& _other) const; + }; + + /// Retrieves the id of the expression equivalence class resulting from the given item applied to the + /// given classes, might also create a new one. + /// @param _copyItem if true, copies the assembly item to an internal storage instead of just + /// keeping a pointer. + /// The @a _sequenceNumber indicates the current storage or memory access sequence. + Id find( + AssemblyItem const& _item, + Ids const& _arguments = {}, + bool _copyItem = true, + unsigned _sequenceNumber = 0 + ); + /// @returns the canonical representative of an expression class. + Expression const& representative(Id _id) const { return m_representatives.at(_id); } + /// @returns the number of classes. + Id size() const { return m_representatives.size(); } + + /// @returns true if the values of the given classes are known to be different (on every input). + /// @note that this function might still return false for some different inputs. + bool knownToBeDifferent(Id _a, Id _b); + /// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32. + bool knownToBeDifferentBy32(Id _a, Id _b); + /// @returns true if the value of the given class is known to be zero. + /// @note that this is not the negation of knownNonZero + bool knownZero(Id _c); + /// @returns true if the value of the given class is known to be nonzero. + /// @note that this is not the negation of knownZero + bool knownNonZero(Id _c); + /// @returns a pointer to the value if the given class is known to be a constant, + /// and a nullptr otherwise. + u256 const* knownConstant(Id _c); + + /// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for + /// the lifetime of the ExpressionClasses object. + AssemblyItem const* storeItem(AssemblyItem const& _item); + + std::string fullDAGToString(Id _id) const; + +private: + /// Tries to simplify the given expression. + /// @returns its class if it possible or Id(-1) otherwise. + /// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed + Id tryToSimplify(Expression const& _expr, bool _secondRun = false); + + /// Rebuilds an expression from a (matched) pattern. + Id rebuildExpression(ExpressionTemplate const& _template); + + std::vector>> createRules() const; + + /// Expression equivalence class representatives - we only store one item of an equivalence. + std::vector m_representatives; + /// All expression ever encountered. + std::set m_expressions; + std::vector> m_spareAssemblyItems; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + +private: + bool matchesBaseItem(AssemblyItem const& _item) const; + Expression const& matchGroupValue() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + u256 m_data = 0; + std::vector m_arguments; + unsigned m_matchGroup = 0; + std::map* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector arguments; +}; + +} +} diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp new file mode 100644 index 000000000..83d59efc7 --- /dev/null +++ b/SemanticInformation.cpp @@ -0,0 +1,124 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file SemanticInformation.cpp + * @author Christian + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) +{ + switch (_item.type()) + { + default: + case UndefinedItem: + case Tag: + return true; + case Push: + case PushString: + case PushTag: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushData: + return false; + case Operation: + { + if (isSwapInstruction(_item) || isDupInstruction(_item)) + return false; + if (_item.instruction() == Instruction::GAS || _item.instruction() == Instruction::PC) + return true; // GAS and PC assume a specific order of opcodes + if (_item.instruction() == Instruction::MSIZE) + return true; // msize is modified already by memory access, avoid that for now + InstructionInfo info = instructionInfo(_item.instruction()); + if (_item.instruction() == Instruction::SSTORE) + return false; + if (_item.instruction() == Instruction::MSTORE) + return false; + //@todo: We do not handle the following memory instructions for now: + // calldatacopy, codecopy, extcodecopy, mstore8, + // msize (note that msize also depends on memory read access) + + // the second requirement will be lifted once it is implemented + return info.sideEffects || info.args > 2; + } + } +} + +bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + case Instruction::ADD: + case Instruction::MUL: + case Instruction::EQ: + case Instruction::AND: + case Instruction::OR: + case Instruction::XOR: + return true; + default: + return false; + } +} + +bool SemanticInformation::isDupInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16; +} + +bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16; +} + +bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item) +{ + return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI); +} + +bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we + // continue on the next instruction (unless an exception happens which can always happen) + case Instruction::JUMP: + case Instruction::JUMPI: + case Instruction::RETURN: + case Instruction::SUICIDE: + case Instruction::STOP: + return true; + default: + return false; + } +} diff --git a/SemanticInformation.h b/SemanticInformation.h new file mode 100644 index 000000000..27aa6f1a4 --- /dev/null +++ b/SemanticInformation.h @@ -0,0 +1,51 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file SemanticInformation.h + * @author Christian + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#pragma once + + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; + +/** + * Helper functions to provide context-independent information about assembly items. + */ +struct SemanticInformation +{ + /// @returns true if the given items starts a new block for common subexpression analysis. + static bool breaksCSEAnalysisBlock(AssemblyItem const& _item); + /// @returns true if the item is a two-argument operation whose value does not depend on the + /// order of its arguments. + static bool isCommutativeOperation(AssemblyItem const& _item); + static bool isDupInstruction(AssemblyItem const& _item); + static bool isSwapInstruction(AssemblyItem const& _item); + static bool isJumpInstruction(AssemblyItem const& _item); + static bool altersControlFlow(AssemblyItem const& _item); +}; + +} +} diff --git a/SourceLocation.h b/SourceLocation.h new file mode 100644 index 000000000..35e3c0318 --- /dev/null +++ b/SourceLocation.h @@ -0,0 +1,89 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @author Lefteris Karapetsas + * @date 2015 + * Represents a location in a source file + */ + +#pragma once + +#include +#include +#include +#include + +namespace dev +{ + +/** + * Representation of an interval of source positions. + * The interval includes start and excludes end. + */ +struct SourceLocation +{ + SourceLocation(int _start, int _end, std::shared_ptr _sourceName): + start(_start), end(_end), sourceName(_sourceName) { } + SourceLocation(): start(-1), end(-1) { } + + SourceLocation(SourceLocation const& _other): + start(_other.start), end(_other.end), sourceName(_other.sourceName) {} + SourceLocation& operator=(SourceLocation const& _other) { start = _other.start; end = _other.end; sourceName = _other.sourceName; return *this;} + + bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;} + bool operator!=(SourceLocation const& _other) const { return !operator==(_other); } + inline bool operator<(SourceLocation const& _other) const; + inline bool contains(SourceLocation const& _other) const; + inline bool intersects(SourceLocation const& _other) const; + + bool isEmpty() const { return start == -1 && end == -1; } + + int start; + int end; + std::shared_ptr sourceName; +}; + +/// Stream output for Location (used e.g. in boost exceptions). +inline std::ostream& operator<<(std::ostream& _out, SourceLocation const& _location) +{ + if (_location.isEmpty()) + return _out << "NO_LOCATION_SPECIFIED"; + return _out << *_location.sourceName << "[" << _location.start << "," << _location.end << ")"; +} + +bool SourceLocation::operator<(SourceLocation const& _other) const +{ + if (!sourceName || !_other.sourceName) + return int(!!sourceName) < int(!!_other.sourceName); + return make_tuple(*sourceName, start, end) < make_tuple(*_other.sourceName, _other.start, _other.end); +} + +bool SourceLocation::contains(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return start <= _other.start && _other.end <= end; +} + +bool SourceLocation::intersects(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return _other.start < end && start < _other.end; +} + +} From 4d62c463d143c93f7938db5b8f7d01d33aa1a698 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 6 May 2015 10:43:59 +0200 Subject: [PATCH 02/67] Structural gas estimator. --- GasMeter.cpp | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++ GasMeter.h | 67 +++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 GasMeter.cpp create mode 100644 GasMeter.h diff --git a/GasMeter.cpp b/GasMeter.cpp new file mode 100644 index 000000000..e5fb0e09a --- /dev/null +++ b/GasMeter.cpp @@ -0,0 +1,104 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file GasMeter.cpp + * @author Christian + * @date 2015 + */ + +#include "GasMeter.h" +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +GasMeter::GasConsumption& GasMeter::GasConsumption::operator+=(GasConsumption const& _other) +{ + isInfinite = isInfinite || _other.isInfinite; + if (isInfinite) + return *this; + bigint v = bigint(value) + _other.value; + if (v > std::numeric_limits::max()) + isInfinite = true; + else + value = u256(v); + return *this; +} + +GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) +{ + switch (_item.type()) { + case Push: + case PushTag: + return runGas(Instruction::PUSH1); + case Tag: + return runGas(Instruction::JUMPDEST); + case Operation: + { + GasConsumption gas = runGas(_item.instruction()); + switch (_item.instruction()) + { + case Instruction::SSTORE: + // @todo logic can be improved + gas += c_sstoreSetGas; + break; + case Instruction::SLOAD: + gas += c_sloadGas; + break; + case Instruction::MSTORE: + case Instruction::MSTORE8: + case Instruction::MLOAD: + case Instruction::RETURN: + case Instruction::SHA3: + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + case Instruction::EXTCODECOPY: + case Instruction::LOG0: + case Instruction::LOG1: + case Instruction::LOG2: + case Instruction::LOG3: + case Instruction::LOG4: + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::EXP: + // @todo logic can be improved + gas = GasConsumption::infinite(); + break; + default: + break; + } + return gas; + break; + } + default: + break; + } + + return GasConsumption::infinite(); +} + +GasMeter::GasConsumption GasMeter::runGas(Instruction _instruction) +{ + if (_instruction == Instruction::JUMPDEST) + return GasConsumption(1); + + int tier = instructionInfo(_instruction).gasPriceTier; + return tier == InvalidTier ? GasConsumption::infinite() : c_tierStepGas[tier]; +} + + diff --git a/GasMeter.h b/GasMeter.h new file mode 100644 index 000000000..63dbc1380 --- /dev/null +++ b/GasMeter.h @@ -0,0 +1,67 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file GasMeter.cpp + * @author Christian + * @date 2015 + */ + +#pragma once + +#include +#include + +namespace dev +{ +namespace eth +{ + +/** + * Class that helps computing the maximum gas consumption for instructions. + */ +class GasMeter +{ +public: + struct GasConsumption + { + GasConsumption(u256 _value = 0, bool _infinite = false): value(_value), isInfinite(_infinite) {} + static GasConsumption infinite() { return GasConsumption(0, true); } + + GasConsumption& operator+=(GasConsumption const& _otherS); + std::ostream& operator<<(std::ostream& _str) const; + + u256 value; + bool isInfinite; + }; + + /// Returns an upper bound on the gas consumed by the given instruction. + GasConsumption estimateMax(AssemblyItem const& _item); + +private: + static GasConsumption runGas(Instruction _instruction); +}; + +inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption const& _consumption) +{ + if (_consumption.isInfinite) + return _str << "inf"; + else + return _str << _consumption.value; +} + + +} +} From 9106d72a02aa52b0c48db2eef7e4f9df213500b5 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 29 Apr 2015 18:16:05 +0200 Subject: [PATCH 03/67] Split known state from common subexpression eliminator. --- Assembly.cpp | 3 +- CommonSubexpressionEliminator.cpp | 267 ++-------------------------- CommonSubexpressionEliminator.h | 59 +------ KnownState.cpp | 278 ++++++++++++++++++++++++++++++ KnownState.h | 149 ++++++++++++++++ 5 files changed, 451 insertions(+), 305 deletions(-) create mode 100644 KnownState.cpp create mode 100644 KnownState.h diff --git a/Assembly.cpp b/Assembly.cpp index 6cc09a4bc..c7253622e 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -329,7 +329,8 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - CommonSubexpressionEliminator eliminator; + KnownState state; + CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); AssemblyItems optItems; diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 63524d6f3..645a426d9 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -37,18 +37,19 @@ vector CommonSubexpressionEliminator::getOptimizedItems() map initialStackContents; map targetStackContents; - int minHeight = m_stackHeight + 1; - if (!m_stackElements.empty()) - minHeight = min(minHeight, m_stackElements.begin()->first); + int minHeight = m_state.stackHeight() + 1; + if (!m_state.stackElements().empty()) + minHeight = min(minHeight, m_state.stackElements().begin()->first); for (int height = minHeight; height <= 0; ++height) - initialStackContents[height] = initialStackElement(height, SourceLocation()); - for (int height = minHeight; height <= m_stackHeight; ++height) - targetStackContents[height] = stackElement(height, SourceLocation()); + //@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown? + initialStackContents[height] = m_state.initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_state.stackHeight(); ++height) + targetStackContents[height] = m_state.stackElement(height, SourceLocation()); // Debug info: //stream(cout, initialStackContents, targetStackContents); - AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( initialStackContents, targetStackContents ); @@ -57,103 +58,11 @@ vector CommonSubexpressionEliminator::getOptimizedItems() return items; } -ostream& CommonSubexpressionEliminator::stream( - ostream& _out, - map _initialStack, - map _targetStack -) const -{ - auto streamExpressionClass = [this](ostream& _out, Id _id) - { - auto const& expr = m_expressionClasses.representative(_id); - _out << " " << dec << _id << ": " << *expr.item; - if (expr.sequenceNumber) - _out << "@" << dec << expr.sequenceNumber; - _out << "("; - for (Id arg: expr.arguments) - _out << dec << arg << ","; - _out << ")" << endl; - }; - - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; - _out << "Equivalence classes: " << endl; - for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) - streamExpressionClass(_out, eqClass); - - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - - return _out; -} - void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) { - if (_item.type() != Operation) - { - assertThrow(_item.deposit() == 1, InvalidDeposit, ""); - setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); - } - else - { - Instruction instruction = _item.instruction(); - InstructionInfo info = instructionInfo(instruction); - if (SemanticInformation::isDupInstruction(_item)) - setStackElement( - m_stackHeight + 1, - stackElement( - m_stackHeight - int(instruction) + int(Instruction::DUP1), - _item.getLocation() - ) - ); - else if (SemanticInformation::isSwapInstruction(_item)) - swapStackElements( - m_stackHeight, - m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), - _item.getLocation() - ); - else if (instruction != Instruction::POP) - { - vector arguments(info.args); - for (int i = 0; i < info.args; ++i) - arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); - if (_item.instruction() == Instruction::SSTORE) - storeInStorage(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::SLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromStorage(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::MSTORE) - storeInMemory(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::MLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromMemory(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::SHA3) - setStackElement( - m_stackHeight + _item.deposit(), - applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) - ); - else - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses.find(_item, arguments, _copyItem) - ); - } - m_stackHeight += _item.deposit(); - } + StoreOperation op = m_state.feedItem(_item, _copyItem); + if (op.isValid()) + m_storeOperations.push_back(op); } void CommonSubexpressionEliminator::optimizeBreakingItem() @@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() SourceLocation const& location = m_breakingItem->getLocation(); AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - Id condition = stackElement(m_stackHeight - 1, location); - Id zero = m_expressionClasses.find(u256(0)); - if (m_expressionClasses.knownToBeDifferent(condition, zero)) + Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); + Id zero = m_state.expressionClasses().find(u256(0)); + if (m_state.expressionClasses().knownToBeDifferent(condition, zero)) { feedItem(AssemblyItem(Instruction::SWAP1, location), true); feedItem(AssemblyItem(Instruction::POP, location), true); AssemblyItem item(Instruction::JUMP, location); item.setJumpType(jumpType); - m_breakingItem = m_expressionClasses.storeItem(item); + m_breakingItem = m_state.expressionClasses().storeItem(item); return; } - Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); - if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition}); + if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero)) { AssemblyItem it(Instruction::POP, location); feedItem(it, true); @@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() } } -void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) -{ - m_stackElements[_stackHeight] = _class; -} - -void CommonSubexpressionEliminator::swapStackElements( - int _stackHeightA, - int _stackHeightB, - SourceLocation const& _location -) -{ - assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); - // ensure they are created - stackElement(_stackHeightA, _location); - stackElement(_stackHeightB, _location); - - swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - if (m_stackElements.count(_stackHeight)) - return m_stackElements.at(_stackHeight); - // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); - assertThrow(_stackHeight > -16, StackTooDeepException, ""); - // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); -} - -void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) - // do not execute the storage if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_storageContent) storageContents; - // Copy over all values (i.e. retain knowledge about them) where we know that this store - // operation will not destroy the knowledge. Specifically, we copy storage locations we know - // are different from _slot or locations where we know that the stored value is equal to _value. - for (auto const& storageItem: m_storageContent) - if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) - storageContents.insert(storageItem); - m_storageContent = move(storageContents); - - AssemblyItem item(Instruction::SSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); - m_storageContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot)) - return m_storageContent.at(_slot); - - AssemblyItem item(Instruction::SLOAD, _location); - return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) - // do not execute the store if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_memoryContent) memoryContents; - // copy over values at points where we know that they are different from _slot by at least 32 - for (auto const& memoryItem: m_memoryContent) - if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) - memoryContents.insert(memoryItem); - m_memoryContent = move(memoryContents); - - AssemblyItem item(Instruction::MSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); - m_memoryContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot)) - return m_memoryContent.at(_slot); - - AssemblyItem item(Instruction::MLOAD, _location); - return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( - Id _start, - Id _length, - SourceLocation const& _location -) -{ - AssemblyItem sha3Item(Instruction::SHA3, _location); - // Special logic if length is a short constant, otherwise we cannot tell. - u256 const* l = m_expressionClasses.knownConstant(_length); - // unknown or too large length - if (!l || *l > 128) - return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - - vector arguments; - for (u256 i = 0; i < *l; i += 32) - { - Id slot = m_expressionClasses.find( - AssemblyItem(Instruction::ADD, _location), - {_start, m_expressionClasses.find(i)} - ); - arguments.push_back(loadFromMemory(slot, _location)); - } - if (m_knownSha3Hashes.count(arguments)) - return m_knownSha3Hashes.at(arguments); - Id v; - // If all arguments are known constants, compute the sha3 here - if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) - { - bytes data; - for (Id a: arguments) - data += toBigEndian(*m_expressionClasses.knownConstant(a)); - data.resize(size_t(*l)); - v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); - } - else - v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - return m_knownSha3Hashes[arguments] = v; -} - CSECodeGenerator::CSECodeGenerator( ExpressionClasses& _expressionClasses, vector const& _storeOperations diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h index 6156bc81a..2ed926401 100644 --- a/CommonSubexpressionEliminator.h +++ b/CommonSubexpressionEliminator.h @@ -32,6 +32,7 @@ #include #include #include +#include namespace dev { @@ -58,20 +59,9 @@ class CommonSubexpressionEliminator { public: using Id = ExpressionClasses::Id; - struct StoreOperation - { - enum Target { Memory, Storage }; - StoreOperation( - Target _target, - Id _slot, - unsigned _sequenceNumber, - Id _expression - ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} - Target target; - Id slot; - unsigned sequenceNumber; - Id expression; - }; + using StoreOperation = KnownState::StoreOperation; + + CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {} /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first /// item that must be fed into a new instance of the eliminator. @@ -95,49 +85,10 @@ private: /// Tries to optimize the item that breaks the basic block at the end. void optimizeBreakingItem(); - /// Simplifies the given item using - /// Assigns a new equivalence class to the next sequence number of the given stack element. - void setStackElement(int _stackHeight, Id _class); - /// Swaps the given stack elements in their next sequence number. - void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); - /// Retrieves the current equivalence class fo the given stack element (or generates a new - /// one if it does not exist yet). - Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). - Id initialStackElement(int _stackHeight, SourceLocation const& _location); - - /// Increments the sequence number, deletes all storage information that might be overwritten - /// and stores the new value at the given slot. - void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in storage or creates a new special sload class. - Id loadFromStorage(Id _slot, SourceLocation const& _location); - /// Increments the sequence number, deletes all memory information that might be overwritten - /// and stores the new value at the given slot. - void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in memory or creates a new special mload class. - Id loadFromMemory(Id _slot, SourceLocation const& _location); - /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. - Id applySha3(Id _start, Id _length, SourceLocation const& _location); - - /// Current stack height, can be negative. - int m_stackHeight = 0; - /// Current stack layout, mapping stack height -> equivalence class - std::map m_stackElements; - /// Current sequence number, this is incremented with each modification to storage or memory. - unsigned m_sequenceNumber = 1; - /// Knowledge about storage content. - std::map m_storageContent; - /// Knowledge about memory content. Keys are memory addresses, note that the values overlap - /// and are not contained here if they are not completely known. - std::map m_memoryContent; - /// Keeps record of all sha3 hashes that are computed. - std::map, Id> m_knownSha3Hashes; + KnownState m_state; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. std::vector m_storeOperations; - /// Structure containing the classes of equivalent expressions. - ExpressionClasses m_expressionClasses; /// The item that breaks the basic block, can be nullptr. /// It is usually appended to the block but can be optimized in some cases. diff --git a/KnownState.cpp b/KnownState.cpp new file mode 100644 index 000000000..244270fb6 --- /dev/null +++ b/KnownState.cpp @@ -0,0 +1,278 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file KnownState.cpp + * @author Christian + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream( + ostream& _out, + map _initialStack, + map _targetStack +) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": " << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "Optimizer analysis:" << endl; + _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Initial stack: " << endl; + for (auto const& it: _initialStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Target stack: " << endl; + for (auto const& it: _targetStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + m_stackHeight += _item.deposit(); + } + return op; +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new equivalence class. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id KnownState::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); + assertThrow(_stackHeight > -16, StackTooDeepException, ""); + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + diff --git a/KnownState.h b/KnownState.h new file mode 100644 index 000000000..c6dfcee6b --- /dev/null +++ b/KnownState.h @@ -0,0 +1,149 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file KnownState.h + * @author Christian + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Class to infer and store knowledge about the state of the virtual machine at a specific + * instruction. + * + * The general workings are that for each assembly item that is fed, an equivalence class is + * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some + * arithmetic instructions are used to infer equivalences while these classes are determined. + */ +class KnownState +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Invalid, Memory, Storage }; + StoreOperation(): target(Invalid), sequenceNumber(-1) {} + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + bool isValid() const { return target != Invalid; } + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + KnownState(): m_expressionClasses(std::make_shared()) {} + + /// Streams debugging information to @a _out. + std::ostream& stream( + std::ostream& _out, + std::map _initialStack = std::map(), + std::map _targetStack = std::map() + ) const; + + /// Feeds the item into the system for analysis. + /// @returns a possible store operation + StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Resets any knowledge about storage. + void resetStorage() { m_storageContent.clear(); } + /// Resets any knowledge about storage. + void resetMemory() { m_memoryContent.clear(); } + /// Resets any knowledge about the current stack. + void resetStack() { m_stackElements.clear(); m_stackHeight = 0; } + /// Resets any knowledge. + void reset() { resetStorage(); resetMemory(); resetStack(); } + + ///@todo the sequence numbers in two copies of this class should never be the same. + /// might be doable using two-dimensional sequence numbers, where the first value is incremented + /// for each copy + + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height + /// (must not be positive). + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + int stackHeight() const { return m_stackHeight; } + std::map const& stackElements() const { return m_stackElements; } + ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + +private: + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if storage was not modified + StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if memory was not modified + StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map, Id> m_knownSha3Hashes; + /// Structure containing the classes of equivalent expressions. + std::shared_ptr m_expressionClasses; +}; + +} +} From 3ebb7d99c4e24d7bc963c419790c9f0081cc47a1 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 30 Apr 2015 11:40:43 +0200 Subject: [PATCH 04/67] More flexible way to approach unknown stack elements. --- ExpressionClasses.cpp | 36 +++++++++++++++++++++++++++--------- ExpressionClasses.h | 8 ++++++-- KnownState.cpp | 2 +- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 1e60a7fe8..8d0785d37 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -37,6 +37,7 @@ using namespace dev::eth; bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const { + assertThrow(!!item && !!_other.item, OptimizerException, ""); auto type = item->type(); auto otherType = _other.item->type(); return std::tie(type, item->data(), arguments, sequenceNumber) < @@ -78,6 +79,15 @@ ExpressionClasses::Id ExpressionClasses::find( return exp.id; } +ExpressionClasses::Id ExpressionClasses::newId() +{ + // Note that we cannot insert it in m_expressions because this requires item to be set. + Expression exp; + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + return exp.id; +} + bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. @@ -122,10 +132,16 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const { Expression const& expr = representative(_id); stringstream str; - str << dec << expr.id << ":" << *expr.item << "("; - for (Id arg: expr.arguments) - str << fullDAGToString(arg) << ","; - str << ")"; + str << dec << expr.id << ":"; + if (expr.item) + { + str << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + } + else + str << " UNIQUE"; return str.str(); } @@ -279,7 +295,7 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, { static Rules rules; - if (_expr.item->type() != Operation) + if (!_expr.item || _expr.item->type() != Operation) return -1; for (auto const& rule: rules.rules()) @@ -337,7 +353,7 @@ void Pattern::setMatchGroup(unsigned _group, map& _ bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const { - if (!matchesBaseItem(*_expr.item)) + if (!matchesBaseItem(_expr.item)) return false; if (m_matchGroup) { @@ -387,13 +403,15 @@ string Pattern::toString() const return s.str(); } -bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const { if (m_type == UndefinedItem) return true; - if (m_type != _item.type()) + if (!_item) return false; - if (m_requireDataMatch && m_data != _item.data()) + if (m_type != _item->type()) + return false; + if (m_requireDataMatch && m_data != _item->data()) return false; return true; } diff --git a/ExpressionClasses.h b/ExpressionClasses.h index 2f720f606..5d32c0f71 100644 --- a/ExpressionClasses.h +++ b/ExpressionClasses.h @@ -50,7 +50,7 @@ public: struct Expression { Id id; - AssemblyItem const* item; + AssemblyItem const* item = nullptr; Ids arguments; unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). @@ -68,6 +68,10 @@ public: bool _copyItem = true, unsigned _sequenceNumber = 0 ); + /// @returns a new unique class id which does not and will never have a representative containing + /// an AssemblyItem, i.e. its value cannot be generated, instead it has to be assumed to be + /// already present. + Id newId(); /// @returns the canonical representative of an expression class. Expression const& representative(Id _id) const { return m_representatives.at(_id); } /// @returns the number of classes. @@ -149,7 +153,7 @@ public: std::string toString() const; private: - bool matchesBaseItem(AssemblyItem const& _item) const; + bool matchesBaseItem(AssemblyItem const* _item) const; Expression const& matchGroupValue() const; AssemblyItemType m_type; diff --git a/KnownState.cpp b/KnownState.cpp index 244270fb6..e83810d43 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -136,7 +136,7 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation if (m_stackElements.count(_stackHeight)) return m_stackElements.at(_stackHeight); // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); + return m_stackElements[_stackHeight] = m_expressionClasses->newId(); } ExpressionClasses::Id KnownState::initialStackElement( From 867101e40981db56d8b72fd363e4f9e376991284 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 30 Apr 2015 14:41:55 +0200 Subject: [PATCH 05/67] Common subexpression elimination ready for using pre-known state. --- CommonSubexpressionEliminator.cpp | 8 +++++--- CommonSubexpressionEliminator.h | 6 ++++-- ExpressionClasses.cpp | 9 --------- ExpressionClasses.h | 4 ---- KnownState.cpp | 10 +++++----- 5 files changed, 14 insertions(+), 23 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 645a426d9..4b85eba40 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -40,9 +40,8 @@ vector CommonSubexpressionEliminator::getOptimizedItems() int minHeight = m_state.stackHeight() + 1; if (!m_state.stackElements().empty()) minHeight = min(minHeight, m_state.stackElements().begin()->first); - for (int height = minHeight; height <= 0; ++height) - //@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown? - initialStackContents[height] = m_state.initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_initialState.stackHeight(); ++height) + initialStackContents[height] = m_initialState.stackElement(height, SourceLocation()); for (int height = minHeight; height <= m_state.stackHeight(); ++height) targetStackContents[height] = m_state.stackElement(height, SourceLocation()); @@ -50,6 +49,7 @@ vector CommonSubexpressionEliminator::getOptimizedItems() //stream(cout, initialStackContents, targetStackContents); AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( + m_initialState.stackHeight(), initialStackContents, targetStackContents ); @@ -106,10 +106,12 @@ CSECodeGenerator::CSECodeGenerator( } AssemblyItems CSECodeGenerator::generateCode( + int _initialStackHeight, map const& _initialStack, map const& _targetStackContents ) { + m_stackHeight = _initialStackHeight; m_stack = _initialStack; for (auto const& item: m_stack) if (!m_classPositions.count(item.second)) diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h index 2ed926401..6e1ba40b3 100644 --- a/CommonSubexpressionEliminator.h +++ b/CommonSubexpressionEliminator.h @@ -61,7 +61,7 @@ public: using Id = ExpressionClasses::Id; using StoreOperation = KnownState::StoreOperation; - CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {} + CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {} /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first /// item that must be fed into a new instance of the eliminator. @@ -85,6 +85,7 @@ private: /// Tries to optimize the item that breaks the basic block at the end. void optimizeBreakingItem(); + KnownState m_initialState; KnownState m_state; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. @@ -115,6 +116,7 @@ public: /// @param _targetStackContents final contents of the stack, by stack height relative to initial /// @note should only be called once on each object. AssemblyItems generateCode( + int _initialStackHeight, std::map const& _initialStack, std::map const& _targetStackContents ); @@ -150,7 +152,7 @@ private: AssemblyItems m_generatedItems; /// Current height of the stack relative to the start. - int m_stackHeight = 0; + int m_stackHeight; /// If (b, a) is in m_requests then b is needed to compute a. std::multimap m_neededBy; /// Current content of the stack. diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 8d0785d37..e62f75264 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -79,15 +79,6 @@ ExpressionClasses::Id ExpressionClasses::find( return exp.id; } -ExpressionClasses::Id ExpressionClasses::newId() -{ - // Note that we cannot insert it in m_expressions because this requires item to be set. - Expression exp; - exp.id = m_representatives.size(); - m_representatives.push_back(exp); - return exp.id; -} - bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. diff --git a/ExpressionClasses.h b/ExpressionClasses.h index 5d32c0f71..c83520300 100644 --- a/ExpressionClasses.h +++ b/ExpressionClasses.h @@ -68,10 +68,6 @@ public: bool _copyItem = true, unsigned _sequenceNumber = 0 ); - /// @returns a new unique class id which does not and will never have a representative containing - /// an AssemblyItem, i.e. its value cannot be generated, instead it has to be assumed to be - /// already present. - Id newId(); /// @returns the canonical representative of an expression class. Expression const& representative(Id _id) const { return m_representatives.at(_id); } /// @returns the number of classes. diff --git a/KnownState.cpp b/KnownState.cpp index e83810d43..02c6ee136 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -135,8 +135,10 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation { if (m_stackElements.count(_stackHeight)) return m_stackElements.at(_stackHeight); - // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = m_expressionClasses->newId(); + // Stack element not found (not assigned yet), create new unknown equivalence class. + //@todo check that we do not infer incorrect equivalences when the stack is cleared partially + //in between. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); } ExpressionClasses::Id KnownState::initialStackElement( @@ -144,10 +146,8 @@ ExpressionClasses::Id KnownState::initialStackElement( SourceLocation const& _location ) { - assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); - assertThrow(_stackHeight > -16, StackTooDeepException, ""); // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); + return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location)); } void KnownState::setStackElement(int _stackHeight, Id _class) From a2e3bcbd0c45a79a9709dc8a69858765ab904805 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 30 Apr 2015 15:31:16 +0200 Subject: [PATCH 06/67] Make KnownState work with all instructions. --- ExpressionClasses.cpp | 19 ++++++++++----- KnownState.cpp | 7 ++++++ SemanticInformation.cpp | 54 +++++++++++++++++++++++++++++++++++++++++ SemanticInformation.h | 9 +++++++ 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index e62f75264..cfbeba7fa 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -57,12 +57,15 @@ ExpressionClasses::Id ExpressionClasses::find( exp.arguments = _arguments; exp.sequenceNumber = _sequenceNumber; - if (SemanticInformation::isCommutativeOperation(_item)) - sort(exp.arguments.begin(), exp.arguments.end()); + if (SemanticInformation::isDeterministic(_item)) + { + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); - auto it = m_expressions.find(exp); - if (it != m_expressions.end()) - return it->id; + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + } if (_copyItem) exp.item = storeItem(_item); @@ -286,7 +289,11 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, { static Rules rules; - if (!_expr.item || _expr.item->type() != Operation) + if ( + !_expr.item || + _expr.item->type() != Operation || + !SemanticInformation::isDeterministic(*_expr.item) + ) return -1; for (auto const& rule: rules.rules()) diff --git a/KnownState.cpp b/KnownState.cpp index 02c6ee136..632777c82 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -101,6 +101,7 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool vector arguments(info.args); for (int i = 0; i < info.args; ++i) arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); else if (_item.instruction() == Instruction::SLOAD) @@ -121,10 +122,16 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) ); else + { + if (SemanticInformation::invalidatesMemory(_item.instruction())) + resetMemory(); + if (SemanticInformation::invalidatesStorage(_item.instruction())) + resetStorage(); setStackElement( m_stackHeight + _item.deposit(), m_expressionClasses->find(_item, arguments, _copyItem) ); + } } m_stackHeight += _item.deposit(); } diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 83d59efc7..40c36f9e3 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -122,3 +122,57 @@ bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) return false; } } + + +bool SemanticInformation::isDeterministic(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return true; + assertThrow(!altersControlFlow(_item), OptimizerException, ""); + + switch (_item.instruction()) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::GAS: + case Instruction::PC: + case Instruction::MSIZE: // depends on previous writes and reads, not only on content + case Instruction::BALANCE: // depends on previous calls + case Instruction::EXTCODESIZE: + return false; + default: + return true; + } +} + +bool SemanticInformation::invalidatesMemory(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + case Instruction::EXTCODECOPY: + case Instruction::MSTORE: + case Instruction::MSTORE8: + case Instruction::CALL: + case Instruction::CALLCODE: + return true; + default: + return false; + } +} + +bool SemanticInformation::invalidatesStorage(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::SSTORE: + return true; + default: + return false; + } +} diff --git a/SemanticInformation.h b/SemanticInformation.h index 27aa6f1a4..b14ddb65a 100644 --- a/SemanticInformation.h +++ b/SemanticInformation.h @@ -23,6 +23,7 @@ #pragma once +#include namespace dev { @@ -45,6 +46,14 @@ struct SemanticInformation static bool isSwapInstruction(AssemblyItem const& _item); static bool isJumpInstruction(AssemblyItem const& _item); static bool altersControlFlow(AssemblyItem const& _item); + /// @returns false if the value put on the stack by _item depends on anything else than + /// the information in the current block header, memory, storage or stack. + /// @note should not be called for instructions that alter the control flow. + static bool isDeterministic(AssemblyItem const& _item); + /// @returns true if the given instruction modifies memory. + static bool invalidatesMemory(Instruction _instruction); + /// @returns true if the given instruction modifies storage (even indirectly). + static bool invalidatesStorage(Instruction _instruction); }; } From 9d7eb49f35f801b53960135b7c353fa64cea7439 Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 4 May 2015 10:15:41 +0200 Subject: [PATCH 07/67] Gather knowledge about the state during control flow analysis. --- Assembly.cpp | 7 +++- ControlFlowGraph.cpp | 91 ++++++++++++++++++++++++++++++++++++++++- ControlFlowGraph.h | 22 +++++++--- KnownState.cpp | 75 +++++++++++++++++++++++++-------- KnownState.h | 30 ++++++++++---- SemanticInformation.cpp | 1 - SemanticInformation.h | 1 - 7 files changed, 192 insertions(+), 35 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index c7253622e..1c5391168 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -314,6 +314,10 @@ Assembly& Assembly::optimise(bool _enable) copt << toString(*this); count = 0; + //@todo CFG interface should be a generator, that returns an item and a pointer to a + // knownstate, which has to replace the current state if it is not null. + // Feed these items to the CSE, but also store them and replace the stored version + // if the items generated by the CSE are shorter. (or even use less gas?) copt << "Performing control flow analysis..."; { ControlFlowGraph cfg(m_items); @@ -329,7 +333,8 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - KnownState state; + //@todo use only a single state / expression classes instance. + KnownState state(make_shared()); CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index cc4367e64..0b0c757d6 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -23,9 +23,11 @@ #include #include +#include #include #include #include +#include using namespace std; using namespace dev; @@ -46,6 +48,7 @@ AssemblyItems ControlFlowGraph::optimisedItems() resolveNextLinks(); removeUnusedBlocks(); setPrevLinks(); + gatherKnowledge(); return rebuildCode(); } @@ -209,6 +212,77 @@ void ControlFlowGraph::setPrevLinks() } } +void ControlFlowGraph::gatherKnowledge() +{ + // @todo actually we know that memory is filled with zeros at the beginning, + // we could make use of that. + shared_ptr emptyState = make_shared(); + ExpressionClasses& expr = emptyState->expressionClasses(); + bool unknownJumpEncountered = false; + + vector>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + while (!workQueue.empty()) + { + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!workQueue.back().first, OptimizerException, ""); + BasicBlock& block = m_blocks.at(workQueue.back().first); + shared_ptr state = workQueue.back().second; + workQueue.pop_back(); + if (block.startState) + { + state->reduceToCommonKnowledge(*block.startState); + if (*state == *block.startState) + continue; + } + + block.startState = state->copy(); + //@todo we might know the return address for the first pass, but not anymore for the second, + // -> store knowledge about tags as a union. + + // Feed all items except for the final jump yet because it will erase the target tag. + unsigned pc = block.begin; + while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) + state->feedItem(m_items.at(pc++)); + + if ( + block.endType == BasicBlock::EndType::JUMP || + block.endType == BasicBlock::EndType::JUMPI + ) + { + assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); + //@todo in the case of JUMPI, add knowledge about the condition to the state + // (for both values of the condition) + BlockId nextBlock = expressionClassToBlockId( + state->stackElement(state->stackHeight(), SourceLocation()), + expr + ); + state->feedItem(m_items.at(pc++)); + if (nextBlock) + workQueue.push_back(make_pair(nextBlock, state->copy())); + else if (!unknownJumpEncountered) + { + // We do not know where this jump goes, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(make_pair(it.first, emptyState->copy())); + } + } + else if (block.begin <= pc && pc < block.end) + state->feedItem(m_items.at(pc++)); + assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); + + block.endState = state; + + if ( + block.endType == BasicBlock::EndType::HANDOVER || + block.endType == BasicBlock::EndType::JUMPI + ) + workQueue.push_back(make_pair(block.next, state->copy())); + } +} + AssemblyItems ControlFlowGraph::rebuildCode() { map pushes; @@ -233,7 +307,7 @@ AssemblyItems ControlFlowGraph::rebuildCode() blockId = m_blocks.at(blockId).prev; for (; blockId; blockId = m_blocks.at(blockId).next) { - BasicBlock const& block = m_blocks.at(blockId); + BasicBlock& block = m_blocks.at(blockId); blocksToAdd.erase(blockId); blocksAdded.insert(blockId); @@ -243,7 +317,10 @@ AssemblyItems ControlFlowGraph::rebuildCode() continue; // If block starts with unused tag, skip it. if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + { ++begin; + ++block.begin; + } previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); copy(begin, end, back_inserter(code)); } @@ -252,6 +329,18 @@ AssemblyItems ControlFlowGraph::rebuildCode() return code; } +BlockId ControlFlowGraph::expressionClassToBlockId( + ExpressionClasses::Id _id, + ExpressionClasses& _exprClasses +) +{ + ExpressionClasses::Expression expr = _exprClasses.representative(_id); + if (expr.item && expr.item->type() == PushTag) + return BlockId(expr.item->data()); + else + return BlockId::invalid(); +} + BlockId ControlFlowGraph::generateNewId() { BlockId id = BlockId(++m_lastUsedId); diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h index 5d16df327..4310d6642 100644 --- a/ControlFlowGraph.h +++ b/ControlFlowGraph.h @@ -24,16 +24,17 @@ #pragma once #include +#include #include #include +#include namespace dev { namespace eth { -class AssemblyItem; -using AssemblyItems = std::vector; +class KnownState; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special @@ -69,14 +70,20 @@ struct BasicBlock unsigned end = 0; /// Tags pushed inside this block, with multiplicity. std::vector pushedTags; - /// ID of the block that always follows this one (either JUMP or flow into new block), - /// or BlockId::invalid() otherwise + /// ID of the block that always follows this one (either non-branching part of JUMPI or flow + /// into new block), or BlockId::invalid() otherwise BlockId next = BlockId::invalid(); - /// ID of the block that has to precede this one. + /// ID of the block that has to precede this one (because control flows into it). BlockId prev = BlockId::invalid(); enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; EndType endType = EndType::HANDOVER; + + /// Knowledge about the state when this block is entered. Intersection of all possible ways + /// to enter this block. + std::shared_ptr startState; + /// Knowledge about the state at the end of this block. + std::shared_ptr endState; }; class ControlFlowGraph @@ -93,9 +100,14 @@ private: void splitBlocks(); void resolveNextLinks(); void removeUnusedBlocks(); + void gatherKnowledge(); void setPrevLinks(); AssemblyItems rebuildCode(); + /// @returns the corresponding BlockId if _id is a pushed jump tag, + /// and an invalid BlockId otherwise. + BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses); + BlockId generateNewId(); unsigned m_lastUsedId = 0; diff --git a/KnownState.cpp b/KnownState.cpp index 632777c82..7ff0143e1 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -30,16 +30,18 @@ using namespace std; using namespace dev; using namespace dev::eth; -ostream& KnownState::stream( - ostream& _out, - map _initialStack, - map _targetStack -) const +ostream& KnownState::stream(ostream& _out) const { auto streamExpressionClass = [this](ostream& _out, Id _id) { auto const& expr = m_expressionClasses->representative(_id); - _out << " " << dec << _id << ": " << *expr.item; + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; if (expr.sequenceNumber) _out << "@" << dec << expr.sequenceNumber; _out << "("; @@ -48,22 +50,32 @@ ostream& KnownState::stream( _out << ")" << endl; }; - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; _out << "Equivalence classes: " << endl; for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) streamExpressionClass(_out, eqClass); - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) { _out << " " << dec << it.first << ": "; streamExpressionClass(_out, it.second); } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) { - _out << " " << dec << it.first << ": "; + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; streamExpressionClass(_out, it.second); } @@ -73,7 +85,11 @@ ostream& KnownState::stream( KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) { StoreOperation op; - if (_item.type() != Operation) + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) { assertThrow(_item.deposit() == 1, InvalidDeposit, ""); setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); @@ -127,17 +143,40 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool resetMemory(); if (SemanticInformation::invalidatesStorage(_item.instruction())) resetStorage(); - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses->find(_item, arguments, _copyItem) - ); + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); } } + for (int p = m_stackHeight; p > m_stackHeight + _item.deposit(); --p) + m_stackElements.erase(p); m_stackHeight += _item.deposit(); } return op; } +void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/) +{ + //@todo + *this = KnownState(m_expressionClasses); +} + +bool KnownState::operator==(const KnownState& _other) const +{ + //@todo + return ( + m_stackElements.empty() && + _other.m_stackElements.empty() && + m_storageContent.empty() && + _other.m_storageContent.empty() && + m_memoryContent.empty() && + _other.m_memoryContent.empty() + ); +} + ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) { if (m_stackElements.count(_stackHeight)) diff --git a/KnownState.h b/KnownState.h index c6dfcee6b..f7a3dd675 100644 --- a/KnownState.h +++ b/KnownState.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -70,14 +71,14 @@ public: Id expression; }; - KnownState(): m_expressionClasses(std::make_shared()) {} + explicit KnownState( + std::shared_ptr _expressionClasses = std::make_shared() + ): m_expressionClasses(_expressionClasses) + { + } /// Streams debugging information to @a _out. - std::ostream& stream( - std::ostream& _out, - std::map _initialStack = std::map(), - std::map _targetStack = std::map() - ) const; + std::ostream& stream(std::ostream& _out) const; /// Feeds the item into the system for analysis. /// @returns a possible store operation @@ -92,6 +93,20 @@ public: /// Resets any knowledge. void reset() { resetStorage(); resetMemory(); resetStack(); } + /// Manually increments the storage and memory sequence number. + void incrementSequenceNumber() { m_sequenceNumber += 2; } + + /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. + /// If the stack heighht is different, the smaller one is used and the stack is compared + /// relatively. + void reduceToCommonKnowledge(KnownState const& _other); + + /// @returns a shared pointer to a copy of this state. + std::shared_ptr copy() const { return std::make_shared(*this); } + + /// @returns true if the knowledge about the state of both objects is (known to be) equal. + bool operator==(KnownState const& _other) const; + ///@todo the sequence numbers in two copies of this class should never be the same. /// might be doable using two-dimensional sequence numbers, where the first value is incremented /// for each copy @@ -99,8 +114,7 @@ public: /// Retrieves the current equivalence class fo the given stack element (or generates a new /// one if it does not exist yet). Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). + /// @returns the equivalence class id of the special initial stack element at the given height. Id initialStackElement(int _stackHeight, SourceLocation const& _location); int stackHeight() const { return m_stackHeight; } diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 40c36f9e3..056162b5f 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -128,7 +128,6 @@ bool SemanticInformation::isDeterministic(AssemblyItem const& _item) { if (_item.type() != Operation) return true; - assertThrow(!altersControlFlow(_item), OptimizerException, ""); switch (_item.instruction()) { diff --git a/SemanticInformation.h b/SemanticInformation.h index b14ddb65a..094f45912 100644 --- a/SemanticInformation.h +++ b/SemanticInformation.h @@ -48,7 +48,6 @@ struct SemanticInformation static bool altersControlFlow(AssemblyItem const& _item); /// @returns false if the value put on the stack by _item depends on anything else than /// the information in the current block header, memory, storage or stack. - /// @note should not be called for instructions that alter the control flow. static bool isDeterministic(AssemblyItem const& _item); /// @returns true if the given instruction modifies memory. static bool invalidatesMemory(Instruction _instruction); From 85673ff00cc8c0c21209080fb327c7deda69883a Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 5 May 2015 17:03:07 +0200 Subject: [PATCH 08/67] Remove unused old optimizer rule. --- Assembly.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 1c5391168..aec06aef6 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -304,9 +304,6 @@ Assembly& Assembly::optimise(bool _enable) { if (!_enable) return *this; - std::vector>> rules; - // jump to next instruction - rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }}); unsigned total = 0; for (unsigned count = 1; count > 0; total += count) From bebe76828a6c8ccfc3e61a066a43530f715aeee9 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 5 May 2015 17:45:58 +0200 Subject: [PATCH 09/67] CFG returns vector of blocks instead of assembly items. --- Assembly.cpp | 5 ++++- ControlFlowGraph.cpp | 28 ++++++++++++---------------- ControlFlowGraph.h | 14 +++++++++----- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index aec06aef6..9530ded49 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -318,7 +318,10 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing control flow analysis..."; { ControlFlowGraph cfg(m_items); - AssemblyItems optItems = cfg.optimisedItems(); + AssemblyItems optItems; + for (BasicBlock const& block: cfg.optimisedBlocks()) + copy(m_items.begin() + block.begin, m_items.begin() + block.end, + back_inserter(optItems)); if (optItems.size() < m_items.size()) { copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 0b0c757d6..2e28317a3 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -38,10 +38,10 @@ BlockId::BlockId(u256 const& _id): m_id(_id) assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); } -AssemblyItems ControlFlowGraph::optimisedItems() +BasicBlocks ControlFlowGraph::optimisedBlocks() { if (m_items.empty()) - return m_items; + return BasicBlocks(); findLargestTag(); splitBlocks(); @@ -216,17 +216,17 @@ void ControlFlowGraph::gatherKnowledge() { // @todo actually we know that memory is filled with zeros at the beginning, // we could make use of that. - shared_ptr emptyState = make_shared(); + KnownStatePointer emptyState = make_shared(); ExpressionClasses& expr = emptyState->expressionClasses(); bool unknownJumpEncountered = false; - vector>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + vector> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); while (!workQueue.empty()) { //@todo we might have to do something like incrementing the sequence number for each JUMPDEST assertThrow(!!workQueue.back().first, OptimizerException, ""); BasicBlock& block = m_blocks.at(workQueue.back().first); - shared_ptr state = workQueue.back().second; + KnownStatePointer state = workQueue.back().second; workQueue.pop_back(); if (block.startState) { @@ -283,7 +283,7 @@ void ControlFlowGraph::gatherKnowledge() } } -AssemblyItems ControlFlowGraph::rebuildCode() +BasicBlocks ControlFlowGraph::rebuildCode() { map pushes; for (auto& idAndBlock: m_blocks) @@ -294,7 +294,7 @@ AssemblyItems ControlFlowGraph::rebuildCode() for (auto it: m_blocks) blocksToAdd.insert(it.first); set blocksAdded; - AssemblyItems code; + BasicBlocks blocks; for ( BlockId blockId = BlockId::initial(); @@ -311,22 +311,18 @@ AssemblyItems ControlFlowGraph::rebuildCode() blocksToAdd.erase(blockId); blocksAdded.insert(blockId); - auto begin = m_items.begin() + block.begin; - auto end = m_items.begin() + block.end; - if (begin == end) + if (block.begin == block.end) continue; // If block starts with unused tag, skip it. - if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) - { - ++begin; + if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) ++block.begin; - } + if (block.begin < block.end) + blocks.push_back(block); previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); - copy(begin, end, back_inserter(code)); } } - return code; + return blocks; } BlockId ControlFlowGraph::expressionClassToBlockId( diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h index 4310d6642..3366dc45f 100644 --- a/ControlFlowGraph.h +++ b/ControlFlowGraph.h @@ -35,6 +35,7 @@ namespace eth { class KnownState; +using KnownStatePointer = std::shared_ptr; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special @@ -81,19 +82,22 @@ struct BasicBlock /// Knowledge about the state when this block is entered. Intersection of all possible ways /// to enter this block. - std::shared_ptr startState; + KnownStatePointer startState; /// Knowledge about the state at the end of this block. - std::shared_ptr endState; + KnownStatePointer endState; }; +using BasicBlocks = std::vector; + class ControlFlowGraph { public: /// Initializes the control flow graph. /// @a _items has to persist across the usage of this class. ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} - /// @returns the collection of optimised items, should be called only once. - AssemblyItems optimisedItems(); + /// @returns vector of basic blocks in the order they should be used in the final code. + /// Should be called only once. + BasicBlocks optimisedBlocks(); private: void findLargestTag(); @@ -102,7 +106,7 @@ private: void removeUnusedBlocks(); void gatherKnowledge(); void setPrevLinks(); - AssemblyItems rebuildCode(); + BasicBlocks rebuildCode(); /// @returns the corresponding BlockId if _id is a pushed jump tag, /// and an invalid BlockId otherwise. From 1dfcb4735011dfaa143d6592713ec6b4bf097934 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 8 May 2015 18:07:56 +0200 Subject: [PATCH 10/67] Use range-based erase. --- KnownState.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/KnownState.cpp b/KnownState.cpp index 7ff0143e1..41ac4802b 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -151,8 +151,10 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool ); } } - for (int p = m_stackHeight; p > m_stackHeight + _item.deposit(); --p) - m_stackElements.erase(p); + m_stackElements.erase( + m_stackElements.upper_bound(m_stackHeight + _item.deposit()), + m_stackElements.end() + ); m_stackHeight += _item.deposit(); } return op; From b07331fbb301cfc08c53da0b81856c1360dcc963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Mon, 11 May 2015 11:38:11 +0200 Subject: [PATCH 11/67] CMake: set default RUNTIME_OUTPUT_DIRECTORY property to "bin" This commit changes output directory for runtime components (executables and DLLs) to "bin" directory. That allows running executables on Windows without need of install step. Closes ethereum/cpp-ethereum#1821 --- CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8150806f..eb8fea95c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,15 +19,10 @@ set(EXECUTABLE evmasm) file(GLOB HEADERS "*.h") -if (ETH_STATIC) - add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) -else() - add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS}) -endif() +add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) target_link_libraries(${EXECUTABLE} evmcore) target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) - From 9d3f0de31bb82217a5fc5f2daf933d21b18774a0 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 6 May 2015 19:15:14 +0200 Subject: [PATCH 12/67] Reuse state during common subexpression elimination. --- Assembly.cpp | 78 ++++++++++++++----------------- CommonSubexpressionEliminator.cpp | 51 ++++++++++++++------ CommonSubexpressionEliminator.h | 14 ++---- ControlFlowGraph.cpp | 22 +++++++-- 4 files changed, 93 insertions(+), 72 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 9530ded49..abcd44516 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -311,54 +311,45 @@ Assembly& Assembly::optimise(bool _enable) copt << toString(*this); count = 0; - //@todo CFG interface should be a generator, that returns an item and a pointer to a - // knownstate, which has to replace the current state if it is not null. - // Feed these items to the CSE, but also store them and replace the stored version - // if the items generated by the CSE are shorter. (or even use less gas?) - copt << "Performing control flow analysis..."; + copt << "Performing optimisation..."; { ControlFlowGraph cfg(m_items); - AssemblyItems optItems; + AssemblyItems optimisedItems; for (BasicBlock const& block: cfg.optimisedBlocks()) - copy(m_items.begin() + block.begin, m_items.begin() + block.end, - back_inserter(optItems)); - if (optItems.size() < m_items.size()) { - copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); - m_items = move(optItems); - count++; - } - } + assertThrow(!!block.startState, OptimizerException, ""); + CommonSubexpressionEliminator eliminator(*block.startState); + auto iter = m_items.begin() + block.begin; + auto const end = m_items.begin() + block.end; + while (iter < end) + { + auto orig = iter; + iter = eliminator.feedItems(iter, end); + bool shouldReplace = false; + AssemblyItems optimisedChunk; + try + { + optimisedChunk = eliminator.getOptimizedItems(); + shouldReplace = (optimisedChunk.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } - copt << "Performing common subexpression elimination..."; - for (auto iter = m_items.begin(); iter != m_items.end();) - { - //@todo use only a single state / expression classes instance. - KnownState state(make_shared()); - CommonSubexpressionEliminator eliminator(state); - auto orig = iter; - iter = eliminator.feedItems(iter, m_items.end()); - AssemblyItems optItems; - bool shouldReplace = false; - try - { - optItems = eliminator.getOptimizedItems(); - shouldReplace = (optItems.size() < size_t(iter - orig)); - } - catch (StackTooDeepException const&) - { - // This might happen if the opcode reconstruction is not as efficient - // as the hand-crafted code. - } - - if (shouldReplace) - { - copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size(); - count++; - for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter) - *orig = move(*moveIter); - iter = m_items.erase(orig, iter); + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optimisedChunk.size(); + count++; + optimisedItems += optimisedChunk; + } + else + copy(orig, iter, back_inserter(optimisedItems)); + } } + if (optimisedItems.size() < m_items.size()) + m_items = move(optimisedItems); } } @@ -461,7 +452,8 @@ bytes Assembly::assemble() const for (auto const& i: tagRef) { bytesRef r(ret.data() + i.first, bytesPerTag); - toBigEndian(tagPos[i.second], r); + //@todo in the failure case, we could use the position of the invalid jumpdest + toBigEndian(i.second < tagPos.size() ? tagPos[i.second] : (1 << (8 * bytesPerTag)) - 1, r); } if (!m_data.empty()) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 4b85eba40..5beb7966f 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -45,16 +45,22 @@ vector CommonSubexpressionEliminator::getOptimizedItems() for (int height = minHeight; height <= m_state.stackHeight(); ++height) targetStackContents[height] = m_state.stackElement(height, SourceLocation()); - // Debug info: - //stream(cout, initialStackContents, targetStackContents); - AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( m_initialState.stackHeight(), initialStackContents, targetStackContents ); if (m_breakingItem) + { items.push_back(*m_breakingItem); + m_state.feedItem(*m_breakingItem); + } + + // cleanup + m_initialState = m_state; + m_breakingItem = nullptr; + m_storeOperations.clear(); + return items; } @@ -113,6 +119,7 @@ AssemblyItems CSECodeGenerator::generateCode( { m_stackHeight = _initialStackHeight; m_stack = _initialStack; + m_targetStack = _targetStackContents; for (auto const& item: m_stack) if (!m_classPositions.count(item.second)) m_classPositions[item.second] = item.first; @@ -122,7 +129,7 @@ AssemblyItems CSECodeGenerator::generateCode( // generate the dependency graph starting from final storage and memory writes and target stack contents for (auto const& p: m_storeOperations) addDependencies(p.second.back().expression); - for (auto const& targetItem: _targetStackContents) + for (auto const& targetItem: m_targetStack) { m_finalClasses.insert(targetItem.second); addDependencies(targetItem.second); @@ -141,8 +148,10 @@ AssemblyItems CSECodeGenerator::generateCode( generateClassElement(seqAndId.second, true); // generate the target stack elements - for (auto const& targetItem: _targetStackContents) + for (auto const& targetItem: m_targetStack) { + if (m_stack.count(targetItem.first) && m_stack.at(targetItem.first) == targetItem.second) + continue; // already there int position = generateClassElement(targetItem.second); assertThrow(position != c_invalidPosition, OptimizerException, ""); if (position == targetItem.first) @@ -164,21 +173,24 @@ AssemblyItems CSECodeGenerator::generateCode( // check validity int finalHeight = 0; - if (!_targetStackContents.empty()) + if (!m_targetStack.empty()) // have target stack, so its height should be the final height - finalHeight = (--_targetStackContents.end())->first; + finalHeight = (--m_targetStack.end())->first; else if (!_initialStack.empty()) // no target stack, only erase the initial stack finalHeight = _initialStack.begin()->first - 1; else // neither initial no target stack, no change in height - finalHeight = 0; + finalHeight = _initialStackHeight; assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + return m_generatedItems; } void CSECodeGenerator::addDependencies(Id _c) { + if (m_classPositions.count(_c)) + return; // it is already on the stack if (m_neededBy.count(_c)) return; // we already computed the dependencies for _c ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); @@ -340,7 +352,7 @@ int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) // this will not append a swap but remove the one that is already there appendOrRemoveSwap(m_stackHeight - 1, location); for (auto arg: arguments) - if (canBeRemoved(arg, _c)) + if (m_classPositions[arg] != c_invalidPosition && canBeRemoved(arg, _c)) m_classPositions[arg] = c_invalidPosition; for (size_t i = 0; i < arguments.size(); ++i) m_stack.erase(m_stackHeight - i); @@ -371,13 +383,22 @@ int CSECodeGenerator::classElementPosition(Id _id) const return m_classPositions.at(_id); } -bool CSECodeGenerator::canBeRemoved(Id _element, Id _result) +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition) { - // Returns false if _element is finally needed or is needed by a class that has not been - // computed yet. Note that m_classPositions also includes classes that were deleted in the meantime. - if (m_finalClasses.count(_element)) - return false; + // Default for _fromPosition is the canonical position of the element. + if (_fromPosition == c_invalidPosition) + _fromPosition = classElementPosition(_element); + bool isCopy = _fromPosition != classElementPosition(_element); + if (m_finalClasses.count(_element)) + // It is part of the target stack. It can be removed if it is a copy that is not in the target position. + return isCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element); + else if (isCopy) + // It is only a copy, can be removed. + return true; + + // Can be removed unless it is needed by a class that has not been computed yet. + // Note that m_classPositions also includes classes that were deleted in the meantime. auto range = m_neededBy.equal_range(_element); for (auto it = range.first; it != range.second; ++it) if (it->second != _result && !m_classPositions.count(it->second)) @@ -391,7 +412,7 @@ bool CSECodeGenerator::removeStackTopIfPossible() return false; assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); Id top = m_stack[m_stackHeight]; - if (!canBeRemoved(top)) + if (!canBeRemoved(top, Id(-1), m_stackHeight)) return false; m_generatedItems.push_back(AssemblyItem(Instruction::POP)); m_stack.erase(m_stackHeight); diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h index 6e1ba40b3..2a9a31255 100644 --- a/CommonSubexpressionEliminator.h +++ b/CommonSubexpressionEliminator.h @@ -71,13 +71,6 @@ public: /// @returns the resulting items after optimization. AssemblyItems getOptimizedItems(); - /// Streams debugging information to @a _out. - std::ostream& stream( - std::ostream& _out, - std::map _initialStack = std::map(), - std::map _targetStack = std::map() - ) const; - private: /// Feeds the item into the system for analysis. void feedItem(AssemblyItem const& _item, bool _copyItem = false); @@ -134,8 +127,9 @@ private: /// @note throws an exception if it is not on the stack. int classElementPosition(Id _id) const; - /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result. - bool canBeRemoved(Id _element, Id _result = Id(-1)); + /// @returns true if the copy of @a _element can be removed from stack position _fromPosition + /// - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1), int _fromPosition = c_invalidPosition); /// Appends code to remove the topmost stack element if it can be removed. bool removeStackTopIfPossible(); @@ -167,6 +161,7 @@ private: std::map, StoreOperations> m_storeOperations; /// The set of equivalence classes that should be present on the stack at the end. std::set m_finalClasses; + std::map m_targetStack; }; template @@ -175,6 +170,7 @@ _AssemblyItemIterator CommonSubexpressionEliminator::feedItems( _AssemblyItemIterator _end ) { + assertThrow(!m_breakingItem, OptimizerException, "Invalid use of CommonSubexpressionEliminator."); for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) feedItem(*_iterator); if (_iterator != _end) diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 2e28317a3..7ed56ff1a 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -142,7 +142,7 @@ void ControlFlowGraph::removeUnusedBlocks() BasicBlock const& block = m_blocks.at(blocksToProcess.back()); blocksToProcess.pop_back(); for (BlockId tag: block.pushedTags) - if (!neededBlocks.count(tag)) + if (!neededBlocks.count(tag) && m_blocks.count(tag)) { neededBlocks.insert(tag); blocksToProcess.push_back(tag); @@ -191,12 +191,12 @@ void ControlFlowGraph::setPrevLinks() if (push.type() != PushTag) continue; BlockId nextId(push.data()); - if (m_blocks.at(nextId).prev) + if (m_blocks.count(nextId) && m_blocks.at(nextId).prev) continue; bool hasLoop = false; - for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next) + for (BlockId id = nextId; id && m_blocks.count(id) && !hasLoop; id = m_blocks.at(id).next) hasLoop = (id == blockId); - if (hasLoop) + if (hasLoop || !m_blocks.count(nextId)) continue; m_blocks[nextId].prev = blockId; @@ -225,6 +225,8 @@ void ControlFlowGraph::gatherKnowledge() { //@todo we might have to do something like incrementing the sequence number for each JUMPDEST assertThrow(!!workQueue.back().first, OptimizerException, ""); + if (!m_blocks.count(workQueue.back().first)) + continue; // too bad, we do not know the tag, probably an invalid jump BasicBlock& block = m_blocks.at(workQueue.back().first); KnownStatePointer state = workQueue.back().second; workQueue.pop_back(); @@ -281,6 +283,15 @@ void ControlFlowGraph::gatherKnowledge() ) workQueue.push_back(make_pair(block.next, state->copy())); } + + // Remove all blocks we never visited here. This might happen because a tag is pushed but + // never used for a JUMP. + // Note that this invalidates some contents of pushedTags + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (!it->second.startState) + m_blocks.erase(it++); + else + it++; } BasicBlocks ControlFlowGraph::rebuildCode() @@ -288,7 +299,8 @@ BasicBlocks ControlFlowGraph::rebuildCode() map pushes; for (auto& idAndBlock: m_blocks) for (BlockId ref: idAndBlock.second.pushedTags) - pushes[ref]++; + if (m_blocks.count(ref)) + pushes[ref]++; set blocksToAdd; for (auto it: m_blocks) From 2fbcb5b9c81e7922e7cc58a4d75da12ec600e536 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 7 May 2015 18:31:21 +0200 Subject: [PATCH 13/67] Store alternative stack locations during code generation. --- CommonSubexpressionEliminator.cpp | 80 ++++++++++++++++--------------- CommonSubexpressionEliminator.h | 8 ++-- 2 files changed, 45 insertions(+), 43 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 5beb7966f..e369c9dbc 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -121,10 +121,7 @@ AssemblyItems CSECodeGenerator::generateCode( m_stack = _initialStack; m_targetStack = _targetStackContents; for (auto const& item: m_stack) - if (!m_classPositions.count(item.second)) - m_classPositions[item.second] = item.first; - - // @todo: provide information about the positions of copies of class elements + m_classPositions[item.second].insert(item.first); // generate the dependency graph starting from final storage and memory writes and target stack contents for (auto const& p: m_storeOperations) @@ -152,11 +149,12 @@ AssemblyItems CSECodeGenerator::generateCode( { if (m_stack.count(targetItem.first) && m_stack.at(targetItem.first) == targetItem.second) continue; // already there - int position = generateClassElement(targetItem.second); - assertThrow(position != c_invalidPosition, OptimizerException, ""); - if (position == targetItem.first) + generateClassElement(targetItem.second); + assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, ""); + if (m_classPositions[targetItem.second].count(targetItem.first)) continue; SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + int position = classElementPosition(targetItem.second); if (position < targetItem.first) // it is already at its target, we need another copy appendDup(position, location); @@ -266,19 +264,23 @@ void CSECodeGenerator::addDependencies(Id _c) } } -int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) { + for (auto it: m_classPositions) + for (auto p: it.second) + if (p > m_stackHeight) + assertThrow(false, OptimizerException, ""); // do some cleanup removeStackTopIfPossible(); if (m_classPositions.count(_c)) { assertThrow( - m_classPositions[_c] != c_invalidPosition, + !m_classPositions[_c].empty(), OptimizerException, "Element already removed but still needed." ); - return m_classPositions[_c]; + return; } ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); assertThrow( @@ -351,16 +353,16 @@ int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) // this will not append a swap but remove the one that is already there appendOrRemoveSwap(m_stackHeight - 1, location); - for (auto arg: arguments) - if (m_classPositions[arg] != c_invalidPosition && canBeRemoved(arg, _c)) - m_classPositions[arg] = c_invalidPosition; for (size_t i = 0; i < arguments.size(); ++i) + { + m_classPositions[m_stack[m_stackHeight - i]].erase(m_stackHeight - i); m_stack.erase(m_stackHeight - i); + } appendItem(*expr.item); if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) { m_stack[m_stackHeight] = _c; - return m_classPositions[_c] = m_stackHeight; + m_classPositions[_c].insert(m_stackHeight); } else { @@ -369,18 +371,18 @@ int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) OptimizerException, "Invalid number of return values." ); - return m_classPositions[_c] = c_invalidPosition; + m_classPositions[_c]; // ensure it is created to mark the expression as generated } } int CSECodeGenerator::classElementPosition(Id _id) const { assertThrow( - m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition, + m_classPositions.count(_id) && !m_classPositions.at(_id).empty(), OptimizerException, "Element requested but is not present." ); - return m_classPositions.at(_id); + return *max_element(m_classPositions.at(_id).begin(), m_classPositions.at(_id).end()); } bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition) @@ -389,20 +391,19 @@ bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition) if (_fromPosition == c_invalidPosition) _fromPosition = classElementPosition(_element); - bool isCopy = _fromPosition != classElementPosition(_element); + bool haveCopy = m_classPositions.at(_element).size() > 1; if (m_finalClasses.count(_element)) // It is part of the target stack. It can be removed if it is a copy that is not in the target position. - return isCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element); - else if (isCopy) - // It is only a copy, can be removed. - return true; - - // Can be removed unless it is needed by a class that has not been computed yet. - // Note that m_classPositions also includes classes that were deleted in the meantime. - auto range = m_neededBy.equal_range(_element); - for (auto it = range.first; it != range.second; ++it) - if (it->second != _result && !m_classPositions.count(it->second)) - return false; + return haveCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element); + else if (!haveCopy) + { + // Can be removed unless it is needed by a class that has not been computed yet. + // Note that m_classPositions also includes classes that were deleted in the meantime. + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + } return true; } @@ -414,9 +415,9 @@ bool CSECodeGenerator::removeStackTopIfPossible() Id top = m_stack[m_stackHeight]; if (!canBeRemoved(top, Id(-1), m_stackHeight)) return false; - m_generatedItems.push_back(AssemblyItem(Instruction::POP)); + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); m_stack.erase(m_stackHeight); - m_stackHeight--; + appendItem(AssemblyItem(Instruction::POP)); return true; } @@ -428,6 +429,7 @@ void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _locat assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); m_stack[m_stackHeight] = m_stack[_fromPosition]; + m_classPositions[m_stack[m_stackHeight]].insert(m_stackHeight); } void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) @@ -439,13 +441,15 @@ void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation cons assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); - // The value of a class can be present in multiple locations on the stack. We only update the - // "canonical" one that is tracked by m_classPositions - if (m_classPositions[m_stack[m_stackHeight]] == m_stackHeight) - m_classPositions[m_stack[m_stackHeight]] = _fromPosition; - if (m_classPositions[m_stack[_fromPosition]] == _fromPosition) - m_classPositions[m_stack[_fromPosition]] = m_stackHeight; - swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + + if (m_stack[m_stackHeight] != m_stack[_fromPosition]) + { + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); + m_classPositions[m_stack[m_stackHeight]].insert(_fromPosition); + m_classPositions[m_stack[_fromPosition]].erase(_fromPosition); + m_classPositions[m_stack[_fromPosition]].insert(m_stackHeight); + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + } if (m_generatedItems.size() >= 2 && SemanticInformation::isSwapInstruction(m_generatedItems.back()) && *(m_generatedItems.end() - 2) == m_generatedItems.back()) diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h index 2a9a31255..a35e31d90 100644 --- a/CommonSubexpressionEliminator.h +++ b/CommonSubexpressionEliminator.h @@ -119,10 +119,8 @@ private: void addDependencies(Id _c); /// Produce code that generates the given element if it is not yet present. - /// @returns the stack position of the element or c_invalidPosition if it does not actually - /// generate a value on the stack. /// @param _allowSequenced indicates that sequence-constrained operations are allowed - int generateClassElement(Id _c, bool _allowSequenced = false); + void generateClassElement(Id _c, bool _allowSequenced = false); /// @returns the position of the representative of the given id on the stack. /// @note throws an exception if it is not on the stack. int classElementPosition(Id _id) const; @@ -151,8 +149,8 @@ private: std::multimap m_neededBy; /// Current content of the stack. std::map m_stack; - /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted. - std::map m_classPositions; + /// Current positions of equivalence classes, equal to the empty set if already deleted. + std::map> m_classPositions; /// The actual eqivalence class items and how to compute them. ExpressionClasses& m_expressionClasses; From 2870281fe8cd70a27d69cbdc6ab97b6d48c11409 Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 11 May 2015 16:40:28 +0200 Subject: [PATCH 14/67] Compute state intersection. --- KnownState.cpp | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/KnownState.cpp b/KnownState.cpp index 41ac4802b..d6fbde2d9 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -160,23 +160,46 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool return op; } -void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/) +/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from +/// _this which is not in or not equal to the value in _other. +template void intersect( + _Mapping& _this, + _Mapping const& _other, + function<_KeyType(_KeyType)> const& _keyTrans = [](_KeyType _k) { return _k; } +) { - //@todo - *this = KnownState(m_expressionClasses); + for (auto it = _this.begin(); it != _this.end();) + if (_other.count(_keyTrans(it->first)) && _other.at(_keyTrans(it->first)) == it->second) + ++it; + else + it = _this.erase(it); +} + +void KnownState::reduceToCommonKnowledge(KnownState const& _other) +{ + int stackDiff = m_stackHeight - _other.m_stackHeight; + function stackKeyTransform = [=](int _key) -> int { return _key - stackDiff; }; + intersect(m_stackElements, _other.m_stackElements, stackKeyTransform); + // Use the smaller stack height. Essential to terminate in case of loops. + if (m_stackHeight > _other.m_stackHeight) + { + map shiftedStack; + for (auto const& stackElement: m_stackElements) + shiftedStack[stackElement.first - stackDiff] = stackElement.second; + m_stackElements = move(shiftedStack); + m_stackHeight = _other.m_stackHeight; + } + + intersect(m_storageContent, _other.m_storageContent); + intersect(m_memoryContent, _other.m_memoryContent); } bool KnownState::operator==(const KnownState& _other) const { - //@todo - return ( - m_stackElements.empty() && - _other.m_stackElements.empty() && - m_storageContent.empty() && - _other.m_storageContent.empty() && - m_memoryContent.empty() && - _other.m_memoryContent.empty() - ); + return m_storageContent == _other.m_storageContent && + m_memoryContent == _other.m_memoryContent && + m_stackHeight == _other.m_stackHeight && + m_stackElements == _other.m_stackElements; } ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) From aafa354a958bae0070bc3ce846b121ad048c6f01 Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 11 May 2015 19:44:45 +0200 Subject: [PATCH 15/67] Use returning erase variant. --- ControlFlowGraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 7ed56ff1a..cc68b2af8 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -289,7 +289,7 @@ void ControlFlowGraph::gatherKnowledge() // Note that this invalidates some contents of pushedTags for (auto it = m_blocks.begin(); it != m_blocks.end();) if (!it->second.startState) - m_blocks.erase(it++); + it = m_blocks.erase(it); else it++; } From 60d69c78f5d165341ce76f63a7e95418c95a3481 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 12 May 2015 11:25:34 +0200 Subject: [PATCH 16/67] Fixed template problem. --- KnownState.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/KnownState.cpp b/KnownState.cpp index d6fbde2d9..5a70a74fb 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -162,7 +162,7 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool /// Helper function for KnownState::reduceToCommonKnowledge, removes everything from /// _this which is not in or not equal to the value in _other. -template void intersect( +template void intersect( _Mapping& _this, _Mapping const& _other, function<_KeyType(_KeyType)> const& _keyTrans = [](_KeyType _k) { return _k; } @@ -175,6 +175,11 @@ template void intersect it = _this.erase(it); } +template void intersect(_Mapping& _this, _Mapping const& _other) +{ + intersect<_Mapping, ExpressionClasses::Id>(_this, _other, [](ExpressionClasses::Id _k) { return _k; }); +} + void KnownState::reduceToCommonKnowledge(KnownState const& _other) { int stackDiff = m_stackHeight - _other.m_stackHeight; From ced4720faa63eb7a1842eb1dbc17522a1ec458fd Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 12 May 2015 16:16:44 +0200 Subject: [PATCH 17/67] Unify blocks with shared code. --- Assembly.cpp | 10 +++++ AssemblyItem.h | 2 + BlockDeduplicator.cpp | 93 +++++++++++++++++++++++++++++++++++++++++++ BlockDeduplicator.h | 69 ++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+) create mode 100644 BlockDeduplicator.cpp create mode 100644 BlockDeduplicator.h diff --git a/Assembly.cpp b/Assembly.cpp index abcd44516..1011392b9 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include using namespace std; using namespace dev; @@ -348,8 +349,17 @@ Assembly& Assembly::optimise(bool _enable) copy(orig, iter, back_inserter(optimisedItems)); } } + if (optimisedItems.size() < m_items.size()) + { m_items = move(optimisedItems); + count++; + } + + // This only modifies PushTags, we have to run again to actually remove code. + BlockDeduplicator dedup(m_items); + if (dedup.deduplicate()) + count++; } } diff --git a/AssemblyItem.h b/AssemblyItem.h index 6f2a65de9..b3012a7ea 100644 --- a/AssemblyItem.h +++ b/AssemblyItem.h @@ -68,6 +68,8 @@ public: /// @returns true iff the type and data of the items are equal. bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + /// Less-than operator compatible with operator==. + bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); } /// @returns an upper bound for the number of bytes required by this item, assuming that /// the value of a jump tag takes @a _addressLength bytes. diff --git a/BlockDeduplicator.cpp b/BlockDeduplicator.cpp new file mode 100644 index 000000000..ca4f7e21a --- /dev/null +++ b/BlockDeduplicator.cpp @@ -0,0 +1,93 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file BlockDeduplicator.cpp + * @author Christian + * @date 2015 + * Unifies basic blocks that share content. + */ + +#include +#include +#include +#include + +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool BlockDeduplicator::deduplicate() +{ + // Compares indices based on the suffix that starts there, ignoring tags and stopping at + // opcodes that stop the control flow. + function comparator = [&](size_t _i, size_t _j) + { + if (_i == _j) + return false; + + BlockIterator first(m_items.begin() + _i, m_items.end()); + BlockIterator second(m_items.begin() + _j, m_items.end()); + BlockIterator end(m_items.end(), m_items.end()); + + if (first != end && (*first).type() == Tag) + ++first; + if (second != end && (*second).type() == Tag) + ++second; + + return std::lexicographical_compare(first, end, second, end); + }; + + set> blocksSeen(comparator); + map tagReplacement; + for (size_t i = 0; i < m_items.size(); ++i) + { + if (m_items.at(i).type() != Tag) + continue; + auto it = blocksSeen.find(i); + if (it == blocksSeen.end()) + blocksSeen.insert(i); + else + tagReplacement[m_items.at(i).data()] = m_items.at(*it).data(); + } + + bool ret = false; + for (AssemblyItem& item: m_items) + if (item.type() == PushTag && tagReplacement.count(item.data())) + { + ret = true; + item.setData(tagReplacement.at(item.data())); + } + return ret; +} + +BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++() +{ + if (it == end) + return *this; + if (SemanticInformation::altersControlFlow(*it) && *it != AssemblyItem(eth::Instruction::JUMPI)) + it = end; + else + { + ++it; + while (it != end && it->type() == Tag) + ++it; + } + return *this; +} diff --git a/BlockDeduplicator.h b/BlockDeduplicator.h new file mode 100644 index 000000000..8a82a1ed7 --- /dev/null +++ b/BlockDeduplicator.h @@ -0,0 +1,69 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @file BlockDeduplicator.h + * @author Christian + * @date 2015 + * Unifies basic blocks that share content. + */ + +#pragma once + +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; + +/** + * Optimizer class to be used to unify blocks that share content. + * Modifies the passed vector in place. + */ +class BlockDeduplicator +{ +public: + BlockDeduplicator(AssemblyItems& _items): m_items(_items) {} + /// @returns true if something was changed + bool deduplicate(); + +private: + /// Iterator that skips tags skips to the end if (all branches of) the control + /// flow does not continue to the next instruction. + struct BlockIterator: std::iterator + { + public: + BlockIterator(AssemblyItems::const_iterator _it, AssemblyItems::const_iterator _end): + it(_it), end(_end) { } + BlockIterator& operator++(); + bool operator==(BlockIterator const& _other) const { return it == _other.it; } + bool operator!=(BlockIterator const& _other) const { return it != _other.it; } + AssemblyItem const& operator*() const { return *it; } + AssemblyItems::const_iterator it; + AssemblyItems::const_iterator end; + }; + + AssemblyItems& m_items; +}; + +} +} From 79f8a224ef848d30af3743f5f06c74b45ce1da5e Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 12 May 2015 17:00:23 +0200 Subject: [PATCH 18/67] Removed unnecessary include. --- BlockDeduplicator.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/BlockDeduplicator.cpp b/BlockDeduplicator.cpp index ca4f7e21a..eadbe1b40 100644 --- a/BlockDeduplicator.cpp +++ b/BlockDeduplicator.cpp @@ -26,8 +26,6 @@ #include #include -#include - using namespace std; using namespace dev; using namespace dev::eth; From 2654daab2628181597bb4c35ae69ca378248f8ba Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 13 May 2015 11:45:18 +0300 Subject: [PATCH 19/67] Revert "CMake: set default RUNTIME_OUTPUT_DIRECTORY property to "bin"" --- CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb8fea95c..f8150806f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,10 +19,15 @@ set(EXECUTABLE evmasm) file(GLOB HEADERS "*.h") -add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) +if (ETH_STATIC) + add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +else() + add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS}) +endif() target_link_libraries(${EXECUTABLE} evmcore) target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + From cebc959ff3d7dab6a41833013ffe22728def3221 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 12 May 2015 21:27:04 +0200 Subject: [PATCH 20/67] Known state: store tags on stack as unions. --- CommonSubexpressionEliminator.cpp | 9 ++- ControlFlowGraph.cpp | 51 ++++++++--------- ControlFlowGraph.h | 4 -- ExpressionClasses.cpp | 10 ++++ ExpressionClasses.h | 6 +- KnownState.cpp | 92 ++++++++++++++++++++++--------- KnownState.h | 19 +++++-- 7 files changed, 125 insertions(+), 66 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index e369c9dbc..7564fcd99 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -153,7 +153,9 @@ AssemblyItems CSECodeGenerator::generateCode( assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, ""); if (m_classPositions[targetItem.second].count(targetItem.first)) continue; - SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + SourceLocation location; + if (m_expressionClasses.representative(targetItem.second).item) + location = m_expressionClasses.representative(targetItem.second).item->getLocation(); int position = classElementPosition(targetItem.second); if (position < targetItem.first) // it is already at its target, we need another copy @@ -197,7 +199,9 @@ void CSECodeGenerator::addDependencies(Id _c) addDependencies(argument); m_neededBy.insert(make_pair(argument, _c)); } - if (expr.item->type() == Operation && ( + if ( + expr.item && + expr.item->type() == Operation && ( expr.item->instruction() == Instruction::SLOAD || expr.item->instruction() == Instruction::MLOAD || expr.item->instruction() == Instruction::SHA3 @@ -288,6 +292,7 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) OptimizerException, "Sequence constrained operation requested out of sequence." ); + assertThrow(expr.item, OptimizerException, "Non-generated expression without item."); vector const& arguments = expr.arguments; for (Id arg: boost::adaptors::reverse(arguments)) generateClassElement(arg); diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index cc68b2af8..3566bdb17 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -217,7 +218,6 @@ void ControlFlowGraph::gatherKnowledge() // @todo actually we know that memory is filled with zeros at the beginning, // we could make use of that. KnownStatePointer emptyState = make_shared(); - ExpressionClasses& expr = emptyState->expressionClasses(); bool unknownJumpEncountered = false; vector> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); @@ -238,8 +238,6 @@ void ControlFlowGraph::gatherKnowledge() } block.startState = state->copy(); - //@todo we might know the return address for the first pass, but not anymore for the second, - // -> store knowledge about tags as a union. // Feed all items except for the final jump yet because it will erase the target tag. unsigned pc = block.begin; @@ -254,22 +252,29 @@ void ControlFlowGraph::gatherKnowledge() assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); //@todo in the case of JUMPI, add knowledge about the condition to the state // (for both values of the condition) - BlockId nextBlock = expressionClassToBlockId( - state->stackElement(state->stackHeight(), SourceLocation()), - expr + set tags = state->tagsInExpression( + state->stackElement(state->stackHeight(), SourceLocation()) ); state->feedItem(m_items.at(pc++)); - if (nextBlock) - workQueue.push_back(make_pair(nextBlock, state->copy())); - else if (!unknownJumpEncountered) + + if (tags.empty() || std::any_of(tags.begin(), tags.end(), [&](u256 const& _tag) { - // We do not know where this jump goes, so we have to reset the states of all - // JUMPDESTs. - unknownJumpEncountered = true; - for (auto const& it: m_blocks) - if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) - workQueue.push_back(make_pair(it.first, emptyState->copy())); + return !m_blocks.count(BlockId(_tag)); + })) + { + if (!unknownJumpEncountered) + { + // We do not know the target of this jump, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(make_pair(it.first, emptyState->copy())); + } } + else + for (auto tag: tags) + workQueue.push_back(make_pair(BlockId(tag), state->copy())); } else if (block.begin <= pc && pc < block.end) state->feedItem(m_items.at(pc++)); @@ -329,7 +334,11 @@ BasicBlocks ControlFlowGraph::rebuildCode() if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) ++block.begin; if (block.begin < block.end) + { blocks.push_back(block); + blocks.back().startState->clearTagUnions(); + blocks.back().endState->clearTagUnions(); + } previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); } } @@ -337,18 +346,6 @@ BasicBlocks ControlFlowGraph::rebuildCode() return blocks; } -BlockId ControlFlowGraph::expressionClassToBlockId( - ExpressionClasses::Id _id, - ExpressionClasses& _exprClasses -) -{ - ExpressionClasses::Expression expr = _exprClasses.representative(_id); - if (expr.item && expr.item->type() == PushTag) - return BlockId(expr.item->data()); - else - return BlockId::invalid(); -} - BlockId ControlFlowGraph::generateNewId() { BlockId id = BlockId(++m_lastUsedId); diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h index 3366dc45f..4480ba491 100644 --- a/ControlFlowGraph.h +++ b/ControlFlowGraph.h @@ -108,10 +108,6 @@ private: void setPrevLinks(); BasicBlocks rebuildCode(); - /// @returns the corresponding BlockId if _id is a pushed jump tag, - /// and an invalid BlockId otherwise. - BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses); - BlockId generateNewId(); unsigned m_lastUsedId = 0; diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index cfbeba7fa..81adc0dbb 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -82,6 +82,16 @@ ExpressionClasses::Id ExpressionClasses::find( return exp.id; } +ExpressionClasses::Id ExpressionClasses::newClass(SourceLocation const& _location) +{ + Expression exp; + exp.id = m_representatives.size(); + exp.item = storeItem(AssemblyItem(UndefinedItem, (u256(1) << 255) + exp.id, _location)); + m_representatives.push_back(exp); + m_expressions.insert(exp); + return exp.id; +} + bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) { // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. diff --git a/ExpressionClasses.h b/ExpressionClasses.h index c83520300..dd94092e8 100644 --- a/ExpressionClasses.h +++ b/ExpressionClasses.h @@ -52,7 +52,8 @@ public: Id id; AssemblyItem const* item = nullptr; Ids arguments; - unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. + /// Storage modification sequence, only used for storage and memory operations. + unsigned sequenceNumber = 0; /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). bool operator<(Expression const& _other) const; }; @@ -73,6 +74,9 @@ public: /// @returns the number of classes. Id size() const { return m_representatives.size(); } + /// @returns the id of a new class which is different to all other classes. + Id newClass(SourceLocation const& _location); + /// @returns true if the values of the given classes are known to be different (on every input). /// @note that this function might still return false for some different inputs. bool knownToBeDifferent(Id _a, Id _b); diff --git a/KnownState.cpp b/KnownState.cpp index 5a70a74fb..b84e656aa 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -162,29 +162,41 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool /// Helper function for KnownState::reduceToCommonKnowledge, removes everything from /// _this which is not in or not equal to the value in _other. -template void intersect( - _Mapping& _this, - _Mapping const& _other, - function<_KeyType(_KeyType)> const& _keyTrans = [](_KeyType _k) { return _k; } -) +template void intersect(_Mapping& _this, _Mapping const& _other) { for (auto it = _this.begin(); it != _this.end();) - if (_other.count(_keyTrans(it->first)) && _other.at(_keyTrans(it->first)) == it->second) + if (_other.count(it->first) && _other.at(it->first) == it->second) ++it; else it = _this.erase(it); } -template void intersect(_Mapping& _this, _Mapping const& _other) -{ - intersect<_Mapping, ExpressionClasses::Id>(_this, _other, [](ExpressionClasses::Id _k) { return _k; }); -} - void KnownState::reduceToCommonKnowledge(KnownState const& _other) { int stackDiff = m_stackHeight - _other.m_stackHeight; - function stackKeyTransform = [=](int _key) -> int { return _key - stackDiff; }; - intersect(m_stackElements, _other.m_stackElements, stackKeyTransform); + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (_other.m_stackElements.count(it->first - stackDiff)) + { + Id other = _other.m_stackElements.at(it->first - stackDiff); + if (it->second == other) + ++it; + else + { + set theseTags = tagsInExpression(it->second); + set otherTags = tagsInExpression(other); + if (!theseTags.empty() && !otherTags.empty()) + { + theseTags.insert(otherTags.begin(), otherTags.end()); + it->second = tagUnion(theseTags); + ++it; + } + else + it = m_stackElements.erase(it); + } + } + else + it = m_stackElements.erase(it); + // Use the smaller stack height. Essential to terminate in case of loops. if (m_stackHeight > _other.m_stackHeight) { @@ -201,10 +213,15 @@ void KnownState::reduceToCommonKnowledge(KnownState const& _other) bool KnownState::operator==(const KnownState& _other) const { - return m_storageContent == _other.m_storageContent && - m_memoryContent == _other.m_memoryContent && - m_stackHeight == _other.m_stackHeight && - m_stackElements == _other.m_stackElements; + if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent) + return false; + int stackDiff = m_stackHeight - _other.m_stackHeight; + auto thisIt = m_stackElements.cbegin(); + auto otherIt = _other.m_stackElements.cbegin(); + for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt) + if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second) + return false; + return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend()); } ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) @@ -212,18 +229,17 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation if (m_stackElements.count(_stackHeight)) return m_stackElements.at(_stackHeight); // Stack element not found (not assigned yet), create new unknown equivalence class. - //@todo check that we do not infer incorrect equivalences when the stack is cleared partially - //in between. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); + return m_stackElements[_stackHeight] = + m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); } -ExpressionClasses::Id KnownState::initialStackElement( - int _stackHeight, - SourceLocation const& _location -) +void KnownState::clearTagUnions() { - // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location)); + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (m_tagUnions.left.count(it->second)) + it = m_stackElements.erase(it); + else + ++it; } void KnownState::setStackElement(int _stackHeight, Id _class) @@ -352,3 +368,27 @@ KnownState::Id KnownState::applySha3( return m_knownSha3Hashes[arguments] = v; } +set KnownState::tagsInExpression(KnownState::Id _expressionId) +{ + if (m_tagUnions.left.count(_expressionId)) + return m_tagUnions.left.at(_expressionId); + // Might be a tag, then return the set of itself. + ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId); + if (expr.item && expr.item->type() == PushTag) + return set({expr.item->data()}); + else + return set(); +} + +KnownState::Id KnownState::tagUnion(set _tags) +{ + if (m_tagUnions.right.count(_tags)) + return m_tagUnions.right.at(_tags); + else + { + Id id = m_expressionClasses->newClass(SourceLocation()); + m_tagUnions.right.insert(make_pair(_tags, id)); + return id; + } +} + diff --git a/KnownState.h b/KnownState.h index f7a3dd675..3505df74f 100644 --- a/KnownState.h +++ b/KnownState.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -107,15 +108,16 @@ public: /// @returns true if the knowledge about the state of both objects is (known to be) equal. bool operator==(KnownState const& _other) const; - ///@todo the sequence numbers in two copies of this class should never be the same. - /// might be doable using two-dimensional sequence numbers, where the first value is incremented - /// for each copy - /// Retrieves the current equivalence class fo the given stack element (or generates a new /// one if it does not exist yet). Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height. - Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + /// @returns its set of tags if the given expression class is a known tag union; returns a set + /// containing the tag if it is a PushTag expression and the empty set otherwise. + std::set tagsInExpression(Id _expressionId); + /// During analysis, different tags on the stack are partially treated as the same class. + /// This removes such classes not to confuse later analyzers. + void clearTagUnions(); int stackHeight() const { return m_stackHeight; } std::map const& stackElements() const { return m_stackElements; } @@ -142,6 +144,9 @@ private: /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. Id applySha3(Id _start, Id _length, SourceLocation const& _location); + /// @returns a new or already used Id representing the given set of tags. + Id tagUnion(std::set _tags); + /// Current stack height, can be negative. int m_stackHeight = 0; /// Current stack layout, mapping stack height -> equivalence class @@ -157,6 +162,8 @@ private: std::map, Id> m_knownSha3Hashes; /// Structure containing the classes of equivalent expressions. std::shared_ptr m_expressionClasses; + /// Container for unions of tags stored on the stack. + boost::bimap> m_tagUnions; }; } From b124878c5341dc2576be10631b30e74ae16ab8b2 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 13 May 2015 19:13:03 +0200 Subject: [PATCH 21/67] Fixed indentation. --- CommonSubexpressionEliminator.cpp | 4 +--- KnownState.cpp | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 7564fcd99..9f6f9dd63 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -199,9 +199,7 @@ void CSECodeGenerator::addDependencies(Id _c) addDependencies(argument); m_neededBy.insert(make_pair(argument, _c)); } - if ( - expr.item && - expr.item->type() == Operation && ( + if (expr.item && expr.item->type() == Operation && ( expr.item->instruction() == Instruction::SLOAD || expr.item->instruction() == Instruction::MLOAD || expr.item->instruction() == Instruction::SHA3 diff --git a/KnownState.cpp b/KnownState.cpp index b84e656aa..0aac9cedb 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -230,7 +230,7 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation return m_stackElements.at(_stackHeight); // Stack element not found (not assigned yet), create new unknown equivalence class. return m_stackElements[_stackHeight] = - m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); + m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); } void KnownState::clearTagUnions() From e50070035fa25f9a4d89cd6cd9e2ef2a7f04919b Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 15 May 2015 15:36:14 +0200 Subject: [PATCH 22/67] Hex/decimal cleanup for assembly output. --- Assembly.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 1011392b9..6f38b0f42 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -121,13 +121,13 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); break; case Push: - _out << " PUSH " << i.data(); + _out << " PUSH " << hex << i.data(); break; case PushString: _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; break; case PushTag: - _out << " PUSH [tag" << i.data() << "]"; + _out << " PUSH [tag" << dec << i.data() << "]"; break; case PushSub: _out << " PUSH [$" << h256(i.data()).abridged() << "]"; @@ -139,7 +139,7 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con _out << " PUSHSIZE"; break; case Tag: - _out << "tag" << i.data() << ": " << endl << _prefix << " JUMPDEST"; + _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; break; case PushData: _out << " PUSH [" << hex << (unsigned)i.data() << "]"; @@ -208,7 +208,7 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes break; case PushTag: collection.append( - createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, string(i.data()))); break; case PushSub: collection.append( @@ -223,19 +223,13 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes createJsonValue("PUSHSIZE", i.getLocation().start, i.getLocation().end)); break; case Tag: - { collection.append( createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data()))); collection.append( createJsonValue("JUMDEST", i.getLocation().start, i.getLocation().end)); - } break; case PushData: - { - Json::Value pushData; - pushData["name"] = "PUSH hex"; - collection.append(createJsonValue("PUSH hex", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); - } + collection.append(createJsonValue("PUSH data", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); break; default: BOOST_THROW_EXCEPTION(InvalidOpcode()); From 3ecd54a83513d8b59b5e27c671a036870cf1bc90 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Tue, 19 May 2015 19:51:38 +0200 Subject: [PATCH 23/67] Move non-cryptopp dependent stuff into devcore. --- CommonSubexpressionEliminator.cpp | 2 +- KnownState.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 9f6f9dd63..b2fa73116 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include diff --git a/KnownState.cpp b/KnownState.cpp index 0aac9cedb..895778ed1 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -23,7 +23,7 @@ #include "KnownState.h" #include -#include +#include #include using namespace std; From d015945a1db28ba55ce674a73091742b781d2d9d Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 20 May 2015 00:27:07 +0200 Subject: [PATCH 24/67] Gas estimation taking known state into account. --- Assembly.cpp | 1 + AssemblyItem.h | 6 +++ GasMeter.cpp | 134 ++++++++++++++++++++++++++++++++++++++++++++----- GasMeter.h | 27 +++++++++- KnownState.cpp | 13 ++++- KnownState.h | 4 ++ 6 files changed, 168 insertions(+), 17 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 6f38b0f42..5cf3b787a 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -431,6 +431,7 @@ bytes Assembly::assemble() const case PushSubSize: { auto s = m_data[i.data()].size(); + i.setPushedValue(u256(s)); byte b = max(1, dev::bytesRequired(s)); ret.push_back((byte)Instruction::PUSH1 - 1 + b); ret.resize(ret.size() + b); diff --git a/AssemblyItem.h b/AssemblyItem.h index b3012a7ea..7d8f3d9a4 100644 --- a/AssemblyItem.h +++ b/AssemblyItem.h @@ -84,11 +84,17 @@ public: JumpType getJumpType() const { return m_jumpType; } std::string getJumpTypeAsString() const; + void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared(_value); } + u256 const* pushedValue() const { return m_pushedValue.get(); } + private: AssemblyItemType m_type; u256 m_data; SourceLocation m_location; JumpType m_jumpType = JumpType::Ordinary; + /// Pushed value for operations with data to be determined during assembly stage, + /// e.g. PushSubSize, PushTag, PushSub, etc. + mutable std::shared_ptr m_pushedValue; }; using AssemblyItems = std::vector; diff --git a/GasMeter.cpp b/GasMeter.cpp index e5fb0e09a..a8dc4dd58 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -20,6 +20,7 @@ */ #include "GasMeter.h" +#include #include using namespace std; @@ -41,55 +42,162 @@ GasMeter::GasConsumption& GasMeter::GasConsumption::operator+=(GasConsumption co GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) { - switch (_item.type()) { + GasConsumption gas; + switch (_item.type()) + { case Push: case PushTag: - return runGas(Instruction::PUSH1); + case PushData: + case PushString: + case PushSub: + case PushSubSize: + case PushProgramSize: + gas = runGas(Instruction::PUSH1); + break; case Tag: - return runGas(Instruction::JUMPDEST); + gas = runGas(Instruction::JUMPDEST); + break; case Operation: { - GasConsumption gas = runGas(_item.instruction()); + ExpressionClasses& classes = m_state->expressionClasses(); + gas = runGas(_item.instruction()); switch (_item.instruction()) { case Instruction::SSTORE: - // @todo logic can be improved - gas += c_sstoreSetGas; + { + ExpressionClasses::Id slot = m_state->relativeStackElement(0); + ExpressionClasses::Id value = m_state->relativeStackElement(-1); + if (classes.knownZero(value) || ( + m_state->storageContent().count(slot) && + classes.knownNonZero(m_state->storageContent().at(slot)) + )) + gas += c_sstoreResetGas; //@todo take refunds into account + else + gas += c_sstoreSetGas; break; + } case Instruction::SLOAD: gas += c_sloadGas; break; - case Instruction::MSTORE: - case Instruction::MSTORE8: - case Instruction::MLOAD: case Instruction::RETURN: + gas += memoryGas(0, -1); + break; + case Instruction::MLOAD: + case Instruction::MSTORE: + gas += memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(0), + classes.find(AssemblyItem(32)) + })); + break; + case Instruction::MSTORE8: + gas += memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(0), + classes.find(AssemblyItem(1)) + })); + break; case Instruction::SHA3: + gas = c_sha3Gas; + gas += wordGas(c_sha3WordGas, m_state->relativeStackElement(-1)); + gas += memoryGas(0, -1); + break; case Instruction::CALLDATACOPY: case Instruction::CODECOPY: + gas += memoryGas(0, -2); + gas += wordGas(c_copyGas, m_state->relativeStackElement(-2)); + break; case Instruction::EXTCODECOPY: + gas += memoryGas(-1, -3); + gas += wordGas(c_copyGas, m_state->relativeStackElement(-3)); + break; case Instruction::LOG0: case Instruction::LOG1: case Instruction::LOG2: case Instruction::LOG3: case Instruction::LOG4: + { + unsigned n = unsigned(_item.instruction()) - unsigned(Instruction::LOG0); + gas = c_logGas + c_logTopicGas * n; + gas += memoryGas(0, -1); + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) + gas += c_logDataGas * (*value); + else + gas = GasConsumption::infinite(); + break; + } case Instruction::CALL: case Instruction::CALLCODE: + gas = c_callGas; + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(0))) + gas += (*value); + else + gas = GasConsumption::infinite(); + if (_item.instruction() != Instruction::CALLCODE) + gas += c_callNewAccountGas; // We very rarely know whether the address exists. + if (!classes.knownZero(m_state->relativeStackElement(-2))) + gas += c_callValueTransferGas; + gas += memoryGas(-3, -4); + gas += memoryGas(-5, -6); + break; case Instruction::CREATE: + gas = c_createGas; + gas += memoryGas(-1, -2); + break; case Instruction::EXP: - // @todo logic can be improved - gas = GasConsumption::infinite(); + gas = c_expGas; + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) + gas += c_expByteGas * (32 - (h256(*value).firstBitSet() / 8)); + else + gas = GasConsumption::infinite(); break; default: break; } - return gas; break; } default: + gas = GasConsumption::infinite(); break; } - return GasConsumption::infinite(); + m_state->feedItem(_item); + return gas; +} + +GasMeter::GasConsumption GasMeter::wordGas(u256 const& _multiplier, ExpressionClasses::Id _position) +{ + u256 const* value = m_state->expressionClasses().knownConstant(_position); + if (!value) + return GasConsumption::infinite(); + return GasConsumption(_multiplier * ((*value + 31) / 32)); +} + +GasMeter::GasConsumption GasMeter::memoryGas(ExpressionClasses::Id _position) +{ + u256 const* value = m_state->expressionClasses().knownConstant(_position); + if (!value) + return GasConsumption::infinite(); + if (*value < m_largestMemoryAccess) + return GasConsumption(u256(0)); + u256 previous = m_largestMemoryAccess; + m_largestMemoryAccess = *value; + auto memGas = [](u256 const& pos) -> u256 + { + u256 size = (pos + 31) / 32; + return c_memoryGas * size + size * size / c_quadCoeffDiv; + }; + return memGas(*value) - memGas(previous); +} + +GasMeter::GasConsumption GasMeter::memoryGas(int _stackPosOffset, int _stackPosSize) +{ + ExpressionClasses& classes = m_state->expressionClasses(); + if (classes.knownZero(m_state->relativeStackElement(_stackPosSize))) + return GasConsumption(0); + else + return memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(_stackPosOffset), + m_state->relativeStackElement(_stackPosSize) + })); } GasMeter::GasConsumption GasMeter::runGas(Instruction _instruction) diff --git a/GasMeter.h b/GasMeter.h index 63dbc1380..ab6d5613b 100644 --- a/GasMeter.h +++ b/GasMeter.h @@ -22,6 +22,7 @@ #pragma once #include +#include #include namespace dev @@ -29,8 +30,13 @@ namespace dev namespace eth { +class KnownState; + /** * Class that helps computing the maximum gas consumption for instructions. + * Has to be initialized with a certain known state that will be automatically updated for + * each call to estimateMax. These calls have to supply strictly subsequent AssemblyItems. + * A new gas meter has to be constructed (with a new state) for control flow changes. */ class GasMeter { @@ -47,11 +53,28 @@ public: bool isInfinite; }; - /// Returns an upper bound on the gas consumed by the given instruction. + /// Constructs a new gas meter given the current state. + GasMeter(std::shared_ptr const& _state): m_state(_state) {} + + /// @returns an upper bound on the gas consumed by the given instruction and updates + /// the state. GasConsumption estimateMax(AssemblyItem const& _item); private: + /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. + GasConsumption wordGas(u256 const& _multiplier, ExpressionClasses::Id _value); + /// @returns the gas needed to access the given memory position. + /// @todo this assumes that memory was never accessed before and thus over-estimates gas usage. + GasConsumption memoryGas(ExpressionClasses::Id _position); + /// @returns the memory gas for accessing the memory at a specific offset for a number of bytes + /// given as values on the stack at the given relative positions. + GasConsumption memoryGas(int _stackPosOffset, int _stackPosSize); + static GasConsumption runGas(Instruction _instruction); + + std::shared_ptr m_state; + /// Largest point where memory was accessed since the creation of this object. + u256 m_largestMemoryAccess; }; inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption const& _consumption) @@ -59,7 +82,7 @@ inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption con if (_consumption.isInfinite) return _str << "inf"; else - return _str << _consumption.value; + return _str << std::dec << _consumption.value; } diff --git a/KnownState.cpp b/KnownState.cpp index 895778ed1..d62dbf17e 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -92,7 +92,11 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool else if (_item.type() != Operation) { assertThrow(_item.deposit() == 1, InvalidDeposit, ""); - setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + if (_item.pushedValue()) + // only available after assembly stage, should not be used for optimisation + setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue())); + else + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); } else { @@ -230,7 +234,12 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation return m_stackElements.at(_stackHeight); // Stack element not found (not assigned yet), create new unknown equivalence class. return m_stackElements[_stackHeight] = - m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); + m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); +} + +KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location) +{ + return stackElement(m_stackHeight + _stackOffset, _location); } void KnownState::clearTagUnions() diff --git a/KnownState.h b/KnownState.h index 3505df74f..9d28ef21a 100644 --- a/KnownState.h +++ b/KnownState.h @@ -111,6 +111,8 @@ public: /// Retrieves the current equivalence class fo the given stack element (or generates a new /// one if it does not exist yet). Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the stackElement relative to the current stack height. + Id relativeStackElement(int _stackOffset, SourceLocation const& _location = SourceLocation()); /// @returns its set of tags if the given expression class is a known tag union; returns a set /// containing the tag if it is a PushTag expression and the empty set otherwise. @@ -123,6 +125,8 @@ public: std::map const& stackElements() const { return m_stackElements; } ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + std::map const& storageContent() const { return m_storageContent; } + private: /// Assigns a new equivalence class to the next sequence number of the given stack element. void setStackElement(int _stackHeight, Id _class); From cd28fb8faa6009a53e1f127fb934d00f29da832d Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 22 May 2015 09:33:57 +0200 Subject: [PATCH 25/67] Path gas meter. --- GasMeter.cpp | 5 +- GasMeter.h | 12 +++- PathGasMeter.cpp | 128 ++++++++++++++++++++++++++++++++++++++++ PathGasMeter.h | 66 +++++++++++++++++++++ SemanticInformation.cpp | 2 +- 5 files changed, 207 insertions(+), 6 deletions(-) create mode 100644 PathGasMeter.cpp create mode 100644 PathGasMeter.h diff --git a/GasMeter.cpp b/GasMeter.cpp index a8dc4dd58..3749e635d 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -29,12 +29,13 @@ using namespace dev::eth; GasMeter::GasConsumption& GasMeter::GasConsumption::operator+=(GasConsumption const& _other) { - isInfinite = isInfinite || _other.isInfinite; + if (_other.isInfinite && !isInfinite) + *this = infinite(); if (isInfinite) return *this; bigint v = bigint(value) + _other.value; if (v > std::numeric_limits::max()) - isInfinite = true; + *this = infinite(); else value = u256(v); return *this; diff --git a/GasMeter.h b/GasMeter.h index ab6d5613b..95593b565 100644 --- a/GasMeter.h +++ b/GasMeter.h @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include @@ -46,20 +47,25 @@ public: GasConsumption(u256 _value = 0, bool _infinite = false): value(_value), isInfinite(_infinite) {} static GasConsumption infinite() { return GasConsumption(0, true); } - GasConsumption& operator+=(GasConsumption const& _otherS); - std::ostream& operator<<(std::ostream& _str) const; + GasConsumption& operator+=(GasConsumption const& _other); + bool operator<(GasConsumption const& _other) const { return this->tuple() < _other.tuple(); } + + std::tuple tuple() const { return std::tie(isInfinite, value); } u256 value; bool isInfinite; }; /// Constructs a new gas meter given the current state. - GasMeter(std::shared_ptr const& _state): m_state(_state) {} + explicit GasMeter(std::shared_ptr const& _state, u256 const& _largestMemoryAccess = 0): + m_state(_state), m_largestMemoryAccess(_largestMemoryAccess) {} /// @returns an upper bound on the gas consumed by the given instruction and updates /// the state. GasConsumption estimateMax(AssemblyItem const& _item); + u256 const& largestMemoryAccess() const { return m_largestMemoryAccess; } + private: /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. GasConsumption wordGas(u256 const& _multiplier, ExpressionClasses::Id _value); diff --git a/PathGasMeter.cpp b/PathGasMeter.cpp new file mode 100644 index 000000000..8f7314f89 --- /dev/null +++ b/PathGasMeter.cpp @@ -0,0 +1,128 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file PathGasMeter.cpp + * @author Christian + * @date 2015 + */ + +#include "PathGasMeter.h" +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + +PathGasMeter::PathGasMeter(AssemblyItems const& _items): + m_items(_items) +{ + for (size_t i = 0; i < m_items.size(); ++i) + if (m_items[i].type() == Tag) + m_tagPositions[m_items[i].data()] = i; +} + +GasMeter::GasConsumption PathGasMeter::estimateMax( + size_t _startIndex, + shared_ptr const& _state +) +{ + auto path = unique_ptr(new GasPath()); + path->index = _startIndex; + path->state = _state->copy(); + m_queue.push_back(move(path)); + + GasMeter::GasConsumption gas; + while (!m_queue.empty() && !gas.isInfinite) + gas = max(gas, handleQueueItem()); + return gas; +} + +GasMeter::GasConsumption PathGasMeter::handleQueueItem() +{ + assertThrow(!m_queue.empty(), OptimizerException, ""); + + unique_ptr path = move(m_queue.back()); + m_queue.pop_back(); + + shared_ptr state = path->state; + GasMeter meter(state, path->largestMemoryAccess); + ExpressionClasses& classes = state->expressionClasses(); + GasMeter::GasConsumption gas = path->gas; + size_t index = path->index; + + if (index >= m_items.size() || (index > 0 && m_items.at(index).type() != Tag)) + // Invalid jump usually provokes an out-of-gas exception, but we want to give an upper + // bound on the gas that is needed without changing the behaviour, so it is fine to + // return the current gas value. + return gas; + + set jumpTags; + for (; index < m_items.size() && !gas.isInfinite; ++index) + { + bool branchStops = false; + jumpTags.clear(); + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag || item == AssemblyItem(eth::Instruction::JUMPDEST)) + { + // Do not allow any backwards jump. This is quite restrictive but should work for + // the simplest things. + if (path->visitedJumpdests.count(index)) + return GasMeter::GasConsumption::infinite(); + path->visitedJumpdests.insert(index); + } + else if (item == AssemblyItem(eth::Instruction::JUMP)) + { + branchStops = true; + jumpTags = state->tagsInExpression(state->relativeStackElement(0)); + if (jumpTags.empty()) // unknown jump destination + return GasMeter::GasConsumption::infinite(); + } + else if (item == AssemblyItem(eth::Instruction::JUMPI)) + { + ExpressionClasses::Id condition = state->relativeStackElement(-1); + if (classes.knownNonZero(condition) || !classes.knownZero(condition)) + { + jumpTags = state->tagsInExpression(state->relativeStackElement(0)); + if (jumpTags.empty()) // unknown jump destination + return GasMeter::GasConsumption::infinite(); + } + branchStops = classes.knownNonZero(condition); + } + else if (SemanticInformation::altersControlFlow(item)) + branchStops = true; + + gas += meter.estimateMax(item); + + for (u256 const& tag: jumpTags) + { + auto newPath = unique_ptr(new GasPath()); + newPath->index = m_items.size(); + if (m_tagPositions.count(tag)) + newPath->index = m_tagPositions.at(tag); + newPath->gas = gas; + newPath->largestMemoryAccess = meter.largestMemoryAccess(); + newPath->state = state->copy(); + newPath->visitedJumpdests = path->visitedJumpdests; + m_queue.push_back(move(newPath)); + } + + if (branchStops) + break; + } + + return gas; +} diff --git a/PathGasMeter.h b/PathGasMeter.h new file mode 100644 index 000000000..1ada460aa --- /dev/null +++ b/PathGasMeter.h @@ -0,0 +1,66 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file PathGasMeter.cpp + * @author Christian + * @date 2015 + */ + +#pragma once + +#include +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class KnownState; + +struct GasPath +{ + size_t index = 0; + std::shared_ptr state; + u256 largestMemoryAccess; + GasMeter::GasConsumption gas; + std::set visitedJumpdests; +}; + +/** + * Computes an upper bound on the gas usage of a computation starting at a certain position in + * a list of AssemblyItems in a given state until the computation stops. + * Can be used to estimate the gas usage of functions on any given input. + */ +class PathGasMeter +{ +public: + PathGasMeter(AssemblyItems const& _items); + + GasMeter::GasConsumption estimateMax(size_t _startIndex, std::shared_ptr const& _state); + +private: + GasMeter::GasConsumption handleQueueItem(); + + std::vector> m_queue; + std::map m_tagPositions; + AssemblyItems const& m_items; +}; + +} +} diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 056162b5f..91f93e7ef 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -111,7 +111,7 @@ bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) switch (_item.instruction()) { // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we - // continue on the next instruction (unless an exception happens which can always happen) + // continue on the next instruction case Instruction::JUMP: case Instruction::JUMPI: case Instruction::RETURN: From 2414a23168e9abb5c343a7f3a93e4d0e247c12ac Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 22 May 2015 10:48:54 +0200 Subject: [PATCH 26/67] Functional gas estimator. --- ExpressionClasses.cpp | 27 ++++++++++++++++++++++++--- ExpressionClasses.h | 5 +++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 81adc0dbb..81ba11541 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -57,11 +57,11 @@ ExpressionClasses::Id ExpressionClasses::find( exp.arguments = _arguments; exp.sequenceNumber = _sequenceNumber; + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + if (SemanticInformation::isDeterministic(_item)) { - if (SemanticInformation::isCommutativeOperation(_item)) - sort(exp.arguments.begin(), exp.arguments.end()); - auto it = m_expressions.find(exp); if (it != m_expressions.end()) return it->id; @@ -82,6 +82,27 @@ ExpressionClasses::Id ExpressionClasses::find( return exp.id; } +void ExpressionClasses::forceEqual( + ExpressionClasses::Id _id, + AssemblyItem const& _item, + ExpressionClasses::Ids const& _arguments, + bool _copyItem +) +{ + Expression exp; + exp.id = _id; + exp.item = &_item; + exp.arguments = _arguments; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + if (_copyItem) + exp.item = storeItem(_item); + + m_expressions.insert(exp); +} + ExpressionClasses::Id ExpressionClasses::newClass(SourceLocation const& _location) { Expression exp; diff --git a/ExpressionClasses.h b/ExpressionClasses.h index dd94092e8..4bfd7d24a 100644 --- a/ExpressionClasses.h +++ b/ExpressionClasses.h @@ -74,6 +74,11 @@ public: /// @returns the number of classes. Id size() const { return m_representatives.size(); } + /// Forces the given @a _item with @a _arguments to the class @a _id. This can be used to + /// add prior knowledge e.g. about CALLDATA, but has to be used with caution. Will not work as + /// expected if @a _item applied to @a _arguments already exists. + void forceEqual(Id _id, AssemblyItem const& _item, Ids const& _arguments, bool _copyItem = true); + /// @returns the id of a new class which is different to all other classes. Id newClass(SourceLocation const& _location); From dadde12178e218d1481bd38c9260ceb23c748cb6 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 22 May 2015 16:11:57 +0200 Subject: [PATCH 27/67] Tighter estimation for EXP. --- GasMeter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GasMeter.cpp b/GasMeter.cpp index 3749e635d..650bd6e28 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -148,7 +148,7 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) gas += c_expByteGas * (32 - (h256(*value).firstBitSet() / 8)); else - gas = GasConsumption::infinite(); + gas += c_expByteGas * 32; break; default: break; From 06890e5428749c2bc94e6b58cdc2d0bdff0ee4fd Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 22 May 2015 14:19:58 +0200 Subject: [PATCH 28/67] Commandline interface for gas estimation. --- GasMeter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GasMeter.h b/GasMeter.h index 95593b565..6949c193e 100644 --- a/GasMeter.h +++ b/GasMeter.h @@ -86,7 +86,7 @@ private: inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption const& _consumption) { if (_consumption.isInfinite) - return _str << "inf"; + return _str << "[???]"; else return _str << std::dec << _consumption.value; } From 7f55e26eb8fd9c321c679f7e4c758070b8d670c1 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 26 May 2015 11:29:41 +0200 Subject: [PATCH 29/67] Removed redundant std. --- GasMeter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GasMeter.cpp b/GasMeter.cpp index 650bd6e28..4e5289e38 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -34,7 +34,7 @@ GasMeter::GasConsumption& GasMeter::GasConsumption::operator+=(GasConsumption co if (isInfinite) return *this; bigint v = bigint(value) + _other.value; - if (v > std::numeric_limits::max()) + if (v > numeric_limits::max()) *this = infinite(); else value = u256(v); From beab869e1443a9ef8c4bbf27affda0265e8d1947 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 28 May 2015 14:43:46 +0200 Subject: [PATCH 30/67] Allow duplicate code removal for loops. --- Assembly.cpp | 10 +++--- BlockDeduplicator.cpp | 75 +++++++++++++++++++++++++++++++------------ BlockDeduplicator.h | 16 ++++++--- 3 files changed, 72 insertions(+), 29 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 5cf3b787a..8c6591885 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -307,6 +307,11 @@ Assembly& Assembly::optimise(bool _enable) count = 0; copt << "Performing optimisation..."; + // This only modifies PushTags, we have to run again to actually remove code. + BlockDeduplicator dedup(m_items); + if (dedup.deduplicate()) + count++; + { ControlFlowGraph cfg(m_items); AssemblyItems optimisedItems; @@ -349,11 +354,6 @@ Assembly& Assembly::optimise(bool _enable) m_items = move(optimisedItems); count++; } - - // This only modifies PushTags, we have to run again to actually remove code. - BlockDeduplicator dedup(m_items); - if (dedup.deduplicate()) - count++; } } diff --git a/BlockDeduplicator.cpp b/BlockDeduplicator.cpp index eadbe1b40..d930ea22b 100644 --- a/BlockDeduplicator.cpp +++ b/BlockDeduplicator.cpp @@ -35,13 +35,33 @@ bool BlockDeduplicator::deduplicate() { // Compares indices based on the suffix that starts there, ignoring tags and stopping at // opcodes that stop the control flow. + + // Virtual tag that signifies "the current block" and which is used to optimise loops. + // We abort if this virtual tag actually exists. + AssemblyItem pushSelf(PushTag, u256(-4)); + if ( + std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()) || + std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag()) + ) + return false; + function comparator = [&](size_t _i, size_t _j) { if (_i == _j) return false; - BlockIterator first(m_items.begin() + _i, m_items.end()); - BlockIterator second(m_items.begin() + _j, m_items.end()); + // To compare recursive loops, we have to already unify PushTag opcodes of the + // block's own tag. + AssemblyItem pushFirstTag(pushSelf); + AssemblyItem pushSecondTag(pushSelf); + + if (_i < m_items.size() && m_items.at(_i).type() == Tag) + pushFirstTag = m_items.at(_i).pushTag(); + if (_j < m_items.size() && m_items.at(_j).type() == Tag) + pushSecondTag = m_items.at(_j).pushTag(); + + BlockIterator first(m_items.begin() + _i, m_items.end(), &pushFirstTag, &pushSelf); + BlockIterator second(m_items.begin() + _j, m_items.end(), &pushSecondTag, &pushSelf); BlockIterator end(m_items.end(), m_items.end()); if (first != end && (*first).type() == Tag) @@ -52,27 +72,34 @@ bool BlockDeduplicator::deduplicate() return std::lexicographical_compare(first, end, second, end); }; - set> blocksSeen(comparator); - map tagReplacement; - for (size_t i = 0; i < m_items.size(); ++i) + size_t iterations = 0; + for (; ; ++iterations) { - if (m_items.at(i).type() != Tag) - continue; - auto it = blocksSeen.find(i); - if (it == blocksSeen.end()) - blocksSeen.insert(i); - else - tagReplacement[m_items.at(i).data()] = m_items.at(*it).data(); - } - - bool ret = false; - for (AssemblyItem& item: m_items) - if (item.type() == PushTag && tagReplacement.count(item.data())) + //@todo this should probably be optimized. + set> blocksSeen(comparator); + map tagReplacement; + for (size_t i = 0; i < m_items.size(); ++i) { - ret = true; - item.setData(tagReplacement.at(item.data())); + if (m_items.at(i).type() != Tag) + continue; + auto it = blocksSeen.find(i); + if (it == blocksSeen.end()) + blocksSeen.insert(i); + else + tagReplacement[m_items.at(i).data()] = m_items.at(*it).data(); } - return ret; + + bool changed = false; + for (AssemblyItem& item: m_items) + if (item.type() == PushTag && tagReplacement.count(item.data())) + { + changed = true; + item.setData(tagReplacement.at(item.data())); + } + if (!changed) + break; + } + return iterations > 0; } BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++() @@ -89,3 +116,11 @@ BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++() } return *this; } + +AssemblyItem const& BlockDeduplicator::BlockIterator::operator*() const +{ + if (replaceItem && replaceWith && *it == *replaceItem) + return *replaceWith; + else + return *it; +} diff --git a/BlockDeduplicator.h b/BlockDeduplicator.h index 8a82a1ed7..c48835fd4 100644 --- a/BlockDeduplicator.h +++ b/BlockDeduplicator.h @@ -47,19 +47,27 @@ public: bool deduplicate(); private: - /// Iterator that skips tags skips to the end if (all branches of) the control + /// Iterator that skips tags and skips to the end if (all branches of) the control /// flow does not continue to the next instruction. + /// If the arguments are supplied to the constructor, replaces items on the fly. struct BlockIterator: std::iterator { public: - BlockIterator(AssemblyItems::const_iterator _it, AssemblyItems::const_iterator _end): - it(_it), end(_end) { } + BlockIterator( + AssemblyItems::const_iterator _it, + AssemblyItems::const_iterator _end, + AssemblyItem const* _replaceItem = nullptr, + AssemblyItem const* _replaceWith = nullptr + ): + it(_it), end(_end), replaceItem(_replaceItem), replaceWith(_replaceWith) {} BlockIterator& operator++(); bool operator==(BlockIterator const& _other) const { return it == _other.it; } bool operator!=(BlockIterator const& _other) const { return it != _other.it; } - AssemblyItem const& operator*() const { return *it; } + AssemblyItem const& operator*() const; AssemblyItems::const_iterator it; AssemblyItems::const_iterator end; + AssemblyItem const* replaceItem; + AssemblyItem const* replaceWith; }; AssemblyItems& m_items; From 25205cb05b068e0ec116bdc3f8230f9f2f2159f0 Mon Sep 17 00:00:00 2001 From: Liana Husikyan Date: Fri, 15 May 2015 12:23:13 +0200 Subject: [PATCH 31/67] added error jump instead of STOP instraction in case of exception --- Assembly.cpp | 28 +++++++++++++++++++++++----- Assembly.h | 6 ++++-- AssemblyItem.h | 2 +- ControlFlowGraph.cpp | 8 ++++---- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 5cf3b787a..f492260af 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -127,7 +127,10 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; break; case PushTag: - _out << " PUSH [tag" << dec << i.data() << "]"; + if (i.data() == 0) + _out << " PUSH [ErrorTag]"; + else + _out << " PUSH [tag" << dec << i.data() << "]"; break; case PushSub: _out << " PUSH [$" << h256(i.data()).abridged() << "]"; @@ -207,6 +210,10 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes createJsonValue("PUSH tag", i.getLocation().start, i.getLocation().end, m_strings.at((h256)i.data()))); break; case PushTag: + if (i.data() == 0) + collection.append( + createJsonValue("PUSH [ErrorTag]", i.getLocation().start, i.getLocation().end, "")); + collection.append( createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, string(i.data()))); break; @@ -226,7 +233,7 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes collection.append( createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data()))); collection.append( - createJsonValue("JUMDEST", i.getLocation().start, i.getLocation().end)); + createJsonValue("JUMPDEST", i.getLocation().start, i.getLocation().end)); break; case PushData: collection.append(createJsonValue("PUSH data", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); @@ -387,6 +394,11 @@ bytes Assembly::assemble() const // m_data must not change from here on for (AssemblyItem const& i: m_items) + { + // store position of the invalid jump destination + if (i.type() != Tag && tagPos[0] == 0) + tagPos[0] = ret.size(); + switch (i.type()) { case Operation: @@ -448,17 +460,23 @@ bytes Assembly::assemble() const } case Tag: tagPos[(unsigned)i.data()] = ret.size(); + assertThrow(i.data() != 0, AssemblyException, ""); ret.push_back((byte)Instruction::JUMPDEST); break; default: BOOST_THROW_EXCEPTION(InvalidOpcode()); } - + } for (auto const& i: tagRef) { bytesRef r(ret.data() + i.first, bytesPerTag); - //@todo in the failure case, we could use the position of the invalid jumpdest - toBigEndian(i.second < tagPos.size() ? tagPos[i.second] : (1 << (8 * bytesPerTag)) - 1, r); + auto tag = i.second; + if (tag >= tagPos.size()) + tag = 0; + if (tag == 0) + assertThrow(tagPos[tag] != 0, AssemblyException, ""); + + toBigEndian(tagPos[tag], r); } if (!m_data.empty()) diff --git a/Assembly.h b/Assembly.h index b4850f7d0..4550eb6e7 100644 --- a/Assembly.h +++ b/Assembly.h @@ -67,6 +67,8 @@ public: AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); } + template Assembly& operator<<(T const& _d) { append(_d); return *this; } AssemblyItems const& getItems() const { return m_items; } AssemblyItem const& back() const { return m_items.back(); } @@ -97,7 +99,6 @@ public: const StringMap &_sourceCodes = StringMap(), bool _inJsonFormat = false ) const; - protected: std::string getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } @@ -109,7 +110,8 @@ private: Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; protected: - unsigned m_usedTags = 0; + // 0 is reserved for exception + unsigned m_usedTags = 1; AssemblyItems m_items; mutable std::map m_data; std::vector m_subs; diff --git a/AssemblyItem.h b/AssemblyItem.h index 7d8f3d9a4..9eca0a7d1 100644 --- a/AssemblyItem.h +++ b/AssemblyItem.h @@ -65,7 +65,7 @@ public: /// @returns the instruction of this item (only valid if type() == Operation) Instruction instruction() const { return Instruction(byte(m_data)); } - /// @returns true iff the type and data of the items are equal. + /// @returns true if the type and data of the items are equal. bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } /// Less-than operator compatible with operator==. diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 3566bdb17..41a53aa82 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -226,7 +226,10 @@ void ControlFlowGraph::gatherKnowledge() //@todo we might have to do something like incrementing the sequence number for each JUMPDEST assertThrow(!!workQueue.back().first, OptimizerException, ""); if (!m_blocks.count(workQueue.back().first)) + { + workQueue.pop_back(); continue; // too bad, we do not know the tag, probably an invalid jump + } BasicBlock& block = m_blocks.at(workQueue.back().first); KnownStatePointer state = workQueue.back().second; workQueue.pop_back(); @@ -257,10 +260,7 @@ void ControlFlowGraph::gatherKnowledge() ); state->feedItem(m_items.at(pc++)); - if (tags.empty() || std::any_of(tags.begin(), tags.end(), [&](u256 const& _tag) - { - return !m_blocks.count(BlockId(_tag)); - })) + if (tags.empty()) { if (!unknownJumpEncountered) { From 735535d9609ed35b1037ed7128f795df162909ca Mon Sep 17 00:00:00 2001 From: Liana Husikyan Date: Tue, 19 May 2015 15:44:58 +0200 Subject: [PATCH 32/67] style fixes --- Assembly.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Assembly.h b/Assembly.h index 4550eb6e7..3c82125a1 100644 --- a/Assembly.h +++ b/Assembly.h @@ -67,7 +67,7 @@ public: AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } - AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); } + AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); } template Assembly& operator<<(T const& _d) { append(_d); return *this; } AssemblyItems const& getItems() const { return m_items; } From da4cd45a85c1b3fe7b72ca53ba4263a309d40ed1 Mon Sep 17 00:00:00 2001 From: Liana Husikyan Date: Wed, 20 May 2015 13:15:01 +0200 Subject: [PATCH 33/67] corrected asm-json output --- Assembly.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index f492260af..dabf646c1 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -213,9 +213,9 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes if (i.data() == 0) collection.append( createJsonValue("PUSH [ErrorTag]", i.getLocation().start, i.getLocation().end, "")); - - collection.append( - createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, string(i.data()))); + else + collection.append( + createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, string(i.data()))); break; case PushSub: collection.append( From 88096c2c694983da327fd0fc46c31dc6f7404f73 Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 1 Jun 2015 12:32:59 +0200 Subject: [PATCH 34/67] Compute constants --- Assembly.cpp | 14 ++- Assembly.h | 9 +- ConstantOptimiser.cpp | 225 ++++++++++++++++++++++++++++++++++++++++++ ConstantOptimiser.h | 147 +++++++++++++++++++++++++++ GasMeter.cpp | 7 +- GasMeter.h | 4 +- 6 files changed, 398 insertions(+), 8 deletions(-) create mode 100644 ConstantOptimiser.cpp create mode 100644 ConstantOptimiser.h diff --git a/Assembly.cpp b/Assembly.cpp index a9ee96199..8642824f6 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -22,9 +22,12 @@ #include "Assembly.h" #include #include +#include #include #include #include +#include +#include #include using namespace std; using namespace dev; @@ -302,7 +305,7 @@ inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b) struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; #define copt dev::LogOutputStream() -Assembly& Assembly::optimise(bool _enable) +Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) { if (!_enable) return *this; @@ -364,10 +367,17 @@ Assembly& Assembly::optimise(bool _enable) } } + total += ConstantOptimisationMethod::optimiseConstants( + _isCreation, + _isCreation ? 1 : _runs, + *this, + m_items + ); + copt << total << " optimisations done."; for (auto& sub: m_subs) - sub.optimise(true); + sub.optimise(true, false, _runs); return *this; } diff --git a/Assembly.h b/Assembly.h index 3c82125a1..1457173bc 100644 --- a/Assembly.h +++ b/Assembly.h @@ -49,6 +49,7 @@ public: AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); } AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } + Assembly const& getSub(size_t _sub) const { return m_subs.at(_sub); } AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } @@ -92,7 +93,13 @@ public: void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } bytes assemble() const; - Assembly& optimise(bool _enable); + bytes const& data(h256 const& _i) const { return m_data[_i]; } + + /// Modify (if @a _enable is set) and return the current assembly such that creation and + /// execution gas usage is optimised. @a _isCreation should be true for the top-level assembly. + /// @a _runs specifes an estimate on how often each opcode in this assembly will be executed, + /// i.e. use a small value to optimise for size and a large value to optimise for runtime. + Assembly& optimise(bool _enable, bool _isCreation = true, size_t _runs = 200); Json::Value stream( std::ostream& _out, std::string const& _prefix = "", diff --git a/ConstantOptimiser.cpp b/ConstantOptimiser.cpp new file mode 100644 index 000000000..77d1bdfaa --- /dev/null +++ b/ConstantOptimiser.cpp @@ -0,0 +1,225 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file ConstantOptimiser.cpp + * @author Christian + * @date 2015 + */ + +#include "libevmasm/ConstantOptimiser.h" +#include +#include +#include +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned ConstantOptimisationMethod::optimiseConstants( + bool _isCreation, + size_t _runs, + Assembly& _assembly, + AssemblyItems& _items +) +{ + unsigned optimisations = 0; + map pushes; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + pushes[item]++; + for (auto it: pushes) + { + AssemblyItem const& item = it.first; + if (item.data() < 0x100) + continue; + Params params; + params.multiplicity = it.second; + params.isCreation = _isCreation; + params.runs = _runs; + LiteralMethod lit(params, item.data()); + bigint literalGas = lit.gasNeeded(); + CodeCopyMethod copy(params, item.data()); + bigint copyGas = copy.gasNeeded(); + ComputeMethod compute(params, item.data()); + bigint computeGas = compute.gasNeeded(); + if (copyGas < literalGas && copyGas < computeGas) + { + copy.execute(_assembly, _items); + optimisations++; + } + else if (computeGas < literalGas && computeGas < copyGas) + { + compute.execute(_assembly, _items); + optimisations++; + } + } + return optimisations; +} + +bigint ConstantOptimisationMethod::simpleRunGas(AssemblyItems const& _items) +{ + bigint gas = 0; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + gas += GasMeter::runGas(eth::Instruction::PUSH1); + else if (item.type() == Operation) + gas += GasMeter::runGas(item.instruction()); + return gas; +} + +bigint ConstantOptimisationMethod::dataGas(bytes const& _data) const +{ + if (m_params.isCreation) + { + bigint gas; + for (auto b: _data) + gas += b ? c_txDataNonZeroGas : c_txDataZeroGas; + return gas; + } + else + return c_createDataGas * dataSize(); +} + +size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) +{ + size_t size = 0; + for (AssemblyItem const& item: _items) + size += item.bytesRequired(3); // assume 3 byte addresses + return size; +} + +void ConstantOptimisationMethod::replaceConstants( + AssemblyItems& _items, + AssemblyItems const& _replacement +) const +{ + assertThrow(_items.size() > 0, OptimizerException, ""); + for (size_t i = 0; i < _items.size(); ++i) + { + if (_items.at(i) != AssemblyItem(m_value)) + continue; + _items[i] = _replacement[0]; + _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end()); + i += _replacement.size() - 1; + } +} + +bigint LiteralMethod::gasNeeded() +{ + return combineGas( + simpleRunGas({eth::Instruction::PUSH1}), + // PUSHX plus data + (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas) + dataGas(), + 0 + ); +} + +CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value), + m_copyRoutine{ + u256(0), + eth::Instruction::DUP1, + eth::Instruction::MLOAD, // back up memory + u256(32), + AssemblyItem(PushData, u256(1) << 16), // has to be replaced + eth::Instruction::DUP4, + eth::Instruction::CODECOPY, + eth::Instruction::DUP2, + eth::Instruction::MLOAD, + eth::Instruction::SWAP2, + eth::Instruction::MSTORE + } +{ +} + +bigint CodeCopyMethod::gasNeeded() +{ + return combineGas( + // Run gas: we ignore memory increase costs + simpleRunGas(m_copyRoutine) + c_copyGas, + // Data gas for copy routines: Some bytes are zero, but we ignore them. + bytesRequired(m_copyRoutine) * (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas), + // Data gas for data itself + dataGas(toBigEndian(m_value)) + ); +} + +void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items) +{ + bytes data = toBigEndian(m_value); + m_copyRoutine[4] = _assembly.newData(data); + replaceConstants(_items, m_copyRoutine); +} + +AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) +{ + if (_value < 0x10000) + // Very small value, not worth computing + return AssemblyItems{_value}; + else if (dev::bytesRequired(~_value) < dev::bytesRequired(_value)) + // Negated is shorter to represent + return findRepresentation(~_value) + AssemblyItems{Instruction::NOT}; + else + { + // Decompose value into a * 2**k + b where abs(b) << 2**k + // Is not always better, try literal and decomposition method. + AssemblyItems routine{u256(_value)}; + bigint bestGas = gasNeeded(routine); + for (unsigned bits = 255; bits > 8; --bits) + { + unsigned gapDetector = unsigned(_value >> (bits - 8)) & 0x1ff; + if (gapDetector != 0xff && gapDetector != 0x100) + continue; + + u256 powerOfTwo = u256(1) << bits; + u256 upperPart = _value >> bits; + bigint lowerPart = _value & (powerOfTwo - 1); + if (abs(powerOfTwo - lowerPart) < lowerPart) + lowerPart = lowerPart - powerOfTwo; // make it negative + if (abs(lowerPart) >= (powerOfTwo >> 8)) + continue; + + AssemblyItems newRoutine; + if (lowerPart != 0) + newRoutine += findRepresentation(u256(abs(lowerPart))); + newRoutine += AssemblyItems{u256(bits), u256(2), Instruction::EXP}; + if (upperPart != 1 && upperPart != 0) + newRoutine += findRepresentation(upperPart) + AssemblyItems{Instruction::MUL}; + if (lowerPart > 0) + newRoutine += AssemblyItems{Instruction::ADD}; + else if (lowerPart < 0) + newRoutine.push_back(eth::Instruction::SUB); + + bigint newGas = gasNeeded(newRoutine); + if (newGas < bestGas) + { + bestGas = move(newGas); + routine = move(newRoutine); + } + } + return routine; + } +} + +bigint ComputeMethod::gasNeeded(AssemblyItems const& _routine) +{ + size_t numExps = count(_routine.begin(), _routine.end(), eth::Instruction::EXP); + return combineGas( + simpleRunGas(_routine) + numExps * (c_expGas + c_expByteGas), + // Data gas for routine: Some bytes are zero, but we ignore them. + bytesRequired(_routine) * (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas), + 0 + ); +} diff --git a/ConstantOptimiser.h b/ConstantOptimiser.h new file mode 100644 index 000000000..e75eff380 --- /dev/null +++ b/ConstantOptimiser.h @@ -0,0 +1,147 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file ConstantOptimiser.cpp + * @author Christian + * @date 2015 + */ + +#pragma once + +#include +#include +#include + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector; +class Assembly; + +/** + * Abstract base class for one way to change how constants are represented in the code. + */ +class ConstantOptimisationMethod +{ +public: + /// Tries to optimised how constants are represented in the source code and modifies + /// @a _assembly and its @a _items. + /// @returns zero if no optimisations could be performed. + static unsigned optimiseConstants( + bool _isCreation, + size_t _runs, + Assembly& _assembly, + AssemblyItems& _items + ); + + struct Params + { + bool isCreation; ///< Whether this is called during contract creation or runtime. + size_t runs; ///< Estimated number of calls per opcode oven the lifetime of the contract. + size_t multiplicity; ///< Number of times the constant appears in the code. + }; + + explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value): + m_params(_params), m_value(_value) {} + virtual bigint gasNeeded() = 0; + virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0; + +protected: + size_t dataSize() const { return std::max(1, dev::bytesRequired(m_value)); } + + /// @returns the run gas for the given items ignoring special gas costs + static bigint simpleRunGas(AssemblyItems const& _items); + /// @returns the gas needed to store the given data literally + bigint dataGas(bytes const& _data) const; + /// @returns the gas needed to store the value literally + bigint dataGas() const { return dataGas(toCompactBigEndian(m_value, 1)); } + static size_t bytesRequired(AssemblyItems const& _items); + /// @returns the combined estimated gas usage taking @a m_params into account. + bigint combineGas( + bigint const& _runGas, + bigint const& _repeatedDataGas, + bigint const& _uniqueDataGas + ) + { + // _runGas is not multiplied by _multiplicity because the runs are "per opcode" + return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas; + } + + /// Replaces the constant by the code given in @a _replacement. + void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const; + + Params m_params; + u256 const& m_value; +}; + +/** + * Optimisation method that pushes the constant to the stack literally. This is the default method, + * i.e. executing it does not alter the Assembly. + */ +class LiteralMethod: public ConstantOptimisationMethod +{ +public: + explicit LiteralMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) {} + virtual bigint gasNeeded() override; + virtual void execute(Assembly&, AssemblyItems&) override {} +}; + +/** + * Method that stores the data in the .data section of the code and copies it to the stack. + */ +class CodeCopyMethod: public ConstantOptimisationMethod +{ +public: + explicit CodeCopyMethod(Params const& _params, u256 const& _value); + virtual bigint gasNeeded() override; + virtual void execute(Assembly& _assembly, AssemblyItems& _items) override; + +protected: + AssemblyItems m_copyRoutine; +}; + +/** + * Method that tries to compute the constant. + */ +class ComputeMethod: public ConstantOptimisationMethod +{ +public: + explicit ComputeMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) + { + m_routine = findRepresentation(m_value); + } + + virtual bigint gasNeeded() override { return gasNeeded(m_routine); } + virtual void execute(Assembly&, AssemblyItems& _items) override + { + replaceConstants(_items, m_routine); + } + +protected: + /// Tries to recursively find a way to compute @a _value. + AssemblyItems findRepresentation(u256 const& _value); + bigint gasNeeded(AssemblyItems const& _routine); + + AssemblyItems m_routine; +}; + +} +} diff --git a/GasMeter.cpp b/GasMeter.cpp index 4e5289e38..42a5bed2e 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -201,13 +201,14 @@ GasMeter::GasConsumption GasMeter::memoryGas(int _stackPosOffset, int _stackPosS })); } -GasMeter::GasConsumption GasMeter::runGas(Instruction _instruction) +u256 GasMeter::runGas(Instruction _instruction) { if (_instruction == Instruction::JUMPDEST) - return GasConsumption(1); + return 1; int tier = instructionInfo(_instruction).gasPriceTier; - return tier == InvalidTier ? GasConsumption::infinite() : c_tierStepGas[tier]; + assertThrow(tier != InvalidTier, OptimizerException, "Invalid gas tier."); + return c_tierStepGas[tier]; } diff --git a/GasMeter.h b/GasMeter.h index 6949c193e..90f151fc4 100644 --- a/GasMeter.h +++ b/GasMeter.h @@ -66,6 +66,8 @@ public: u256 const& largestMemoryAccess() const { return m_largestMemoryAccess; } + static u256 runGas(Instruction _instruction); + private: /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. GasConsumption wordGas(u256 const& _multiplier, ExpressionClasses::Id _value); @@ -76,8 +78,6 @@ private: /// given as values on the stack at the given relative positions. GasConsumption memoryGas(int _stackPosOffset, int _stackPosSize); - static GasConsumption runGas(Instruction _instruction); - std::shared_ptr m_state; /// Largest point where memory was accessed since the creation of this object. u256 m_largestMemoryAccess; From 81ed5612bc33caa4cd1dd0bf1741637afa070e4c Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 4 Jun 2015 11:02:34 +0200 Subject: [PATCH 35/67] MSVC fix. --- ConstantOptimiser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ConstantOptimiser.cpp b/ConstantOptimiser.cpp index 77d1bdfaa..80a2dc180 100644 --- a/ConstantOptimiser.cpp +++ b/ConstantOptimiser.cpp @@ -127,8 +127,9 @@ bigint LiteralMethod::gasNeeded() } CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): - ConstantOptimisationMethod(_params, _value), - m_copyRoutine{ + ConstantOptimisationMethod(_params, _value) +{ + m_copyRoutine = AssemblyItems{ u256(0), eth::Instruction::DUP1, eth::Instruction::MLOAD, // back up memory @@ -140,8 +141,7 @@ CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): eth::Instruction::MLOAD, eth::Instruction::SWAP2, eth::Instruction::MSTORE - } -{ + }; } bigint CodeCopyMethod::gasNeeded() From cad767de61dc53b9b297f08ee09cb5a3c0821782 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 5 Jun 2015 17:34:20 +0200 Subject: [PATCH 36/67] Remove namespace prefixes. --- ConstantOptimiser.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ConstantOptimiser.cpp b/ConstantOptimiser.cpp index 80a2dc180..88874d81c 100644 --- a/ConstantOptimiser.cpp +++ b/ConstantOptimiser.cpp @@ -73,7 +73,7 @@ bigint ConstantOptimisationMethod::simpleRunGas(AssemblyItems const& _items) bigint gas = 0; for (AssemblyItem const& item: _items) if (item.type() == Push) - gas += GasMeter::runGas(eth::Instruction::PUSH1); + gas += GasMeter::runGas(Instruction::PUSH1); else if (item.type() == Operation) gas += GasMeter::runGas(item.instruction()); return gas; @@ -119,7 +119,7 @@ void ConstantOptimisationMethod::replaceConstants( bigint LiteralMethod::gasNeeded() { return combineGas( - simpleRunGas({eth::Instruction::PUSH1}), + simpleRunGas({Instruction::PUSH1}), // PUSHX plus data (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas) + dataGas(), 0 @@ -131,16 +131,16 @@ CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): { m_copyRoutine = AssemblyItems{ u256(0), - eth::Instruction::DUP1, - eth::Instruction::MLOAD, // back up memory + Instruction::DUP1, + Instruction::MLOAD, // back up memory u256(32), AssemblyItem(PushData, u256(1) << 16), // has to be replaced - eth::Instruction::DUP4, - eth::Instruction::CODECOPY, - eth::Instruction::DUP2, - eth::Instruction::MLOAD, - eth::Instruction::SWAP2, - eth::Instruction::MSTORE + Instruction::DUP4, + Instruction::CODECOPY, + Instruction::DUP2, + Instruction::MLOAD, + Instruction::SWAP2, + Instruction::MSTORE }; } @@ -200,7 +200,7 @@ AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) if (lowerPart > 0) newRoutine += AssemblyItems{Instruction::ADD}; else if (lowerPart < 0) - newRoutine.push_back(eth::Instruction::SUB); + newRoutine.push_back(Instruction::SUB); bigint newGas = gasNeeded(newRoutine); if (newGas < bestGas) @@ -215,7 +215,7 @@ AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) bigint ComputeMethod::gasNeeded(AssemblyItems const& _routine) { - size_t numExps = count(_routine.begin(), _routine.end(), eth::Instruction::EXP); + size_t numExps = count(_routine.begin(), _routine.end(), Instruction::EXP); return combineGas( simpleRunGas(_routine) + numExps * (c_expGas + c_expByteGas), // Data gas for routine: Some bytes are zero, but we ignore them. From 7bdd1b1d4ae46920ae54aaa61c40b411a75f15b9 Mon Sep 17 00:00:00 2001 From: chriseth Date: Sat, 6 Jun 2015 12:42:36 +0200 Subject: [PATCH 37/67] Optimize double ISZERO. --- ExpressionClasses.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 81ba11541..5ad8e724c 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -260,6 +260,22 @@ Rules::Rules() {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, }; + // Double negation of opcodes with binary result + for (auto const& op: vector{ + Instruction::EQ, + Instruction::LT, + Instruction::SLT, + Instruction::GT, + Instruction::SGT + }) + m_rules.push_back({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, + [=]() -> Pattern { return {op, {X, Y}}; } + }); + m_rules.push_back({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, + [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } + }); // Associative operations for (auto const& opFun: vector>>{ {Instruction::ADD, plus()}, From 55e1729852716ccffeada013453e3c40f0edaf28 Mon Sep 17 00:00:00 2001 From: chriseth Date: Sat, 6 Jun 2015 15:31:22 +0200 Subject: [PATCH 38/67] Quick fix to not access inaccessible sequences. --- CommonSubexpressionEliminator.cpp | 9 +++++++++ CommonSubexpressionEliminator.h | 3 +++ KnownState.h | 1 + 3 files changed, 13 insertions(+) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index b2fa73116..fe86908fb 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -46,6 +46,7 @@ vector CommonSubexpressionEliminator::getOptimizedItems() targetStackContents[height] = m_state.stackElement(height, SourceLocation()); AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( + m_initialState.sequenceNumber(), m_initialState.stackHeight(), initialStackContents, targetStackContents @@ -112,6 +113,7 @@ CSECodeGenerator::CSECodeGenerator( } AssemblyItems CSECodeGenerator::generateCode( + unsigned _initialSequenceNumber, int _initialStackHeight, map const& _initialStack, map const& _targetStackContents @@ -137,7 +139,14 @@ AssemblyItems CSECodeGenerator::generateCode( for (auto const& p: m_neededBy) for (auto id: {p.first, p.second}) if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) + { + if (seqNr < _initialSequenceNumber) + // Invalid sequenced operation. + // @todo quick fix for now. Proper fix needs to choose representative with higher + // sequence number during dependency analyis. + BOOST_THROW_EXCEPTION(StackTooDeepException()); sequencedExpressions.insert(make_pair(seqNr, id)); + } // Perform all operations on storage and memory in order, if they are needed. for (auto const& seqAndId: sequencedExpressions) diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h index a35e31d90..f6c43c57a 100644 --- a/CommonSubexpressionEliminator.h +++ b/CommonSubexpressionEliminator.h @@ -105,10 +105,13 @@ public: CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations); /// @returns the assembly items generated from the given requirements + /// @param _initialSequenceNumber starting sequence number, do not generate sequenced operations + /// before this number. /// @param _initialStack current contents of the stack (up to stack height of zero) /// @param _targetStackContents final contents of the stack, by stack height relative to initial /// @note should only be called once on each object. AssemblyItems generateCode( + unsigned _initialSequenceNumber, int _initialStackHeight, std::map const& _initialStack, std::map const& _targetStackContents diff --git a/KnownState.h b/KnownState.h index 9d28ef21a..dd6185c6f 100644 --- a/KnownState.h +++ b/KnownState.h @@ -94,6 +94,7 @@ public: /// Resets any knowledge. void reset() { resetStorage(); resetMemory(); resetStack(); } + unsigned sequenceNumber() const { return m_sequenceNumber; } /// Manually increments the storage and memory sequence number. void incrementSequenceNumber() { m_sequenceNumber += 2; } From 13a20d837219588c9459d35d3f8ae714afc7668b Mon Sep 17 00:00:00 2001 From: chriseth Date: Sat, 6 Jun 2015 01:04:55 +0200 Subject: [PATCH 39/67] Improved "Stack too deep" error message. Closes #2080. --- CommonSubexpressionEliminator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index b2fa73116..a441bd8bb 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -428,7 +428,7 @@ void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _locat { assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); int instructionNum = 1 + m_stackHeight - _fromPosition; - assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables."); assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); m_stack[m_stackHeight] = m_stack[_fromPosition]; @@ -441,7 +441,7 @@ void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation cons if (_fromPosition == m_stackHeight) return; int instructionNum = m_stackHeight - _fromPosition; - assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables."); assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); From 87a56b2bfe28655532964c69e43e5b2dc67cd38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Mon, 8 Jun 2015 12:09:24 +0200 Subject: [PATCH 40/67] Remove pessimising moves. --- Assembly.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Assembly.cpp b/Assembly.cpp index 8642824f6..3557fc0ee 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -109,7 +109,7 @@ string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLoc if (newLinePos != string::npos) cut = cut.substr(0, newLinePos) + "..."; - return move(cut); + return cut; } ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const From a72e357c4eb5b8d4052fd3df6288f0b2f13b7a0e Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 10 Jun 2015 09:58:59 +0200 Subject: [PATCH 41/67] Improved exception safety in CSE. Fixes #2135 --- CommonSubexpressionEliminator.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index fadf2776a..2c4742d61 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -35,6 +35,19 @@ vector CommonSubexpressionEliminator::getOptimizedItems() { optimizeBreakingItem(); + KnownState nextInitialState = m_state; + if (m_breakingItem) + nextInitialState.feedItem(*m_breakingItem); + KnownState nextState = nextInitialState; + + ScopeGuard reset([&]() + { + m_breakingItem = nullptr; + m_storeOperations.clear(); + m_initialState = move(nextInitialState); + m_state = move(nextState); + }); + map initialStackContents; map targetStackContents; int minHeight = m_state.stackHeight() + 1; @@ -52,15 +65,7 @@ vector CommonSubexpressionEliminator::getOptimizedItems() targetStackContents ); if (m_breakingItem) - { items.push_back(*m_breakingItem); - m_state.feedItem(*m_breakingItem); - } - - // cleanup - m_initialState = m_state; - m_breakingItem = nullptr; - m_storeOperations.clear(); return items; } From 64a1e82b6cf6ef6483a159743827c68e7baa3a64 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 12 Jun 2015 11:06:05 +0200 Subject: [PATCH 42/67] Optimize RETURN x 0 to STOP. --- CommonSubexpressionEliminator.cpp | 48 +++++++++++++++++++------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 2c4742d61..8fb4625a8 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -79,31 +79,43 @@ void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _co void CommonSubexpressionEliminator::optimizeBreakingItem() { - if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI)) + if (!m_breakingItem) return; + ExpressionClasses& classes = m_state.expressionClasses(); SourceLocation const& location = m_breakingItem->getLocation(); - AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - - Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); - Id zero = m_state.expressionClasses().find(u256(0)); - if (m_state.expressionClasses().knownToBeDifferent(condition, zero)) + if (*m_breakingItem == AssemblyItem(Instruction::JUMPI)) { - feedItem(AssemblyItem(Instruction::SWAP1, location), true); - feedItem(AssemblyItem(Instruction::POP, location), true); + AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - AssemblyItem item(Instruction::JUMP, location); - item.setJumpType(jumpType); - m_breakingItem = m_state.expressionClasses().storeItem(item); - return; + Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); + if (classes.knownNonZero(condition)) + { + feedItem(AssemblyItem(Instruction::SWAP1, location), true); + feedItem(AssemblyItem(Instruction::POP, location), true); + + AssemblyItem item(Instruction::JUMP, location); + item.setJumpType(jumpType); + m_breakingItem = classes.storeItem(item); + } + else if (classes.knownZero(condition)) + { + AssemblyItem it(Instruction::POP, location); + feedItem(it, true); + feedItem(it, true); + m_breakingItem = nullptr; + } } - Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition}); - if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero)) + else if (*m_breakingItem == AssemblyItem(Instruction::RETURN)) { - AssemblyItem it(Instruction::POP, location); - feedItem(it, true); - feedItem(it, true); - m_breakingItem = nullptr; + Id size = m_state.stackElement(m_state.stackHeight() - 1, location); + if (classes.knownZero(size)) + { + feedItem(AssemblyItem(Instruction::POP, location), true); + feedItem(AssemblyItem(Instruction::POP, location), true); + AssemblyItem item(Instruction::STOP, location); + m_breakingItem = classes.storeItem(item); + } } } From 66a85f0229c8160ab9d1dd3d96ba248afe1cab6e Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 16 Jun 2015 16:08:40 +0200 Subject: [PATCH 43/67] Some documentation and checks for vector_ref. --- Assembly.cpp | 10 ---------- AssemblyItem.cpp | 7 ------- AssemblyItem.h | 9 ++++++--- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 3557fc0ee..34ee05966 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -292,16 +292,6 @@ void Assembly::injectStart(AssemblyItem const& _i) m_items.insert(m_items.begin(), _i); } -inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b) -{ - if (_a.size() != _b.size()) - return false; - for (unsigned i = 0; i < _a.size(); ++i) - if (!_a[i].match(_b[i])) - return false; - return true; -} - struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; #define copt dev::LogOutputStream() diff --git a/AssemblyItem.cpp b/AssemblyItem.cpp index a4485a144..a0c5e19a6 100644 --- a/AssemblyItem.cpp +++ b/AssemblyItem.cpp @@ -126,10 +126,3 @@ ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) } return _out; } - -ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i) -{ - for (AssemblyItem const& i: _i) - _out << i; - return _out; -} diff --git a/AssemblyItem.h b/AssemblyItem.h index 9eca0a7d1..3fa9bb203 100644 --- a/AssemblyItem.h +++ b/AssemblyItem.h @@ -98,11 +98,14 @@ private: }; using AssemblyItems = std::vector; -using AssemblyItemsConstRef = vector_ref; std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); -std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); -inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _items) +{ + for (AssemblyItem const& item: _items) + _out << item; + return _out; +} } } From 0f06fd8c2fbb62744e435cb7ae9967e95ade6b2a Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 25 Jun 2015 18:41:26 +0200 Subject: [PATCH 44/67] Fixed counter modification when appending assemblies. --- Assembly.cpp | 6 +++--- AssemblyItem.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 34ee05966..c96b6f40d 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -41,7 +41,7 @@ void Assembly::append(Assembly const& _a) if (i.type() == Tag || i.type() == PushTag) i.setData(i.data() + m_usedTags); else if (i.type() == PushSub || i.type() == PushSubSize) - i.setData(i.data() + m_usedTags); + i.setData(i.data() + m_subs.size()); append(i); } m_deposit = newDeposit; @@ -136,10 +136,10 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con _out << " PUSH [tag" << dec << i.data() << "]"; break; case PushSub: - _out << " PUSH [$" << h256(i.data()).abridged() << "]"; + _out << " PUSH [$" << h256(i.data()).abridgedMiddle() << "]"; break; case PushSubSize: - _out << " PUSH #[$" << h256(i.data()).abridged() << "]"; + _out << " PUSH #[$" << h256(i.data()).abridgedMiddle() << "]"; break; case PushProgramSize: _out << " PUSHSIZE"; diff --git a/AssemblyItem.cpp b/AssemblyItem.cpp index a0c5e19a6..e005ece18 100644 --- a/AssemblyItem.cpp +++ b/AssemblyItem.cpp @@ -110,10 +110,10 @@ ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) _out << " PushData " << hex << (unsigned)_item.data(); break; case PushSub: - _out << " PushSub " << hex << h256(_item.data()).abridged(); + _out << " PushSub " << hex << h256(_item.data()).abridgedMiddle(); break; case PushSubSize: - _out << " PushSubSize " << hex << h256(_item.data()).abridged(); + _out << " PushSubSize " << hex << h256(_item.data()).abridgedMiddle(); break; case PushProgramSize: _out << " PushProgramSize"; From c8954e1e7c1049fc8b4161f112c6c834edce9d58 Mon Sep 17 00:00:00 2001 From: debris Date: Wed, 5 Aug 2015 11:35:05 +0200 Subject: [PATCH 45/67] fixed cmake policy CMP0042, MACOSX RPATH --- CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8150806f..6843aaf2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,4 @@ cmake_policy(SET CMP0015 NEW) -# this policy was introduced in cmake 3.0 -# remove if, once 3.0 will be used on unix -if (${CMAKE_MAJOR_VERSION} GREATER 2) - # old policy do not use MACOSX_RPATH - cmake_policy(SET CMP0042 OLD) -endif() set(CMAKE_AUTOMOC OFF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") From 1ea338a852ea9e4c09ec45809c06c57b9a183372 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 5 Aug 2015 17:57:22 +0200 Subject: [PATCH 46/67] Revert "fixed cmake policy CMP0042, MACOSX RPATH" --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6843aaf2c..f8150806f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,10 @@ cmake_policy(SET CMP0015 NEW) +# this policy was introduced in cmake 3.0 +# remove if, once 3.0 will be used on unix +if (${CMAKE_MAJOR_VERSION} GREATER 2) + # old policy do not use MACOSX_RPATH + cmake_policy(SET CMP0042 OLD) +endif() set(CMAKE_AUTOMOC OFF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") From ba00ce457ab710b06eb1fadfd84c12b28b199b9b Mon Sep 17 00:00:00 2001 From: debris Date: Wed, 5 Aug 2015 23:48:19 +0200 Subject: [PATCH 47/67] now policy CMP0042 is set to OLD This reverts commit 61e99b1040b80685c70c57cfb23f92e898cd41fb. --- CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8150806f..6843aaf2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,4 @@ cmake_policy(SET CMP0015 NEW) -# this policy was introduced in cmake 3.0 -# remove if, once 3.0 will be used on unix -if (${CMAKE_MAJOR_VERSION} GREATER 2) - # old policy do not use MACOSX_RPATH - cmake_policy(SET CMP0042 OLD) -endif() set(CMAKE_AUTOMOC OFF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") From 7d54d746eef514caedaf522ed5e8529111720604 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Thu, 6 Aug 2015 15:27:50 +0200 Subject: [PATCH 48/67] Warnings fixes. --- KnownState.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/KnownState.h b/KnownState.h index dd6185c6f..d49b35a61 100644 --- a/KnownState.h +++ b/KnownState.h @@ -29,7 +29,12 @@ #include #include #include +#pragma warning(push) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredeclared-class-member" #include +#pragma warning(pop) +#pragma GCC diagnostic pop #include #include #include From 090e581fe85eb609324284b07dbe316f22d14612 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Thu, 6 Aug 2015 15:56:00 +0200 Subject: [PATCH 49/67] GCC compile fix. --- KnownState.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KnownState.h b/KnownState.h index d49b35a61..6dff74a5a 100644 --- a/KnownState.h +++ b/KnownState.h @@ -31,7 +31,7 @@ #include #pragma warning(push) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wredeclared-class-member" +#pragma clang diagnostic ignored "-Wredeclared-class-member" #include #pragma warning(pop) #pragma GCC diagnostic pop From 8b433edc4e0b163b66f6c743bfcfea56a2e81be6 Mon Sep 17 00:00:00 2001 From: Liana Husikyan Date: Mon, 10 Aug 2015 17:55:31 +0200 Subject: [PATCH 50/67] added checks to prevent the self assignment --- SourceLocation.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/SourceLocation.h b/SourceLocation.h index 35e3c0318..b8b57b609 100644 --- a/SourceLocation.h +++ b/SourceLocation.h @@ -41,8 +41,21 @@ struct SourceLocation SourceLocation(): start(-1), end(-1) { } SourceLocation(SourceLocation const& _other): - start(_other.start), end(_other.end), sourceName(_other.sourceName) {} - SourceLocation& operator=(SourceLocation const& _other) { start = _other.start; end = _other.end; sourceName = _other.sourceName; return *this;} + start(_other.start), + end(_other.end), + sourceName(_other.sourceName) + {} + + SourceLocation& operator=(SourceLocation const& _other) + { + if (&_other == this) + return *this; + + start = _other.start; + end = _other.end; + sourceName = _other.sourceName; + return *this; + } bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;} bool operator!=(SourceLocation const& _other) const { return !operator==(_other); } From 09fe2c098c798d3de7127583a0ba694e6001e6e0 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Mon, 10 Aug 2015 20:31:22 +0200 Subject: [PATCH 51/67] Style fixes. --- Assembly.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Assembly.h b/Assembly.h index 1457173bc..5814c16e0 100644 --- a/Assembly.h +++ b/Assembly.h @@ -112,7 +112,7 @@ protected: unsigned bytesRequired() const; private: - Json::Value streamAsmJson(std::ostream& _out, const StringMap &_sourceCodes) const; + Json::Value streamAsmJson(std::ostream& _out, StringMap const& _sourceCodes) const; std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const; Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; From 8fbecb9c27a457f4704f5b354cf780808b8948ab Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Mon, 17 Aug 2015 15:09:42 +0200 Subject: [PATCH 52/67] libevmasm doesn't use libdevcrypto. --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6843aaf2c..986cf8143 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,6 @@ else() endif() target_link_libraries(${EXECUTABLE} evmcore) -target_link_libraries(${EXECUTABLE} devcrypto) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) From 34986ee4fcbea4d4f3609d9227864b640f021ffa Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 18 Aug 2015 19:41:47 +0200 Subject: [PATCH 53/67] Detect unavailable items and do not optimise the chunk in that case. --- Assembly.cpp | 5 +++++ CommonSubexpressionEliminator.cpp | 11 +++++++++++ Exceptions.h | 1 + 3 files changed, 17 insertions(+) diff --git a/Assembly.cpp b/Assembly.cpp index c96b6f40d..a9b70eb4e 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -337,6 +337,11 @@ Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) // This might happen if the opcode reconstruction is not as efficient // as the hand-crafted code. } + catch (ItemNotAvailableException const&) + { + // This might happen if e.g. associativity and commutativity rules + // reorganise the expression tree, but not all leaves are available. + } if (shouldReplace) { diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 8fb4625a8..6c095595f 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -220,6 +220,12 @@ void CSECodeGenerator::addDependencies(Id _c) if (m_neededBy.count(_c)) return; // we already computed the dependencies for _c ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); + if (expr.item->type() == UndefinedItem) + BOOST_THROW_EXCEPTION( + // If this exception happens, we need to find a different way to generate the + // compound expression. + ItemNotAvailableException() << errinfo_comment("Undefined item requested but not available.") + ); for (Id argument: expr.arguments) { addDependencies(argument); @@ -317,6 +323,11 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) "Sequence constrained operation requested out of sequence." ); assertThrow(expr.item, OptimizerException, "Non-generated expression without item."); + assertThrow( + expr.item->type() != UndefinedItem, + OptimizerException, + "Undefined item requested but not available." + ); vector const& arguments = expr.arguments; for (Id arg: boost::adaptors::reverse(arguments)) generateClassElement(arg); diff --git a/Exceptions.h b/Exceptions.h index 7cc190e41..03b8afdee 100644 --- a/Exceptions.h +++ b/Exceptions.h @@ -31,6 +31,7 @@ namespace eth struct AssemblyException: virtual Exception {}; struct OptimizerException: virtual AssemblyException {}; struct StackTooDeepException: virtual OptimizerException {}; +struct ItemNotAvailableException: virtual OptimizerException {}; } } From fd23ee97d8ec823d8873c8af7f55ce59f0a62316 Mon Sep 17 00:00:00 2001 From: debris Date: Thu, 27 Aug 2015 12:19:33 +0200 Subject: [PATCH 54/67] cmake refactor in progress --- CMakeLists.txt | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 986cf8143..27e426857 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,21 +5,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") aux_source_directory(. SRC_LIST) -include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) include_directories(BEFORE ..) -include_directories(${Boost_INCLUDE_DIRS}) set(EXECUTABLE evmasm) file(GLOB HEADERS "*.h") -if (ETH_STATIC) - add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) -else() - add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS}) -endif() - -target_link_libraries(${EXECUTABLE} evmcore) +add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) +eth_use(${EXECUTABLE} REQUIRED Eth::evmcore) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) From aea7e04bf4fab1c3f1fc9592c03f6b8a34c41be2 Mon Sep 17 00:00:00 2001 From: debris Date: Mon, 7 Sep 2015 14:32:06 +0200 Subject: [PATCH 55/67] split pr changes --- CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27e426857..1accb8eae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,7 @@ -cmake_policy(SET CMP0015 NEW) -set(CMAKE_AUTOMOC OFF) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") aux_source_directory(. SRC_LIST) -include_directories(BEFORE ..) - set(EXECUTABLE evmasm) file(GLOB HEADERS "*.h") From 3ca3fb492d2f710f45d690db85089036bfc77b68 Mon Sep 17 00:00:00 2001 From: debris Date: Tue, 8 Sep 2015 17:11:02 +0200 Subject: [PATCH 56/67] applied changes from https://github.com/ethereum/cpp-ethereum/pull/2953 --- Assembly.cpp | 28 +++++++------- Assembly.h | 6 +-- AssemblyItem.h | 2 +- CommonSubexpressionEliminator.cpp | 64 +++++++++++++++---------------- ExpressionClasses.cpp | 2 +- KnownState.cpp | 16 ++++---- 6 files changed, 59 insertions(+), 59 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index a9b70eb4e..64bfd7768 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -91,7 +91,7 @@ unsigned Assembly::bytesRequired() const } } -string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const +string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const { if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0) return ""; @@ -153,7 +153,7 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con default: BOOST_THROW_EXCEPTION(InvalidOpcode()); } - _out << "\t\t" << getLocationFromSources(_sourceCodes, i.getLocation()) << endl; + _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl; } if (!m_data.empty() || !m_subs.empty()) @@ -202,44 +202,44 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes { case Operation: collection.append( - createJsonValue(instructionInfo(i.instruction()).name, i.getLocation().start, i.getLocation().end, i.getJumpTypeAsString())); + createJsonValue(instructionInfo(i.instruction()).name, i.location().start, i.location().end, i.getJumpTypeAsString())); break; case Push: collection.append( - createJsonValue("PUSH", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()), i.getJumpTypeAsString())); + createJsonValue("PUSH", i.location().start, i.location().end, toStringInHex(i.data()), i.getJumpTypeAsString())); break; case PushString: collection.append( - createJsonValue("PUSH tag", i.getLocation().start, i.getLocation().end, m_strings.at((h256)i.data()))); + createJsonValue("PUSH tag", i.location().start, i.location().end, m_strings.at((h256)i.data()))); break; case PushTag: if (i.data() == 0) collection.append( - createJsonValue("PUSH [ErrorTag]", i.getLocation().start, i.getLocation().end, "")); + createJsonValue("PUSH [ErrorTag]", i.location().start, i.location().end, "")); else collection.append( - createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, string(i.data()))); + createJsonValue("PUSH [tag]", i.location().start, i.location().end, string(i.data()))); break; case PushSub: collection.append( - createJsonValue("PUSH [$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + createJsonValue("PUSH [$]", i.location().start, i.location().end, dev::toString(h256(i.data())))); break; case PushSubSize: collection.append( - createJsonValue("PUSH #[$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + createJsonValue("PUSH #[$]", i.location().start, i.location().end, dev::toString(h256(i.data())))); break; case PushProgramSize: collection.append( - createJsonValue("PUSHSIZE", i.getLocation().start, i.getLocation().end)); + createJsonValue("PUSHSIZE", i.location().start, i.location().end)); break; case Tag: collection.append( - createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data()))); + createJsonValue("tag", i.location().start, i.location().end, string(i.data()))); collection.append( - createJsonValue("JUMPDEST", i.getLocation().start, i.getLocation().end)); + createJsonValue("JUMPDEST", i.location().start, i.location().end)); break; case PushData: - collection.append(createJsonValue("PUSH data", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + collection.append(createJsonValue("PUSH data", i.location().start, i.location().end, toStringInHex(i.data()))); break; default: BOOST_THROW_EXCEPTION(InvalidOpcode()); @@ -282,7 +282,7 @@ AssemblyItem const& Assembly::append(AssemblyItem const& _i) { m_deposit += _i.deposit(); m_items.push_back(_i); - if (m_items.back().getLocation().isEmpty() && !m_currentSourceLocation.isEmpty()) + if (m_items.back().location().isEmpty() && !m_currentSourceLocation.isEmpty()) m_items.back().setLocation(m_currentSourceLocation); return back(); } diff --git a/Assembly.h b/Assembly.h index 5814c16e0..7a6e969ac 100644 --- a/Assembly.h +++ b/Assembly.h @@ -49,7 +49,7 @@ public: AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); } AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } - Assembly const& getSub(size_t _sub) const { return m_subs.at(_sub); } + Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); } AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } @@ -71,7 +71,7 @@ public: AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); } template Assembly& operator<<(T const& _d) { append(_d); return *this; } - AssemblyItems const& getItems() const { return m_items; } + AssemblyItems const& items() const { return m_items; } AssemblyItem const& back() const { return m_items.back(); } std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } @@ -107,7 +107,7 @@ public: bool _inJsonFormat = false ) const; protected: - std::string getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; + std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } unsigned bytesRequired() const; diff --git a/AssemblyItem.h b/AssemblyItem.h index 3fa9bb203..f677728d3 100644 --- a/AssemblyItem.h +++ b/AssemblyItem.h @@ -78,7 +78,7 @@ public: bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } void setLocation(SourceLocation const& _location) { m_location = _location; } - SourceLocation const& getLocation() const { return m_location; } + SourceLocation const& location() const { return m_location; } void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } JumpType getJumpType() const { return m_jumpType; } diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 6c095595f..0797dd294 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -83,24 +83,24 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() return; ExpressionClasses& classes = m_state.expressionClasses(); - SourceLocation const& location = m_breakingItem->getLocation(); + SourceLocation const& itemLocation = m_breakingItem->location(); if (*m_breakingItem == AssemblyItem(Instruction::JUMPI)) { AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); + Id condition = m_state.stackElement(m_state.stackHeight() - 1, itemLocation); if (classes.knownNonZero(condition)) { - feedItem(AssemblyItem(Instruction::SWAP1, location), true); - feedItem(AssemblyItem(Instruction::POP, location), true); + feedItem(AssemblyItem(Instruction::SWAP1, itemLocation), true); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); - AssemblyItem item(Instruction::JUMP, location); + AssemblyItem item(Instruction::JUMP, itemLocation); item.setJumpType(jumpType); m_breakingItem = classes.storeItem(item); } else if (classes.knownZero(condition)) { - AssemblyItem it(Instruction::POP, location); + AssemblyItem it(Instruction::POP, itemLocation); feedItem(it, true); feedItem(it, true); m_breakingItem = nullptr; @@ -108,12 +108,12 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() } else if (*m_breakingItem == AssemblyItem(Instruction::RETURN)) { - Id size = m_state.stackElement(m_state.stackHeight() - 1, location); + Id size = m_state.stackElement(m_state.stackHeight() - 1, itemLocation); if (classes.knownZero(size)) { - feedItem(AssemblyItem(Instruction::POP, location), true); - feedItem(AssemblyItem(Instruction::POP, location), true); - AssemblyItem item(Instruction::STOP, location); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + AssemblyItem item(Instruction::STOP, itemLocation); m_breakingItem = classes.storeItem(item); } } @@ -179,16 +179,16 @@ AssemblyItems CSECodeGenerator::generateCode( assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, ""); if (m_classPositions[targetItem.second].count(targetItem.first)) continue; - SourceLocation location; + SourceLocation sourceLocation; if (m_expressionClasses.representative(targetItem.second).item) - location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + sourceLocation = m_expressionClasses.representative(targetItem.second).item->location(); int position = classElementPosition(targetItem.second); if (position < targetItem.first) // it is already at its target, we need another copy - appendDup(position, location); + appendDup(position, sourceLocation); else - appendOrRemoveSwap(position, location); - appendOrRemoveSwap(targetItem.first, location); + appendOrRemoveSwap(position, sourceLocation); + appendOrRemoveSwap(targetItem.first, sourceLocation); } // remove surplus elements @@ -263,7 +263,7 @@ void CSECodeGenerator::addDependencies(Id _c) case Instruction::SHA3: { Id length = expr.arguments.at(1); - AssemblyItem offsetInstr(Instruction::SUB, expr.item->getLocation()); + AssemblyItem offsetInstr(Instruction::SUB, expr.item->location()); Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom}); u256 const* o = m_expressionClasses.knownConstant(offsetToStart); u256 const* l = m_expressionClasses.knownConstant(length); @@ -332,7 +332,7 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) for (Id arg: boost::adaptors::reverse(arguments)) generateClassElement(arg); - SourceLocation const& location = expr.item->getLocation(); + SourceLocation const& itemLocation = expr.item->location(); // The arguments are somewhere on the stack now, so it remains to move them at the correct place. // This is quite difficult as sometimes, the values also have to removed in this process // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is @@ -340,42 +340,42 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) if (arguments.size() == 1) { if (canBeRemoved(arguments[0], _c)) - appendOrRemoveSwap(classElementPosition(arguments[0]), location); + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); else - appendDup(classElementPosition(arguments[0]), location); + appendDup(classElementPosition(arguments[0]), itemLocation); } else if (arguments.size() == 2) { if (canBeRemoved(arguments[1], _c)) { - appendOrRemoveSwap(classElementPosition(arguments[1]), location); + appendOrRemoveSwap(classElementPosition(arguments[1]), itemLocation); if (arguments[0] == arguments[1]) - appendDup(m_stackHeight, location); + appendDup(m_stackHeight, itemLocation); else if (canBeRemoved(arguments[0], _c)) { - appendOrRemoveSwap(m_stackHeight - 1, location); - appendOrRemoveSwap(classElementPosition(arguments[0]), location); + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); } else - appendDup(classElementPosition(arguments[0]), location); + appendDup(classElementPosition(arguments[0]), itemLocation); } else { if (arguments[0] == arguments[1]) { - appendDup(classElementPosition(arguments[0]), location); - appendDup(m_stackHeight, location); + appendDup(classElementPosition(arguments[0]), itemLocation); + appendDup(m_stackHeight, itemLocation); } else if (canBeRemoved(arguments[0], _c)) { - appendOrRemoveSwap(classElementPosition(arguments[0]), location); - appendDup(classElementPosition(arguments[1]), location); - appendOrRemoveSwap(m_stackHeight - 1, location); + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + appendDup(classElementPosition(arguments[1]), itemLocation); + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); } else { - appendDup(classElementPosition(arguments[1]), location); - appendDup(classElementPosition(arguments[0]), location); + appendDup(classElementPosition(arguments[1]), itemLocation); + appendDup(classElementPosition(arguments[0]), itemLocation); } } } @@ -392,7 +392,7 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) !m_generatedItems.empty() && m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) // this will not append a swap but remove the one that is already there - appendOrRemoveSwap(m_stackHeight - 1, location); + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); for (size_t i = 0; i < arguments.size(); ++i) { m_classPositions[m_stack[m_stackHeight - i]].erase(m_stackHeight - i); diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 5ad8e724c..1fe6643f2 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -356,7 +356,7 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, //cout << "with rule " << rule.first.toString() << endl; //ExpressionTemplate t(rule.second()); //cout << "to " << rule.second().toString() << endl; - return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->getLocation())); + return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); } } diff --git a/KnownState.cpp b/KnownState.cpp index d62dbf17e..55e860e2d 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -107,39 +107,39 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool m_stackHeight + 1, stackElement( m_stackHeight - int(instruction) + int(Instruction::DUP1), - _item.getLocation() + _item.location() ) ); else if (SemanticInformation::isSwapInstruction(_item)) swapStackElements( m_stackHeight, m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), - _item.getLocation() + _item.location() ); else if (instruction != Instruction::POP) { vector arguments(info.args); for (int i = 0; i < info.args; ++i) - arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + arguments[i] = stackElement(m_stackHeight - i, _item.location()); if (_item.instruction() == Instruction::SSTORE) - op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); + op = storeInStorage(arguments[0], arguments[1], _item.location()); else if (_item.instruction() == Instruction::SLOAD) setStackElement( m_stackHeight + _item.deposit(), - loadFromStorage(arguments[0], _item.getLocation()) + loadFromStorage(arguments[0], _item.location()) ); else if (_item.instruction() == Instruction::MSTORE) - op = storeInMemory(arguments[0], arguments[1], _item.getLocation()); + op = storeInMemory(arguments[0], arguments[1], _item.location()); else if (_item.instruction() == Instruction::MLOAD) setStackElement( m_stackHeight + _item.deposit(), - loadFromMemory(arguments[0], _item.getLocation()) + loadFromMemory(arguments[0], _item.location()) ); else if (_item.instruction() == Instruction::SHA3) setStackElement( m_stackHeight + _item.deposit(), - applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + applySha3(arguments.at(0), arguments.at(1), _item.location()) ); else { From 129b4142d81d91138efe4a47392a87702e066441 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 10 Sep 2015 12:02:18 +0200 Subject: [PATCH 57/67] Transition from bytecode to more general linker objects. --- Assembly.cpp | 141 +++++++++++++++++++++++++--------------- Assembly.h | 19 ++++-- AssemblyItem.cpp | 6 ++ AssemblyItem.h | 14 +++- GasMeter.cpp | 1 + LinkerObject.cpp | 62 ++++++++++++++++++ LinkerObject.h | 55 ++++++++++++++++ SemanticInformation.cpp | 1 + 8 files changed, 240 insertions(+), 59 deletions(-) create mode 100644 LinkerObject.cpp create mode 100644 LinkerObject.h diff --git a/Assembly.cpp b/Assembly.cpp index 64bfd7768..5d4efac11 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -50,8 +50,9 @@ void Assembly::append(Assembly const& _a) m_data.insert(i); for (auto const& i: _a.m_strings) m_strings.insert(i); - for (auto const& i: _a.m_subs) - m_subs.push_back(i); + m_subs += _a.m_subs; + for (auto const& lib: _a.m_libraries) + m_libraries.insert(lib); assert(!_a.m_baseDeposit); assert(!_a.m_totalDeposit); @@ -144,6 +145,9 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con case PushProgramSize: _out << " PUSHSIZE"; break; + case PushLibraryAddress: + _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\""; + break; case Tag: _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; break; @@ -161,7 +165,7 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con _out << _prefix << ".data:" << endl; for (auto const& i: m_data) if (u256(i.first) >= m_subs.size()) - _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << toHex(i.second) << endl; + _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl; for (size_t i = 0; i < m_subs.size(); ++i) { _out << _prefix << " " << hex << i << ": " << endl; @@ -232,6 +236,11 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes collection.append( createJsonValue("PUSHSIZE", i.location().start, i.location().end)); break; + case PushLibraryAddress: + collection.append( + createJsonValue("PUSHLIB", i.location().start, i.location().end, m_libraries.at(h256(i.data()))) + ); + break; case Tag: collection.append( createJsonValue("tag", i.location().start, i.location().end, string(i.data()))); @@ -287,6 +296,13 @@ AssemblyItem const& Assembly::append(AssemblyItem const& _i) return back(); } +AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier) +{ + h256 h(dev::sha3(_identifier)); + m_libraries[h] = _identifier; + return AssemblyItem(PushLibraryAddress, h); +} + void Assembly::injectStart(AssemblyItem const& _i) { m_items.insert(m_items.begin(), _i); @@ -377,96 +393,107 @@ Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) return *this; } -bytes Assembly::assemble() const +LinkerObject const& Assembly::assemble() const { - bytes ret; + if (!m_assembledObject.bytecode.empty()) + return m_assembledObject; + + LinkerObject& ret = m_assembledObject; unsigned totalBytes = bytesRequired(); vector tagPos(m_usedTags); map tagRef; multimap dataRef; + multimap subRef; vector sizeRef; ///< Pointers to code locations where the size of the program is inserted unsigned bytesPerTag = dev::bytesRequired(totalBytes); byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; - for (size_t i = 0; i < m_subs.size(); ++i) - m_data[u256(i)] = m_subs[i].assemble(); - unsigned bytesRequiredIncludingData = bytesRequired(); + for (auto const& sub: m_subs) + bytesRequiredIncludingData += sub.assemble().bytecode.size(); + unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; - ret.reserve(bytesRequiredIncludingData); - // m_data must not change from here on + ret.bytecode.reserve(bytesRequiredIncludingData); for (AssemblyItem const& i: m_items) { // store position of the invalid jump destination if (i.type() != Tag && tagPos[0] == 0) - tagPos[0] = ret.size(); + tagPos[0] = ret.bytecode.size(); switch (i.type()) { case Operation: - ret.push_back((byte)i.data()); + ret.bytecode.push_back((byte)i.data()); break; case PushString: { - ret.push_back((byte)Instruction::PUSH32); + ret.bytecode.push_back((byte)Instruction::PUSH32); unsigned ii = 0; for (auto j: m_strings.at((h256)i.data())) if (++ii > 32) break; else - ret.push_back((byte)j); + ret.bytecode.push_back((byte)j); while (ii++ < 32) - ret.push_back(0); + ret.bytecode.push_back(0); break; } case Push: { byte b = max(1, dev::bytesRequired(i.data())); - ret.push_back((byte)Instruction::PUSH1 - 1 + b); - ret.resize(ret.size() + b); - bytesRef byr(&ret.back() + 1 - b, b); + ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); toBigEndian(i.data(), byr); break; } case PushTag: { - ret.push_back(tagPush); - tagRef[ret.size()] = (unsigned)i.data(); - ret.resize(ret.size() + bytesPerTag); + ret.bytecode.push_back(tagPush); + tagRef[ret.bytecode.size()] = (unsigned)i.data(); + ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); break; } - case PushData: case PushSub: - { - ret.push_back(dataRefPush); - dataRef.insert(make_pair((h256)i.data(), ret.size())); - ret.resize(ret.size() + bytesPerDataRef); + case PushData: + ret.bytecode.push_back(dataRefPush); + dataRef.insert(make_pair((h256)i.data(), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSub: + ret.bytecode.push_back(dataRefPush); + subRef.insert(make_pair(size_t(i.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; - } case PushSubSize: { - auto s = m_data[i.data()].size(); + auto s = m_subs.at(size_t(i.data())).assemble().bytecode.size(); i.setPushedValue(u256(s)); byte b = max(1, dev::bytesRequired(s)); - ret.push_back((byte)Instruction::PUSH1 - 1 + b); - ret.resize(ret.size() + b); - bytesRef byr(&ret.back() + 1 - b, b); + ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); toBigEndian(s, byr); break; } case PushProgramSize: { - ret.push_back(dataRefPush); - sizeRef.push_back(ret.size()); - ret.resize(ret.size() + bytesPerDataRef); + ret.bytecode.push_back(dataRefPush); + sizeRef.push_back(ret.bytecode.size()); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; } + case PushLibraryAddress: + ret.bytecode.push_back(byte(Instruction::PUSH20)); + ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); + ret.bytecode.resize(ret.bytecode.size() + 20); + break; case Tag: - tagPos[(unsigned)i.data()] = ret.size(); + tagPos[(unsigned)i.data()] = ret.bytecode.size(); assertThrow(i.data() != 0, AssemblyException, ""); - ret.push_back((byte)Instruction::JUMPDEST); + ret.bytecode.push_back((byte)Instruction::JUMPDEST); break; default: BOOST_THROW_EXCEPTION(InvalidOpcode()); @@ -474,7 +501,7 @@ bytes Assembly::assemble() const } for (auto const& i: tagRef) { - bytesRef r(ret.data() + i.first, bytesPerTag); + bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); auto tag = i.second; if (tag >= tagPos.size()) tag = 0; @@ -484,28 +511,36 @@ bytes Assembly::assemble() const toBigEndian(tagPos[tag], r); } - if (!m_data.empty()) + if (!dataRef.empty() && !subRef.empty()) + ret.bytecode.push_back(0); + for (size_t i = 0; i < m_subs.size(); ++i) { - ret.push_back(0); - for (auto const& i: m_data) + auto references = subRef.equal_range(i); + if (references.first == references.second) + continue; + for (auto ref = references.first; ref != references.second; ++ref) { - auto its = dataRef.equal_range(i.first); - if (its.first != its.second) - { - for (auto it = its.first; it != its.second; ++it) - { - bytesRef r(ret.data() + it->second, bytesPerDataRef); - toBigEndian(ret.size(), r); - } - for (auto b: i.second) - ret.push_back(b); - } + bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); } + ret.append(m_subs[i].assemble()); + } + for (auto const& dataItem: m_data) + { + auto references = dataRef.equal_range(dataItem.first); + if (references.first == references.second) + continue; + for (auto ref = references.first; ref != references.second; ++ref) + { + bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + ret.bytecode += dataItem.second; } for (unsigned pos: sizeRef) { - bytesRef r(ret.data() + pos, bytesPerDataRef); - toBigEndian(ret.size(), r); + bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); } return ret; } diff --git a/Assembly.h b/Assembly.h index 7a6e969ac..d18f94896 100644 --- a/Assembly.h +++ b/Assembly.h @@ -25,9 +25,11 @@ #include #include #include +#include #include #include #include +#include #include "Exceptions.h" #include @@ -47,11 +49,12 @@ public: AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } - AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); } + AssemblyItem newData(bytes const& _data) { h256 h(sha3(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); } AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); } - AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } + AssemblyItem newPushString(std::string const& _data) { h256 h(sha3(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); } AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } + AssemblyItem newPushLibraryAddress(std::string const& _identifier); AssemblyItem append() { return append(newTag()); } void append(Assembly const& _a); @@ -63,6 +66,7 @@ public: /// Pushes the final size of the current assembly itself. Use this when the code is modified /// after compilation and CODESIZE is not an option. void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } + void appendLibraryAddress(std::string const& _identifier) { append(newPushLibraryAddress(_identifier)); } AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } @@ -92,8 +96,9 @@ public: /// Changes the source location used for each appended item. void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } - bytes assemble() const; - bytes const& data(h256 const& _i) const { return m_data[_i]; } + /// Assembles the assembly into bytecode. The assembly should not be modified after this call. + LinkerObject const& assemble() const; + bytes const& data(h256 const& _i) const { return m_data.at(_i); } /// Modify (if @a _enable is set) and return the current assembly such that creation and /// execution gas usage is optimised. @a _isCreation should be true for the top-level assembly. @@ -106,6 +111,7 @@ public: const StringMap &_sourceCodes = StringMap(), bool _inJsonFormat = false ) const; + protected: std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } @@ -120,9 +126,12 @@ protected: // 0 is reserved for exception unsigned m_usedTags = 1; AssemblyItems m_items; - mutable std::map m_data; + std::map m_data; std::vector m_subs; std::map m_strings; + std::map m_libraries; ///< Identifiers of libraries to be linked. + + mutable LinkerObject m_assembledObject; int m_deposit = 0; int m_baseDeposit = 0; diff --git a/AssemblyItem.cpp b/AssemblyItem.cpp index e005ece18..d70510646 100644 --- a/AssemblyItem.cpp +++ b/AssemblyItem.cpp @@ -44,6 +44,8 @@ unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const case PushData: case PushSub: return 1 + _addressLength; + case PushLibraryAddress: + return 21; default: break; } @@ -63,6 +65,7 @@ int AssemblyItem::deposit() const case PushSub: case PushSubSize: case PushProgramSize: + case PushLibraryAddress: return 1; case Tag: return 0; @@ -118,6 +121,9 @@ ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) case PushProgramSize: _out << " PushProgramSize"; break; + case PushLibraryAddress: + _out << " PushLibraryAddress " << hex << h256(_item.data()).abridgedMiddle(); + break; case UndefinedItem: _out << " ???"; break; diff --git a/AssemblyItem.h b/AssemblyItem.h index f677728d3..795b5a8a2 100644 --- a/AssemblyItem.h +++ b/AssemblyItem.h @@ -34,7 +34,19 @@ namespace dev namespace eth { -enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData }; +enum AssemblyItemType { + UndefinedItem, + Operation, + Push, + PushString, + PushTag, + PushSub, + PushSubSize, + PushProgramSize, + Tag, + PushData, + PushLibraryAddress ///< Push a currently unknown address of another (library) contract. +}; class Assembly; diff --git a/GasMeter.cpp b/GasMeter.cpp index 42a5bed2e..00b93214d 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -53,6 +53,7 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) case PushSub: case PushSubSize: case PushProgramSize: + case PushLibraryAddress: gas = runGas(Instruction::PUSH1); break; case Tag: diff --git a/LinkerObject.cpp b/LinkerObject.cpp new file mode 100644 index 000000000..ceb864a17 --- /dev/null +++ b/LinkerObject.cpp @@ -0,0 +1,62 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file LinkerObject.cpp + * @author Christian R + * @date 2015 + */ + +#include +#include + +using namespace dev; +using namespace dev::eth; +using namespace std; + +void LinkerObject::append(LinkerObject const& _other) +{ + for (auto const& ref: _other.linkReferences) + linkReferences[ref.first + bytecode.size()] = ref.second; + bytecode += _other.bytecode; +} + +void LinkerObject::link(map const& _libraryAddresses) +{ + std::map remainingRefs; + for (auto const& linkRef: linkReferences) + { + auto it = _libraryAddresses.find(linkRef.second); + if (it == _libraryAddresses.end()) + remainingRefs.insert(linkRef); + else + it->second.ref().copyTo(ref(bytecode).cropped(linkRef.first, 20)); + } + linkReferences.swap(remainingRefs); +} + +string LinkerObject::toHex() const +{ + string hex = dev::toHex(bytecode); + for (auto const& ref: linkReferences) + { + size_t pos = ref.first * 2; + string const& name = ref.second; + hex[pos] = hex[pos + 1] = hex[pos + 38] = hex[pos + 39] = '_'; + for (size_t i = 0; i < 36; ++i) + hex[pos + 2 + i] = i < name.size() ? name[i] : '_'; + } + return hex; +} diff --git a/LinkerObject.h b/LinkerObject.h new file mode 100644 index 000000000..83d2bd7e0 --- /dev/null +++ b/LinkerObject.h @@ -0,0 +1,55 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Assembly.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include + +namespace dev +{ +namespace eth +{ + +/** + * Binary object that potentially still needs to be linked (i.e. addresses of other contracts + * need to be filled in). + */ +struct LinkerObject +{ + bytes bytecode; + /// Map from offsets in bytecode to library identifiers. The addresses starting at those offsets + /// need to be replaced by the actual addresses by the linker. + std::map linkReferences; + + /// Appends the bytecode of @a _other and incorporates its link references. + void append(LinkerObject const& _other); + + /// Links the given libraries by replacing their uses in the code and removes them from the references. + void link(std::map const& _libraryAddresses); + + /// @returns a hex representation of the bytecode of the given object, replacing unlinked + /// addresses by placeholders. + std::string toHex() const; +}; + +} +} diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 91f93e7ef..309bbe2b1 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -43,6 +43,7 @@ bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) case PushSubSize: case PushProgramSize: case PushData: + case PushLibraryAddress: return false; case Operation: { From 724ef6757dc6745554f03183acecd6e0124f7e76 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Sun, 13 Sep 2015 17:49:26 +0200 Subject: [PATCH 58/67] Workaround bug in boost. --- ExpressionClasses.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 1fe6643f2..9d13a57a1 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -183,6 +183,16 @@ private: vector>> m_rules; }; +template S divWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) / bigint(_b)); +} + +template S modWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) % bigint(_b)); +} + Rules::Rules() { // Multiple occurences of one of these inside one rule must match the same equivalence class. @@ -206,10 +216,10 @@ Rules::Rules() {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, - {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }}, - {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }}, - {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }}, - {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, From 4b77b09148c22b182575612240d70040c699c353 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 22 Sep 2015 12:54:41 +0200 Subject: [PATCH 59/67] Create version string. --- Version.cpp | 38 ++++++++++++++++++++++++++++++++++++++ Version.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 Version.cpp create mode 100644 Version.h diff --git a/Version.cpp b/Version.cpp new file mode 100644 index 000000000..16b510b6f --- /dev/null +++ b/Version.cpp @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @author Christian + * @date 2015 + * Versioning. + */ + +#include +#include +#include +#include + +using namespace dev; +using namespace std; + +char const* dev::eth::VersionNumberLibEvmAsm = ETH_PROJECT_VERSION; +extern string const dev::eth::VersionStringLibEvmAsm = + string(dev::eth::VersionNumberLibEvmAsm) + + "-" + + string(DEV_QUOTED(ETH_COMMIT_HASH)).substr(0, 8) + + (ETH_CLEAN_REPO ? "" : "*") + + "/" DEV_QUOTED(ETH_BUILD_TYPE) "-" DEV_QUOTED(ETH_BUILD_PLATFORM); + diff --git a/Version.h b/Version.h new file mode 100644 index 000000000..8cba6e833 --- /dev/null +++ b/Version.h @@ -0,0 +1,36 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** + * @author Christian + * @date 2015 + * Versioning. + */ + +#pragma once + +#include + +namespace dev +{ +namespace eth +{ + +extern char const* VersionNumberLibEvmAsm; +extern std::string const VersionStringLibEvmAsm; + +} +} From b4c46201ef8c46594f442e9b11bf22a8a04b642e Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 6 Oct 2015 17:44:39 +0200 Subject: [PATCH 60/67] Add non-const sub function. --- Assembly.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Assembly.h b/Assembly.h index d18f94896..28328277d 100644 --- a/Assembly.h +++ b/Assembly.h @@ -52,6 +52,7 @@ public: AssemblyItem newData(bytes const& _data) { h256 h(sha3(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); } AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); } + Assembly& sub(size_t _sub) { return m_subs.at(_sub); } AssemblyItem newPushString(std::string const& _data) { h256 h(sha3(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); } AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } AssemblyItem newPushLibraryAddress(std::string const& _identifier); From 5caad351f88e54afa6c38074938c18705fdd319c Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 16 Nov 2015 11:48:38 +0100 Subject: [PATCH 61/67] Work around bug in u256->unsigned conversion for MSVC/Boost. --- ControlFlowGraph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 41a53aa82..5adb951a4 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -34,7 +34,8 @@ using namespace std; using namespace dev; using namespace dev::eth; -BlockId::BlockId(u256 const& _id): m_id(_id) +BlockId::BlockId(u256 const& _id): + m_id(unsigned(_id)) { assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); } From 6e98243ead4843a4dd28fcae312f3b8af6cc7b03 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Sat, 21 Nov 2015 14:33:55 +0100 Subject: [PATCH 62/67] EIP-2.1 Fixes #96. Address a few concerns from prior code review. --- Assembly.cpp | 1 - ConstantOptimiser.cpp | 20 ++++++++++---------- ConstantOptimiser.h | 8 ++++++++ GasMeter.cpp | 42 ++++++++++++++++++++---------------------- GasMeter.h | 8 +++++++- 5 files changed, 45 insertions(+), 34 deletions(-) diff --git a/Assembly.cpp b/Assembly.cpp index 5d4efac11..7277865e0 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -22,7 +22,6 @@ #include "Assembly.h" #include #include -#include #include #include #include diff --git a/ConstantOptimiser.cpp b/ConstantOptimiser.cpp index 88874d81c..0ebe2eabd 100644 --- a/ConstantOptimiser.cpp +++ b/ConstantOptimiser.cpp @@ -22,7 +22,6 @@ #include "libevmasm/ConstantOptimiser.h" #include #include -#include using namespace std; using namespace dev; using namespace dev::eth; @@ -70,12 +69,13 @@ unsigned ConstantOptimisationMethod::optimiseConstants( bigint ConstantOptimisationMethod::simpleRunGas(AssemblyItems const& _items) { + EVMSchedule schedule; // TODO: make relevant to context. bigint gas = 0; for (AssemblyItem const& item: _items) if (item.type() == Push) - gas += GasMeter::runGas(Instruction::PUSH1); + gas += GasMeter::runGas(Instruction::PUSH1, schedule); else if (item.type() == Operation) - gas += GasMeter::runGas(item.instruction()); + gas += GasMeter::runGas(item.instruction(), schedule); return gas; } @@ -85,11 +85,11 @@ bigint ConstantOptimisationMethod::dataGas(bytes const& _data) const { bigint gas; for (auto b: _data) - gas += b ? c_txDataNonZeroGas : c_txDataZeroGas; + gas += b ? m_schedule.txDataNonZeroGas : m_schedule.txDataZeroGas; return gas; } else - return c_createDataGas * dataSize(); + return m_schedule.createDataGas * dataSize(); } size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) @@ -121,7 +121,7 @@ bigint LiteralMethod::gasNeeded() return combineGas( simpleRunGas({Instruction::PUSH1}), // PUSHX plus data - (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas) + dataGas(), + (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas) + dataGas(), 0 ); } @@ -148,9 +148,9 @@ bigint CodeCopyMethod::gasNeeded() { return combineGas( // Run gas: we ignore memory increase costs - simpleRunGas(m_copyRoutine) + c_copyGas, + simpleRunGas(m_copyRoutine) + m_schedule.copyGas, // Data gas for copy routines: Some bytes are zero, but we ignore them. - bytesRequired(m_copyRoutine) * (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas), + bytesRequired(m_copyRoutine) * (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas), // Data gas for data itself dataGas(toBigEndian(m_value)) ); @@ -217,9 +217,9 @@ bigint ComputeMethod::gasNeeded(AssemblyItems const& _routine) { size_t numExps = count(_routine.begin(), _routine.end(), Instruction::EXP); return combineGas( - simpleRunGas(_routine) + numExps * (c_expGas + c_expByteGas), + simpleRunGas(_routine) + numExps * (m_schedule.expGas + m_schedule.expByteGas), // Data gas for routine: Some bytes are zero, but we ignore them. - bytesRequired(_routine) * (m_params.isCreation ? c_txDataNonZeroGas : c_createDataGas), + bytesRequired(_routine) * (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas), 0 ); } diff --git a/ConstantOptimiser.h b/ConstantOptimiser.h index e75eff380..64cb66bb9 100644 --- a/ConstantOptimiser.h +++ b/ConstantOptimiser.h @@ -24,6 +24,7 @@ #include #include #include +#include namespace dev { @@ -34,6 +35,8 @@ class AssemblyItem; using AssemblyItems = std::vector; class Assembly; +// TODO: FIXME: HOMESTEAD: XXX: @chfast populate m_schedule from an ExtVMFace instance via ExtVMFace::evmSchedule. + /** * Abstract base class for one way to change how constants are represented in the code. */ @@ -88,6 +91,7 @@ protected: Params m_params; u256 const& m_value; + EVMSchedule m_schedule; }; /** @@ -101,6 +105,8 @@ public: ConstantOptimisationMethod(_params, _value) {} virtual bigint gasNeeded() override; virtual void execute(Assembly&, AssemblyItems&) override {} + + EVMSchedule m_schedule; }; /** @@ -115,6 +121,7 @@ public: protected: AssemblyItems m_copyRoutine; + EVMSchedule m_schedule; }; /** @@ -141,6 +148,7 @@ protected: bigint gasNeeded(AssemblyItems const& _routine); AssemblyItems m_routine; + EVMSchedule m_schedule; }; } diff --git a/GasMeter.cpp b/GasMeter.cpp index 00b93214d..b792f04db 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -21,8 +21,6 @@ #include "GasMeter.h" #include -#include - using namespace std; using namespace dev; using namespace dev::eth; @@ -73,13 +71,13 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) m_state->storageContent().count(slot) && classes.knownNonZero(m_state->storageContent().at(slot)) )) - gas += c_sstoreResetGas; //@todo take refunds into account + gas += m_schedule.sstoreResetGas; //@todo take refunds into account else - gas += c_sstoreSetGas; + gas += m_schedule.sstoreSetGas; break; } case Instruction::SLOAD: - gas += c_sloadGas; + gas += m_schedule.sloadGas; break; case Instruction::RETURN: gas += memoryGas(0, -1); @@ -98,18 +96,18 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) })); break; case Instruction::SHA3: - gas = c_sha3Gas; - gas += wordGas(c_sha3WordGas, m_state->relativeStackElement(-1)); + gas = m_schedule.sha3Gas; + gas += wordGas(m_schedule.sha3WordGas, m_state->relativeStackElement(-1)); gas += memoryGas(0, -1); break; case Instruction::CALLDATACOPY: case Instruction::CODECOPY: gas += memoryGas(0, -2); - gas += wordGas(c_copyGas, m_state->relativeStackElement(-2)); + gas += wordGas(m_schedule.copyGas, m_state->relativeStackElement(-2)); break; case Instruction::EXTCODECOPY: gas += memoryGas(-1, -3); - gas += wordGas(c_copyGas, m_state->relativeStackElement(-3)); + gas += wordGas(m_schedule.copyGas, m_state->relativeStackElement(-3)); break; case Instruction::LOG0: case Instruction::LOG1: @@ -118,38 +116,38 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) case Instruction::LOG4: { unsigned n = unsigned(_item.instruction()) - unsigned(Instruction::LOG0); - gas = c_logGas + c_logTopicGas * n; + gas = m_schedule.logGas + m_schedule.logTopicGas * n; gas += memoryGas(0, -1); if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) - gas += c_logDataGas * (*value); + gas += m_schedule.logDataGas * (*value); else gas = GasConsumption::infinite(); break; } case Instruction::CALL: case Instruction::CALLCODE: - gas = c_callGas; + gas = m_schedule.callGas; if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(0))) gas += (*value); else gas = GasConsumption::infinite(); if (_item.instruction() != Instruction::CALLCODE) - gas += c_callNewAccountGas; // We very rarely know whether the address exists. + gas += m_schedule.callNewAccountGas; // We very rarely know whether the address exists. if (!classes.knownZero(m_state->relativeStackElement(-2))) - gas += c_callValueTransferGas; + gas += m_schedule.callValueTransferGas; gas += memoryGas(-3, -4); gas += memoryGas(-5, -6); break; case Instruction::CREATE: - gas = c_createGas; + gas = m_schedule.createGas; gas += memoryGas(-1, -2); break; case Instruction::EXP: - gas = c_expGas; + gas = m_schedule.expGas; if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) - gas += c_expByteGas * (32 - (h256(*value).firstBitSet() / 8)); + gas += m_schedule.expByteGas * (32 - (h256(*value).firstBitSet() / 8)); else - gas += c_expByteGas * 32; + gas += m_schedule.expByteGas * 32; break; default: break; @@ -182,10 +180,10 @@ GasMeter::GasConsumption GasMeter::memoryGas(ExpressionClasses::Id _position) return GasConsumption(u256(0)); u256 previous = m_largestMemoryAccess; m_largestMemoryAccess = *value; - auto memGas = [](u256 const& pos) -> u256 + auto memGas = [=](u256 const& pos) -> u256 { u256 size = (pos + 31) / 32; - return c_memoryGas * size + size * size / c_quadCoeffDiv; + return m_schedule.memoryGas * size + size * size / m_schedule.quadCoeffDiv; }; return memGas(*value) - memGas(previous); } @@ -202,14 +200,14 @@ GasMeter::GasConsumption GasMeter::memoryGas(int _stackPosOffset, int _stackPosS })); } -u256 GasMeter::runGas(Instruction _instruction) +u256 GasMeter::runGas(Instruction _instruction, EVMSchedule const& _es) { if (_instruction == Instruction::JUMPDEST) return 1; int tier = instructionInfo(_instruction).gasPriceTier; assertThrow(tier != InvalidTier, OptimizerException, "Invalid gas tier."); - return c_tierStepGas[tier]; + return _es.tierStepGas[tier]; } diff --git a/GasMeter.h b/GasMeter.h index 90f151fc4..b11a63a55 100644 --- a/GasMeter.h +++ b/GasMeter.h @@ -25,6 +25,7 @@ #include #include #include +#include namespace dev { @@ -33,6 +34,8 @@ namespace eth class KnownState; +// TODO: FIXME: HOMESTEAD: XXX: @chfast populate m_schedule from an ExtVMFace instance via ExtVMFace::evmSchedule. + /** * Class that helps computing the maximum gas consumption for instructions. * Has to be initialized with a certain known state that will be automatically updated for @@ -66,7 +69,8 @@ public: u256 const& largestMemoryAccess() const { return m_largestMemoryAccess; } - static u256 runGas(Instruction _instruction); + u256 runGas(Instruction _instruction) const { return runGas(_instruction, m_schedule); } + static u256 runGas(Instruction _instruction, EVMSchedule const& _es); private: /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. @@ -81,6 +85,8 @@ private: std::shared_ptr m_state; /// Largest point where memory was accessed since the creation of this object. u256 m_largestMemoryAccess; + + EVMSchedule m_schedule; }; inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption const& _consumption) From 1b1b6651cd60fcaded8043dc00e61df075fe2e4a Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 15 Jan 2016 16:26:12 +0100 Subject: [PATCH 63/67] Fix sequence number bug. This bug resulted in incorrect storage access in some situations. The reason was that when intersecting states, the sequence numbers were not handled and thus some operations with too low sequence numbers were used during code generation. --- ControlFlowGraph.cpp | 42 ++++++++++++++++++++++++++++-------------- KnownState.cpp | 6 ++++-- KnownState.h | 5 ++--- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 5adb951a4..1ad54d8c7 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -221,22 +221,36 @@ void ControlFlowGraph::gatherKnowledge() KnownStatePointer emptyState = make_shared(); bool unknownJumpEncountered = false; - vector> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + struct WorkQueueItem { + BlockId blockId; + KnownStatePointer state; + set blocksSeen; + }; + + vector workQueue{WorkQueueItem{BlockId::initial(), emptyState->copy(), set()}}; + auto addWorkQueueItem = [&](WorkQueueItem const& _currentItem, BlockId _to, KnownStatePointer const& _state) + { + WorkQueueItem item; + item.blockId = _to; + item.state = _state->copy(); + item.blocksSeen = _currentItem.blocksSeen; + item.blocksSeen.insert(_currentItem.blockId); + workQueue.push_back(move(item)); + }; + while (!workQueue.empty()) { - //@todo we might have to do something like incrementing the sequence number for each JUMPDEST - assertThrow(!!workQueue.back().first, OptimizerException, ""); - if (!m_blocks.count(workQueue.back().first)) - { - workQueue.pop_back(); - continue; // too bad, we do not know the tag, probably an invalid jump - } - BasicBlock& block = m_blocks.at(workQueue.back().first); - KnownStatePointer state = workQueue.back().second; + WorkQueueItem item = move(workQueue.back()); workQueue.pop_back(); + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!item.blockId, OptimizerException, ""); + if (!m_blocks.count(item.blockId)) + continue; // too bad, we do not know the tag, probably an invalid jump + BasicBlock& block = m_blocks.at(item.blockId); + KnownStatePointer state = item.state; if (block.startState) { - state->reduceToCommonKnowledge(*block.startState); + state->reduceToCommonKnowledge(*block.startState, !item.blocksSeen.count(item.blockId)); if (*state == *block.startState) continue; } @@ -270,12 +284,12 @@ void ControlFlowGraph::gatherKnowledge() unknownJumpEncountered = true; for (auto const& it: m_blocks) if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) - workQueue.push_back(make_pair(it.first, emptyState->copy())); + workQueue.push_back(WorkQueueItem{it.first, emptyState, set()}); } } else for (auto tag: tags) - workQueue.push_back(make_pair(BlockId(tag), state->copy())); + addWorkQueueItem(item, BlockId(tag), state); } else if (block.begin <= pc && pc < block.end) state->feedItem(m_items.at(pc++)); @@ -287,7 +301,7 @@ void ControlFlowGraph::gatherKnowledge() block.endType == BasicBlock::EndType::HANDOVER || block.endType == BasicBlock::EndType::JUMPI ) - workQueue.push_back(make_pair(block.next, state->copy())); + addWorkQueueItem(item, block.next, state); } // Remove all blocks we never visited here. This might happen because a tag is pushed but diff --git a/KnownState.cpp b/KnownState.cpp index 55e860e2d..39cce3e83 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -175,7 +175,7 @@ template void intersect(_Mapping& _this, _Mapping const& _other it = _this.erase(it); } -void KnownState::reduceToCommonKnowledge(KnownState const& _other) +void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers) { int stackDiff = m_stackHeight - _other.m_stackHeight; for (auto it = m_stackElements.begin(); it != m_stackElements.end();) @@ -213,9 +213,11 @@ void KnownState::reduceToCommonKnowledge(KnownState const& _other) intersect(m_storageContent, _other.m_storageContent); intersect(m_memoryContent, _other.m_memoryContent); + if (_combineSequenceNumbers) + m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber); } -bool KnownState::operator==(const KnownState& _other) const +bool KnownState::operator==(KnownState const& _other) const { if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent) return false; diff --git a/KnownState.h b/KnownState.h index 6dff74a5a..c1c602dcb 100644 --- a/KnownState.h +++ b/KnownState.h @@ -100,13 +100,12 @@ public: void reset() { resetStorage(); resetMemory(); resetStack(); } unsigned sequenceNumber() const { return m_sequenceNumber; } - /// Manually increments the storage and memory sequence number. - void incrementSequenceNumber() { m_sequenceNumber += 2; } /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. /// If the stack heighht is different, the smaller one is used and the stack is compared /// relatively. - void reduceToCommonKnowledge(KnownState const& _other); + /// @param _combineSequenceNumbers if true, sets the sequence number to the maximum of both + void reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers); /// @returns a shared pointer to a copy of this state. std::shared_ptr copy() const { return std::make_shared(*this); } From 96ea3c63f3edc6058d74cfa77828e900c9e9cb6d Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 18 Jan 2016 10:57:03 +0100 Subject: [PATCH 64/67] Fix: Copy empty state. --- ControlFlowGraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index 1ad54d8c7..fc2144c74 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -284,7 +284,7 @@ void ControlFlowGraph::gatherKnowledge() unknownJumpEncountered = true; for (auto const& it: m_blocks) if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) - workQueue.push_back(WorkQueueItem{it.first, emptyState, set()}); + workQueue.push_back(WorkQueueItem{it.first, emptyState->copy(), set()}); } } else From 05d30fc7cf58ef57238295559aac709338e3c4ea Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 3 Mar 2016 16:56:22 +0100 Subject: [PATCH 65/67] Add delegatecall to the optimizer. --- GasMeter.cpp | 12 ++++++++---- SemanticInformation.cpp | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/GasMeter.cpp b/GasMeter.cpp index b792f04db..935831691 100644 --- a/GasMeter.cpp +++ b/GasMeter.cpp @@ -126,18 +126,22 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) } case Instruction::CALL: case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + { gas = m_schedule.callGas; if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(0))) gas += (*value); else gas = GasConsumption::infinite(); - if (_item.instruction() != Instruction::CALLCODE) + if (_item.instruction() == Instruction::CALL) gas += m_schedule.callNewAccountGas; // We very rarely know whether the address exists. - if (!classes.knownZero(m_state->relativeStackElement(-2))) + int valueSize = _item.instruction() == Instruction::DELEGATECALL ? 0 : 1; + if (!classes.knownZero(m_state->relativeStackElement(-1 - valueSize))) gas += m_schedule.callValueTransferGas; - gas += memoryGas(-3, -4); - gas += memoryGas(-5, -6); + gas += memoryGas(-2 - valueSize, -3 - valueSize); + gas += memoryGas(-4 - valueSize, -5 - valueSize); break; + } case Instruction::CREATE: gas = m_schedule.createGas; gas += memoryGas(-1, -2); diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 309bbe2b1..ea579b837 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -134,6 +134,7 @@ bool SemanticInformation::isDeterministic(AssemblyItem const& _item) { case Instruction::CALL: case Instruction::CALLCODE: + case Instruction::DELEGATECALL: case Instruction::CREATE: case Instruction::GAS: case Instruction::PC: @@ -157,6 +158,7 @@ bool SemanticInformation::invalidatesMemory(Instruction _instruction) case Instruction::MSTORE8: case Instruction::CALL: case Instruction::CALLCODE: + case Instruction::DELEGATECALL: return true; default: return false; @@ -169,6 +171,7 @@ bool SemanticInformation::invalidatesStorage(Instruction _instruction) { case Instruction::CALL: case Instruction::CALLCODE: + case Instruction::DELEGATECALL: case Instruction::CREATE: case Instruction::SSTORE: return true; From 2fe6037b9bc42946dafbf0f870ca59546712d14c Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 11 Mar 2016 16:12:34 +0100 Subject: [PATCH 66/67] Increment sequence number for opcodes that can write to memory or storage. --- KnownState.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/KnownState.cpp b/KnownState.cpp index 39cce3e83..dd269ff4e 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -143,10 +143,16 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool ); else { - if (SemanticInformation::invalidatesMemory(_item.instruction())) + bool invMem = SemanticInformation::invalidatesMemory(_item.instruction()); + bool invStor = SemanticInformation::invalidatesStorage(_item.instruction()); + // We could be a bit more fine-grained here (CALL only invalidates part of + // memory, etc), but we do not for now. + if (invMem) resetMemory(); - if (SemanticInformation::invalidatesStorage(_item.instruction())) + if (invStor) resetStorage(); + if (invMem || invStor) + m_sequenceNumber += 2; // Increment by two because it can read and write assertThrow(info.ret <= 1, InvalidDeposit, ""); if (info.ret == 1) setStackElement( From b50e65437ebad535af29e84156b8b1af71f61c3d Mon Sep 17 00:00:00 2001 From: Dimitry Date: Mon, 21 Mar 2016 11:55:45 +0300 Subject: [PATCH 67/67] move libevmasm --- Assembly.cpp => libevmasm/Assembly.cpp | 0 Assembly.h => libevmasm/Assembly.h | 0 AssemblyItem.cpp => libevmasm/AssemblyItem.cpp | 0 AssemblyItem.h => libevmasm/AssemblyItem.h | 0 BlockDeduplicator.cpp => libevmasm/BlockDeduplicator.cpp | 0 BlockDeduplicator.h => libevmasm/BlockDeduplicator.h | 0 CMakeLists.txt => libevmasm/CMakeLists.txt | 0 .../CommonSubexpressionEliminator.cpp | 0 .../CommonSubexpressionEliminator.h | 0 ConstantOptimiser.cpp => libevmasm/ConstantOptimiser.cpp | 0 ConstantOptimiser.h => libevmasm/ConstantOptimiser.h | 0 ControlFlowGraph.cpp => libevmasm/ControlFlowGraph.cpp | 0 ControlFlowGraph.h => libevmasm/ControlFlowGraph.h | 0 Exceptions.h => libevmasm/Exceptions.h | 0 ExpressionClasses.cpp => libevmasm/ExpressionClasses.cpp | 0 ExpressionClasses.h => libevmasm/ExpressionClasses.h | 0 GasMeter.cpp => libevmasm/GasMeter.cpp | 0 GasMeter.h => libevmasm/GasMeter.h | 0 KnownState.cpp => libevmasm/KnownState.cpp | 0 KnownState.h => libevmasm/KnownState.h | 0 LinkerObject.cpp => libevmasm/LinkerObject.cpp | 0 LinkerObject.h => libevmasm/LinkerObject.h | 0 PathGasMeter.cpp => libevmasm/PathGasMeter.cpp | 0 PathGasMeter.h => libevmasm/PathGasMeter.h | 0 SemanticInformation.cpp => libevmasm/SemanticInformation.cpp | 0 SemanticInformation.h => libevmasm/SemanticInformation.h | 0 SourceLocation.h => libevmasm/SourceLocation.h | 0 Version.cpp => libevmasm/Version.cpp | 0 Version.h => libevmasm/Version.h | 0 29 files changed, 0 insertions(+), 0 deletions(-) rename Assembly.cpp => libevmasm/Assembly.cpp (100%) rename Assembly.h => libevmasm/Assembly.h (100%) rename AssemblyItem.cpp => libevmasm/AssemblyItem.cpp (100%) rename AssemblyItem.h => libevmasm/AssemblyItem.h (100%) rename BlockDeduplicator.cpp => libevmasm/BlockDeduplicator.cpp (100%) rename BlockDeduplicator.h => libevmasm/BlockDeduplicator.h (100%) rename CMakeLists.txt => libevmasm/CMakeLists.txt (100%) rename CommonSubexpressionEliminator.cpp => libevmasm/CommonSubexpressionEliminator.cpp (100%) rename CommonSubexpressionEliminator.h => libevmasm/CommonSubexpressionEliminator.h (100%) rename ConstantOptimiser.cpp => libevmasm/ConstantOptimiser.cpp (100%) rename ConstantOptimiser.h => libevmasm/ConstantOptimiser.h (100%) rename ControlFlowGraph.cpp => libevmasm/ControlFlowGraph.cpp (100%) rename ControlFlowGraph.h => libevmasm/ControlFlowGraph.h (100%) rename Exceptions.h => libevmasm/Exceptions.h (100%) rename ExpressionClasses.cpp => libevmasm/ExpressionClasses.cpp (100%) rename ExpressionClasses.h => libevmasm/ExpressionClasses.h (100%) rename GasMeter.cpp => libevmasm/GasMeter.cpp (100%) rename GasMeter.h => libevmasm/GasMeter.h (100%) rename KnownState.cpp => libevmasm/KnownState.cpp (100%) rename KnownState.h => libevmasm/KnownState.h (100%) rename LinkerObject.cpp => libevmasm/LinkerObject.cpp (100%) rename LinkerObject.h => libevmasm/LinkerObject.h (100%) rename PathGasMeter.cpp => libevmasm/PathGasMeter.cpp (100%) rename PathGasMeter.h => libevmasm/PathGasMeter.h (100%) rename SemanticInformation.cpp => libevmasm/SemanticInformation.cpp (100%) rename SemanticInformation.h => libevmasm/SemanticInformation.h (100%) rename SourceLocation.h => libevmasm/SourceLocation.h (100%) rename Version.cpp => libevmasm/Version.cpp (100%) rename Version.h => libevmasm/Version.h (100%) diff --git a/Assembly.cpp b/libevmasm/Assembly.cpp similarity index 100% rename from Assembly.cpp rename to libevmasm/Assembly.cpp diff --git a/Assembly.h b/libevmasm/Assembly.h similarity index 100% rename from Assembly.h rename to libevmasm/Assembly.h diff --git a/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp similarity index 100% rename from AssemblyItem.cpp rename to libevmasm/AssemblyItem.cpp diff --git a/AssemblyItem.h b/libevmasm/AssemblyItem.h similarity index 100% rename from AssemblyItem.h rename to libevmasm/AssemblyItem.h diff --git a/BlockDeduplicator.cpp b/libevmasm/BlockDeduplicator.cpp similarity index 100% rename from BlockDeduplicator.cpp rename to libevmasm/BlockDeduplicator.cpp diff --git a/BlockDeduplicator.h b/libevmasm/BlockDeduplicator.h similarity index 100% rename from BlockDeduplicator.h rename to libevmasm/BlockDeduplicator.h diff --git a/CMakeLists.txt b/libevmasm/CMakeLists.txt similarity index 100% rename from CMakeLists.txt rename to libevmasm/CMakeLists.txt diff --git a/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp similarity index 100% rename from CommonSubexpressionEliminator.cpp rename to libevmasm/CommonSubexpressionEliminator.cpp diff --git a/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h similarity index 100% rename from CommonSubexpressionEliminator.h rename to libevmasm/CommonSubexpressionEliminator.h diff --git a/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp similarity index 100% rename from ConstantOptimiser.cpp rename to libevmasm/ConstantOptimiser.cpp diff --git a/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h similarity index 100% rename from ConstantOptimiser.h rename to libevmasm/ConstantOptimiser.h diff --git a/ControlFlowGraph.cpp b/libevmasm/ControlFlowGraph.cpp similarity index 100% rename from ControlFlowGraph.cpp rename to libevmasm/ControlFlowGraph.cpp diff --git a/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h similarity index 100% rename from ControlFlowGraph.h rename to libevmasm/ControlFlowGraph.h diff --git a/Exceptions.h b/libevmasm/Exceptions.h similarity index 100% rename from Exceptions.h rename to libevmasm/Exceptions.h diff --git a/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp similarity index 100% rename from ExpressionClasses.cpp rename to libevmasm/ExpressionClasses.cpp diff --git a/ExpressionClasses.h b/libevmasm/ExpressionClasses.h similarity index 100% rename from ExpressionClasses.h rename to libevmasm/ExpressionClasses.h diff --git a/GasMeter.cpp b/libevmasm/GasMeter.cpp similarity index 100% rename from GasMeter.cpp rename to libevmasm/GasMeter.cpp diff --git a/GasMeter.h b/libevmasm/GasMeter.h similarity index 100% rename from GasMeter.h rename to libevmasm/GasMeter.h diff --git a/KnownState.cpp b/libevmasm/KnownState.cpp similarity index 100% rename from KnownState.cpp rename to libevmasm/KnownState.cpp diff --git a/KnownState.h b/libevmasm/KnownState.h similarity index 100% rename from KnownState.h rename to libevmasm/KnownState.h diff --git a/LinkerObject.cpp b/libevmasm/LinkerObject.cpp similarity index 100% rename from LinkerObject.cpp rename to libevmasm/LinkerObject.cpp diff --git a/LinkerObject.h b/libevmasm/LinkerObject.h similarity index 100% rename from LinkerObject.h rename to libevmasm/LinkerObject.h diff --git a/PathGasMeter.cpp b/libevmasm/PathGasMeter.cpp similarity index 100% rename from PathGasMeter.cpp rename to libevmasm/PathGasMeter.cpp diff --git a/PathGasMeter.h b/libevmasm/PathGasMeter.h similarity index 100% rename from PathGasMeter.h rename to libevmasm/PathGasMeter.h diff --git a/SemanticInformation.cpp b/libevmasm/SemanticInformation.cpp similarity index 100% rename from SemanticInformation.cpp rename to libevmasm/SemanticInformation.cpp diff --git a/SemanticInformation.h b/libevmasm/SemanticInformation.h similarity index 100% rename from SemanticInformation.h rename to libevmasm/SemanticInformation.h diff --git a/SourceLocation.h b/libevmasm/SourceLocation.h similarity index 100% rename from SourceLocation.h rename to libevmasm/SourceLocation.h diff --git a/Version.cpp b/libevmasm/Version.cpp similarity index 100% rename from Version.cpp rename to libevmasm/Version.cpp diff --git a/Version.h b/libevmasm/Version.h similarity index 100% rename from Version.h rename to libevmasm/Version.h