Transition from bytecode to more general linker objects.

This commit is contained in:
chriseth 2015-09-10 12:02:18 +02:00
parent 3ca3fb492d
commit 129b4142d8
8 changed files with 240 additions and 59 deletions

View File

@ -50,8 +50,9 @@ void Assembly::append(Assembly const& _a)
m_data.insert(i);
for (auto const& i: _a.m_strings)
m_strings.insert(i);
for (auto const& i: _a.m_subs)
m_subs.push_back(i);
m_subs += _a.m_subs;
for (auto const& lib: _a.m_libraries)
m_libraries.insert(lib);
assert(!_a.m_baseDeposit);
assert(!_a.m_totalDeposit);
@ -144,6 +145,9 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con
case PushProgramSize:
_out << " PUSHSIZE";
break;
case PushLibraryAddress:
_out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\"";
break;
case Tag:
_out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST";
break;
@ -161,7 +165,7 @@ ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap con
_out << _prefix << ".data:" << endl;
for (auto const& i: m_data)
if (u256(i.first) >= m_subs.size())
_out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << toHex(i.second) << endl;
_out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl;
for (size_t i = 0; i < m_subs.size(); ++i)
{
_out << _prefix << " " << hex << i << ": " << endl;
@ -232,6 +236,11 @@ Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes
collection.append(
createJsonValue("PUSHSIZE", i.location().start, i.location().end));
break;
case PushLibraryAddress:
collection.append(
createJsonValue("PUSHLIB", i.location().start, i.location().end, m_libraries.at(h256(i.data())))
);
break;
case Tag:
collection.append(
createJsonValue("tag", i.location().start, i.location().end, string(i.data())));
@ -287,6 +296,13 @@ AssemblyItem const& Assembly::append(AssemblyItem const& _i)
return back();
}
AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier)
{
h256 h(dev::sha3(_identifier));
m_libraries[h] = _identifier;
return AssemblyItem(PushLibraryAddress, h);
}
void Assembly::injectStart(AssemblyItem const& _i)
{
m_items.insert(m_items.begin(), _i);
@ -377,96 +393,107 @@ Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs)
return *this;
}
bytes Assembly::assemble() const
LinkerObject const& Assembly::assemble() const
{
bytes ret;
if (!m_assembledObject.bytecode.empty())
return m_assembledObject;
LinkerObject& ret = m_assembledObject;
unsigned totalBytes = bytesRequired();
vector<unsigned> tagPos(m_usedTags);
map<unsigned, unsigned> tagRef;
multimap<h256, unsigned> dataRef;
multimap<size_t, size_t> subRef;
vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted
unsigned bytesPerTag = dev::bytesRequired(totalBytes);
byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag;
for (size_t i = 0; i < m_subs.size(); ++i)
m_data[u256(i)] = m_subs[i].assemble();
unsigned bytesRequiredIncludingData = bytesRequired();
for (auto const& sub: m_subs)
bytesRequiredIncludingData += sub.assemble().bytecode.size();
unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData);
byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef;
ret.reserve(bytesRequiredIncludingData);
// m_data must not change from here on
ret.bytecode.reserve(bytesRequiredIncludingData);
for (AssemblyItem const& i: m_items)
{
// store position of the invalid jump destination
if (i.type() != Tag && tagPos[0] == 0)
tagPos[0] = ret.size();
tagPos[0] = ret.bytecode.size();
switch (i.type())
{
case Operation:
ret.push_back((byte)i.data());
ret.bytecode.push_back((byte)i.data());
break;
case PushString:
{
ret.push_back((byte)Instruction::PUSH32);
ret.bytecode.push_back((byte)Instruction::PUSH32);
unsigned ii = 0;
for (auto j: m_strings.at((h256)i.data()))
if (++ii > 32)
break;
else
ret.push_back((byte)j);
ret.bytecode.push_back((byte)j);
while (ii++ < 32)
ret.push_back(0);
ret.bytecode.push_back(0);
break;
}
case Push:
{
byte b = max<unsigned>(1, dev::bytesRequired(i.data()));
ret.push_back((byte)Instruction::PUSH1 - 1 + b);
ret.resize(ret.size() + b);
bytesRef byr(&ret.back() + 1 - b, b);
ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b);
ret.bytecode.resize(ret.bytecode.size() + b);
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
toBigEndian(i.data(), byr);
break;
}
case PushTag:
{
ret.push_back(tagPush);
tagRef[ret.size()] = (unsigned)i.data();
ret.resize(ret.size() + bytesPerTag);
ret.bytecode.push_back(tagPush);
tagRef[ret.bytecode.size()] = (unsigned)i.data();
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
break;
}
case PushData: case PushSub:
{
ret.push_back(dataRefPush);
dataRef.insert(make_pair((h256)i.data(), ret.size()));
ret.resize(ret.size() + bytesPerDataRef);
case PushData:
ret.bytecode.push_back(dataRefPush);
dataRef.insert(make_pair((h256)i.data(), ret.bytecode.size()));
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
break;
case PushSub:
ret.bytecode.push_back(dataRefPush);
subRef.insert(make_pair(size_t(i.data()), ret.bytecode.size()));
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
break;
}
case PushSubSize:
{
auto s = m_data[i.data()].size();
auto s = m_subs.at(size_t(i.data())).assemble().bytecode.size();
i.setPushedValue(u256(s));
byte b = max<unsigned>(1, dev::bytesRequired(s));
ret.push_back((byte)Instruction::PUSH1 - 1 + b);
ret.resize(ret.size() + b);
bytesRef byr(&ret.back() + 1 - b, b);
ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b);
ret.bytecode.resize(ret.bytecode.size() + b);
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
toBigEndian(s, byr);
break;
}
case PushProgramSize:
{
ret.push_back(dataRefPush);
sizeRef.push_back(ret.size());
ret.resize(ret.size() + bytesPerDataRef);
ret.bytecode.push_back(dataRefPush);
sizeRef.push_back(ret.bytecode.size());
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
break;
}
case PushLibraryAddress:
ret.bytecode.push_back(byte(Instruction::PUSH20));
ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data());
ret.bytecode.resize(ret.bytecode.size() + 20);
break;
case Tag:
tagPos[(unsigned)i.data()] = ret.size();
tagPos[(unsigned)i.data()] = ret.bytecode.size();
assertThrow(i.data() != 0, AssemblyException, "");
ret.push_back((byte)Instruction::JUMPDEST);
ret.bytecode.push_back((byte)Instruction::JUMPDEST);
break;
default:
BOOST_THROW_EXCEPTION(InvalidOpcode());
@ -474,7 +501,7 @@ bytes Assembly::assemble() const
}
for (auto const& i: tagRef)
{
bytesRef r(ret.data() + i.first, bytesPerTag);
bytesRef r(ret.bytecode.data() + i.first, bytesPerTag);
auto tag = i.second;
if (tag >= tagPos.size())
tag = 0;
@ -484,28 +511,36 @@ bytes Assembly::assemble() const
toBigEndian(tagPos[tag], r);
}
if (!m_data.empty())
if (!dataRef.empty() && !subRef.empty())
ret.bytecode.push_back(0);
for (size_t i = 0; i < m_subs.size(); ++i)
{
ret.push_back(0);
for (auto const& i: m_data)
auto references = subRef.equal_range(i);
if (references.first == references.second)
continue;
for (auto ref = references.first; ref != references.second; ++ref)
{
auto its = dataRef.equal_range(i.first);
if (its.first != its.second)
{
for (auto it = its.first; it != its.second; ++it)
{
bytesRef r(ret.data() + it->second, bytesPerDataRef);
toBigEndian(ret.size(), r);
}
for (auto b: i.second)
ret.push_back(b);
}
bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef);
toBigEndian(ret.bytecode.size(), r);
}
ret.append(m_subs[i].assemble());
}
for (auto const& dataItem: m_data)
{
auto references = dataRef.equal_range(dataItem.first);
if (references.first == references.second)
continue;
for (auto ref = references.first; ref != references.second; ++ref)
{
bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef);
toBigEndian(ret.bytecode.size(), r);
}
ret.bytecode += dataItem.second;
}
for (unsigned pos: sizeRef)
{
bytesRef r(ret.data() + pos, bytesPerDataRef);
toBigEndian(ret.size(), r);
bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef);
toBigEndian(ret.bytecode.size(), r);
}
return ret;
}

View File

@ -25,9 +25,11 @@
#include <sstream>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
#include <libdevcore/SHA3.h>
#include <libevmcore/Instruction.h>
#include <libevmasm/SourceLocation.h>
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/LinkerObject.h>
#include "Exceptions.h"
#include <json/json.h>
@ -47,11 +49,12 @@ public:
AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); }
AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); }
AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash<std::string>()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); }
AssemblyItem newData(bytes const& _data) { h256 h(sha3(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); }
AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash<std::string>()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); }
AssemblyItem newPushString(std::string const& _data) { h256 h(sha3(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); }
AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
AssemblyItem newPushLibraryAddress(std::string const& _identifier);
AssemblyItem append() { return append(newTag()); }
void append(Assembly const& _a);
@ -63,6 +66,7 @@ public:
/// Pushes the final size of the current assembly itself. Use this when the code is modified
/// after compilation and CODESIZE is not an option.
void appendProgramSize() { append(AssemblyItem(PushProgramSize)); }
void appendLibraryAddress(std::string const& _identifier) { append(newPushLibraryAddress(_identifier)); }
AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; }
AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; }
@ -92,8 +96,9 @@ public:
/// Changes the source location used for each appended item.
void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; }
bytes assemble() const;
bytes const& data(h256 const& _i) const { return m_data[_i]; }
/// Assembles the assembly into bytecode. The assembly should not be modified after this call.
LinkerObject const& assemble() const;
bytes const& data(h256 const& _i) const { return m_data.at(_i); }
/// Modify (if @a _enable is set) and return the current assembly such that creation and
/// execution gas usage is optimised. @a _isCreation should be true for the top-level assembly.
@ -106,6 +111,7 @@ public:
const StringMap &_sourceCodes = StringMap(),
bool _inJsonFormat = false
) const;
protected:
std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const;
void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); }
@ -120,9 +126,12 @@ protected:
// 0 is reserved for exception
unsigned m_usedTags = 1;
AssemblyItems m_items;
mutable std::map<h256, bytes> m_data;
std::map<h256, bytes> m_data;
std::vector<Assembly> m_subs;
std::map<h256, std::string> m_strings;
std::map<h256, std::string> m_libraries; ///< Identifiers of libraries to be linked.
mutable LinkerObject m_assembledObject;
int m_deposit = 0;
int m_baseDeposit = 0;

View File

@ -44,6 +44,8 @@ unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const
case PushData:
case PushSub:
return 1 + _addressLength;
case PushLibraryAddress:
return 21;
default:
break;
}
@ -63,6 +65,7 @@ int AssemblyItem::deposit() const
case PushSub:
case PushSubSize:
case PushProgramSize:
case PushLibraryAddress:
return 1;
case Tag:
return 0;
@ -118,6 +121,9 @@ ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item)
case PushProgramSize:
_out << " PushProgramSize";
break;
case PushLibraryAddress:
_out << " PushLibraryAddress " << hex << h256(_item.data()).abridgedMiddle();
break;
case UndefinedItem:
_out << " ???";
break;

View File

@ -34,7 +34,19 @@ namespace dev
namespace eth
{
enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData };
enum AssemblyItemType {
UndefinedItem,
Operation,
Push,
PushString,
PushTag,
PushSub,
PushSubSize,
PushProgramSize,
Tag,
PushData,
PushLibraryAddress ///< Push a currently unknown address of another (library) contract.
};
class Assembly;

View File

@ -53,6 +53,7 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item)
case PushSub:
case PushSubSize:
case PushProgramSize:
case PushLibraryAddress:
gas = runGas(Instruction::PUSH1);
break;
case Tag:

62
LinkerObject.cpp Normal file
View File

@ -0,0 +1,62 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file LinkerObject.cpp
* @author Christian R <c@ethdev.com>
* @date 2015
*/
#include <libevmasm/LinkerObject.h>
#include <libdevcore/CommonData.h>
using namespace dev;
using namespace dev::eth;
using namespace std;
void LinkerObject::append(LinkerObject const& _other)
{
for (auto const& ref: _other.linkReferences)
linkReferences[ref.first + bytecode.size()] = ref.second;
bytecode += _other.bytecode;
}
void LinkerObject::link(map<string, h160> const& _libraryAddresses)
{
std::map<size_t, std::string> remainingRefs;
for (auto const& linkRef: linkReferences)
{
auto it = _libraryAddresses.find(linkRef.second);
if (it == _libraryAddresses.end())
remainingRefs.insert(linkRef);
else
it->second.ref().copyTo(ref(bytecode).cropped(linkRef.first, 20));
}
linkReferences.swap(remainingRefs);
}
string LinkerObject::toHex() const
{
string hex = dev::toHex(bytecode);
for (auto const& ref: linkReferences)
{
size_t pos = ref.first * 2;
string const& name = ref.second;
hex[pos] = hex[pos + 1] = hex[pos + 38] = hex[pos + 39] = '_';
for (size_t i = 0; i < 36; ++i)
hex[pos + 2 + i] = i < name.size() ? name[i] : '_';
}
return hex;
}

55
LinkerObject.h Normal file
View File

@ -0,0 +1,55 @@
/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file Assembly.h
* @author Gav Wood <i@gavwood.com>
* @date 2014
*/
#pragma once
#include <libdevcore/Common.h>
#include <libdevcore/FixedHash.h>
namespace dev
{
namespace eth
{
/**
* Binary object that potentially still needs to be linked (i.e. addresses of other contracts
* need to be filled in).
*/
struct LinkerObject
{
bytes bytecode;
/// Map from offsets in bytecode to library identifiers. The addresses starting at those offsets
/// need to be replaced by the actual addresses by the linker.
std::map<size_t, std::string> linkReferences;
/// Appends the bytecode of @a _other and incorporates its link references.
void append(LinkerObject const& _other);
/// Links the given libraries by replacing their uses in the code and removes them from the references.
void link(std::map<std::string, h160> const& _libraryAddresses);
/// @returns a hex representation of the bytecode of the given object, replacing unlinked
/// addresses by placeholders.
std::string toHex() const;
};
}
}

View File

@ -43,6 +43,7 @@ bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item)
case PushSubSize:
case PushProgramSize:
case PushData:
case PushLibraryAddress:
return false;
case Operation:
{