diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 460881581..59f3717aa 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -67,8 +67,9 @@ unsigned Assembly::codeSize(unsigned subTagSize) const for (auto const& i: m_data) ret += i.second.size(); - for (AssemblyItem const& i: items()) - ret += i.bytesRequired(tagSize, Precision::Approximate); + for (auto const& codeSection: m_codeSections) + for (AssemblyItem const& i: codeSection.items) + ret += i.bytesRequired(tagSize, Precision::Approximate); if (numberEncodingSize(ret) <= tagSize) return static_cast(ret); } @@ -521,14 +522,15 @@ LinkerObject const& Assembly::assemble() const bool setsImmutables = false; bool pushesImmutables = false; - for (auto const& i: items()) - if (i.type() == AssignImmutable) - { - i.setImmutableOccurrences(immutableReferencesBySub[i.data()].second.size()); - setsImmutables = true; - } - else if (i.type() == PushImmutable) - pushesImmutables = true; + for (auto const& codeSection: m_codeSections) + for (auto const& i: codeSection.items) + if (i.type() == AssignImmutable) + { + i.setImmutableOccurrences(immutableReferencesBySub[i.data()].second.size()); + setsImmutables = true; + } + else if (i.type() == PushImmutable) + pushesImmutables = true; if (setsImmutables || pushesImmutables) assertThrow( setsImmutables != pushesImmutables, @@ -536,6 +538,59 @@ LinkerObject const& Assembly::assemble() const "Cannot push and assign immutables in the same assembly subroutine." ); + // TODO: assert zero inputs/outputs on code section zero + // TODO: assert one code section being present and *only* one being present unless EOF + + // Insert EOF1 header. + vector codeSectionSizeOffsets; + auto setCodeSectionSize = [&](size_t _section, size_t _size) { + bytesRef length(ret.bytecode.data() + codeSectionSizeOffsets.at(_section), 2); + toBigEndian(_size, length); + }; + size_t dataSectionSizeOffset = 0; + auto setDataSectionSize = [&](size_t _size) { + bytesRef length(ret.bytecode.data() + dataSectionSizeOffset, 2); + toBigEndian(_size, length); + }; + if (needsEOFContainer) + { + bool needsTypeSection = m_codeSections.size() > 1; + // TODO: empty data is disallowed + ret.bytecode.push_back(0xef); + ret.bytecode.push_back(0x00); + ret.bytecode.push_back(0x01); // version 1 + if (needsTypeSection) + { + ret.bytecode.push_back(0x03); // kind=type + ret.bytecode.push_back(0x00); // length of type section + ret.bytecode.push_back(0x00); + bytesRef length(&ret.bytecode.back() + 1 - 2, 2); + toBigEndian(m_codeSections.size() * 2, length); + } + for (auto const& codeSection: m_codeSections) + { + (void) codeSection; + ret.bytecode.push_back(0x01); // kind=code + codeSectionSizeOffsets.emplace_back(ret.bytecode.size()); + ret.bytecode.push_back(0x00); // placeholder for length of code + ret.bytecode.push_back(0x00); + } + ret.bytecode.push_back(0x02); // kind=data + dataSectionSizeOffset = ret.bytecode.size(); + ret.bytecode.push_back(0x00); // length of data + ret.bytecode.push_back(0x00); + ret.bytecode.push_back(0x00); // terminator + + if (needsTypeSection) + for (auto const& codeSection: m_codeSections) + { + ret.bytecode.push_back(codeSection.inputs); + ret.bytecode.push_back(codeSection.outputs); + } + } + + + unsigned headerSize = static_cast(ret.bytecode.size()); unsigned bytesRequiredForCode = codeSize(static_cast(subTagSize)); m_tagPositionsInBytecode = vector(m_usedTags, numeric_limits::max()); map> tagRef; @@ -545,199 +600,163 @@ LinkerObject const& Assembly::assemble() const unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode); uint8_t tagPush = static_cast(pushInstruction(bytesPerTag)); - // TODO: all of this is a bit off - unsigned bytesRequiredIncludingData = (m_eofVersion.has_value() ? 10 : 0) + bytesRequiredForCode + 1 + static_cast(m_auxiliaryData.size()); - for (auto const& sub: m_subs) - bytesRequiredIncludingData += static_cast(sub->assemble().bytecode.size()); + if (!needsEOFContainer) + ++bytesRequiredForCode; ///< Additional INVALID marker. + // TODO: all of this is a bit off + unsigned bytesRequiredForData = static_cast(m_auxiliaryData.size()); + for (auto const& sub: m_subs) + bytesRequiredForData += static_cast(sub->assemble().bytecode.size()); + + unsigned bytesRequiredIncludingData = headerSize + bytesRequiredForCode + bytesRequiredForData; unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData); uint8_t dataRefPush = static_cast(pushInstruction(bytesPerDataRef)); ret.bytecode.reserve(bytesRequiredIncludingData); - // Insert EOF1 header. - bytesRef eofCodeLength(&ret.bytecode.back(), 0); - vector eofFunctionLengths; - bytesRef eofDataLength(&ret.bytecode.back(), 0); - if (needsEOFContainer) - { - // TODO: empty data is disallowed - ret.bytecode.push_back(0xef); - ret.bytecode.push_back(0x00); - ret.bytecode.push_back(0x01); // version 1 - if (!m_functions.empty()) - { - ret.bytecode.push_back(0x03); // kind=type - ret.bytecode.push_back(0x00); // length of type section - ret.bytecode.push_back(0x00); - bytesRef length(&ret.bytecode.back() + 1 - 2, 2); - toBigEndian((m_functions.size() + 1) * 2, length); - } - ret.bytecode.push_back(0x01); // kind=code - ret.bytecode.push_back(0x00); // length of code - ret.bytecode.push_back(0x00); - eofCodeLength = bytesRef(&ret.bytecode.back() + 1 - 2, 2); - for (size_t i = 0; i < m_functions.size(); ++i) - { - ret.bytecode.push_back(0x01); // kind=code - ret.bytecode.push_back(0x00); // length of code - ret.bytecode.push_back(0x00); - eofFunctionLengths.emplace_back(&ret.bytecode.back() + 1 - 2, 2); - } - ret.bytecode.push_back(0x02); // kind=data - ret.bytecode.push_back(0x00); // length of data - ret.bytecode.push_back(0x00); - eofDataLength = bytesRef(&ret.bytecode.back() + 1 - 2, 2); - ret.bytecode.push_back(0x00); // terminator - if (!m_functions.empty()) - { - ret.bytecode.push_back(0); - ret.bytecode.push_back(0); - for (auto const& [args, rets, functionAssembly]: m_functions) - { - (void)functionAssembly; - ret.bytecode.push_back(args); - ret.bytecode.push_back(rets); - } - } - } - auto const codeStart = ret.bytecode.size(); - auto assembleItems = [&](AssemblyItems const& _items) { - for (AssemblyItem const& i: _items) + for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) { - // store position of the invalid jump destination - if (i.type() != Tag && m_tagPositionsInBytecode[0] == numeric_limits::max()) - m_tagPositionsInBytecode[0] = ret.bytecode.size(); + auto const sectionStart = ret.bytecode.size(); - switch (i.type()) + for (AssemblyItem const& i: codeSection.items) { - case Operation: - ret.bytecode.push_back(static_cast(i.instruction())); - break; - case Push: - { - unsigned b = max(1, numberEncodingSize(i.data())); - ret.bytecode.push_back(static_cast(pushInstruction(b))); - ret.bytecode.resize(ret.bytecode.size() + b); - bytesRef byr(&ret.bytecode.back() + 1 - b, b); - toBigEndian(i.data(), byr); - break; - } - case PushTag: - { - ret.bytecode.push_back(tagPush); - tagRef[ret.bytecode.size()] = i.splitForeignPushTag(); - ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); - break; - } - case PushData: - ret.bytecode.push_back(dataRefPush); - dataRef.insert(make_pair(h256(i.data()), ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - break; - case PushSub: - assertThrow(i.data() <= numeric_limits::max(), AssemblyException, ""); - ret.bytecode.push_back(dataRefPush); - subRef.insert(make_pair(static_cast(i.data()), ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - break; - case PushSubSize: - { - assertThrow(i.data() <= numeric_limits::max(), AssemblyException, ""); - auto s = subAssemblyById(static_cast(i.data()))->assemble().bytecode.size(); - i.setPushedValue(u256(s)); - unsigned b = max(1, numberEncodingSize(s)); - ret.bytecode.push_back(static_cast(pushInstruction(b))); - ret.bytecode.resize(ret.bytecode.size() + b); - bytesRef byr(&ret.bytecode.back() + 1 - b, b); - toBigEndian(s, byr); - break; - } - case PushProgramSize: - { - ret.bytecode.push_back(dataRefPush); - sizeRef.push_back(static_cast(ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - break; - } - case PushLibraryAddress: - ret.bytecode.push_back(static_cast(Instruction::PUSH20)); - ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); - ret.bytecode.resize(ret.bytecode.size() + 20); - break; - case PushImmutable: - ret.bytecode.push_back(static_cast(Instruction::PUSH32)); - // Maps keccak back to the "identifier" string of that immutable. - ret.immutableReferences[i.data()].first = m_immutables.at(i.data()); - // Record the bytecode offset of the PUSH32 argument. - ret.immutableReferences[i.data()].second.emplace_back(ret.bytecode.size()); - // Advance bytecode by 32 bytes (default initialized). - ret.bytecode.resize(ret.bytecode.size() + 32); - break; - case VerbatimBytecode: - ret.bytecode += i.verbatimData(); - break; - case AssignImmutable: - { - // Expect 2 elements on stack (source, dest_base) - auto const& offsets = immutableReferencesBySub[i.data()].second; - for (size_t i = 0; i < offsets.size(); ++i) + // store position of the invalid jump destination + if (i.type() != Tag && m_tagPositionsInBytecode[0] == numeric_limits::max()) + m_tagPositionsInBytecode[0] = ret.bytecode.size(); + + switch (i.type()) { - if (i != offsets.size() - 1) + case Operation: + ret.bytecode.push_back(static_cast(i.instruction())); + break; + case Push: + { + unsigned b = max(1, numberEncodingSize(i.data())); + ret.bytecode.push_back(static_cast(pushInstruction(b))); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(i.data(), byr); + break; + } + case PushTag: + { + ret.bytecode.push_back(tagPush); + tagRef[ret.bytecode.size()] = i.splitForeignPushTag(); + ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); + break; + } + case PushData: + ret.bytecode.push_back(dataRefPush); + dataRef.insert(make_pair(h256(i.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSub: + assertThrow(i.data() <= numeric_limits::max(), AssemblyException, ""); + ret.bytecode.push_back(dataRefPush); + subRef.insert(make_pair(static_cast(i.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSubSize: + { + assertThrow(i.data() <= numeric_limits::max(), AssemblyException, ""); + auto s = subAssemblyById(static_cast(i.data()))->assemble().bytecode.size(); + i.setPushedValue(u256(s)); + unsigned b = max(1, numberEncodingSize(s)); + ret.bytecode.push_back(static_cast(pushInstruction(b))); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(s, byr); + break; + } + case PushProgramSize: + { + ret.bytecode.push_back(dataRefPush); + sizeRef.push_back(static_cast(ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + } + case PushLibraryAddress: + ret.bytecode.push_back(static_cast(Instruction::PUSH20)); + ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); + ret.bytecode.resize(ret.bytecode.size() + 20); + break; + case PushImmutable: + ret.bytecode.push_back(static_cast(Instruction::PUSH32)); + // Maps keccak back to the "identifier" string of that immutable. + ret.immutableReferences[i.data()].first = m_immutables.at(i.data()); + // Record the bytecode offset of the PUSH32 argument. + ret.immutableReferences[i.data()].second.emplace_back(ret.bytecode.size()); + // Advance bytecode by 32 bytes (default initialized). + ret.bytecode.resize(ret.bytecode.size() + 32); + break; + case VerbatimBytecode: + ret.bytecode += i.verbatimData(); + break; + case AssignImmutable: + { + // Expect 2 elements on stack (source, dest_base) + auto const& offsets = immutableReferencesBySub[i.data()].second; + for (size_t i = 0; i < offsets.size(); ++i) { - ret.bytecode.push_back(uint8_t(Instruction::DUP2)); - ret.bytecode.push_back(uint8_t(Instruction::DUP2)); + if (i != offsets.size() - 1) + { + ret.bytecode.push_back(uint8_t(Instruction::DUP2)); + ret.bytecode.push_back(uint8_t(Instruction::DUP2)); + } + // TODO: should we make use of the constant optimizer methods for pushing the offsets? + bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); + ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); + ret.bytecode += offsetBytes; + ret.bytecode.push_back(uint8_t(Instruction::ADD)); + ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); } - // TODO: should we make use of the constant optimizer methods for pushing the offsets? - bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); - ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); - ret.bytecode += offsetBytes; - ret.bytecode.push_back(uint8_t(Instruction::ADD)); - ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); + if (offsets.empty()) + { + ret.bytecode.push_back(uint8_t(Instruction::POP)); + ret.bytecode.push_back(uint8_t(Instruction::POP)); + } + immutableReferencesBySub.erase(i.data()); + break; } - if (offsets.empty()) + case PushDeployTimeAddress: + ret.bytecode.push_back(static_cast(Instruction::PUSH20)); + ret.bytecode.resize(ret.bytecode.size() + 20); + break; + case Tag: { - ret.bytecode.push_back(uint8_t(Instruction::POP)); - ret.bytecode.push_back(uint8_t(Instruction::POP)); + assertThrow(i.data() != 0, AssemblyException, "Invalid tag position."); + assertThrow(i.splitForeignPushTag().first == numeric_limits::max(), AssemblyException, "Foreign tag."); + size_t tagId = static_cast(i.data()); + assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); + assertThrow(m_tagPositionsInBytecode[tagId] == numeric_limits::max(), AssemblyException, "Duplicate tag position."); + m_tagPositionsInBytecode[tagId] = ret.bytecode.size() - codeStart; + ret.bytecode.push_back(static_cast(Instruction::JUMPDEST)); + break; + } + case CallF: + { + ret.bytecode.push_back(static_cast(Instruction::CALLF)); + ret.bytecode.resize(ret.bytecode.size() + 2); + bytesRef byr(&ret.bytecode.back() + 1 - 2, 2); + toBigEndian(i.data(), byr); + break; + } + case RetF: + { + ret.bytecode.push_back(static_cast(Instruction::RETF)); + break; + } + default: + assertThrow(false, InvalidOpcode, "Unexpected opcode while assembling."); } - immutableReferencesBySub.erase(i.data()); - break; - } - case PushDeployTimeAddress: - ret.bytecode.push_back(static_cast(Instruction::PUSH20)); - ret.bytecode.resize(ret.bytecode.size() + 20); - break; - case Tag: - { - assertThrow(i.data() != 0, AssemblyException, "Invalid tag position."); - assertThrow(i.splitForeignPushTag().first == numeric_limits::max(), AssemblyException, "Foreign tag."); - size_t tagId = static_cast(i.data()); - assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); - assertThrow(m_tagPositionsInBytecode[tagId] == numeric_limits::max(), AssemblyException, "Duplicate tag position."); - m_tagPositionsInBytecode[tagId] = ret.bytecode.size() - codeStart; - ret.bytecode.push_back(static_cast(Instruction::JUMPDEST)); - break; - } - case CallF: - { - ret.bytecode.push_back(static_cast(Instruction::CALLF)); - ret.bytecode.resize(ret.bytecode.size() + 2); - bytesRef byr(&ret.bytecode.back() + 1 - 2, 2); - toBigEndian(i.data(), byr); - break; - } - case RetF: - { - ret.bytecode.push_back(static_cast(Instruction::RETF)); - break; - } - default: - assertThrow(false, InvalidOpcode, "Unexpected opcode while assembling."); } + + auto sectionEnd = ret.bytecode.size(); + + if (needsEOFContainer) + setCodeSectionSize(codeSectionIndex, sectionEnd - sectionStart); } - }; - assembleItems(items()); if (!immutableReferencesBySub.empty()) throw @@ -751,22 +770,6 @@ LinkerObject const& Assembly::assemble() const // Append an INVALID here to help tests find miscompilation. ret.bytecode.push_back(static_cast(Instruction::INVALID)); - auto const codeLength = ret.bytecode.size() - codeStart; - if (needsEOFContainer) - { - assertThrow(codeLength > 0 && codeLength <= 0xffff, AssemblyException, "Invalid code section size."); - toBigEndian(codeLength, eofCodeLength); - } - - - for (size_t i = 0; i < m_functions.size(); ++i) - { - size_t start = ret.bytecode.size(); - assembleItems(std::get<2>(m_functions[i])); - size_t size = ret.bytecode.size() - start; - toBigEndian(size, eofFunctionLengths.at(i)); - } - auto const dataStart = ret.bytecode.size(); for (auto const& [subIdPath, bytecodeOffset]: subRef) @@ -843,7 +846,7 @@ LinkerObject const& Assembly::assemble() const dataLength++; } assertThrow(dataLength > 0u && dataLength <= 0xffff, AssemblyException, "Invalid data section size."); - toBigEndian(dataLength, eofDataLength); + setDataSectionSize(dataLength); } return ret; diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index c6b205db3..6e2397ace 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -49,7 +49,13 @@ using AssemblyPointer = std::shared_ptr; class Assembly { public: - Assembly(bool _creation, std::optional _eofVersion, std::string _name): m_creation(_creation), m_eofVersion(_eofVersion), m_name(std::move(_name)) { } + Assembly(bool _creation, std::optional _eofVersion, std::string _name): + m_creation(_creation), + m_eofVersion(_eofVersion), + m_name(std::move(_name)) + { + m_codeSections.emplace_back(); + } std::optional eofVersion() const { return m_eofVersion; } bool supportsFunctions() const { return m_eofVersion.has_value(); } @@ -57,14 +63,13 @@ public: AssemblyItem newPushTag() { assertThrow(m_usedTags < 0xffffffff, AssemblyException, ""); return AssemblyItem(PushTag, m_usedTags++); } AssemblyItem newFunctionCall(uint16_t _functionID) { - auto&& [args, rets, functionItems] = m_functions.at(_functionID); - (void)functionItems; - return AssemblyItem::functionCall(_functionID, args, rets); + assertThrow(_functionID < m_codeSections.size(), AssemblyException, "Call to undeclared function."); + auto const& section = m_codeSections.at(_functionID); + return AssemblyItem::functionCall(_functionID, section.inputs, section.outputs); } AssemblyItem newFunctionReturn() { - assertThrow(m_currentFunctionID.has_value(), AssemblyException, ""); - return AssemblyItem::functionReturn(std::get<1>(m_functions.at(*m_currentFunctionID))); + return AssemblyItem::functionReturn(m_codeSections.at(m_currentCodeSection).outputs); } /// Returns a tag identified by the given name. Creates it if it does not yet exist. AssemblyItem namedTag(std::string const& _name, size_t _params, size_t _returns, std::optional _sourceID); @@ -73,23 +78,24 @@ public: AssemblyItem newSub(AssemblyPointer const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } uint16_t createFunction(uint8_t _args, uint8_t _rets) { - size_t functionID = m_functions.size(); - assertThrow(functionID <= 0xFFFF, AssemblyException, "Too many functions"); - assertThrow(!m_currentFunctionID.has_value(), AssemblyException, "Nested createFunction"); - m_functions.emplace_back(_args, _rets, AssemblyItems{}); + size_t functionID = m_codeSections.size(); + assertThrow(functionID <= 0xFFFF, AssemblyException, "Too many functions."); + assertThrow(m_currentCodeSection == 0, AssemblyException, "Functions need to be declared from the main block."); + m_codeSections.emplace_back(CodeSection{_args, _rets, {}}); return static_cast(functionID); } void beginFunction(uint16_t _functionID) { - auto& function = m_functions.at(_functionID); - assertThrow(!m_currentFunctionID.has_value(), AssemblyException, "Nested beginFunction"); - assertThrow(std::get<2>(function).empty(), AssemblyException, "Function already defined."); - m_currentFunctionID = _functionID; + assertThrow(m_currentCodeSection == 0, AssemblyException, "Atempted to begin a function before ending the last one."); + assertThrow(_functionID < m_codeSections.size(), AssemblyException, "Attempt to begin an undeclared function."); + auto& section = m_codeSections.at(_functionID); + assertThrow(section.items.empty(), AssemblyException, "Function already defined."); + m_currentCodeSection = _functionID; } void endFunction() { - assertThrow(m_currentFunctionID.has_value(), AssemblyException, ""); - m_currentFunctionID.reset(); + assertThrow(m_currentCodeSection != 0, AssemblyException, "End function without begin function."); + m_currentCodeSection = 0; } Assembly const& sub(size_t _sub) const { return *m_subs.at(_sub); } Assembly& sub(size_t _sub) { return *m_subs.at(_sub); } @@ -143,19 +149,13 @@ public: /// Returns the assembly items. AssemblyItems const& items() const { - if (m_currentFunctionID.has_value()) - return std::get<2>(m_functions.at(*m_currentFunctionID)); - else - return m_mainItems; + return m_codeSections.at(m_currentCodeSection).items; } /// Returns the mutable assembly items. Use with care! AssemblyItems& items() { - if (m_currentFunctionID.has_value()) - return std::get<2>(m_functions.at(*m_currentFunctionID)); - else - return m_mainItems; + return m_codeSections.at(m_currentCodeSection).items; } int deposit() const { return m_deposit; } @@ -243,12 +243,18 @@ protected: }; std::map m_namedTags; - AssemblyItems m_mainItems; std::map m_data; /// Data that is appended to the very end of the contract. bytes m_auxiliaryData; std::vector> m_subs; - std::vector> m_functions; + struct CodeSection + { + uint8_t inputs = 0; + uint8_t outputs = 0; + AssemblyItems items{}; + }; + std::vector m_codeSections; + uint16_t m_currentCodeSection = 0; std::map m_strings; std::map m_libraries; ///< Identifiers of libraries to be linked. std::map m_immutables; ///< Identifiers of immutables. @@ -268,7 +274,6 @@ protected: /// True, if the assembly contains contract creation code. bool const m_creation = false; std::optional m_eofVersion; - std::optional m_currentFunctionID; /// Internal name of the assembly object, only used with the Yul backend /// currently std::string m_name;