diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 1b24879c4..1cfc04ce6 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -52,11 +52,12 @@ AssemblyItem const& Assembly::append(AssemblyItem _i) { assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); m_deposit += static_cast(_i.deposit()); - items().emplace_back(std::move(_i)); - if (!items().back().location().isValid() && m_currentSourceLocation.isValid()) - items().back().setLocation(m_currentSourceLocation); - items().back().m_modifierDepth = m_currentModifierDepth; - return items().back(); + auto& currentItems = m_codeSections.at(m_currentCodeSection).items; + currentItems.emplace_back(std::move(_i)); + if (!currentItems.back().location().isValid() && m_currentSourceLocation.isValid()) + currentItems.back().setLocation(m_currentSourceLocation); + currentItems.back().m_modifierDepth = m_currentModifierDepth; + return currentItems.back(); } unsigned Assembly::codeSize(unsigned subTagSize) const @@ -64,8 +65,6 @@ unsigned Assembly::codeSize(unsigned subTagSize) const for (unsigned tagSize = subTagSize; true; ++tagSize) { size_t ret = 1; - for (auto const& i: m_data) - ret += i.second.size(); for (auto const& codeSection: m_codeSections) for (AssemblyItem const& i: codeSection.items) @@ -190,7 +189,8 @@ void Assembly::assemblyStream( { Functionalizer f(_out, _prefix, _sourceCodes, *this); - for (auto const& i: items()) + // TODO: support EOF + for (auto const& i: m_codeSections.front().items) f.feed(i, _debugInfoSelection); f.flush(); @@ -228,7 +228,8 @@ Json::Value Assembly::assemblyJSON(map const& _sourceIndices, Json::Value root; root[".code"] = Json::arrayValue; Json::Value& code = root[".code"]; - for (AssemblyItem const& item: items()) + // TODO: support EOF + for (AssemblyItem const& item: m_codeSections.front().items) { int sourceIndex = -1; if (item.location().sourceName) @@ -345,24 +346,25 @@ map const& Assembly::optimiseInternal( std::set _tagsReferencedFromOutside ) { - if (m_eofVersion.has_value()) - // TODO - return *m_tagReplacements; - if (m_tagReplacements) return *m_tagReplacements; // Run optimisation for sub-assemblies. + // TODO: verify and double-check this for EOF. for (size_t subId = 0; subId < m_subs.size(); ++subId) { OptimiserSettings settings = _settings; Assembly& sub = *m_subs[subId]; + std::set referencedTags; + for (auto& codeSection: m_codeSections) + referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId); map const& subTagReplacements = sub.optimiseInternal( settings, - JumpdestRemover::referencedTags(items(), subId) + referencedTags ); // Apply the replacements (can be empty). - BlockDeduplicator::applyTagReplacement(items(), subTagReplacements, subId); + for (auto& codeSection: m_codeSections) + BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId); } map tagReplacements; @@ -371,9 +373,9 @@ map const& Assembly::optimiseInternal( { count = 0; - if (_settings.runInliner) + if (_settings.runInliner && !m_eofVersion.has_value()) Inliner{ - items(), + m_codeSections.front().items, _tagsReferencedFromOutside, _settings.expectedExecutionsPerDeployment, isCreation(), @@ -382,25 +384,34 @@ map const& Assembly::optimiseInternal( if (_settings.runJumpdestRemover) { - JumpdestRemover jumpdestOpt{items()}; - if (jumpdestOpt.optimise(_tagsReferencedFromOutside)) - count++; + // TODO: verify this for EOF. + for (auto& codeSection: m_codeSections) + { + JumpdestRemover jumpdestOpt{codeSection.items}; + if (jumpdestOpt.optimise(_tagsReferencedFromOutside)) + count++; + } } if (_settings.runPeephole) { - PeepholeOptimiser peepOpt{items()}; - while (peepOpt.optimise()) + // TODO: verify this for EOF. + for (auto& codeSection: m_codeSections) { - count++; - assertThrow(count < 64000, OptimizerException, "Peephole optimizer seems to be stuck."); + PeepholeOptimiser peepOpt{codeSection.items}; + while (peepOpt.optimise()) + { + count++; + assertThrow(count < 64000, OptimizerException, "Peephole optimizer seems to be stuck."); + } } } // This only modifies PushTags, we have to run again to actually remove code. - if (_settings.runDeduplicate) + // TODO: investigate options for EOF. + if (_settings.runDeduplicate && !m_eofVersion.has_value()) { - BlockDeduplicator deduplicator{items()}; + BlockDeduplicator deduplicator{m_codeSections.front().items}; if (deduplicator.deduplicate()) { for (auto const& replacement: deduplicator.replacedTags()) @@ -423,24 +434,26 @@ map const& Assembly::optimiseInternal( } } - if (_settings.runCSE) + // TODO: investigate for EOF + if (_settings.runCSE && !m_eofVersion.has_value()) { // Control flow graph optimization has been here before but is disabled because it // assumes we only jump to tags that are pushed. This is not the case anymore with // function types that can be stored in storage. AssemblyItems optimisedItems; - bool usesMSize = ranges::any_of(items(), [](AssemblyItem const& _i) { + auto& items = m_codeSections.front().items; + bool usesMSize = ranges::any_of(items, [](AssemblyItem const& _i) { return _i == AssemblyItem{Instruction::MSIZE} || _i.type() == VerbatimBytecode; }); - auto iter = items().begin(); - while (iter != items().end()) + auto iter = items.begin(); + while (iter != items.end()) { KnownState emptyState; CommonSubexpressionEliminator eliminator{emptyState}; auto orig = iter; - iter = eliminator.feedItems(iter, items().end(), usesMSize); + iter = eliminator.feedItems(iter, items.end(), usesMSize); bool shouldReplace = false; AssemblyItems optimisedChunk; try @@ -467,9 +480,9 @@ map const& Assembly::optimiseInternal( else copy(orig, iter, back_inserter(optimisedItems)); } - if (optimisedItems.size() < items().size()) + if (optimisedItems.size() < items.size()) { - items() = std::move(optimisedItems); + items = std::move(optimisedItems); count++; } } @@ -541,12 +554,17 @@ LinkerObject const& Assembly::assemble() const // TODO: assert zero inputs/outputs on code section zero // TODO: assert one code section being present and *only* one being present unless EOF + unsigned bytesRequiredForSubs = 0; + // TODO: consider fully producing all sub and data refs in this pass already. + for (auto&& codeSection: m_codeSections) + for (AssemblyItem const& i: codeSection.items) + if (i.type() == PushSub) + bytesRequiredForSubs += static_cast(subAssemblyById(static_cast(i.data()))->assemble().bytecode.size()); unsigned bytesRequiredForDataUpperBound = static_cast(m_auxiliaryData.size()); - for (auto const& sub: m_subs) - bytesRequiredForDataUpperBound += static_cast(sub->assemble().bytecode.size()); // Some of these may be unreferenced and not actually end up in data. for (auto const& dataItem: m_data) bytesRequiredForDataUpperBound += static_cast(dataItem.second.size()); + unsigned bytesRequiredForDataAndSubsUpperBound = bytesRequiredForDataUpperBound + bytesRequiredForSubs; // Insert EOF1 header. vector codeSectionSizeOffsets; @@ -583,7 +601,7 @@ LinkerObject const& Assembly::assemble() const ret.bytecode.push_back(0x00); // placeholder for length of code ret.bytecode.push_back(0x00); } - if (bytesRequiredForDataUpperBound > 0) + if (bytesRequiredForDataAndSubsUpperBound > 0) { ret.bytecode.push_back(0x02); // kind=data dataSectionSizeOffset = ret.bytecode.size(); @@ -600,7 +618,6 @@ LinkerObject const& Assembly::assemble() const } } - unsigned headerSize = static_cast(ret.bytecode.size()); unsigned bytesRequiredForCode = codeSize(static_cast(subTagSize)); m_tagPositionsInBytecode = vector(m_usedTags, numeric_limits::max()); @@ -608,19 +625,21 @@ LinkerObject const& Assembly::assemble() const multimap dataRef; multimap subRef; vector sizeRef; ///< Pointers to code locations where the size of the program is inserted - unsigned bytesPerTag = numberEncodingSize(headerSize + bytesRequiredForCode); + unsigned bytesPerTag = numberEncodingSize(headerSize + bytesRequiredForCode + bytesRequiredForDataUpperBound); uint8_t tagPush = static_cast(pushInstruction(bytesPerTag)); if (!needsEOFContainer) ++bytesRequiredForCode; ///< Additional INVALID marker. - unsigned bytesRequiredIncludingDataUpperBound = headerSize + bytesRequiredForCode + bytesRequiredForDataUpperBound; - unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingDataUpperBound); + unsigned bytesRequiredIncludingDataAndSubsUpperBound = headerSize + bytesRequiredForCode + bytesRequiredForDataAndSubsUpperBound; + unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingDataAndSubsUpperBound); uint8_t dataRefPush = static_cast(pushInstruction(bytesPerDataRef)); - ret.bytecode.reserve(bytesRequiredIncludingDataUpperBound); + ret.bytecode.reserve(bytesRequiredIncludingDataAndSubsUpperBound); auto const codeStart = ret.bytecode.size(); + + for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) { auto const sectionStart = ret.bytecode.size(); @@ -778,11 +797,26 @@ LinkerObject const& Assembly::assemble() const auto const dataStart = ret.bytecode.size(); - for (auto const& [subIdPath, bytecodeOffset]: subRef) { - bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef); - toBigEndian(ret.bytecode.size(), r); - ret.append(subAssemblyById(subIdPath)->assemble()); + std::map subAssemblyOffsets; + for (auto const& [subIdPath, bytecodeOffset]: subRef) + { + LinkerObject subObject = subAssemblyById(subIdPath)->assemble(); + util::h256 h(util::keccak256(util::asString(subObject.bytecode))); + bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef); + if (size_t* subAssemblyOffset = util::valueOrNullptr(subAssemblyOffsets, h)) + toBigEndian(*subAssemblyOffset, r); + else + { + toBigEndian(ret.bytecode.size(), r); + subAssemblyOffsets[h] = ret.bytecode.size(); + ret.bytecode += subObject.bytecode; + } + // TODO: double-check this. + for (auto const& ref: subObject.linkReferences) + ret.linkReferences[ref.first + subAssemblyOffsets[h]] = ref.second; + + } } for (auto const& i: tagRef) @@ -806,12 +840,13 @@ LinkerObject const& Assembly::assemble() const { size_t position = m_tagPositionsInBytecode.at(tagInfo.id); optional tagIndex; - for (auto&& [index, item]: items() | ranges::views::enumerate) - if (item.type() == Tag && static_cast(item.data()) == tagInfo.id) - { - tagIndex = index; - break; - } + for (auto& codeSection: m_codeSections) + for (auto&& [index, item]: codeSection.items | ranges::views::enumerate) + if (item.type() == Tag && static_cast(item.data()) == tagInfo.id) + { + tagIndex = index; + break; + } ret.functionDebugData[name] = { position == numeric_limits::max() ? nullopt : optional{position}, tagIndex, @@ -836,6 +871,12 @@ LinkerObject const& Assembly::assemble() const ret.bytecode += m_auxiliaryData; + if (needsEOFContainer && bytesRequiredForDataAndSubsUpperBound > 0 && ret.bytecode.size() == dataStart) + { + // We have commited to a data section, but not actually needed it, so create a fake one. + ret.bytecode.push_back(0); + } + for (unsigned pos: sizeRef) { bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); @@ -845,17 +886,27 @@ LinkerObject const& Assembly::assemble() const auto dataLength = ret.bytecode.size() - dataStart; if (needsEOFContainer) { - assertThrow(bytesRequiredForDataUpperBound >= dataLength, AssemblyException, "Unexpected data size."); - if (bytesRequiredForDataUpperBound > 0) + if (bytesRequiredForDataAndSubsUpperBound < dataLength) { - if (dataLength == 0) + std::cout << "Auxdata: " << m_auxiliaryData.size() << std::endl; + std::cout << "m_data: " << m_data.size() << std::endl; + std::cout << "subRef: " << subRef.size() << std::endl; + for (auto&& [subIdPath, offset]: subRef) { - // We have commited to a data section, but not actually needed it, so create a fake one. - ++dataLength; - ret.bytecode.push_back(0); + (void)offset; + std::cout << "R: " << subAssemblyById(subIdPath) << std::endl; } + std::cout << "m_subs: " << m_subs.size() << std::endl; + for (auto const& sub: m_subs) + { + std::cout << "sub: " << sub.get() << std::endl; + } + } + assertThrow(bytesRequiredForDataAndSubsUpperBound >= dataLength, AssemblyException, "More data than expected. " + to_string(dataLength) + " > " + to_string(bytesRequiredForDataUpperBound)); + if (bytesRequiredForDataAndSubsUpperBound > 0) + { + assertThrow(0 < dataLength && dataLength <= 0xffff, AssemblyException, "Invalid data section size."); setDataSectionSize(dataLength); - assertThrow(dataLength <= 0xffff, AssemblyException, "Invalid data section size."); } } diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index 3d822fdce..ba54a1d3d 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -146,18 +146,6 @@ public: /// Appends @a _data literally to the very end of the bytecode. void appendToAuxiliaryData(bytes const& _data) { m_auxiliaryData += _data; } - /// Returns the assembly items. - AssemblyItems const& items() const - { - return m_codeSections.at(m_currentCodeSection).items; - } - - /// Returns the mutable assembly items. Use with care! - AssemblyItems& items() - { - return m_codeSections.at(m_currentCodeSection).items; - } - int deposit() const { return m_deposit; } void adjustDeposit(int _adjustment) { m_deposit += _adjustment; assertThrow(m_deposit >= 0, InvalidDeposit, ""); } void setDeposit(int _deposit) { m_deposit = _deposit; assertThrow(m_deposit >= 0, InvalidDeposit, ""); } @@ -217,6 +205,23 @@ public: bool isCreation() const { return m_creation; } + struct CodeSection + { + uint8_t inputs = 0; + uint8_t outputs = 0; + AssemblyItems items{}; + }; + + std::vector& codeSections() + { + return m_codeSections; + } + + std::vector const& codeSections() const + { + return m_codeSections; + } + protected: /// Does the same operations as @a optimise, but should only be applied to a sub and /// returns the replaced tags. Also takes an argument containing the tags of this assembly @@ -247,12 +252,6 @@ protected: /// Data that is appended to the very end of the contract. bytes m_auxiliaryData; std::vector> m_subs; - struct CodeSection - { - uint8_t inputs = 0; - uint8_t outputs = 0; - AssemblyItems items{}; - }; std::vector m_codeSections; uint16_t m_currentCodeSection = 0; std::map m_strings; diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index 37ac4c18b..d66ee48bc 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -36,46 +36,49 @@ unsigned ConstantOptimisationMethod::optimiseConstants( ) { // TODO: design the optimiser in a way this is not needed - AssemblyItems& _items = _assembly.items(); - unsigned optimisations = 0; - map pushes; - for (AssemblyItem const& item: _items) - if (item.type() == Push) - pushes[item]++; - map pendingReplacements; - for (auto it: pushes) + for (auto& codeSection: _assembly.codeSections()) { - AssemblyItem const& item = it.first; - if (item.data() < 0x100) - continue; - Params params; - params.multiplicity = it.second; - params.isCreation = _isCreation; - params.runs = _runs; - params.evmVersion = _evmVersion; - LiteralMethod lit(params, item.data()); - bigint literalGas = lit.gasNeeded(); - CodeCopyMethod copy(params, item.data()); - bigint copyGas = copy.gasNeeded(); - ComputeMethod compute(params, item.data()); - bigint computeGas = compute.gasNeeded(); - AssemblyItems replacement; - if (copyGas < literalGas && copyGas < computeGas) + AssemblyItems& _items = codeSection.items; + + map pushes; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + pushes[item]++; + map pendingReplacements; + for (auto it: pushes) { - replacement = copy.execute(_assembly); - optimisations++; + AssemblyItem const& item = it.first; + if (item.data() < 0x100) + continue; + Params params; + params.multiplicity = it.second; + params.isCreation = _isCreation; + params.runs = _runs; + params.evmVersion = _evmVersion; + LiteralMethod lit(params, item.data()); + bigint literalGas = lit.gasNeeded(); + CodeCopyMethod copy(params, item.data()); + bigint copyGas = copy.gasNeeded(); + ComputeMethod compute(params, item.data()); + bigint computeGas = compute.gasNeeded(); + AssemblyItems replacement; + if (copyGas < literalGas && copyGas < computeGas) + { + replacement = copy.execute(_assembly); + optimisations++; + } + else if (computeGas < literalGas && computeGas <= copyGas) + { + replacement = compute.execute(_assembly); + optimisations++; + } + if (!replacement.empty()) + pendingReplacements[item.data()] = replacement; } - else if (computeGas < literalGas && computeGas <= copyGas) - { - replacement = compute.execute(_assembly); - optimisations++; - } - if (!replacement.empty()) - pendingReplacements[item.data()] = replacement; + if (!pendingReplacements.empty()) + replaceConstants(_items, pendingReplacements); } - if (!pendingReplacements.empty()) - replaceConstants(_items, pendingReplacements); return optimisations; } diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 7d1eeeff0..9e3703628 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -765,7 +765,8 @@ evmasm::AssemblyItems const* CompilerStack::assemblyItems(string const& _contrac solThrow(CompilerError, "Compilation was not successful."); Contract const& currentContract = contract(_contractName); - return currentContract.evmAssembly ? ¤tContract.evmAssembly->items() : nullptr; + solAssert(currentContract.evmAssembly->codeSections().size() == 1, "Expected a single code section in legacy codegen."); + return currentContract.evmAssembly ? ¤tContract.evmAssembly->codeSections().front().items : nullptr; } evmasm::AssemblyItems const* CompilerStack::runtimeAssemblyItems(string const& _contractName) const @@ -774,7 +775,8 @@ evmasm::AssemblyItems const* CompilerStack::runtimeAssemblyItems(string const& _ solThrow(CompilerError, "Compilation was not successful."); Contract const& currentContract = contract(_contractName); - return currentContract.evmRuntimeAssembly ? ¤tContract.evmRuntimeAssembly->items() : nullptr; + solAssert(currentContract.evmRuntimeAssembly->codeSections().size() == 1, "Expected a single code section in legacy codegen."); + return currentContract.evmRuntimeAssembly ? ¤tContract.evmRuntimeAssembly->codeSections().front().items : nullptr; } Json::Value CompilerStack::generatedSources(string const& _contractName, bool _runtime) const diff --git a/libyul/YulStack.cpp b/libyul/YulStack.cpp index 9b80cbf57..859720a5d 100644 --- a/libyul/YulStack.cpp +++ b/libyul/YulStack.cpp @@ -234,9 +234,10 @@ YulStack::assembleWithDeployed(optional _deployName) const creationObject.bytecode = make_shared(creationAssembly->assemble()); yulAssert(creationObject.bytecode->immutableReferences.empty(), "Leftover immutables."); creationObject.assembly = creationAssembly->assemblyString(m_debugInfoSelection); + // TODO: fix for EOF creationObject.sourceMappings = make_unique( evmasm::AssemblyItem::computeSourceMapping( - creationAssembly->items(), + creationAssembly->codeSections().front().items, {{m_charStream->name(), 0}} ) ); @@ -246,9 +247,10 @@ YulStack::assembleWithDeployed(optional _deployName) const { deployedObject.bytecode = make_shared(deployedAssembly->assemble()); deployedObject.assembly = deployedAssembly->assemblyString(m_debugInfoSelection); + // TODO: fix for EOF deployedObject.sourceMappings = make_unique( evmasm::AssemblyItem::computeSourceMapping( - deployedAssembly->items(), + deployedAssembly->codeSections().front().items, {{m_charStream->name(), 0}} ) );