diff --git a/Changelog.md b/Changelog.md index 88b9bde88..6fd65df79 100644 --- a/Changelog.md +++ b/Changelog.md @@ -13,6 +13,7 @@ Compiler Features: Bugfixes: + * Code Generator: Fix constructor source mappings for immutables. * Commandline Interface: Fix extra newline character being appended to sources passed through standard input, affecting their hashes. * Commandline Interface: Report output selection options unsupported by the selected input mode instead of ignoring them. * SMTChecker: Fix internal error in magic type access (``block``, ``msg``, ``tx``). diff --git a/docs/internals/source_mappings.rst b/docs/internals/source_mappings.rst index fbb84e2d1..11027b8ac 100644 --- a/docs/internals/source_mappings.rst +++ b/docs/internals/source_mappings.rst @@ -64,3 +64,7 @@ This means the following source mappings represent the same information: ``1:2:1;1:9:1;2:1:2;2:1:2;2:1:2`` ``1:2:1;:9;2:1:2;;`` + +Important to note is that when the :ref:`verbatim ` builtin is used, +the source mappings will be invalid: The builtin is considered a single +instruction instead of potentially multiple. diff --git a/docs/yul.rst b/docs/yul.rst index e4819ff22..0a90a2825 100644 --- a/docs/yul.rst +++ b/docs/yul.rst @@ -1002,6 +1002,8 @@ within one Yul subobject. If at least one ``memoryguard`` call is found in a sub the additional optimiser steps will be run on it. +.. _yul-verbatim: + verbatim ^^^^^^^^ diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 82bf2be71..12c7be230 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -48,11 +48,11 @@ using namespace solidity::evmasm; using namespace solidity::langutil; using namespace solidity::util; -AssemblyItem const& Assembly::append(AssemblyItem const& _i) +AssemblyItem const& Assembly::append(AssemblyItem _i) { assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); m_deposit += static_cast(_i.deposit()); - m_items.emplace_back(_i); + m_items.emplace_back(move(_i)); if (!m_items.back().location().isValid() && m_currentSourceLocation.isValid()) m_items.back().setLocation(m_currentSourceLocation); m_items.back().m_modifierDepth = m_currentModifierDepth; @@ -68,7 +68,7 @@ unsigned Assembly::codeSize(unsigned subTagSize) const ret += i.second.size(); for (AssemblyItem const& i: m_items) - ret += i.bytesRequired(tagSize); + ret += i.bytesRequired(tagSize, Precision::Approximate); if (numberEncodingSize(ret) <= tagSize) return static_cast(ret); } @@ -696,8 +696,11 @@ LinkerObject const& Assembly::assemble() const break; case PushImmutable: ret.bytecode.push_back(static_cast(Instruction::PUSH32)); + // Maps keccak back to the "identifier" string of that immutable. ret.immutableReferences[i.data()].first = m_immutables.at(i.data()); + // Record the bytecode offset of the PUSH32 argument. ret.immutableReferences[i.data()].second.emplace_back(ret.bytecode.size()); + // Advance bytecode by 32 bytes (default initialized). ret.bytecode.resize(ret.bytecode.size() + 32); break; case VerbatimBytecode: @@ -705,6 +708,7 @@ LinkerObject const& Assembly::assemble() const break; case AssignImmutable: { + // Expect 2 elements on stack (source, dest_base) auto const& offsets = immutableReferencesBySub[i.data()].second; for (size_t i = 0; i < offsets.size(); ++i) { diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index b532834f5..f768ffe31 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -65,7 +65,7 @@ public: AssemblyItem newPushImmutable(std::string const& _identifier); AssemblyItem newImmutableAssignment(std::string const& _identifier); - AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(AssemblyItem _i); AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } template Assembly& operator<<(T const& _d) { append(_d); return *this; } diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index a34870eda..eeeadb3ef 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -65,7 +65,7 @@ void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag) setData(data); } -size_t AssemblyItem::bytesRequired(size_t _addressLength) const +size_t AssemblyItem::bytesRequired(size_t _addressLength, Precision _precision) const { switch (m_type) { @@ -87,10 +87,25 @@ size_t AssemblyItem::bytesRequired(size_t _addressLength) const case PushImmutable: return 1 + 32; case AssignImmutable: - if (m_immutableOccurrences) - return 1 + (3 + 32) * *m_immutableOccurrences; + { + unsigned long immutableOccurrences = 0; + + // Skip exact immutables count if no precise count was requested + if (_precision == Precision::Approximate) + immutableOccurrences = 1; // Assume one immut. ref. else - return 1 + (3 + 32) * 1024; // 1024 occurrences are beyond the maximum code size anyways. + { + solAssert(m_immutableOccurrences, "No immutable references. `bytesRequired()` called before assembly()?"); + immutableOccurrences = m_immutableOccurrences.value(); + } + + if (immutableOccurrences != 0) + // (DUP DUP PUSH ADD MSTORE)* (PUSH ADD MSTORE) + return (immutableOccurrences - 1) * (5 + 32) + (3 + 32); + else + // POP POP + return 2; + } case VerbatimBytecode: return std::get<2>(*m_verbatimBytecode).size(); default: @@ -322,6 +337,26 @@ ostream& solidity::evmasm::operator<<(ostream& _out, AssemblyItem const& _item) return _out; } +size_t AssemblyItem::opcodeCount() const noexcept +{ + switch (m_type) + { + case AssemblyItemType::AssignImmutable: + // Append empty items if this AssignImmutable was referenced more than once. + // For n immutable occurrences the first (n - 1) occurrences will + // generate 5 opcodes and the last will generate 3 opcodes, + // because it is reusing the 2 top-most elements on the stack. + solAssert(m_immutableOccurrences, ""); + + if (m_immutableOccurrences.value() != 0) + return (*m_immutableOccurrences - 1) * 5 + 3; + else + return 2; // two POP's + default: + return 1; + } +} + std::string AssemblyItem::computeSourceMapping( AssemblyItems const& _items, map const& _sourceIndicesMap @@ -334,6 +369,7 @@ std::string AssemblyItem::computeSourceMapping( int prevSourceIndex = -1; int prevModifierDepth = -1; char prevJump = 0; + for (auto const& item: _items) { if (!ret.empty()) @@ -402,6 +438,9 @@ std::string AssemblyItem::computeSourceMapping( } } + if (item.opcodeCount() > 1) + ret += string(item.opcodeCount() - 1, ';'); + prevStart = location.start; prevLength = length; prevSourceIndex = sourceIndex; diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index f1b2b5043..1cbfe768d 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,8 @@ enum AssemblyItemType VerbatimBytecode ///< Contains data that is inserted into the bytecode code section without modification. }; +enum class Precision { Precise , Approximate }; + class Assembly; class AssemblyItem; using AssemblyItems = std::vector; @@ -147,7 +150,10 @@ public: /// @returns an upper bound for the number of bytes required by this item, assuming that /// the value of a jump tag takes @a _addressLength bytes. - size_t bytesRequired(size_t _addressLength) const; + /// @param _precision Whether to return a precise count (which involves + /// counting immutable references which are only set after + /// a call to `assemble()`) or an approx. count. + size_t bytesRequired(size_t _addressLength, Precision _precision = Precision::Precise) const; size_t arguments() const; size_t returnValues() const; size_t deposit() const { return returnValues() - arguments(); } @@ -169,9 +175,11 @@ public: size_t m_modifierDepth = 0; - void setImmutableOccurrences(size_t _n) const { m_immutableOccurrences = std::make_shared(_n); } + void setImmutableOccurrences(size_t _n) const { m_immutableOccurrences = _n; } private: + size_t opcodeCount() const noexcept; + AssemblyItemType m_type; Instruction m_instruction; ///< Only valid if m_type == Operation std::shared_ptr m_data; ///< Only valid if m_type != Operation @@ -184,14 +192,14 @@ private: /// e.g. PushSubSize, PushTag, PushSub, etc. mutable std::shared_ptr m_pushedValue; /// Number of PushImmutable's with the same hash. Only used for AssignImmutable. - mutable std::shared_ptr m_immutableOccurrences; + mutable std::optional m_immutableOccurrences; }; -inline size_t bytesRequired(AssemblyItems const& _items, size_t _addressLength) +inline size_t bytesRequired(AssemblyItems const& _items, size_t _addressLength, Precision _precision = Precision::Precise) { size_t size = 0; for (AssemblyItem const& item: _items) - size += item.bytesRequired(_addressLength); + size += item.bytesRequired(_addressLength, _precision); return size; } diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index 6800baef3..a639bc6fc 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -103,7 +103,7 @@ bigint ConstantOptimisationMethod::dataGas(bytes const& _data) const size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) { - return evmasm::bytesRequired(_items, 3); // assume 3 byte addresses + return evmasm::bytesRequired(_items, 3, Precision::Approximate); // assume 3 byte addresses } void ConstantOptimisationMethod::replaceConstants( diff --git a/libevmasm/Inliner.cpp b/libevmasm/Inliner.cpp index 8d95bdc3b..527e6ae38 100644 --- a/libevmasm/Inliner.cpp +++ b/libevmasm/Inliner.cpp @@ -63,7 +63,7 @@ template uint64_t codeSize(RangeType const& _itemRange) { return ranges::accumulate(_itemRange | ranges::views::transform( - [](auto const& _item) { return _item.bytesRequired(2); } + [](auto const& _item) { return _item.bytesRequired(2, Precision::Approximate); } ), 0u); } /// @returns the tag id, if @a _item is a PushTag or Tag into the current subassembly, nullopt otherwise. diff --git a/libevmasm/PeepholeOptimiser.cpp b/libevmasm/PeepholeOptimiser.cpp index 0ee0b5efd..8e4a09fcc 100644 --- a/libevmasm/PeepholeOptimiser.cpp +++ b/libevmasm/PeepholeOptimiser.cpp @@ -388,6 +388,8 @@ size_t numberOfPops(AssemblyItems const& _items) bool PeepholeOptimiser::optimise() { + // Avoid referencing immutables too early by using approx. counting in bytesRequired() + auto const approx = evmasm::Precision::Approximate; OptimiserState state {m_items, 0, std::back_inserter(m_optimisedItems)}; while (state.i < m_items.size()) applyMethods( @@ -398,7 +400,7 @@ bool PeepholeOptimiser::optimise() ); if (m_optimisedItems.size() < m_items.size() || ( m_optimisedItems.size() == m_items.size() && ( - evmasm::bytesRequired(m_optimisedItems, 3) < evmasm::bytesRequired(m_items, 3) || + evmasm::bytesRequired(m_optimisedItems, 3, approx) < evmasm::bytesRequired(m_items, 3, approx) || numberOfPops(m_optimisedItems) > numberOfPops(m_items) ) )) diff --git a/test/libevmasm/Assembler.cpp b/test/libevmasm/Assembler.cpp index 0c1e4dbad..c7f28f654 100644 --- a/test/libevmasm/Assembler.cpp +++ b/test/libevmasm/Assembler.cpp @@ -21,15 +21,16 @@ * Tests for the assembler. */ -#include #include +#include +#include #include +#include +#include #include #include -#include -#include using namespace std; using namespace solidity::langutil; @@ -165,6 +166,89 @@ BOOST_AUTO_TEST_CASE(all_assembly_items) ); } +BOOST_AUTO_TEST_CASE(immutables_and_its_source_maps) +{ + // Tests for 1, 2, 3 number of immutables. + for (int numImmutables = 1; numImmutables <= 3; ++numImmutables) + { + BOOST_TEST_MESSAGE("NumImmutables: "s + to_string(numImmutables)); + // Tests for the cases 1, 2, 3 occurrences of an immutable reference. + for (int numActualRefs = 1; numActualRefs <= 3; ++numActualRefs) + { + BOOST_TEST_MESSAGE("NumActualRefs: "s + to_string(numActualRefs)); + auto const NumExpectedMappings = + ( + 2 + // PUSH PUSH + (numActualRefs - 1) * 5 + // DUP DUP PUSH ADD MTOSRE + 3 // PUSH ADD MSTORkhbE + ) * numImmutables; + + auto constexpr NumSubs = 1; + auto constexpr NumOpcodesWithoutMappings = + NumSubs + // PUSH for every sub assembly + 1; // INVALID + + auto assemblyName = make_shared("root.asm"); + auto subName = make_shared("sub.asm"); + + map indices = { + { *assemblyName, 0 }, + { *subName, 1 } + }; + + auto subAsm = make_shared(); + for (char i = 0; i < numImmutables; ++i) + { + for (int r = 0; r < numActualRefs; ++r) + { + subAsm->setSourceLocation(SourceLocation{10*i, 10*i + 6 + r, subName}); + subAsm->appendImmutable(string(1, char('a' + i))); // "a", "b", ... + } + } + + Assembly assembly; + for (char i = 1; i <= numImmutables; ++i) + { + assembly.setSourceLocation({10*i, 10*i + 3+i, assemblyName}); + assembly.append(u256(0x71)); // immutble value + assembly.append(u256(0)); // target... modules? + assembly.appendImmutableAssignment(string(1, char('a' + i - 1))); + } + + assembly.appendSubroutine(subAsm); + + checkCompilation(assembly); + + string const sourceMappings = AssemblyItem::computeSourceMapping(assembly.items(), indices); + auto const numberOfMappings = std::count(sourceMappings.begin(), sourceMappings.end(), ';'); + + LinkerObject const& obj = assembly.assemble(); + string const disassembly = disassemble(obj.bytecode, "\n"); + auto const numberOfOpcodes = std::count(disassembly.begin(), disassembly.end(), '\n'); + + #if 0 // {{{ debug prints + { + LinkerObject const& subObj = assembly.sub(0).assemble(); + string const subDisassembly = disassemble(subObj.bytecode, "\n"); + cout << '\n'; + cout << "### immutables: " << numImmutables << ", refs: " << numActualRefs << '\n'; + cout << " - srcmap: \"" << sourceMappings << "\"\n"; + cout << " - src mappings: " << numberOfMappings << '\n'; + cout << " - opcodes: " << numberOfOpcodes << '\n'; + cout << " - subs: " << assembly.numSubs() << '\n'; + cout << " - sub opcodes " << std::count(subDisassembly.begin(), subDisassembly.end(), '\n') << '\n'; + cout << " - sub srcmaps " << AssemblyItem::computeSourceMapping(subAsm->items(), indices) << '\n'; + cout << " - main bytecode:\n\t" << disassemble(obj.bytecode, "\n\t"); + cout << "\r - sub bytecode:\n\t" << disassemble(subObj.bytecode, "\n\t"); + } + #endif // }}} + + BOOST_REQUIRE_EQUAL(NumExpectedMappings, numberOfMappings); + BOOST_REQUIRE_EQUAL(NumExpectedMappings, numberOfOpcodes - NumOpcodesWithoutMappings); + } + } +} + BOOST_AUTO_TEST_CASE(immutable) { map indices = { diff --git a/test/libyul/objectCompiler/immutable_long_name_does_not_end_up_in_bytecode.yul b/test/libyul/objectCompiler/immutable_long_name_does_not_end_up_in_bytecode.yul index a208d15a5..dcc41e7a0 100644 --- a/test/libyul/objectCompiler/immutable_long_name_does_not_end_up_in_bytecode.yul +++ b/test/libyul/objectCompiler/immutable_long_name_does_not_end_up_in_bytecode.yul @@ -17,4 +17,4 @@ object "a" { // assignImmutable("0x85a5b1db611c82c46f5fa18e39ae218397536256c451e5de155a86de843a9ad6") // Bytecode: 73123456789012345678901234567890123456789060005050 // Opcodes: PUSH20 0x1234567890123456789012345678901234567890 PUSH1 0x0 POP POP -// SourceMappings: 167:42:0:-:0;58:1;32:187 +// SourceMappings: 167:42:0:-:0;58:1;32:187;