/* This file is part of solidity. solidity is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. solidity is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with solidity. If not, see . */ // SPDX-License-Identifier: GPL-3.0 /** @file Assembly.cpp * @author Gav Wood * @date 2014 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace solidity; using namespace solidity::evmasm; using namespace solidity::langutil; using namespace solidity::util; AssemblyItem const& Assembly::append(AssemblyItem _i) { assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); m_deposit += static_cast(_i.deposit()); m_items.emplace_back(std::move(_i)); if (!m_items.back().location().isValid() && m_currentSourceLocation.isValid()) m_items.back().setLocation(m_currentSourceLocation); m_items.back().m_modifierDepth = m_currentModifierDepth; return m_items.back(); } unsigned Assembly::codeSize(unsigned subTagSize) const { for (unsigned tagSize = subTagSize; true; ++tagSize) { size_t ret = 1; for (auto const& i: m_data) ret += i.second.size(); for (AssemblyItem const& i: m_items) ret += i.bytesRequired(tagSize, Precision::Approximate); if (numberEncodingSize(ret) <= tagSize) return static_cast(ret); } } void Assembly::importAssemblyItemsFromJSON(Json::Value const& _code, vector const& _sourceList) { solAssert(m_items.empty()); solRequire(_code.isArray(), AssemblyImportException, "Supplied JSON is not an array."); for (auto current = begin(_code); current != end(_code); ++current) { auto const& item = m_items.emplace_back(createAssemblyItemFromJSON(*current, _sourceList)); if (item == Instruction::JUMPDEST) solThrow(AssemblyImportException, "JUMPDEST instruction without a tag"); else if (item.type() == AssemblyItemType::Tag) { ++current; if (current != end(_code) && createAssemblyItemFromJSON(*current, _sourceList) != Instruction::JUMPDEST) solThrow(AssemblyImportException, "JUMPDEST expected after tag."); } } } AssemblyItem Assembly::createAssemblyItemFromJSON(Json::Value const& _json, std::vector const& _sourceList) { solRequire(_json.isObject(), AssemblyImportException, "Supplied JSON is not an object."); static set const validMembers{"name", "begin", "end", "source", "value", "modifierDepth", "jumpType"}; for (auto const& member: _json.getMemberNames()) solRequire( validMembers.count(member), AssemblyImportException, "Unknown member '" + member + "'. Valid members are " + solidity::util::joinHumanReadable(validMembers, ", ") + "." ); solRequire(isOfType(_json["name"]), AssemblyImportException, "Member 'name' missing or not of type string."); solRequire(isOfTypeIfExists(_json, "begin"), AssemblyImportException, "Optional member 'begin' not of type int."); solRequire(isOfTypeIfExists(_json, "end"), AssemblyImportException, "Optional member 'end' not of type int."); solRequire(isOfTypeIfExists(_json, "source"), AssemblyImportException, "Optional member 'source' not of type int."); solRequire(isOfTypeIfExists(_json, "value"), AssemblyImportException, "Optional member 'value' not of type string."); solRequire( isOfTypeIfExists(_json, "modifierDepth"), AssemblyImportException, "Optional member 'modifierDepth' not of type int." ); solRequire( isOfTypeIfExists(_json, "jumpType"), AssemblyImportException, "Optional member 'jumpType' not of type string." ); string name = get(_json["name"]); solRequire(!name.empty(), AssemblyImportException, "Member 'name' was empty."); SourceLocation location; location.start = get(_json["begin"]); location.end = get(_json["end"]); int srcIndex = getOrDefault(_json["source"], -1); size_t modifierDepth = static_cast(getOrDefault(_json["modifierDepth"], 0)); string value = getOrDefault(_json["value"], ""); string jumpType = getOrDefault(_json["jumpType"], ""); auto updateUsedTags = [&](u256 const& data) { m_usedTags = max(m_usedTags, static_cast(data) + 1); return data; }; auto storeImmutableHash = [&](string const& _immutableName) -> h256 { h256 hash(util::keccak256(_immutableName)); solAssert(m_immutables.count(hash) == 0 || m_immutables[hash] == _immutableName); m_immutables[hash] = _immutableName; return hash; }; auto storeLibraryHash = [&](string const& _libraryName) -> h256 { h256 hash(util::keccak256(_libraryName)); solAssert(m_libraries.count(hash) == 0 || m_libraries[hash] == _libraryName); m_libraries[hash] = _libraryName; return hash; }; auto requireValueDefinedForInstruction = [&](string const& _name, string const& _value) { solRequire( !_value.empty(), AssemblyImportException, "Member 'value' was not defined for instruction '" + _name + "', but the instruction needs a value." ); }; auto requireValueUndefinedForInstruction = [&](string const& _name, string const& _value) { solRequire( _value.empty(), AssemblyImportException, "Member 'value' defined for instruction '" + _name + "', but the instruction does not need a value." ); }; solRequire(srcIndex >= -1 && srcIndex < static_cast(_sourceList.size()), AssemblyImportException, "srcIndex out of bound."); if (srcIndex != -1) { static map> sharedSourceNames; if (sharedSourceNames.find(_sourceList[static_cast(srcIndex)]) == sharedSourceNames.end()) sharedSourceNames[_sourceList[static_cast(srcIndex)]] = std::make_shared(_sourceList[static_cast(srcIndex)]); location.sourceName = sharedSourceNames[_sourceList[static_cast(srcIndex)]]; } AssemblyItem result(0); if (c_instructions.count(name)) { AssemblyItem item{c_instructions.at(name), location}; if (!jumpType.empty()) { if (item.instruction() == Instruction::JUMP || item.instruction() == Instruction::JUMPI) item.setJumpType(jumpType); else solThrow( AssemblyImportException, "Member 'jumpType' set on instruction different from JUMP or JUMPI (was set on instruction '" + name + "')" ); } requireValueUndefinedForInstruction(name, value); result = item; } else { solRequire( jumpType.empty(), AssemblyImportException, "Member 'jumpType' set on instruction different from JUMP or JUMPI (was set on instruction '" + name + "')" ); if (name == "PUSH") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::Push, u256("0x" + value)}; } else if (name == "PUSH [ErrorTag]") { requireValueUndefinedForInstruction(name, value); result = {AssemblyItemType::PushTag, 0}; } else if (name == "PUSH [tag]") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::PushTag, updateUsedTags(u256(value))}; } else if (name == "PUSH [$]") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::PushSub, u256("0x" + value)}; } else if (name == "PUSH #[$]") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::PushSubSize, u256("0x" + value)}; } else if (name == "PUSHSIZE") { requireValueUndefinedForInstruction(name, value); result = {AssemblyItemType::PushProgramSize, 0}; } else if (name == "PUSHLIB") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::PushLibraryAddress, storeLibraryHash(value)}; } else if (name == "PUSHDEPLOYADDRESS") { requireValueUndefinedForInstruction(name, value); result = {AssemblyItemType::PushDeployTimeAddress, 0}; } else if (name == "PUSHIMMUTABLE") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::PushImmutable, storeImmutableHash(value)}; } else if (name == "ASSIGNIMMUTABLE") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::AssignImmutable, storeImmutableHash(value)}; } else if (name == "tag") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::Tag, updateUsedTags(u256(value))}; } else if (name == "PUSH data") { requireValueDefinedForInstruction(name, value); result = {AssemblyItemType::PushData, u256("0x" + value)}; } else if (name == "VERBATIM") { requireValueDefinedForInstruction(name, value); AssemblyItem item(fromHex(value), 0, 0); result = item; } else solThrow(InvalidOpcode, "Invalid opcode: " + name); } result.setLocation(location); result.m_modifierDepth = modifierDepth; return result; } namespace { std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) { if (!_location.hasText() || _sourceCodes.empty()) return {}; auto it = _sourceCodes.find(*_location.sourceName); if (it == _sourceCodes.end()) return {}; return CharStream::singleLineSnippet(it->second, _location); } class Functionalizer { public: Functionalizer (std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes, Assembly const& _assembly): m_out(_out), m_prefix(_prefix), m_sourceCodes(_sourceCodes), m_assembly(_assembly) {} void feed(AssemblyItem const& _item, DebugInfoSelection const& _debugInfoSelection) { if (_item.location().isValid() && _item.location() != m_location) { flush(); m_location = _item.location(); printLocation(_debugInfoSelection); } std::string expression = _item.toAssemblyText(m_assembly); if (!( _item.canBeFunctional() && _item.returnValues() <= 1 && _item.arguments() <= m_pending.size() )) { flush(); m_out << m_prefix << (_item.type() == Tag ? "" : " ") << expression << std::endl; return; } if (_item.arguments() > 0) { expression += "("; for (size_t i = 0; i < _item.arguments(); ++i) { expression += m_pending.back(); m_pending.pop_back(); if (i + 1 < _item.arguments()) expression += ", "; } expression += ")"; } m_pending.push_back(expression); if (_item.returnValues() != 1) flush(); } void flush() { for (std::string const& expression: m_pending) m_out << m_prefix << " " << expression << std::endl; m_pending.clear(); } void printLocation(DebugInfoSelection const& _debugInfoSelection) { if (!m_location.isValid() || (!_debugInfoSelection.location && !_debugInfoSelection.snippet)) return; m_out << m_prefix << " /*"; if (_debugInfoSelection.location) { if (m_location.sourceName) m_out << " " + escapeAndQuoteString(*m_location.sourceName); if (m_location.hasText()) m_out << ":" << std::to_string(m_location.start) + ":" + std::to_string(m_location.end); } if (_debugInfoSelection.snippet) { if (_debugInfoSelection.location) m_out << " "; m_out << locationFromSources(m_sourceCodes, m_location); } m_out << " */" << std::endl; } private: strings m_pending; SourceLocation m_location; std::ostream& m_out; std::string const& m_prefix; StringMap const& m_sourceCodes; Assembly const& m_assembly; }; } void Assembly::assemblyStream( std::ostream& _out, DebugInfoSelection const& _debugInfoSelection, std::string const& _prefix, StringMap const& _sourceCodes ) const { Functionalizer f(_out, _prefix, _sourceCodes, *this); for (auto const& i: m_items) f.feed(i, _debugInfoSelection); f.flush(); if (!m_data.empty() || !m_subs.empty()) { _out << _prefix << "stop" << std::endl; for (auto const& i: m_data) if (u256(i.first) >= m_subs.size()) _out << _prefix << "data_" << toHex(u256(i.first)) << " " << util::toHex(i.second) << std::endl; for (size_t i = 0; i < m_subs.size(); ++i) { _out << std::endl << _prefix << "sub_" << i << ": assembly {\n"; m_subs[i]->assemblyStream(_out, _debugInfoSelection, _prefix + " ", _sourceCodes); _out << _prefix << "}" << std::endl; } } if (m_auxiliaryData.size() > 0) _out << std::endl << _prefix << "auxdata: 0x" << util::toHex(m_auxiliaryData) << std::endl; } std::string Assembly::assemblyString( DebugInfoSelection const& _debugInfoSelection, StringMap const& _sourceCodes ) const { std::ostringstream tmp; assemblyStream(tmp, _debugInfoSelection, "", _sourceCodes); return tmp.str(); } Json::Value Assembly::assemblyJSON(std::vector const& _sources, bool _includeSourceList) const { Json::Value root; root[".code"] = Json::arrayValue; Json::Value& code = root[".code"]; for (AssemblyItem const& item: m_items) { int sourceIndex = -1; if (item.location().sourceName) for (size_t index = 0; index < _sources.size(); ++index) if (_sources[index] == *item.location().sourceName) { sourceIndex = static_cast(index); break; } auto [name, data] = item.nameAndData(m_evmVersion); Json::Value jsonItem; jsonItem["name"] = name; jsonItem["begin"] = item.location().start; jsonItem["end"] = item.location().end; if (item.m_modifierDepth != 0) jsonItem["modifierDepth"] = static_cast(item.m_modifierDepth); std::string jumpType = item.getJumpTypeAsString(); if (!jumpType.empty()) jsonItem["jumpType"] = jumpType; if (name == "PUSHLIB") data = m_libraries.at(h256(data)); else if (name == "PUSHIMMUTABLE" || name == "ASSIGNIMMUTABLE") data = m_immutables.at(h256(data)); if (!data.empty()) jsonItem["value"] = data; jsonItem["source"] = sourceIndex; code.append(std::move(jsonItem)); if (item.type() == AssemblyItemType::Tag) { Json::Value jumpdest; jumpdest["name"] = "JUMPDEST"; jumpdest["begin"] = item.location().start; jumpdest["end"] = item.location().end; jumpdest["source"] = sourceIndex; if (item.m_modifierDepth != 0) jumpdest["modifierDepth"] = static_cast(item.m_modifierDepth); code.append(std::move(jumpdest)); } } if (!_sources.empty() && _includeSourceList) { root["sourceList"] = Json::arrayValue; Json::Value& jsonSourceList = root["sourceList"]; for (int index = 0; index < static_cast(_sources.size()); ++index) jsonSourceList[index] = _sources[static_cast(index)]; } if (!m_data.empty() || !m_subs.empty()) { root[".data"] = Json::objectValue; Json::Value& data = root[".data"]; for (auto const& i: m_data) if (u256(i.first) >= m_subs.size()) data[util::toHex(toBigEndian((u256)i.first), util::HexPrefix::DontAdd, util::HexCase::Upper)] = util::toHex(i.second); for (size_t i = 0; i < m_subs.size(); ++i) { stringstream hexStr; hexStr << hex << i; data[hexStr.str()] = m_subs[i]->assemblyJSON(_sources, false); } } if (!m_auxiliaryData.empty()) root[".auxdata"] = util::toHex(m_auxiliaryData); return root; } std::pair, std::vector> Assembly::fromJSON(Json::Value const& _json, vector const& _sourceList, int _level) { solRequire(_json.isObject(), AssemblyImportException, "Supplied JSON is not an object."); static set const validMembers{".code", ".data", ".auxdata", "sourceList"}; for (auto const& attribute: _json.getMemberNames()) solRequire(validMembers.count(attribute), AssemblyImportException, "Unknown attribute '" + attribute + "'."); solRequire(_json.isMember(".code"), AssemblyImportException, "Member '.code' does not exist."); solRequire(_json[".code"].isArray(), AssemblyImportException, "Member '.code' is not an array."); for (auto const& codeItem: _json[".code"]) solRequire(codeItem.isObject(), AssemblyImportException, "Item of '.code' array is not an object."); if (_level == 0) { if (_json.isMember("sourceList")) { solRequire(_json["sourceList"].isArray(), AssemblyImportException, "Optional member 'sourceList' is not an array."); for (auto const& sourceListItem: _json["sourceList"]) solRequire(sourceListItem.isString(), AssemblyImportException, "Item of 'sourceList' array is not of type string."); } } else solRequire( !_json.isMember("sourceList"), AssemblyImportException, "Member 'sourceList' is only allowed in root JSON object." ); shared_ptr result = make_shared(langutil::EVMVersion(), _level == 0, ""); vector sourceList; if (_json.isMember("sourceList")) { solAssert(_level == 0); for (auto const& it: _json["sourceList"]) { solRequire( std::find(sourceList.begin(), sourceList.end(), it.asString()) == sourceList.end(), AssemblyImportException, "Items in 'sourceList' array are not unique." ); sourceList.emplace_back(it.asString()); } } else sourceList = _sourceList; result->importAssemblyItemsFromJSON(_json[".code"], sourceList); if (_json[".auxdata"]) { solRequire(_json[".auxdata"].isString(), AssemblyImportException, "Optional member '.auxdata' is not of type string."); bytes auxdata{fromHex(_json[".auxdata"].asString())}; solRequire(!auxdata.empty(), AssemblyImportException, "Optional member '.auxdata' is not a valid hexadecimal string."); result->m_auxiliaryData = auxdata; } if (_json.isMember(".data")) { solRequire(_json[".data"].isObject(), AssemblyImportException, "Optional member '.data' is not an object."); Json::Value const& data = _json[".data"]; for (Json::ValueConstIterator dataIter = data.begin(); dataIter != data.end(); dataIter++) { solRequire(dataIter.key().isString(), AssemblyImportException, "Key inside '.data' is not of type string."); string dataItemID = dataIter.key().asString(); Json::Value const& code = data[dataItemID]; if (code.isString()) { bytes data_value{fromHex(code.asString())}; solRequire( !data_value.empty(), AssemblyImportException, "Member '.data' contains a value for '" + dataItemID + "' that is not a valid hexadecimal string."); result->m_data[h256(fromHex(dataItemID))] = fromHex(code.asString()); } else if (code.isObject()) { shared_ptr subassembly(Assembly::fromJSON(code, sourceList, _level + 1).first); solAssert(subassembly); result->m_subs.emplace_back(make_shared(*subassembly)); // TODO: this shouldn't be enough for the general case. result->encodeSubPath({0, 0}); } else solThrow(AssemblyImportException, "Key inside '.data' '" + dataItemID + "' can only be a valid hex-string or an object."); } } return std::make_pair(result, sourceList); } AssemblyItem Assembly::namedTag(string const& _name, size_t _params, size_t _returns, optional _sourceID) { assertThrow(!_name.empty(), AssemblyException, "Empty named tag."); if (m_namedTags.count(_name)) { assertThrow(m_namedTags.at(_name).params == _params, AssemblyException, ""); assertThrow(m_namedTags.at(_name).returns == _returns, AssemblyException, ""); assertThrow(m_namedTags.at(_name).sourceID == _sourceID, AssemblyException, ""); } else m_namedTags[_name] = {static_cast(newTag().data()), _sourceID, _params, _returns}; return AssemblyItem{Tag, m_namedTags.at(_name).id}; } AssemblyItem Assembly::newPushLibraryAddress(std::string const& _identifier) { h256 h(util::keccak256(_identifier)); m_libraries[h] = _identifier; return AssemblyItem{PushLibraryAddress, h}; } AssemblyItem Assembly::newPushImmutable(std::string const& _identifier) { h256 h(util::keccak256(_identifier)); m_immutables[h] = _identifier; return AssemblyItem{PushImmutable, h}; } AssemblyItem Assembly::newImmutableAssignment(std::string const& _identifier) { h256 h(util::keccak256(_identifier)); m_immutables[h] = _identifier; return AssemblyItem{AssignImmutable, h}; } Assembly& Assembly::optimise(OptimiserSettings const& _settings) { optimiseInternal(_settings, {}); return *this; } std::map const& Assembly::optimiseInternal( OptimiserSettings const& _settings, std::set _tagsReferencedFromOutside ) { if (m_tagReplacements) return *m_tagReplacements; // Run optimisation for sub-assemblies. for (size_t subId = 0; subId < m_subs.size(); ++subId) { OptimiserSettings settings = _settings; Assembly& sub = *m_subs[subId]; std::map const& subTagReplacements = sub.optimiseInternal( settings, JumpdestRemover::referencedTags(m_items, subId) ); // Apply the replacements (can be empty). BlockDeduplicator::applyTagReplacement(m_items, subTagReplacements, subId); } std::map tagReplacements; // Iterate until no new optimisation possibilities are found. for (unsigned count = 1; count > 0;) { count = 0; if (_settings.runInliner) Inliner{ m_items, _tagsReferencedFromOutside, _settings.expectedExecutionsPerDeployment, isCreation(), _settings.evmVersion }.optimise(); if (_settings.runJumpdestRemover) { JumpdestRemover jumpdestOpt{m_items}; if (jumpdestOpt.optimise(_tagsReferencedFromOutside)) count++; } if (_settings.runPeephole) { PeepholeOptimiser peepOpt{m_items}; while (peepOpt.optimise()) { count++; assertThrow(count < 64000, OptimizerException, "Peephole optimizer seems to be stuck."); } } // This only modifies PushTags, we have to run again to actually remove code. if (_settings.runDeduplicate) { BlockDeduplicator deduplicator{m_items}; if (deduplicator.deduplicate()) { for (auto const& replacement: deduplicator.replacedTags()) { assertThrow( replacement.first <= std::numeric_limits::max() && replacement.second <= std::numeric_limits::max(), OptimizerException, "Invalid tag replacement." ); assertThrow( !tagReplacements.count(replacement.first), OptimizerException, "Replacement already known." ); tagReplacements[replacement.first] = replacement.second; if (_tagsReferencedFromOutside.erase(static_cast(replacement.first))) _tagsReferencedFromOutside.insert(static_cast(replacement.second)); } count++; } } if (_settings.runCSE) { // Control flow graph optimization has been here before but is disabled because it // assumes we only jump to tags that are pushed. This is not the case anymore with // function types that can be stored in storage. AssemblyItems optimisedItems; bool usesMSize = ranges::any_of(m_items, [](AssemblyItem const& _i) { return _i == AssemblyItem{Instruction::MSIZE} || _i.type() == VerbatimBytecode; }); auto iter = m_items.begin(); while (iter != m_items.end()) { KnownState emptyState; CommonSubexpressionEliminator eliminator{emptyState}; auto orig = iter; iter = eliminator.feedItems(iter, m_items.end(), usesMSize); bool shouldReplace = false; AssemblyItems optimisedChunk; try { optimisedChunk = eliminator.getOptimizedItems(); shouldReplace = (optimisedChunk.size() < static_cast(iter - orig)); } catch (StackTooDeepException const&) { // This might happen if the opcode reconstruction is not as efficient // as the hand-crafted code. } catch (ItemNotAvailableException const&) { // This might happen if e.g. associativity and commutativity rules // reorganise the expression tree, but not all leaves are available. } if (shouldReplace) { count++; optimisedItems += optimisedChunk; } else copy(orig, iter, back_inserter(optimisedItems)); } if (optimisedItems.size() < m_items.size()) { m_items = std::move(optimisedItems); count++; } } } if (_settings.runConstantOptimiser) ConstantOptimisationMethod::optimiseConstants( isCreation(), isCreation() ? 1 : _settings.expectedExecutionsPerDeployment, _settings.evmVersion, *this ); m_tagReplacements = std::move(tagReplacements); return *m_tagReplacements; } LinkerObject const& Assembly::assemble() const { assertThrow(!m_invalid, AssemblyException, "Attempted to assemble invalid Assembly object."); // Return the already assembled object, if present. if (!m_assembledObject.bytecode.empty()) return m_assembledObject; // Otherwise ensure the object is actually clear. assertThrow(m_assembledObject.linkReferences.empty(), AssemblyException, "Unexpected link references."); LinkerObject& ret = m_assembledObject; size_t subTagSize = 1; std::map>> immutableReferencesBySub; for (auto const& sub: m_subs) { auto const& linkerObject = sub->assemble(); if (!linkerObject.immutableReferences.empty()) { assertThrow( immutableReferencesBySub.empty(), AssemblyException, "More than one sub-assembly references immutables." ); immutableReferencesBySub = linkerObject.immutableReferences; } for (size_t tagPos: sub->m_tagPositionsInBytecode) if (tagPos != std::numeric_limits::max() && tagPos > subTagSize) subTagSize = tagPos; } bool setsImmutables = false; bool pushesImmutables = false; for (auto const& i: m_items) if (i.type() == AssignImmutable) { i.setImmutableOccurrences(immutableReferencesBySub[i.data()].second.size()); setsImmutables = true; } else if (i.type() == PushImmutable) pushesImmutables = true; if (setsImmutables || pushesImmutables) assertThrow( setsImmutables != pushesImmutables, AssemblyException, "Cannot push and assign immutables in the same assembly subroutine." ); unsigned bytesRequiredForCode = codeSize(static_cast(subTagSize)); m_tagPositionsInBytecode = std::vector(m_usedTags, std::numeric_limits::max()); std::map> tagRef; std::multimap dataRef; std::multimap subRef; std::vector sizeRef; ///< Pointers to code locations where the size of the program is inserted unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode); uint8_t tagPush = static_cast(pushInstruction(bytesPerTag)); unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast(m_auxiliaryData.size()); for (auto const& sub: m_subs) bytesRequiredIncludingData += static_cast(sub->assemble().bytecode.size()); unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData); uint8_t dataRefPush = static_cast(pushInstruction(bytesPerDataRef)); ret.bytecode.reserve(bytesRequiredIncludingData); for (AssemblyItem const& i: m_items) { // store position of the invalid jump destination if (i.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) m_tagPositionsInBytecode[0] = ret.bytecode.size(); switch (i.type()) { case Operation: ret.bytecode.push_back(static_cast(i.instruction())); break; case Push: { unsigned b = numberEncodingSize(i.data()); if (b == 0 && !m_evmVersion.hasPush0()) { b = 1; } ret.bytecode.push_back(static_cast(pushInstruction(b))); if (b > 0) { ret.bytecode.resize(ret.bytecode.size() + b); bytesRef byr(&ret.bytecode.back() + 1 - b, b); toBigEndian(i.data(), byr); } break; } case PushTag: { ret.bytecode.push_back(tagPush); tagRef[ret.bytecode.size()] = i.splitForeignPushTag(); ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); break; } case PushData: ret.bytecode.push_back(dataRefPush); dataRef.insert(std::make_pair(h256(i.data()), ret.bytecode.size())); ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; case PushSub: assertThrow(i.data() <= std::numeric_limits::max(), AssemblyException, ""); ret.bytecode.push_back(dataRefPush); subRef.insert(std::make_pair(static_cast(i.data()), ret.bytecode.size())); ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; case PushSubSize: { assertThrow(i.data() <= std::numeric_limits::max(), AssemblyException, ""); auto s = subAssemblyById(static_cast(i.data()))->assemble().bytecode.size(); i.setPushedValue(u256(s)); unsigned b = std::max(1, numberEncodingSize(s)); ret.bytecode.push_back(static_cast(pushInstruction(b))); ret.bytecode.resize(ret.bytecode.size() + b); bytesRef byr(&ret.bytecode.back() + 1 - b, b); toBigEndian(s, byr); break; } case PushProgramSize: { ret.bytecode.push_back(dataRefPush); sizeRef.push_back(static_cast(ret.bytecode.size())); ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; } case PushLibraryAddress: ret.bytecode.push_back(static_cast(Instruction::PUSH20)); ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); ret.bytecode.resize(ret.bytecode.size() + 20); break; case PushImmutable: ret.bytecode.push_back(static_cast(Instruction::PUSH32)); // Maps keccak back to the "identifier" std::string of that immutable. ret.immutableReferences[i.data()].first = m_immutables.at(i.data()); // Record the bytecode offset of the PUSH32 argument. ret.immutableReferences[i.data()].second.emplace_back(ret.bytecode.size()); // Advance bytecode by 32 bytes (default initialized). ret.bytecode.resize(ret.bytecode.size() + 32); break; case VerbatimBytecode: ret.bytecode += i.verbatimData(); break; case AssignImmutable: { // Expect 2 elements on stack (source, dest_base) auto const& offsets = immutableReferencesBySub[i.data()].second; for (size_t i = 0; i < offsets.size(); ++i) { if (i != offsets.size() - 1) { ret.bytecode.push_back(uint8_t(Instruction::DUP2)); ret.bytecode.push_back(uint8_t(Instruction::DUP2)); } // TODO: should we make use of the constant optimizer methods for pushing the offsets? bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); ret.bytecode += offsetBytes; ret.bytecode.push_back(uint8_t(Instruction::ADD)); ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); } if (offsets.empty()) { ret.bytecode.push_back(uint8_t(Instruction::POP)); ret.bytecode.push_back(uint8_t(Instruction::POP)); } immutableReferencesBySub.erase(i.data()); break; } case PushDeployTimeAddress: ret.bytecode.push_back(static_cast(Instruction::PUSH20)); ret.bytecode.resize(ret.bytecode.size() + 20); break; case Tag: { assertThrow(i.data() != 0, AssemblyException, "Invalid tag position."); assertThrow(i.splitForeignPushTag().first == std::numeric_limits::max(), AssemblyException, "Foreign tag."); size_t tagId = static_cast(i.data()); assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); assertThrow(m_tagPositionsInBytecode[tagId] == std::numeric_limits::max(), AssemblyException, "Duplicate tag position."); m_tagPositionsInBytecode[tagId] = ret.bytecode.size(); ret.bytecode.push_back(static_cast(Instruction::JUMPDEST)); break; } default: assertThrow(false, InvalidOpcode, "Unexpected opcode while assembling."); } } if (!immutableReferencesBySub.empty()) throw langutil::Error( 1284_error, langutil::Error::Type::CodeGenerationError, "Some immutables were read from but never assigned, possibly because of optimization." ); if (!m_subs.empty() || !m_data.empty() || !m_auxiliaryData.empty()) // Append an INVALID here to help tests find miscompilation. ret.bytecode.push_back(static_cast(Instruction::INVALID)); std::map subAssemblyOffsets; for (auto const& [subIdPath, bytecodeOffset]: subRef) { LinkerObject subObject = subAssemblyById(subIdPath)->assemble(); bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef); // In order for de-duplication to kick in, not only must the bytecode be identical, but // link and immutables references as well. if (size_t* subAssemblyOffset = util::valueOrNullptr(subAssemblyOffsets, subObject)) toBigEndian(*subAssemblyOffset, r); else { toBigEndian(ret.bytecode.size(), r); subAssemblyOffsets[subObject] = ret.bytecode.size(); ret.bytecode += subObject.bytecode; } for (auto const& ref: subObject.linkReferences) ret.linkReferences[ref.first + subAssemblyOffsets[subObject]] = ref.second; } for (auto const& i: tagRef) { size_t subId; size_t tagId; std::tie(subId, tagId) = i.second; assertThrow(subId == std::numeric_limits::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id"); std::vector const& tagPositions = subId == std::numeric_limits::max() ? m_tagPositionsInBytecode : m_subs[subId]->m_tagPositionsInBytecode; assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag."); size_t pos = tagPositions[tagId]; assertThrow(pos != std::numeric_limits::max(), AssemblyException, "Reference to tag without position."); assertThrow(numberEncodingSize(pos) <= bytesPerTag, AssemblyException, "Tag too large for reserved space."); bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); toBigEndian(pos, r); } for (auto const& [name, tagInfo]: m_namedTags) { size_t position = m_tagPositionsInBytecode.at(tagInfo.id); std::optional tagIndex; for (auto&& [index, item]: m_items | ranges::views::enumerate) if (item.type() == Tag && static_cast(item.data()) == tagInfo.id) { tagIndex = index; break; } ret.functionDebugData[name] = { position == std::numeric_limits::max() ? std::nullopt : std::optional{position}, tagIndex, tagInfo.sourceID, tagInfo.params, tagInfo.returns }; } for (auto const& dataItem: m_data) { auto references = dataRef.equal_range(dataItem.first); if (references.first == references.second) continue; for (auto ref = references.first; ref != references.second; ++ref) { bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); toBigEndian(ret.bytecode.size(), r); } ret.bytecode += dataItem.second; } ret.bytecode += m_auxiliaryData; for (unsigned pos: sizeRef) { bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); toBigEndian(ret.bytecode.size(), r); } return ret; } std::vector Assembly::decodeSubPath(size_t _subObjectId) const { if (_subObjectId < m_subs.size()) return {_subObjectId}; auto subIdPathIt = find_if( m_subPaths.begin(), m_subPaths.end(), [_subObjectId](auto const& subId) { return subId.second == _subObjectId; } ); assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, ""); return subIdPathIt->first; } size_t Assembly::encodeSubPath(std::vector const& _subPath) { assertThrow(!_subPath.empty(), AssemblyException, ""); if (_subPath.size() == 1) { assertThrow(_subPath[0] < m_subs.size(), AssemblyException, ""); return _subPath[0]; } if (m_subPaths.find(_subPath) == m_subPaths.end()) { size_t objectId = std::numeric_limits::max() - m_subPaths.size(); assertThrow(objectId >= m_subs.size(), AssemblyException, ""); m_subPaths[_subPath] = objectId; } return m_subPaths[_subPath]; } Assembly const* Assembly::subAssemblyById(size_t _subId) const { std::vector subIds = decodeSubPath(_subId); Assembly const* currentAssembly = this; for (size_t currentSubId: subIds) { currentAssembly = currentAssembly->m_subs.at(currentSubId).get(); assertThrow(currentAssembly, AssemblyException, ""); } assertThrow(currentAssembly != this, AssemblyException, ""); return currentAssembly; } Assembly::OptimiserSettings Assembly::OptimiserSettings::translateSettings(frontend::OptimiserSettings const& _settings, langutil::EVMVersion const& _evmVersion) { // Constructing it this way so that we notice changes in the fields. evmasm::Assembly::OptimiserSettings asmSettings{false, false, false, false, false, false, _evmVersion, 0}; asmSettings.runInliner = _settings.runInliner; asmSettings.runJumpdestRemover = _settings.runJumpdestRemover; asmSettings.runPeephole = _settings.runPeephole; asmSettings.runDeduplicate = _settings.runDeduplicate; asmSettings.runCSE = _settings.runCSE; asmSettings.runConstantOptimiser = _settings.runConstantOptimiser; asmSettings.expectedExecutionsPerDeployment = _settings.expectedExecutionsPerDeployment; asmSettings.evmVersion = _evmVersion; return asmSettings; }