diff --git a/test/tools/yulInterpreter/CMakeLists.txt b/test/tools/yulInterpreter/CMakeLists.txt index 52fe0e3c6..02d53d697 100644 --- a/test/tools/yulInterpreter/CMakeLists.txt +++ b/test/tools/yulInterpreter/CMakeLists.txt @@ -1,6 +1,8 @@ set(sources EVMInstructionInterpreter.h EVMInstructionInterpreter.cpp + EWasmBuiltinInterpreter.h + EWasmBuiltinInterpreter.cpp Interpreter.h Interpreter.cpp ) diff --git a/test/tools/yulInterpreter/EWasmBuiltinInterpreter.cpp b/test/tools/yulInterpreter/EWasmBuiltinInterpreter.cpp new file mode 100644 index 000000000..c52ee7e10 --- /dev/null +++ b/test/tools/yulInterpreter/EWasmBuiltinInterpreter.cpp @@ -0,0 +1,376 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * Yul interpreter module that evaluates EWasm builtins. + */ + +#include + +#include + +#include +#include + +#include + +#include + +using namespace std; +using namespace dev; +using namespace yul; +using namespace yul::test; + +namespace +{ + +/// Copy @a _size bytes of @a _source at offset @a _sourceOffset to +/// @a _target at offset @a _targetOffset. Behaves as if @a _source would +/// continue with an infinite sequence of zero bytes beyond its end. +void copyZeroExtended( + map& _target, bytes const& _source, + size_t _targetOffset, size_t _sourceOffset, size_t _size +) +{ + for (size_t i = 0; i < _size; ++i) + _target[_targetOffset + i] = _sourceOffset + i < _source.size() ? _source[_sourceOffset + i] : 0; +} + +} + +using u512 = boost::multiprecision::number>; + +u256 EWasmBuiltinInterpreter::evalBuiltin(YulString _fun, vector const& _arguments) +{ + vector arg; + for (u256 const& a: _arguments) + arg.emplace_back(uint64_t(a & uint64_t(-1))); + + if (_fun == "datasize"_yulstring) + return u256(keccak256(h256(_arguments.at(0)))) & 0xfff; + else if (_fun == "dataoffset"_yulstring) + return u256(keccak256(h256(_arguments.at(0) + 2))) & 0xfff; + else if (_fun == "datacopy"_yulstring) + { + // This is identical to codecopy. + if (accessMemory(_arguments.at(0), _arguments.at(2))) + copyZeroExtended( + m_state.memory, + m_state.code, + size_t(_arguments.at(0)), + size_t(_arguments.at(1) & size_t(-1)), + size_t(_arguments.at(2)) + ); + return 0; + } + else if (_fun == "drop"_yulstring) + return {}; + else if (_fun == "unreachable"_yulstring) + throw ExplicitlyTerminated(); + else if (_fun == "i64.add"_yulstring) + return arg[0] + arg[1]; + else if (_fun == "i64.sub"_yulstring) + return arg[0] - arg[1]; + else if (_fun == "i64.mul"_yulstring) + return arg[0] * arg[1]; + else if (_fun == "i64.div_u"_yulstring) + { + if (arg[1] == 0) + throw ExplicitlyTerminated(); + else + return arg[0] / arg[1]; + } + else if (_fun == "i64.rem_u"_yulstring) + { + if (arg[1] == 0) + throw ExplicitlyTerminated(); + else + return arg[0] % arg[1]; + } + else if (_fun == "i64.and"_yulstring) + return arg[0] & arg[1]; + else if (_fun == "i64.or"_yulstring) + return arg[0] | arg[1]; + else if (_fun == "i64.xor"_yulstring) + return arg[0] ^ arg[1]; + else if (_fun == "i64.shl"_yulstring) + return arg[0] << arg[1]; + else if (_fun == "i64.shr_u"_yulstring) + return arg[0] >> arg[1]; + else if (_fun == "i64.eq"_yulstring) + return arg[0] == arg[1] ? 1 : 0; + else if (_fun == "i64.ne"_yulstring) + return arg[0] != arg[1] ? 1 : 0; + else if (_fun == "i64.eqz"_yulstring) + return arg[0] == 0 ? 1 : 0; + else if (_fun == "i64.lt_u"_yulstring) + return arg[0] < arg[1] ? 1 : 0; + else if (_fun == "i64.gt_u"_yulstring) + return arg[0] > arg[1] ? 1 : 0; + else if (_fun == "i64.le_u"_yulstring) + return arg[0] <= arg[1] ? 1 : 0; + else if (_fun == "i64.ge_u"_yulstring) + return arg[0] >= arg[1] ? 1 : 0; + else if (_fun == "i64.store"_yulstring) + { + accessMemory(arg[0], 8); + writeMemoryWord(arg[0], arg[1]); + return 0; + } + else if (_fun == "i64.load"_yulstring) + { + accessMemory(arg[0], 8); + return readMemoryWord(arg[0]); + } + else if (_fun == "eth.getAddress"_yulstring) + return writeAddress(arg[0], m_state.address); + else if (_fun == "eth.getExternalBalance"_yulstring) + // TODO this does not read the address, but is consistent with + // EVM interpreter implementation. + // If we take the address into account, this needs to use readAddress. + return writeU128(arg[0], m_state.balance); + else if (_fun == "eth.getBlockHash"_yulstring) + { + if (arg[0] >= m_state.blockNumber || arg[0] + 256 < m_state.blockNumber) + return 1; + else + return writeU256(arg[1], 0xaaaaaaaa + u256(arg[0] - m_state.blockNumber - 256)); + } + else if (_fun == "eth.call"_yulstring) + { + // TODO read args from memory + // TODO use readAddress to read address. + logTrace(eth::Instruction::CALL, {}); + return arg[0] & 1; + } + else if (_fun == "eth.callDataCopy"_yulstring) + { + if (arg[1] + arg[2] < arg[1] || arg[1] + arg[2] > m_state.calldata.size()) + throw ExplicitlyTerminated(); + if (accessMemory(arg[0], arg[2])) + copyZeroExtended( + m_state.memory, m_state.calldata, + size_t(arg[0]), size_t(arg[1]), size_t(arg[2]) + ); + return {}; + } + else if (_fun == "eth.getCallDataSize"_yulstring) + return m_state.calldata.size(); + else if (_fun == "eth.callCode"_yulstring) + { + // TODO read args from memory + // TODO use readAddress to read address. + logTrace(eth::Instruction::CALLCODE, {}); + return arg[0] & 1; + } + else if (_fun == "eth.callDelegate"_yulstring) + { + // TODO read args from memory + // TODO use readAddress to read address. + logTrace(eth::Instruction::DELEGATECALL, {}); + return arg[0] & 1; + } + else if (_fun == "eth.callStatic"_yulstring) + { + // TODO read args from memory + // TODO use readAddress to read address. + logTrace(eth::Instruction::STATICCALL, {}); + return arg[0] & 1; + } + else if (_fun == "eth.storageStore"_yulstring) + { + m_state.storage[h256(readU256(arg[0]))] = readU256((arg[1])); + return 0; + } + else if (_fun == "eth.storageLoad"_yulstring) + return writeU256(arg[1], m_state.storage[h256(readU256(arg[0]))]); + else if (_fun == "eth.getCaller"_yulstring) + // TODO should this only write 20 bytes? + return writeAddress(arg[0], m_state.caller); + else if (_fun == "eth.getCallValue"_yulstring) + return writeU128(arg[0], m_state.callvalue); + else if (_fun == "eth.codeCopy"_yulstring) + { + if (accessMemory(arg[0], arg[2])) + copyZeroExtended( + m_state.memory, m_state.code, + size_t(arg[0]), size_t(arg[1]), size_t(arg[2]) + ); + return 0; + } + else if (_fun == "eth.getCodeSize"_yulstring) + return writeU256(arg[0], m_state.code.size()); + else if (_fun == "eth.getBlockCoinbase"_yulstring) + return writeAddress(arg[0], m_state.coinbase); + else if (_fun == "eth.create"_yulstring) + { + // TODO access memory + // TODO use writeAddress to store resulting address + logTrace(eth::Instruction::CREATE, {}); + return 0xcccccc + arg[1]; + } + else if (_fun == "eth.getBlockDifficulty"_yulstring) + return writeU256(arg[0], m_state.difficulty); + else if (_fun == "eth.externalCodeCopy"_yulstring) + { + // TODO use readAddress to read address. + if (accessMemory(arg[1], arg[3])) + // TODO this way extcodecopy and codecopy do the same thing. + copyZeroExtended( + m_state.memory, m_state.code, + size_t(arg[1]), size_t(arg[2]), size_t(arg[3]) + ); + return 0; + } + else if (_fun == "eth.getExternalCodeSize"_yulstring) + return u256(keccak256(h256(readAddress(arg[0])))) & 0xffffff; + else if (_fun == "eth.getGasLeft"_yulstring) + return 0x99; + else if (_fun == "eth.getBlockGasLimit"_yulstring) + return uint64_t(m_state.gaslimit); + else if (_fun == "eth.getTxGasPrice"_yulstring) + return writeU128(arg[0], m_state.gasprice); + else if (_fun == "eth.log"_yulstring) + { + logTrace(eth::Instruction::LOG0, {}); + return 0; + } + else if (_fun == "eth.getBlockNumber"_yulstring) + return m_state.blockNumber; + else if (_fun == "eth.getTxOrigin"_yulstring) + return writeAddress(arg[0], m_state.origin); + else if (_fun == "eth.finish"_yulstring) + { + bytes data; + if (accessMemory(arg[0], arg[1])) + data = readMemory(arg[0], arg[1]); + logTrace(eth::Instruction::RETURN, {}, data); + throw ExplicitlyTerminated(); + } + else if (_fun == "eth.revert"_yulstring) + { + bytes data; + if (accessMemory(arg[0], arg[1])) + data = readMemory(arg[0], arg[1]); + logTrace(eth::Instruction::REVERT, {}, data); + throw ExplicitlyTerminated(); + } + else if (_fun == "eth.getReturnDataSize"_yulstring) + return m_state.returndata.size(); + else if (_fun == "eth.returnDataCopy"_yulstring) + { + if (arg[1] + arg[2] < arg[1] || arg[1] + arg[2] > m_state.returndata.size()) + throw ExplicitlyTerminated(); + if (accessMemory(arg[0], arg[2])) + copyZeroExtended( + m_state.memory, m_state.calldata, + size_t(arg[0]), size_t(arg[1]), size_t(arg[2]) + ); + return {}; + } + else if (_fun == "eth.selfDestruct"_yulstring) + { + // TODO use readAddress to read address. + logTrace(eth::Instruction::SELFDESTRUCT, {}); + throw ExplicitlyTerminated(); + } + else if (_fun == "eth.getBlockTimestamp"_yulstring) + return m_state.timestamp; + + yulAssert(false, "Unknown builtin: " + _fun.str() + " (or implementation did not return)"); + + return 0; +} + +bool EWasmBuiltinInterpreter::accessMemory(u256 const& _offset, u256 const& _size) +{ + if (((_offset + _size) >= _offset) && ((_offset + _size + 0x1f) >= (_offset + _size))) + { + u256 newSize = (_offset + _size + 0x1f) & ~u256(0x1f); + m_state.msize = max(m_state.msize, newSize); + return _size <= 0xffff; + } + else + m_state.msize = u256(-1); + + return false; +} + +bytes EWasmBuiltinInterpreter::readMemory(uint64_t _offset, uint64_t _size) +{ + yulAssert(_size <= 0xffff, "Too large read."); + bytes data(size_t(_size), uint8_t(0)); + for (size_t i = 0; i < data.size(); ++i) + data[i] = m_state.memory[_offset + i]; + return data; +} + +uint64_t EWasmBuiltinInterpreter::readMemoryWord(uint64_t _offset) +{ + uint64_t r = 0; + for (size_t i = 0; i < 8; i++) + r |= uint64_t(m_state.memory[_offset + i]) << (i * 8); + return r; +} + +void EWasmBuiltinInterpreter::writeMemoryWord(uint64_t _offset, uint64_t _value) +{ + for (size_t i = 0; i < 8; i++) + m_state.memory[_offset + i] = uint8_t((_value >> (i * 8)) & 0xff); +} + +u256 EWasmBuiltinInterpreter::writeU256(uint64_t _offset, u256 _value, size_t _croppedTo) +{ + accessMemory(_offset, _croppedTo); + for (size_t i = 0; i < _croppedTo; i++) + { + m_state.memory[_offset + _croppedTo - 1 - i] = uint8_t(_value & 0xff); + _value >>= 8; + } + + return {}; +} + +u256 EWasmBuiltinInterpreter::readU256(uint64_t _offset, size_t _croppedTo) +{ + accessMemory(_offset, _croppedTo); + u256 value; + for (size_t i = 0; i < _croppedTo; i++) + value = (value << 8) | m_state.memory[_offset + i]; + + return value; +} + +void EWasmBuiltinInterpreter::logTrace(dev::eth::Instruction _instruction, std::vector const& _arguments, bytes const& _data) +{ + logTrace(dev::eth::instructionInfo(_instruction).name, _arguments, _data); +} + +void EWasmBuiltinInterpreter::logTrace(std::string const& _pseudoInstruction, std::vector const& _arguments, bytes const& _data) +{ + string message = _pseudoInstruction + "("; + for (size_t i = 0; i < _arguments.size(); ++i) + message += (i > 0 ? ", " : "") + formatNumber(_arguments[i]); + message += ")"; + if (!_data.empty()) + message += " [" + toHex(_data) + "]"; + m_state.trace.emplace_back(std::move(message)); + if (m_state.maxTraceSize > 0 && m_state.trace.size() >= m_state.maxTraceSize) + { + m_state.trace.emplace_back("Trace size limit reached."); + throw TraceLimitReached(); + } +} diff --git a/test/tools/yulInterpreter/EWasmBuiltinInterpreter.h b/test/tools/yulInterpreter/EWasmBuiltinInterpreter.h new file mode 100644 index 000000000..e96cbbae2 --- /dev/null +++ b/test/tools/yulInterpreter/EWasmBuiltinInterpreter.h @@ -0,0 +1,108 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * Yul interpreter module that evaluates EWasm builtins. + */ + +#pragma once + +#include + +#include + +#include + +namespace dev +{ +namespace eth +{ +enum class Instruction: uint8_t; +} +} + +namespace yul +{ +class YulString; +struct BuiltinFunctionForEVM; + +namespace test +{ + +struct InterpreterState; + +/** + * Interprets EWasm builtins based on the current state and logs instructions with + * side-effects. + * + * Since this is mainly meant to be used for differential fuzz testing, it is focused + * on a single contract only, does not do any gas counting and differs from the correct + * implementation in many ways: + * + * - If memory access to a "large" memory position is performed, a deterministic + * value is returned. Data that is stored in a "large" memory position is not + * retained. + * - The blockhash instruction returns a fixed value if the argument is in range. + * - Extcodesize returns a deterministic value depending on the address. + * - Extcodecopy copies a deterministic value depending on the address. + * - And many other things + * + * The main focus is that the generated execution trace is the same for equivalent executions + * and likely to be different for non-equivalent executions. + */ +class EWasmBuiltinInterpreter +{ +public: + explicit EWasmBuiltinInterpreter(InterpreterState& _state): + m_state(_state) + {} + /// Evaluate builtin function + dev::u256 evalBuiltin(YulString _fun, std::vector const& _arguments); + +private: + /// Checks if the memory access is not too large for the interpreter and adjusts + /// msize accordingly. + /// @returns false if the amount of bytes read is lager than 0xffff + bool accessMemory(dev::u256 const& _offset, dev::u256 const& _size = 32); + /// @returns the memory contents at the provided address. + /// Does not adjust msize, use @a accessMemory for that + dev::bytes readMemory(uint64_t _offset, uint64_t _size = 32); + /// @returns the memory contents at the provided address (little-endian). + /// Does not adjust msize, use @a accessMemory for that + uint64_t readMemoryWord(uint64_t _offset); + /// Writes a word to memory (little-endian) + /// Does not adjust msize, use @a accessMemory for that + void writeMemoryWord(uint64_t _offset, uint64_t _value); + + /// Helper for eth.* builtins. Writes to memory (big-endian) and always returns zero. + dev::u256 writeU256(uint64_t _offset, dev::u256 _value, size_t _croppedTo = 32); + dev::u256 writeU128(uint64_t _offset, dev::u256 _value) { return writeU256(_offset, std::move(_value), 16); } + dev::u256 writeAddress(uint64_t _offset, dev::u256 _value) { return writeU256(_offset, std::move(_value), 20); } + /// Helper for eth.* builtins. Reads from memory (big-endian) and returns the value; + dev::u256 readU256(uint64_t _offset, size_t _croppedTo = 32); + dev::u256 readU128(uint64_t _offset) { return readU256(_offset, 16); } + dev::u256 readAddress(uint64_t _offset) { return readU256(_offset, 20); } + + void logTrace(dev::eth::Instruction _instruction, std::vector const& _arguments = {}, dev::bytes const& _data = {}); + /// Appends a log to the trace representing an instruction or similar operation by string, + /// with arguments and auxiliary data (if nonempty). + void logTrace(std::string const& _pseudoInstruction, std::vector const& _arguments = {}, dev::bytes const& _data = {}); + + InterpreterState& m_state; +}; + +} +} diff --git a/test/tools/yulInterpreter/Interpreter.cpp b/test/tools/yulInterpreter/Interpreter.cpp index d1b361c4e..93eee5f80 100644 --- a/test/tools/yulInterpreter/Interpreter.cpp +++ b/test/tools/yulInterpreter/Interpreter.cpp @@ -21,11 +21,13 @@ #include #include +#include #include #include #include #include +#include #include @@ -229,12 +231,21 @@ void ExpressionEvaluator::operator()(FunctionCall const& _funCall) evaluateArgs(_funCall.arguments); if (EVMDialect const* dialect = dynamic_cast(&m_dialect)) + { if (BuiltinFunctionForEVM const* fun = dialect->builtin(_funCall.functionName.name)) { EVMInstructionInterpreter interpreter(m_state); setValue(interpreter.evalBuiltin(*fun, values())); return; } + } + else if (WasmDialect const* dialect = dynamic_cast(&m_dialect)) + if (dialect->builtin(_funCall.functionName.name)) + { + EWasmBuiltinInterpreter interpreter(m_state); + setValue(interpreter.evalBuiltin(_funCall.functionName.name, values())); + return; + } auto [functionScopes, fun] = findFunctionAndScope(_funCall.functionName.name);