diff --git a/test/tools/ossfuzz/CMakeLists.txt b/test/tools/ossfuzz/CMakeLists.txt index 74787f5fd..27d8ba81c 100644 --- a/test/tools/ossfuzz/CMakeLists.txt +++ b/test/tools/ossfuzz/CMakeLists.txt @@ -33,7 +33,7 @@ target_link_libraries(strictasm_assembly_ossfuzz PRIVATE yul FuzzingEngine.a) add_executable(yul_proto_ossfuzz yulProtoFuzzer.cpp protoToYul.cpp yulProto.pb.cc) target_include_directories(yul_proto_ossfuzz PRIVATE /src/libprotobuf-mutator /src/LPM/external.protobuf/include) -target_link_libraries(yul_proto_ossfuzz PRIVATE yul evmasm +target_link_libraries(yul_proto_ossfuzz PRIVATE yul evmasm solidity protobuf-mutator-libfuzzer.a protobuf-mutator.a protobuf.a diff --git a/test/tools/ossfuzz/protoToYul.cpp b/test/tools/ossfuzz/protoToYul.cpp index 6531c1ff0..c4a137088 100644 --- a/test/tools/ossfuzz/protoToYul.cpp +++ b/test/tools/ossfuzz/protoToYul.cpp @@ -16,10 +16,13 @@ */ #include +#include #include +#include using namespace std; using namespace yul::test::yul_fuzzer; +using namespace dev::solidity; string ProtoConverter::createHex(string const& _hexBytes) const { @@ -57,25 +60,25 @@ bool ProtoConverter::isCaseLiteralUnique(Literal const& _x) bool isEmptyString = false; switch (_x.literal_oneof_case()) { - case Literal::kIntval: - tmp = std::to_string(_x.intval()); - break; - case Literal::kHexval: - tmp = "0x" + createHex(_x.hexval()); - break; - case Literal::kStrval: - tmp = createAlphaNum(_x.strval()); - if (tmp.empty()) - { - isEmptyString = true; - tmp = std::to_string(0); - } - else - tmp = "\"" + tmp + "\""; - break; - case Literal::LITERAL_ONEOF_NOT_SET: - tmp = std::to_string(1); - break; + case Literal::kIntval: + tmp = std::to_string(_x.intval()); + break; + case Literal::kHexval: + tmp = "0x" + createHex(_x.hexval()); + break; + case Literal::kStrval: + tmp = createAlphaNum(_x.strval()); + if (tmp.empty()) + { + isEmptyString = true; + tmp = std::to_string(0); + } + else + tmp = "\"" + tmp + "\""; + break; + case Literal::LITERAL_ONEOF_NOT_SET: + tmp = std::to_string(1); + break; } if (!_x.has_strval() || isEmptyString) isUnique = m_switchLiteralSetPerScope.top().insert(dev::u256(tmp)).second; @@ -89,24 +92,25 @@ void ProtoConverter::visit(Literal const& _x) { switch (_x.literal_oneof_case()) { - case Literal::kIntval: - m_output << _x.intval(); - break; - case Literal::kHexval: - m_output << "0x" << createHex(_x.hexval()); - break; - case Literal::kStrval: - m_output << "\"" << createAlphaNum(_x.strval()) << "\""; - break; - case Literal::LITERAL_ONEOF_NOT_SET: - m_output << "1"; - break; + case Literal::kIntval: + m_output << _x.intval(); + break; + case Literal::kHexval: + m_output << "0x" << createHex(_x.hexval()); + break; + case Literal::kStrval: + m_output << "\"" << createAlphaNum(_x.strval()) << "\""; + break; + case Literal::LITERAL_ONEOF_NOT_SET: + m_output << "1"; + break; } } -// Reference any index in [0, m_numLiveVars-1] or [0, m_numLiveVars) +// Reference any index in [0, m_numLiveVars-1] void ProtoConverter::visit(VarRef const& _x) { + yulAssert(m_numLiveVars > 0, "Proto fuzzer: No variables to reference."); m_output << "x_" << (static_cast(_x.varnum()) % m_numLiveVars); } @@ -114,27 +118,30 @@ void ProtoConverter::visit(Expression const& _x) { switch (_x.expr_oneof_case()) { - case Expression::kVarref: - visit(_x.varref()); - break; - case Expression::kCons: - visit(_x.cons()); - break; - case Expression::kBinop: - visit(_x.binop()); - break; - case Expression::kUnop: - visit(_x.unop()); - break; - case Expression::kTop: - visit(_x.top()); - break; - case Expression::kNop: - visit(_x.nop()); - break; - case Expression::EXPR_ONEOF_NOT_SET: - m_output << "1"; - break; + case Expression::kVarref: + visit(_x.varref()); + break; + case Expression::kCons: + visit(_x.cons()); + break; + case Expression::kBinop: + visit(_x.binop()); + break; + case Expression::kUnop: + visit(_x.unop()); + break; + case Expression::kTop: + visit(_x.top()); + break; + case Expression::kNop: + visit(_x.nop()); + break; + case Expression::kFuncExpr: + visit(_x.func_expr()); + break; + case Expression::EXPR_ONEOF_NOT_SET: + m_output << "1"; + break; } } @@ -142,72 +149,72 @@ void ProtoConverter::visit(BinaryOp const& _x) { switch (_x.op()) { - case BinaryOp::ADD: - m_output << "add"; - break; - case BinaryOp::SUB: - m_output << "sub"; - break; - case BinaryOp::MUL: - m_output << "mul"; - break; - case BinaryOp::DIV: - m_output << "div"; - break; - case BinaryOp::MOD: - m_output << "mod"; - break; - case BinaryOp::XOR: - m_output << "xor"; - break; - case BinaryOp::AND: - m_output << "and"; - break; - case BinaryOp::OR: - m_output << "or"; - break; - case BinaryOp::EQ: - m_output << "eq"; - break; - case BinaryOp::LT: - m_output << "lt"; - break; - case BinaryOp::GT: - m_output << "gt"; - break; - case BinaryOp::SHR: - m_output << "shr"; - break; - case BinaryOp::SHL: - m_output << "shl"; - break; - case BinaryOp::SAR: - m_output << "sar"; - break; - case BinaryOp::SDIV: - m_output << "sdiv"; - break; - case BinaryOp::SMOD: - m_output << "smod"; - break; - case BinaryOp::EXP: - m_output << "exp"; - break; - case BinaryOp::SLT: - m_output << "slt"; - break; - case BinaryOp::SGT: - m_output << "sgt"; - break; - case BinaryOp::BYTE: - m_output << "byte"; - break; - case BinaryOp::SI: - m_output << "signextend"; - break; - case BinaryOp::KECCAK: - m_output << "keccak256"; - break; + case BinaryOp::ADD: + m_output << "add"; + break; + case BinaryOp::SUB: + m_output << "sub"; + break; + case BinaryOp::MUL: + m_output << "mul"; + break; + case BinaryOp::DIV: + m_output << "div"; + break; + case BinaryOp::MOD: + m_output << "mod"; + break; + case BinaryOp::XOR: + m_output << "xor"; + break; + case BinaryOp::AND: + m_output << "and"; + break; + case BinaryOp::OR: + m_output << "or"; + break; + case BinaryOp::EQ: + m_output << "eq"; + break; + case BinaryOp::LT: + m_output << "lt"; + break; + case BinaryOp::GT: + m_output << "gt"; + break; + case BinaryOp::SHR: + m_output << "shr"; + break; + case BinaryOp::SHL: + m_output << "shl"; + break; + case BinaryOp::SAR: + m_output << "sar"; + break; + case BinaryOp::SDIV: + m_output << "sdiv"; + break; + case BinaryOp::SMOD: + m_output << "smod"; + break; + case BinaryOp::EXP: + m_output << "exp"; + break; + case BinaryOp::SLT: + m_output << "slt"; + break; + case BinaryOp::SGT: + m_output << "sgt"; + break; + case BinaryOp::BYTE: + m_output << "byte"; + break; + case BinaryOp::SI: + m_output << "signextend"; + break; + case BinaryOp::KECCAK: + m_output << "keccak256"; + break; } m_output << "("; visit(_x.left()); @@ -216,7 +223,6 @@ void ProtoConverter::visit(BinaryOp const& _x) m_output << ")"; } -// New var numbering starts from x_10 void ProtoConverter::visit(VarDecl const& _x) { m_output << "let x_" << m_numLiveVars << " := "; @@ -226,66 +232,104 @@ void ProtoConverter::visit(VarDecl const& _x) m_output << "\n"; } +void ProtoConverter::visit(EmptyVarDecl const&) +{ + m_output << "let x_" << m_numLiveVars++ << "\n"; + m_numVarsPerScope.top()++; +} + +void ProtoConverter::visit(MultiVarDecl const& _x) +{ + size_t funcId = (static_cast(_x.func_index()) % m_functionVecMultiReturnValue.size()); + + int numInParams = m_functionVecMultiReturnValue.at(funcId).first; + int numOutParams = m_functionVecMultiReturnValue.at(funcId).second; + + // Ensure that the chosen function returns at least 2 and at most 4 values + yulAssert( + ((numOutParams >= 2) && (numOutParams <= 4)), + "Proto fuzzer: Multi variable declaration calls a function with either too few or too many output params." + ); + + // We must start variable numbering past the number of live variables at this point in time. + // This creates let x_p,..., x_k := + // (k-p)+1 = numOutParams + m_output << + "let " << + YulUtilFunctions::suffixedVariableNameList("x_", m_numLiveVars, m_numLiveVars + numOutParams) << + " := "; + + // Create RHS of multi var decl + m_output << "foo_" << functionTypeToString(NumFunctionReturns::Multiple) << "_" << funcId; + m_output << "("; + visitFunctionInputParams(_x, numInParams); + m_output << ")\n"; + // Update live variables in scope and in total to account for the variables created by this + // multi variable declaration. + m_numVarsPerScope.top() += numOutParams; + m_numLiveVars += numOutParams; +} + void ProtoConverter::visit(TypedVarDecl const& _x) { m_output << "let x_" << m_numLiveVars; switch (_x.type()) { - case TypedVarDecl::BOOL: - m_output << ": bool := "; - visit(_x.expr()); - m_output << " : bool\n"; - break; - case TypedVarDecl::S8: - m_output << ": s8 := "; - visit(_x.expr()); - m_output << " : s8\n"; - break; - case TypedVarDecl::S32: - m_output << ": s32 := "; - visit(_x.expr()); - m_output << " : s32\n"; - break; - case TypedVarDecl::S64: - m_output << ": s64 := "; - visit(_x.expr()); - m_output << " : s64\n"; - break; - case TypedVarDecl::S128: - m_output << ": s128 := "; - visit(_x.expr()); - m_output << " : s128\n"; - break; - case TypedVarDecl::S256: - m_output << ": s256 := "; - visit(_x.expr()); - m_output << " : s256\n"; - break; - case TypedVarDecl::U8: - m_output << ": u8 := "; - visit(_x.expr()); - m_output << " : u8\n"; - break; - case TypedVarDecl::U32: - m_output << ": u32 := "; - visit(_x.expr()); - m_output << " : u32\n"; - break; - case TypedVarDecl::U64: - m_output << ": u64 := "; - visit(_x.expr()); - m_output << " : u64\n"; - break; - case TypedVarDecl::U128: - m_output << ": u128 := "; - visit(_x.expr()); - m_output << " : u128\n"; - break; - case TypedVarDecl::U256: - m_output << ": u256 := "; - visit(_x.expr()); - m_output << " : u256\n"; - break; + case TypedVarDecl::BOOL: + m_output << ": bool := "; + visit(_x.expr()); + m_output << " : bool\n"; + break; + case TypedVarDecl::S8: + m_output << ": s8 := "; + visit(_x.expr()); + m_output << " : s8\n"; + break; + case TypedVarDecl::S32: + m_output << ": s32 := "; + visit(_x.expr()); + m_output << " : s32\n"; + break; + case TypedVarDecl::S64: + m_output << ": s64 := "; + visit(_x.expr()); + m_output << " : s64\n"; + break; + case TypedVarDecl::S128: + m_output << ": s128 := "; + visit(_x.expr()); + m_output << " : s128\n"; + break; + case TypedVarDecl::S256: + m_output << ": s256 := "; + visit(_x.expr()); + m_output << " : s256\n"; + break; + case TypedVarDecl::U8: + m_output << ": u8 := "; + visit(_x.expr()); + m_output << " : u8\n"; + break; + case TypedVarDecl::U32: + m_output << ": u32 := "; + visit(_x.expr()); + m_output << " : u32\n"; + break; + case TypedVarDecl::U64: + m_output << ": u64 := "; + visit(_x.expr()); + m_output << " : u64\n"; + break; + case TypedVarDecl::U128: + m_output << ": u128 := "; + visit(_x.expr()); + m_output << " : u128\n"; + break; + case TypedVarDecl::U256: + m_output << ": u256 := "; + visit(_x.expr()); + m_output << " : u256\n"; + break; } m_numVarsPerScope.top()++; m_numLiveVars++; @@ -295,27 +339,27 @@ void ProtoConverter::visit(UnaryOp const& _x) { switch (_x.op()) { - case UnaryOp::NOT: - m_output << "not"; - break; - case UnaryOp::MLOAD: - m_output << "mload"; - break; - case UnaryOp::SLOAD: - m_output << "sload"; - break; - case UnaryOp::ISZERO: - m_output << "iszero"; - break; - case UnaryOp::CALLDATALOAD: - m_output << "calldataload"; - break; - case UnaryOp::EXTCODESIZE: - m_output << "extcodesize"; - break; - case UnaryOp::EXTCODEHASH: - m_output << "extcodehash"; - break; + case UnaryOp::NOT: + m_output << "not"; + break; + case UnaryOp::MLOAD: + m_output << "mload"; + break; + case UnaryOp::SLOAD: + m_output << "sload"; + break; + case UnaryOp::ISZERO: + m_output << "iszero"; + break; + case UnaryOp::CALLDATALOAD: + m_output << "calldataload"; + break; + case UnaryOp::EXTCODESIZE: + m_output << "extcodesize"; + break; + case UnaryOp::EXTCODEHASH: + m_output << "extcodehash"; + break; } m_output << "("; visit(_x.operand()); @@ -326,12 +370,12 @@ void ProtoConverter::visit(TernaryOp const& _x) { switch (_x.op()) { - case TernaryOp::ADDM: - m_output << "addmod"; - break; - case TernaryOp::MULM: - m_output << "mulmod"; - break; + case TernaryOp::ADDM: + m_output << "addmod"; + break; + case TernaryOp::MULM: + m_output << "mulmod"; + break; } m_output << "("; visit(_x.arg1()); @@ -346,24 +390,24 @@ void ProtoConverter::visit(NullaryOp const& _x) { switch (_x.op()) { - case NullaryOp::PC: - m_output << "pc()"; - break; - case NullaryOp::MSIZE: - m_output << "msize()"; - break; - case NullaryOp::GAS: - m_output << "gas()"; - break; - case NullaryOp::CALLDATASIZE: - m_output << "calldatasize()"; - break; - case NullaryOp::CODESIZE: - m_output << "codesize()"; - break; - case NullaryOp::RETURNDATASIZE: - m_output << "returndatasize()"; - break; + case NullaryOp::PC: + m_output << "pc()"; + break; + case NullaryOp::MSIZE: + m_output << "msize()"; + break; + case NullaryOp::GAS: + m_output << "gas()"; + break; + case NullaryOp::CALLDATASIZE: + m_output << "calldatasize()"; + break; + case NullaryOp::CODESIZE: + m_output << "codesize()"; + break; + case NullaryOp::RETURNDATASIZE: + m_output << "returndatasize()"; + break; } } @@ -371,15 +415,15 @@ void ProtoConverter::visit(CopyFunc const& _x) { switch (_x.ct()) { - case CopyFunc::CALLDATA: - m_output << "calldatacopy"; - break; - case CopyFunc::CODE: - m_output << "codecopy"; - break; - case CopyFunc::RETURNDATA: - m_output << "returndatacopy"; - break; + case CopyFunc::CALLDATA: + m_output << "calldatacopy"; + break; + case CopyFunc::CODE: + m_output << "codecopy"; + break; + case CopyFunc::RETURNDATA: + m_output << "returndatacopy"; + break; } m_output << "("; visit(_x.target()); @@ -408,66 +452,66 @@ void ProtoConverter::visit(LogFunc const& _x) { switch (_x.num_topics()) { - case LogFunc::ZERO: - m_output << "log0"; - m_output << "("; - visit(_x.pos()); - m_output << ", "; - visit(_x.size()); - m_output << ")\n"; - break; - case LogFunc::ONE: - m_output << "log1"; - m_output << "("; - visit(_x.pos()); - m_output << ", "; - visit(_x.size()); - m_output << ", "; - visit(_x.t1()); - m_output << ")\n"; - break; - case LogFunc::TWO: - m_output << "log2"; - m_output << "("; - visit(_x.pos()); - m_output << ", "; - visit(_x.size()); - m_output << ", "; - visit(_x.t1()); - m_output << ", "; - visit(_x.t2()); - m_output << ")\n"; - break; - case LogFunc::THREE: - m_output << "log3"; - m_output << "("; - visit(_x.pos()); - m_output << ", "; - visit(_x.size()); - m_output << ", "; - visit(_x.t1()); - m_output << ", "; - visit(_x.t2()); - m_output << ", "; - visit(_x.t3()); - m_output << ")\n"; - break; - case LogFunc::FOUR: - m_output << "log4"; - m_output << "("; - visit(_x.pos()); - m_output << ", "; - visit(_x.size()); - m_output << ", "; - visit(_x.t1()); - m_output << ", "; - visit(_x.t2()); - m_output << ", "; - visit(_x.t3()); - m_output << ", "; - visit(_x.t4()); - m_output << ")\n"; - break; + case LogFunc::ZERO: + m_output << "log0"; + m_output << "("; + visit(_x.pos()); + m_output << ", "; + visit(_x.size()); + m_output << ")\n"; + break; + case LogFunc::ONE: + m_output << "log1"; + m_output << "("; + visit(_x.pos()); + m_output << ", "; + visit(_x.size()); + m_output << ", "; + visit(_x.t1()); + m_output << ")\n"; + break; + case LogFunc::TWO: + m_output << "log2"; + m_output << "("; + visit(_x.pos()); + m_output << ", "; + visit(_x.size()); + m_output << ", "; + visit(_x.t1()); + m_output << ", "; + visit(_x.t2()); + m_output << ")\n"; + break; + case LogFunc::THREE: + m_output << "log3"; + m_output << "("; + visit(_x.pos()); + m_output << ", "; + visit(_x.size()); + m_output << ", "; + visit(_x.t1()); + m_output << ", "; + visit(_x.t2()); + m_output << ", "; + visit(_x.t3()); + m_output << ")\n"; + break; + case LogFunc::FOUR: + m_output << "log4"; + m_output << "("; + visit(_x.pos()); + m_output << ", "; + visit(_x.size()); + m_output << ", "; + visit(_x.t1()); + m_output << ", "; + visit(_x.t2()); + m_output << ", "; + visit(_x.t3()); + m_output << ", "; + visit(_x.t4()); + m_output << ")\n"; + break; } } @@ -479,6 +523,115 @@ void ProtoConverter::visit(AssignmentStatement const& _x) m_output << "\n"; } +// Called at the time function call is being made +template +void ProtoConverter::visitFunctionInputParams(T const& _x, unsigned _numInputParams) +{ + // We reverse the order of function input visits since it helps keep this switch case concise. + switch (_numInputParams) + { + case 4: + visit(_x.in_param4()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 3: + visit(_x.in_param3()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 2: + visit(_x.in_param2()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 1: + visit(_x.in_param1()); + BOOST_FALLTHROUGH; + case 0: + break; + default: + yulAssert(false, "Proto fuzzer: Function call with too many input parameters."); + break; + } +} + +void ProtoConverter::visit(MultiAssignment const& _x) +{ + size_t funcId = (static_cast(_x.func_index()) % m_functionVecMultiReturnValue.size()); + unsigned numInParams = m_functionVecMultiReturnValue.at(funcId).first; + unsigned numOutParams = m_functionVecMultiReturnValue.at(funcId).second; + yulAssert( + ((numOutParams >= 2) && (numOutParams <= 4)), + "Proto fuzzer: Multi assignment calls a function that has either too many or too few output parameters." + ); + + // Convert LHS of multi assignment + // We reverse the order of out param visits since the order does not matter. This helps reduce the size of this + // switch statement. + switch (numOutParams) + { + case 4: + visit(_x.out_param4()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 3: + visit(_x.out_param3()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 2: + visit(_x.out_param2()); + m_output << ", "; + visit(_x.out_param1()); + break; + default: + yulAssert(false, "Proto fuzzer: Function call with too many input parameters."); + break; + } + m_output << " := "; + + // Convert RHS of multi assignment + m_output << "foo_" << functionTypeToString(NumFunctionReturns::Multiple) << "_" << funcId; + m_output << "("; + visitFunctionInputParams(_x, numInParams); + m_output << ")\n"; +} + +void ProtoConverter::visit(FunctionCallNoReturnVal const& _x) +{ + size_t funcId = (static_cast(_x.func_index()) % m_functionVecNoReturnValue.size()); + unsigned numInParams = m_functionVecNoReturnValue.at(funcId); + m_output << "foo_" << functionTypeToString(NumFunctionReturns::None) << "_" << funcId; + m_output << "("; + visitFunctionInputParams(_x, numInParams); + m_output << ")\n"; +} + +void ProtoConverter::visit(FunctionCallSingleReturnVal const& _x) +{ + size_t funcId = (static_cast(_x.func_index()) % m_functionVecSingleReturnValue.size()); + unsigned numInParams = m_functionVecSingleReturnValue.at(funcId); + m_output << "foo_" << functionTypeToString(NumFunctionReturns::Single) << "_" << funcId; + m_output << "("; + visitFunctionInputParams(_x, numInParams); + m_output << ")"; +} + +void ProtoConverter::visit(FunctionCall const& _x) +{ + switch (_x.functioncall_oneof_case()) + { + case FunctionCall::kCallZero: + visit(_x.call_zero()); + break; + case FunctionCall::kCallMultidecl: + visit(_x.call_multidecl()); + break; + case FunctionCall::kCallMultiassign: + visit(_x.call_multiassign()); + break; + case FunctionCall::FUNCTIONCALL_ONEOF_NOT_SET: + break; + } +} + void ProtoConverter::visit(IfStmt const& _x) { m_output << "if "; @@ -491,15 +644,15 @@ void ProtoConverter::visit(StoreFunc const& _x) { switch (_x.st()) { - case StoreFunc::MSTORE: - m_output << "mstore("; - break; - case StoreFunc::SSTORE: - m_output << "sstore("; - break; - case StoreFunc::MSTORE8: - m_output << "mstore8("; - break; + case StoreFunc::MSTORE: + m_output << "mstore("; + break; + case StoreFunc::SSTORE: + m_output << "sstore("; + break; + case StoreFunc::MSTORE8: + m_output << "mstore8("; + break; } visit(_x.loc()); m_output << ", "; @@ -509,6 +662,8 @@ void ProtoConverter::visit(StoreFunc const& _x) void ProtoConverter::visit(ForStmt const& _x) { + // Boilerplate for loop that limits the number of iterations to a maximum of 4. + // TODO: Generalize for loop init, condition, and post blocks. std::string loopVarName("i_" + std::to_string(m_numNestedForLoops++)); m_output << "for { let " << loopVarName << " := 0 } " << "lt(" << loopVarName << ", 0x60) " @@ -558,12 +713,12 @@ void ProtoConverter::visit(StopInvalidStmt const& _x) { switch (_x.stmt()) { - case StopInvalidStmt::STOP: - m_output << "stop()\n"; - break; - case StopInvalidStmt::INVALID: - m_output << "invalid()\n"; - break; + case StopInvalidStmt::STOP: + m_output << "stop()\n"; + break; + case StopInvalidStmt::INVALID: + m_output << "invalid()\n"; + break; } } @@ -571,12 +726,12 @@ void ProtoConverter::visit(RetRevStmt const& _x) { switch (_x.stmt()) { - case RetRevStmt::RETURN: - m_output << "return"; - break; - case RetRevStmt::REVERT: - m_output << "revert"; - break; + case RetRevStmt::RETURN: + m_output << "return"; + break; + case RetRevStmt::REVERT: + m_output << "revert"; + break; } m_output << "("; visit(_x.pos()); @@ -597,17 +752,17 @@ void ProtoConverter::visit(TerminatingStmt const& _x) { switch (_x.term_oneof_case()) { - case TerminatingStmt::kStopInvalid: - visit(_x.stop_invalid()); - break; - case TerminatingStmt::kRetRev: - visit(_x.ret_rev()); - break; - case TerminatingStmt::kSelfDes: - visit(_x.self_des()); - break; - case TerminatingStmt::TERM_ONEOF_NOT_SET: - break; + case TerminatingStmt::kStopInvalid: + visit(_x.stop_invalid()); + break; + case TerminatingStmt::kRetRev: + visit(_x.ret_rev()); + break; + case TerminatingStmt::kSelfDes: + visit(_x.self_des()); + break; + case TerminatingStmt::TERM_ONEOF_NOT_SET: + break; } } @@ -615,49 +770,52 @@ void ProtoConverter::visit(Statement const& _x) { switch (_x.stmt_oneof_case()) { - case Statement::kDecl: - visit(_x.decl()); - break; - case Statement::kAssignment: - visit(_x.assignment()); - break; - case Statement::kIfstmt: - visit(_x.ifstmt()); - break; - case Statement::kStorageFunc: - visit(_x.storage_func()); - break; - case Statement::kBlockstmt: - visit(_x.blockstmt()); - break; - case Statement::kForstmt: - visit(_x.forstmt()); - break; - case Statement::kSwitchstmt: - visit(_x.switchstmt()); - break; - case Statement::kBreakstmt: - if (m_inForScope.top()) - m_output << "break\n"; - break; - case Statement::kContstmt: - if (m_inForScope.top()) - m_output << "continue\n"; - break; - case Statement::kLogFunc: - visit(_x.log_func()); - break; - case Statement::kCopyFunc: - visit(_x.copy_func()); - break; - case Statement::kExtcodeCopy: - visit(_x.extcode_copy()); - break; - case Statement::kTerminatestmt: - visit(_x.terminatestmt()); - break; - case Statement::STMT_ONEOF_NOT_SET: - break; + case Statement::kDecl: + visit(_x.decl()); + break; + case Statement::kAssignment: + visit(_x.assignment()); + break; + case Statement::kIfstmt: + visit(_x.ifstmt()); + break; + case Statement::kStorageFunc: + visit(_x.storage_func()); + break; + case Statement::kBlockstmt: + visit(_x.blockstmt()); + break; + case Statement::kForstmt: + visit(_x.forstmt()); + break; + case Statement::kSwitchstmt: + visit(_x.switchstmt()); + break; + case Statement::kBreakstmt: + if (m_inForScope.top()) + m_output << "break\n"; + break; + case Statement::kContstmt: + if (m_inForScope.top()) + m_output << "continue\n"; + break; + case Statement::kLogFunc: + visit(_x.log_func()); + break; + case Statement::kCopyFunc: + visit(_x.copy_func()); + break; + case Statement::kExtcodeCopy: + visit(_x.extcode_copy()); + break; + case Statement::kTerminatestmt: + visit(_x.terminatestmt()); + break; + case Statement::kFunctioncall: + visit(_x.functioncall()); + break; + case Statement::STMT_ONEOF_NOT_SET: + break; } } @@ -677,28 +835,162 @@ void ProtoConverter::visit(Block const& _x) m_output << "{}\n"; } -void ProtoConverter::visit(Function const& _x) +void ProtoConverter::visit(SpecialBlock const& _x) { - m_output << "{\n" - << "let a,b := foo(calldataload(0),calldataload(32),calldataload(64),calldataload(96),calldataload(128)," - << "calldataload(160),calldataload(192),calldataload(224))\n" - << "sstore(0, a)\n" - << "sstore(32, b)\n" - << "function foo(x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7) -> x_8, x_9\n"; - visit(_x.statements()); + m_numVarsPerScope.push(0); + m_output << "{\n"; + visit(_x.var()); + if (_x.statements_size() > 0) + for (auto const& st: _x.statements()) + visit(st); + m_numLiveVars -= m_numVarsPerScope.top(); + m_numVarsPerScope.pop(); m_output << "}\n"; } -string ProtoConverter::functionToString(Function const& _input) +template +void ProtoConverter::createFunctionDefAndCall(T const& _x, unsigned _numInParams, unsigned _numOutParams, NumFunctionReturns _type) +{ + yulAssert( + ((_numInParams <= modInputParams - 1) && (_numOutParams <= modOutputParams - 1)), + "Proto fuzzer: Too many function I/O parameters requested." + ); + + // At the time of function definition creation, the number of live variables must be 0. + // This is because we always create only as many variables as we need within function scope. + yulAssert(m_numLiveVars == 0, "Proto fuzzer: Unused live variable found."); + + // Signature + // This creates function foo__(x_0,...,x_n) + m_output << "function foo_" << functionTypeToString(_type) << "_" << m_numFunctionSets; + m_output << "("; + if (_numInParams > 0) + m_output << YulUtilFunctions::suffixedVariableNameList("x_", 0, _numInParams); + m_output << ")"; + + // Book keeping for variables in function scope and in nested scopes + m_numVarsPerScope.push(_numInParams); + m_numLiveVars += _numInParams; + + // This creates -> x_n+1,...,x_r + if (_numOutParams > 0) + { + m_output << " -> " << YulUtilFunctions::suffixedVariableNameList("x_", _numInParams, _numInParams + _numOutParams); + // More bookkeeping + m_numVarsPerScope.top() += _numOutParams; + m_numLiveVars += _numOutParams; + } + m_output << "\n"; + + // Body + visit(_x.statements()); + + // Ensure that variable stack is balanced + m_numLiveVars -= m_numVarsPerScope.top(); + m_numVarsPerScope.pop(); + yulAssert(m_numLiveVars == 0, "Proto fuzzer: Variable stack after function definition is unbalanced."); + + // Manually create a multi assignment using global variables + // This prints a_0, ..., a_k-1 for this function that returns "k" values + if (_numOutParams > 0) + m_output << YulUtilFunctions::suffixedVariableNameList("a_", 0, _numOutParams) << " := "; + + // Call the function with the correct number of input parameters via calls to calldataload with + // incremental addresses. + m_output << "foo_" << functionTypeToString(_type) << "_" << std::to_string(m_numFunctionSets); + m_output << "("; + for (unsigned i = 0; i < _numInParams; i++) + { + m_output << "calldataload(" << std::to_string(i*32) << ")"; + if (i < _numInParams - 1) + m_output << ","; + } + m_output << ")\n"; + + for (unsigned i = 0; i < _numOutParams; i++) + m_output << "sstore(" << std::to_string(i*32) << ", a_" << std::to_string(i) << ")\n"; +} + +void ProtoConverter::visit(FunctionDefinitionNoReturnVal const& _x) +{ + unsigned numInParams = _x.num_input_params() % modInputParams; + unsigned numOutParams = 0; + createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::None); +} + +void ProtoConverter::visit(FunctionDefinitionSingleReturnVal const& _x) +{ + unsigned numInParams = _x.num_input_params() % modInputParams; + unsigned numOutParams = 1; + createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::Single); +} + +void ProtoConverter::visit(FunctionDefinitionMultiReturnVal const& _x) +{ + unsigned numInParams = _x.num_input_params() % modInputParams; + // Synthesize at least 2 return parameters and at most (modOutputParams - 1) + unsigned numOutParams = std::max(2, _x.num_output_params() % modOutputParams); + createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::Multiple); +} + +void ProtoConverter::visit(FunctionDefinition const& _x) +{ + visit(_x.fd_zero()); + visit(_x.fd_one()); + visit(_x.fd_multi()); + m_numFunctionSets++; +} + +void ProtoConverter::visit(Program const& _x) +{ + /* Program template is as follows + * Four Globals a_0, a_1, a_2, and a_3 to hold up to four function return values + * + * Repeated function definitions followed by function calls of the respective function + * Example: function foo(x_0) -> x_1 {} + * a_0 := foo(calldataload(0)) + * sstore(0, a_0) + */ + m_output << "{\n"; + // Create globals at the beginning + // This creates let a_0, a_1, a_2, a_3 (followed by a new line) + m_output << "let " << YulUtilFunctions::suffixedVariableNameList("a_", 0, modOutputParams - 1) << "\n"; + // Register function interface. Useful while visiting multi var decl/assignment statements. + for (auto const& f: _x.funcs()) + registerFunction(f); + + for (auto const& f: _x.funcs()) + visit(f); + + yulAssert((unsigned)_x.funcs_size() == m_numFunctionSets, "Proto fuzzer: Functions not correctly registered."); + m_output << "}\n"; +} + +string ProtoConverter::programToString(Program const& _input) { visit(_input); return m_output.str(); } -string ProtoConverter::protoToYul(const uint8_t* _data, size_t _size) +void ProtoConverter::registerFunction(FunctionDefinition const& _x) { - Function message; - if (!message.ParsePartialFromArray(_data, _size)) - return "#error invalid proto\n"; - return functionToString(message); + // No return and single return functions explicitly state the number of values returned + registerFunction(_x.fd_zero(), NumFunctionReturns::None); + registerFunction(_x.fd_one(), NumFunctionReturns::Single); + // A multi return function can have between two and (modOutputParams - 1) parameters + unsigned numOutParams = std::max(2, _x.fd_multi().num_output_params() % modOutputParams); + registerFunction(_x.fd_multi(), NumFunctionReturns::Multiple, numOutParams); } + +std::string ProtoConverter::functionTypeToString(NumFunctionReturns _type) +{ + switch (_type) + { + case NumFunctionReturns::None: + return "noreturn"; + case NumFunctionReturns::Single: + return "singlereturn"; + case NumFunctionReturns::Multiple: + return "multireturn"; + } +} \ No newline at end of file diff --git a/test/tools/ossfuzz/protoToYul.h b/test/tools/ossfuzz/protoToYul.h index ab25e0466..dc707a0fa 100644 --- a/test/tools/ossfuzz/protoToYul.h +++ b/test/tools/ossfuzz/protoToYul.h @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -38,31 +40,34 @@ class ProtoConverter public: ProtoConverter() { - // The hard-coded function template foo has 10 parameters that are already "live" - m_numLiveVars = 10; + m_numLiveVars = 0; m_numVarsPerScope.push(m_numLiveVars); m_numNestedForLoops = 0; m_inForScope.push(false); + m_numFunctionSets = 0; } ProtoConverter(ProtoConverter const&) = delete; ProtoConverter(ProtoConverter&&) = delete; - std::string functionToString(Function const& _input); - std::string protoToYul(uint8_t const* _data, size_t _size); + std::string programToString(Program const& _input); private: void visit(BinaryOp const&); void visit(Block const&); + void visit(SpecialBlock const&); void visit(Literal const&); void visit(VarRef const&); void visit(Expression const&); void visit(VarDecl const&); + void visit(EmptyVarDecl const&); + void visit(MultiVarDecl const&); void visit(TypedVarDecl const&); void visit(UnaryOp const&); void visit(AssignmentStatement const&); + void visit(MultiAssignment const&); void visit(IfStmt const&); void visit(StoreFunc const&); void visit(Statement const&); - void visit(Function const&); + void visit(FunctionDefinition const&); void visit(ForStmt const&); void visit(CaseStmt const&); void visit(SwitchStmt const&); @@ -75,19 +80,72 @@ private: void visit(RetRevStmt const&); void visit(SelfDestructStmt const&); void visit(TerminatingStmt const&); + void visit(FunctionCallNoReturnVal const&); + void visit(FunctionCallSingleReturnVal const&); + void visit(FunctionCall const&); + void visit(FunctionDefinitionNoReturnVal const&); + void visit(FunctionDefinitionSingleReturnVal const&); + void visit(FunctionDefinitionMultiReturnVal const&); + void visit(Program const&); template void visit(google::protobuf::RepeatedPtrField const& _repeated_field); + void registerFunction(FunctionDefinition const&); std::string createHex(std::string const& _hexBytes) const; std::string createAlphaNum(std::string const& _strBytes) const; bool isCaseLiteralUnique(Literal const&); + enum class NumFunctionReturns + { + None, + Single, + Multiple + }; + + template + void visitFunctionInputParams(T const&, unsigned); + + template + void createFunctionDefAndCall(T const&, unsigned, unsigned, NumFunctionReturns); + std::string functionTypeToString(NumFunctionReturns _type); + + template + void registerFunction(T const& _x, NumFunctionReturns _type, unsigned _numOutputParams = 0) + { + unsigned numInputParams = _x.num_input_params() % modInputParams; + switch (_type) + { + case NumFunctionReturns::None: + m_functionVecNoReturnValue.push_back(numInputParams); + break; + case NumFunctionReturns::Single: + m_functionVecSingleReturnValue.push_back(numInputParams); + break; + case NumFunctionReturns::Multiple: + m_functionVecMultiReturnValue.push_back(std::make_pair(numInputParams, _numOutputParams)); + break; + } + } std::ostringstream m_output; - std::stack m_numVarsPerScope; - int32_t m_numLiveVars; - int32_t m_numNestedForLoops; + // Number of live variables in inner scope of a function + std::stack m_numVarsPerScope; + // Number of live variables in function scope + unsigned m_numLiveVars; + // Number of nested for loops for loop index referencing + unsigned m_numNestedForLoops; std::stack m_inForScope; + // Set that is used for deduplicating switch case literals std::stack> m_switchLiteralSetPerScope; + // Total number of function sets. A function set contains one function of each type defined by + // NumFunctionReturns + unsigned m_numFunctionSets; + // Look-up table per function type that holds the number of input (output) function parameters + std::vector m_functionVecNoReturnValue; + std::vector m_functionVecSingleReturnValue; + std::vector> m_functionVecMultiReturnValue; + // mod input/output parameters impose an upper bound on the number of input/output parameters a function may have. + static unsigned constexpr modInputParams = 5; + static unsigned constexpr modOutputParams = 5; }; } } diff --git a/test/tools/ossfuzz/yulProto.proto b/test/tools/ossfuzz/yulProto.proto index 4919d2eab..4793a029f 100644 --- a/test/tools/ossfuzz/yulProto.proto +++ b/test/tools/ossfuzz/yulProto.proto @@ -21,6 +21,56 @@ message VarDecl { required Expression expr = 1; } +message FunctionCallNoReturnVal { + // Indexes a function that does not return anything + required uint32 func_index = 1; + required Expression in_param1 = 2; + required Expression in_param2 = 3; + required Expression in_param3 = 4; + required Expression in_param4 = 5; +} + +// Used by Expression +message FunctionCallSingleReturnVal { + // Indexes a function that returns exactly one value + required uint32 func_index = 1; + required Expression in_param1 = 2; + required Expression in_param2 = 3; + required Expression in_param3 = 4; + required Expression in_param4 = 5; +} + +message MultiVarDecl { + // Indexes a function that returns more than one value + required uint32 func_index = 1; + required Expression in_param1 = 2; + required Expression in_param2 = 3; + required Expression in_param3 = 4; + required Expression in_param4 = 5; +} + +message MultiAssignment { + // Indexes a function that returns more than one value + required uint32 func_index = 1; + required Expression in_param1 = 2; + required Expression in_param2 = 3; + required Expression in_param3 = 4; + required Expression in_param4 = 5; + required VarRef out_param1 = 6; + required VarRef out_param2 = 7; + required VarRef out_param3 = 8; + required VarRef out_param4 = 9; +} + +// We exclude function calls with single return value here and use them as expressions +message FunctionCall { + oneof functioncall_oneof { + FunctionCallNoReturnVal call_zero = 1; + MultiVarDecl call_multidecl = 2; + MultiAssignment call_multiassign = 3; + } +} + message TypedVarDecl { enum TypeName { BOOL = 1; @@ -192,6 +242,7 @@ message Expression { UnaryOp unop = 4; TernaryOp top = 5; NullaryOp nop = 6; + FunctionCallSingleReturnVal func_expr = 7; } } @@ -253,21 +304,26 @@ message TerminatingStmt { } } +// Stub for a VarDecl without an Expression on the RHS +message EmptyVarDecl {} + +// TODO: Make Function definition a Statement message Statement { oneof stmt_oneof { - VarDecl decl = 1; - AssignmentStatement assignment = 2; - IfStmt ifstmt = 3; - StoreFunc storage_func = 4; - Block blockstmt = 5; - ForStmt forstmt = 6; - SwitchStmt switchstmt = 7; - BreakStmt breakstmt = 8; - ContinueStmt contstmt = 9; - LogFunc log_func = 10; - CopyFunc copy_func = 11; - ExtCodeCopy extcode_copy = 12; - TerminatingStmt terminatestmt = 13; + VarDecl decl = 1; + AssignmentStatement assignment = 2; + IfStmt ifstmt = 3; + StoreFunc storage_func = 4; + Block blockstmt = 5; + ForStmt forstmt = 6; + SwitchStmt switchstmt = 7; + BreakStmt breakstmt = 8; + ContinueStmt contstmt = 9; + LogFunc log_func = 10; + CopyFunc copy_func = 11; + ExtCodeCopy extcode_copy = 12; + TerminatingStmt terminatestmt = 13; + FunctionCall functioncall = 14; } } @@ -275,8 +331,39 @@ message Block { repeated Statement statements = 1; } -message Function { - required Block statements = 1; +// Identical to Block with the addition of an empty var right at the top +// Used by FunctionDefinitionNoReturnVal only. +message SpecialBlock { + required EmptyVarDecl var = 1; + repeated Statement statements = 2; +} + +// This ensures that proto mutator generates at least one of each type if it creates at least 1 functiondef message. +message FunctionDefinition { + required FunctionDefinitionNoReturnVal fd_zero = 1; + required FunctionDefinitionSingleReturnVal fd_one = 2; + required FunctionDefinitionMultiReturnVal fd_multi = 3; +} + +// Since this function can have 0 parameters, we hoist an empty var decl at the top via SpecialBlock. +message FunctionDefinitionNoReturnVal { + required uint32 num_input_params = 1; + required SpecialBlock statements = 2; +} + +message FunctionDefinitionSingleReturnVal { + required uint32 num_input_params = 1; + required Block statements = 2; +} + +message FunctionDefinitionMultiReturnVal { + required uint32 num_input_params = 1; + required uint32 num_output_params = 2; + required Block statements = 3; +} + +message Program { + repeated FunctionDefinition funcs = 1; } package yul.test.yul_fuzzer; diff --git a/test/tools/ossfuzz/yulProtoFuzzer.cpp b/test/tools/ossfuzz/yulProtoFuzzer.cpp index 9f81ea01b..1cdfc124e 100644 --- a/test/tools/ossfuzz/yulProtoFuzzer.cpp +++ b/test/tools/ossfuzz/yulProtoFuzzer.cpp @@ -30,12 +30,10 @@ using namespace yul; using namespace yul::test::yul_fuzzer; using namespace std; -DEFINE_PROTO_FUZZER(Function const& _input) +DEFINE_PROTO_FUZZER(Program const& _input) { ProtoConverter converter; - string yul_source = converter.functionToString(_input); - if (yul_source.size() > 600) - return; + string yul_source = converter.programToString(_input); if (const char* dump_path = getenv("PROTO_FUZZER_DUMP_PATH")) { @@ -45,6 +43,9 @@ DEFINE_PROTO_FUZZER(Function const& _input) of.write(yul_source.data(), yul_source.size()); } + if (yul_source.size() > 1200) + return; + // AssemblyStack entry point AssemblyStack stack( langutil::EVMVersion(), diff --git a/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp b/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp index 44787d5a5..b384c9af5 100644 --- a/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp +++ b/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp @@ -37,12 +37,10 @@ using namespace langutil; using namespace dev; using namespace yul::test; -DEFINE_PROTO_FUZZER(Function const& _input) +DEFINE_PROTO_FUZZER(Program const& _input) { ProtoConverter converter; - string yul_source = converter.functionToString(_input); - if (yul_source.size() > 600) - return; + string yul_source = converter.programToString(_input); if (const char* dump_path = getenv("PROTO_FUZZER_DUMP_PATH")) { @@ -52,6 +50,9 @@ DEFINE_PROTO_FUZZER(Function const& _input) of.write(yul_source.data(), yul_source.size()); } + if (yul_source.size() > 1200) + return; + // AssemblyStack entry point AssemblyStack stack( langutil::EVMVersion(),