From 4837ef4adab1e08c91fdb2872a9ffba9afafbcf7 Mon Sep 17 00:00:00 2001 From: Bhargava Shastry Date: Tue, 27 Aug 2019 14:57:45 +0200 Subject: [PATCH] Yul proto fuzzer: Make function definition a statement --- test/tools/ossfuzz/protoToYul.cpp | 527 ++++++++++++++++++------------ test/tools/ossfuzz/protoToYul.h | 170 +++++++--- test/tools/ossfuzz/yulProto.proto | 106 ++---- 3 files changed, 455 insertions(+), 348 deletions(-) diff --git a/test/tools/ossfuzz/protoToYul.cpp b/test/tools/ossfuzz/protoToYul.cpp index 50ef07f04..0bb7559a3 100644 --- a/test/tools/ossfuzz/protoToYul.cpp +++ b/test/tools/ossfuzz/protoToYul.cpp @@ -86,10 +86,34 @@ string ProtoConverter::visit(Literal const& _x) } } +bool ProtoConverter::varDeclAvailable() +{ + if (m_inFunctionDef) + return m_scopeVars.top().size() > 0; + else + return m_variables.size() > 0; +} + +bool ProtoConverter::functionCallNotPossible(FunctionCall_Returns _type) +{ + return _type == FunctionCall::SINGLE || + (_type == FunctionCall::MULTIASSIGN && !varDeclAvailable()); +} + void ProtoConverter::visit(VarRef const& _x) { - yulAssert(m_variables.size() > 0, "Proto fuzzer: No variables to reference."); - m_output << m_variables[_x.varnum() % m_variables.size()]; + if (m_inFunctionDef) + { + // Ensure that there is at least one variable declaration to reference in function scope. + yulAssert(m_scopeVars.top().size() > 0, "Proto fuzzer: No variables to reference."); + m_output << m_scopeVars.top()[_x.varnum() % m_scopeVars.top().size()]; + } + else + { + // Ensure that there is at least one variable declaration to reference in nested scopes. + yulAssert(m_variables.size() > 0, "Proto fuzzer: No variables to reference."); + m_output << m_variables[_x.varnum() % m_variables.size()]; + } } void ProtoConverter::visit(Expression const& _x) @@ -97,7 +121,13 @@ void ProtoConverter::visit(Expression const& _x) switch (_x.expr_oneof_case()) { case Expression::kVarref: - visit(_x.varref()); + // If the expression requires a variable reference that we cannot provide + // (because there are no variables in scope), we silently output a literal + // expression from the optimizer dictionary. + if (!varDeclAvailable()) + m_output << dictionaryToken(); + else + visit(_x.varref()); break; case Expression::kCons: m_output << visit(_x.cons()); @@ -115,7 +145,12 @@ void ProtoConverter::visit(Expression const& _x) visit(_x.nop()); break; case Expression::kFuncExpr: - visit(_x.func_expr()); + // FunctionCall must return a single value, otherwise + // we output a trivial expression "1". + if (_x.func_expr().ret() == FunctionCall::SINGLE) + visit(_x.func_expr()); + else + m_output << dictionaryToken(); break; case Expression::EXPR_ONEOF_NOT_SET: m_output << dictionaryToken(); @@ -207,46 +242,10 @@ void ProtoConverter::visit(VarDecl const& _x) m_output << "let " << varName << " := "; visit(_x.expr()); m_output << "\n"; - m_scopes.top().insert(varName); + m_scopeVars.top().push_back(varName); m_variables.push_back(varName); } -void ProtoConverter::visit(EmptyVarDecl const&) -{ - string varName = newVarName(); - m_output << "let " << varName << "\n"; - m_scopes.top().insert(varName); - m_variables.push_back(varName); -} - -void ProtoConverter::visit(MultiVarDecl const& _x) -{ - size_t funcId = (static_cast(_x.func_index()) % m_functionVecMultiReturnValue.size()); - - unsigned numInParams = m_functionVecMultiReturnValue.at(funcId).first; - unsigned numOutParams = m_functionVecMultiReturnValue.at(funcId).second; - - // Ensure that the chosen function returns at least 2 and at most 4 values - yulAssert( - ((numOutParams >= 2) && (numOutParams <= 4)), - "Proto fuzzer: Multi variable declaration calls a function with either too few or too many output params." - ); - - // Obtain variable name suffix - unsigned startIdx = counter(); - m_output << "let "; - vector varsVec = createVars(startIdx, startIdx + numOutParams); - m_output << " := "; - - // Create RHS of multi var decl - m_output << "foo_" << functionTypeToString(NumFunctionReturns::Multiple) << "_" << funcId; - m_output << "("; - visitFunctionInputParams(_x, numInParams); - m_output << ")\n"; - // Add newly minted vars in the multidecl statement to current scope - addToScope(varsVec); -} - void ProtoConverter::visit(TypedVarDecl const& _x) { string varName = newVarName(); @@ -309,7 +308,7 @@ void ProtoConverter::visit(TypedVarDecl const& _x) m_output << " : u256\n"; break; } - m_scopes.top().insert(varName); + m_scopeVars.top().push_back(varName); m_variables.push_back(varName); } @@ -501,9 +500,7 @@ void ProtoConverter::visit(AssignmentStatement const& _x) m_output << "\n"; } -// Called at the time function call is being made -template -void ProtoConverter::visitFunctionInputParams(T const& _x, unsigned _numInputParams) +void ProtoConverter::visitFunctionInputParams(FunctionCall const& _x, unsigned _numInputParams) { // We reverse the order of function input visits since it helps keep this switch case concise. switch (_numInputParams) @@ -531,83 +528,151 @@ void ProtoConverter::visitFunctionInputParams(T const& _x, unsigned _numInputPar } } -void ProtoConverter::visit(MultiAssignment const& _x) +bool ProtoConverter::functionValid(FunctionCall_Returns _type, unsigned _numOutParams) { - size_t funcId = (static_cast(_x.func_index()) % m_functionVecMultiReturnValue.size()); - unsigned numInParams = m_functionVecMultiReturnValue.at(funcId).first; - unsigned numOutParams = m_functionVecMultiReturnValue.at(funcId).second; - yulAssert( - ((numOutParams >= 2) && (numOutParams <= 4)), - "Proto fuzzer: Multi assignment calls a function that has either too many or too few output parameters." - ); - - // Convert LHS of multi assignment - // We reverse the order of out param visits since the order does not matter. This helps reduce the size of this - // switch statement. - switch (numOutParams) + switch (_type) { - case 4: - visit(_x.out_param4()); - m_output << ", "; - BOOST_FALLTHROUGH; - case 3: - visit(_x.out_param3()); - m_output << ", "; - BOOST_FALLTHROUGH; - case 2: - visit(_x.out_param2()); - m_output << ", "; - visit(_x.out_param1()); - break; - default: - yulAssert(false, "Proto fuzzer: Function call with too many input parameters."); - break; + case FunctionCall::ZERO: + return _numOutParams == 0; + case FunctionCall::SINGLE: + return _numOutParams == 1; + case FunctionCall::MULTIDECL: + case FunctionCall::MULTIASSIGN: + return _numOutParams > 1; } - m_output << " := "; - - // Convert RHS of multi assignment - m_output << "foo_" << functionTypeToString(NumFunctionReturns::Multiple) << "_" << funcId; - m_output << "("; - visitFunctionInputParams(_x, numInParams); - m_output << ")\n"; } -void ProtoConverter::visit(FunctionCallNoReturnVal const& _x) +void ProtoConverter::convertFunctionCall( + FunctionCall const& _x, + std::string _name, + unsigned _numInParams, + bool _newLine +) { - size_t funcId = (static_cast(_x.func_index()) % m_functionVecNoReturnValue.size()); - unsigned numInParams = m_functionVecNoReturnValue.at(funcId); - m_output << "foo_" << functionTypeToString(NumFunctionReturns::None) << "_" << funcId; - m_output << "("; - visitFunctionInputParams(_x, numInParams); - m_output << ")\n"; -} - -void ProtoConverter::visit(FunctionCallSingleReturnVal const& _x) -{ - size_t funcId = (static_cast(_x.func_index()) % m_functionVecSingleReturnValue.size()); - unsigned numInParams = m_functionVecSingleReturnValue.at(funcId); - m_output << "foo_" << functionTypeToString(NumFunctionReturns::Single) << "_" << funcId; - m_output << "("; - visitFunctionInputParams(_x, numInParams); + m_output << _name << "("; + visitFunctionInputParams(_x, _numInParams); m_output << ")"; + if (_newLine) + m_output << "\n"; +} + +vector ProtoConverter::createVarDecls(unsigned _start, unsigned _end, bool _isAssignment) +{ + m_output << "let "; + vector varsVec = createVars(_start, _end); + if (_isAssignment) + m_output << " := "; + else + m_output << "\n"; + return varsVec; } void ProtoConverter::visit(FunctionCall const& _x) { - switch (_x.functioncall_oneof_case()) + bool functionAvailable = m_functionSigMap.size() > 0; + unsigned numInParams, numOutParams; + string funcName; + FunctionCall_Returns funcType = _x.ret(); + if (functionAvailable) { - case FunctionCall::kCallZero: - visit(_x.call_zero()); + yulAssert(m_functions.size() > 0, "Proto fuzzer: No function in scope"); + funcName = m_functions[_x.func_index() % m_functions.size()]; + auto ret = m_functionSigMap.at(funcName); + numInParams = ret.first; + numOutParams = ret.second; + } + else + { + // If there are no functions available, calls to functions that + // return a single value may be replaced by a dictionary token. + if (funcType == FunctionCall::SINGLE) + m_output << dictionaryToken(); + return; + } + + // If function selected for function call does not meet interface + // requirements (num output values) for the function type + // specified, then we return early unless it is a function call + // that returns a single value (which may be replaced by a + // dictionary token. + if (!functionValid(funcType, numOutParams)) + { + if (funcType == FunctionCall::SINGLE) + m_output << dictionaryToken(); + return; + } + + // If we are here, it means that we have at least one valid + // function for making the function call + switch (funcType) + { + case FunctionCall::ZERO: + convertFunctionCall(_x, funcName, numInParams); break; - case FunctionCall::kCallMultidecl: - // Hack: Disallow (multi) variable declarations until scope extension is implemented for "for-init" + case FunctionCall::SINGLE: + // Since functions that return a single value are used as expressions + // we do not print a newline because it is done by the expression + // visitor. + convertFunctionCall(_x, funcName, numInParams, /*newLine=*/false); + break; + case FunctionCall::MULTIDECL: + // Hack: Disallow (multi) variable declarations until scope extension + // is implemented for "for-init" if (!m_inForInitScope) - visit(_x.call_multidecl()); + { + // Ensure that the chosen function returns at most 4 values + yulAssert( + numOutParams <= 4, + "Proto fuzzer: Function call with too many output params encountered." + ); + + // Obtain variable name suffix + unsigned startIdx = counter(); + vector varsVec = createVarDecls( + startIdx, + startIdx + numOutParams, + /*isAssignment=*/true + ); + + // Create RHS of multi var decl + convertFunctionCall(_x, funcName, numInParams); + // Add newly minted vars in the multidecl statement to current scope + addVarsToScope(varsVec); + } break; - case FunctionCall::kCallMultiassign: - visit(_x.call_multiassign()); - break; - case FunctionCall::FUNCTIONCALL_ONEOF_NOT_SET: + case FunctionCall::MULTIASSIGN: + // Ensure that the chosen function returns at most 4 values + yulAssert( + numOutParams <= 4, + "Proto fuzzer: Function call with too many output params encountered." + ); + + // Convert LHS of multi assignment + // We reverse the order of out param visits since the order does not matter. + // This helps reduce the size of this switch statement. + switch (numOutParams) + { + case 4: + visit(_x.out_param4()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 3: + visit(_x.out_param3()); + m_output << ", "; + BOOST_FALLTHROUGH; + case 2: + visit(_x.out_param2()); + m_output << ", "; + visit(_x.out_param1()); + break; + default: + yulAssert(false, "Proto fuzzer: Function call with too many or too few input parameters."); + break; + } + m_output << " := "; + + // Convert RHS of multi assignment + convertFunctionCall(_x, funcName, numInParams); break; } } @@ -743,7 +808,7 @@ void ProtoConverter::visit(SwitchStmt const& _x) { if (_x.case_stmt_size() > 0 || _x.has_default_block()) { - std::set s; + std::set s; m_switchLiteralSetPerScope.push(s); m_output << "switch "; visit(_x.switch_expr()); @@ -829,7 +894,10 @@ void ProtoConverter::visit(Statement const& _x) visit(_x.decl()); break; case Statement::kAssignment: - visit(_x.assignment()); + // Create an assignment statement only if there is at least one variable + // declaration that is in scope. + if (varDeclAvailable()) + visit(_x.assignment()); break; case Statement::kIfstmt: visit(_x.ifstmt()); @@ -870,8 +938,15 @@ void ProtoConverter::visit(Statement const& _x) visit(_x.terminatestmt()); break; case Statement::kFunctioncall: + // Return early if a function call cannot be created + if (functionCallNotPossible(_x.functioncall().ret())) + return; visit(_x.functioncall()); break; + case Statement::kFuncdef: + if (!m_inForInitScope) + visit(_x.funcdef()); + break; case Statement::STMT_ONEOF_NOT_SET: break; } @@ -879,34 +954,74 @@ void ProtoConverter::visit(Statement const& _x) void ProtoConverter::openScope(vector const& _funcParams) { - m_scopes.push({}); - addToScope(_funcParams); + m_scopeVars.push({}); + m_scopeFuncs.push({}); + if (!_funcParams.empty()) + addVarsToScope(_funcParams); +} + +void ProtoConverter::updateFunctionMaps(string const& _var) +{ + unsigned erased = m_functionSigMap.erase(_var); + + for (auto const& i: m_functionDefMap) + if (i.second == _var) + { + erased += m_functionDefMap.erase(i.first); + break; + } + + yulAssert(erased == 2, "Proto fuzzer: Function maps not updated"); } void ProtoConverter::closeScope() { - for (auto const& var: m_scopes.top()) + for (auto const& var: m_scopeVars.top()) { - unsigned numErased = m_variables.size(); + unsigned numVarsRemoved = m_variables.size(); m_variables.erase(remove(m_variables.begin(), m_variables.end(), var), m_variables.end()); - numErased -= m_variables.size(); - yulAssert(numErased == 1, "Proto fuzzer: More than one variable went out of scope"); + numVarsRemoved -= m_variables.size(); + yulAssert( + numVarsRemoved == 1, + "Proto fuzzer: Nothing or too much went out of scope" + ); } - m_scopes.pop(); + m_scopeVars.pop(); + + for (auto const& f: m_scopeFuncs.top()) + { + unsigned numFuncsRemoved = m_functions.size(); + m_functions.erase(remove(m_functions.begin(), m_functions.end(), f), m_functions.end()); + numFuncsRemoved -= m_functions.size(); + yulAssert( + numFuncsRemoved == 1, + "Proto fuzzer: Nothing or too much went out of scope" + ); + updateFunctionMaps(f); + } + m_scopeFuncs.pop(); } -void ProtoConverter::addToScope(vector const& _vars) +void ProtoConverter::addVarsToScope(vector const& _vars) { for (string const& i: _vars) { m_variables.push_back(i); - m_scopes.top().insert(i); + m_scopeVars.top().push_back(i); } } void ProtoConverter::visit(Block const& _x, vector _funcParams) { openScope(_funcParams); + + // Register function declarations in this scope unless this + // scope belongs to for-init (in which function declarations + // are forbidden). + for (auto const& statement: _x.statements()) + if (statement.has_funcdef() && !m_inForInitScope) + registerFunction(&statement.funcdef()); + if (_x.statements_size() > 0) { m_output << "{\n"; @@ -919,18 +1034,6 @@ void ProtoConverter::visit(Block const& _x, vector _funcParams) closeScope(); } -void ProtoConverter::visit(SpecialBlock const& _x, vector _funcParams) -{ - openScope(_funcParams); - m_output << "{\n"; - visit(_x.var()); - if (_x.statements_size() > 0) - for (auto const& st: _x.statements()) - visit(st); - m_output << "}\n"; - closeScope(); -} - vector ProtoConverter::createVars(unsigned _startIdx, unsigned _endIdx) { yulAssert(_endIdx > _startIdx, "Proto fuzzer: Variable indices not in range"); @@ -952,24 +1055,49 @@ vector ProtoConverter::createVars(unsigned _startIdx, unsigned _endIdx) return varsVec; } -template -void ProtoConverter::createFunctionDefAndCall(T const& _x, unsigned _numInParams, unsigned _numOutParams, NumFunctionReturns _type) +void ProtoConverter::registerFunction(FunctionDef const* _x) +{ + unsigned numInParams = _x->num_input_params() % s_modInputParams; + unsigned numOutParams = _x->num_output_params() % s_modOutputParams; + NumFunctionReturns numReturns; + if (numOutParams == 0) + numReturns = NumFunctionReturns::None; + else if (numOutParams == 1) + numReturns = NumFunctionReturns::Single; + else + numReturns = NumFunctionReturns::Multiple; + + // Generate function name + string funcName = functionName(numReturns); + + // Register function + auto ret = m_functionSigMap.emplace(make_pair(funcName, make_pair(numInParams, numOutParams))); + yulAssert(ret.second, "Proto fuzzer: Function already exists."); + m_functions.push_back(funcName); + m_scopeFuncs.top().push_back(funcName); + m_functionDefMap.emplace(make_pair(_x, funcName)); +} + +void ProtoConverter::createFunctionDefAndCall( + FunctionDef const& _x, + unsigned _numInParams, + unsigned _numOutParams +) { yulAssert( ((_numInParams <= s_modInputParams - 1) && (_numOutParams <= s_modOutputParams - 1)), "Proto fuzzer: Too many function I/O parameters requested." ); - // Signature - // This creates function foo__(x_0,...,x_n) - m_output << "function foo_" << functionTypeToString(_type) << "_" << m_numFunctionSets; - m_output << "("; + // Obtain function name + yulAssert(m_functionDefMap.count(&_x), "Proto fuzzer: Unregistered function"); + string funcName = m_functionDefMap.at(&_x); + vector varsVec = {}; + m_output << "function " << funcName << "("; + unsigned startIdx = counter(); if (_numInParams > 0) - // Functions must use 0 as the first variable's index until function definition - // is made a statement. Once function definition as statement is implemented, - // start index becomes m_counter. - varsVec = createVars(0, _numInParams); + varsVec = createVars(startIdx, startIdx + _numInParams); m_output << ")"; vector outVarsVec = {}; @@ -978,68 +1106,58 @@ void ProtoConverter::createFunctionDefAndCall(T const& _x, unsigned _numInParams { m_output << " -> "; if (varsVec.empty()) - varsVec = createVars(_numInParams, _numInParams + _numOutParams); + { + yulAssert(_numInParams == 0, "Proto fuzzer: Input parameters not processed correctly"); + varsVec = createVars(startIdx, startIdx + _numOutParams); + } else { - outVarsVec = createVars(_numInParams, _numInParams + _numOutParams); + outVarsVec = createVars(startIdx + _numInParams, startIdx + _numInParams + _numOutParams); varsVec.insert(varsVec.end(), outVarsVec.begin(), outVarsVec.end()); } } + yulAssert(varsVec.size() == _numInParams + _numOutParams, "Proto fuzzer: Function parameters not processed correctly"); m_output << "\n"; + // If function definition is in for-loop body, update + bool wasInForBody = m_inForBodyScope; + m_inForBodyScope = false; + + bool wasInFunctionDef = m_inFunctionDef; + m_inFunctionDef = true; + // Body - visit(_x.statements(), varsVec); + visit(_x.block(), varsVec); + + m_inForBodyScope = wasInForBody; + m_inFunctionDef = wasInFunctionDef; // Manually create a multi assignment using global variables // This prints a_0, ..., a_k-1 for this function that returns "k" values - if (_numOutParams > 0) - m_output << dev::suffixedVariableNameList("a_", 0, _numOutParams) << " := "; - - // Call the function with the correct number of input parameters via calls to calldataload with - // incremental addresses. - m_output << "foo_" << functionTypeToString(_type) << "_" << std::to_string(m_numFunctionSets); - m_output << "("; - for (unsigned i = 0; i < _numInParams; i++) - { - m_output << "calldataload(" << std::to_string(i*32) << ")"; - if (i < _numInParams - 1) - m_output << ","; - } - m_output << ")\n"; - - for (unsigned i = 0; i < _numOutParams; i++) - m_output << "sstore(" << std::to_string(i*32) << ", a_" << std::to_string(i) << ")\n"; +// if (_numOutParams > 0) +// m_output << dev::suffixedVariableNameList("a_", 0, _numOutParams) << " := "; +// +// // Call the function with the correct number of input parameters via calls to calldataload with +// // incremental addresses. +// m_output << funcName << "("; +// for (unsigned i = 0; i < _numInParams; i++) +// { +// m_output << "calldataload(" << std::to_string(i*32) << ")"; +// if (i < _numInParams - 1) +// m_output << ","; +// } +// m_output << ")\n"; +// +// for (unsigned i = 0; i < _numOutParams; i++) +// m_output << "sstore(" << std::to_string(i*32) << ", a_" << std::to_string(i) << ")\n"; } -void ProtoConverter::visit(FunctionDefinitionNoReturnVal const& _x) +void ProtoConverter::visit(FunctionDef const& _x) { unsigned numInParams = _x.num_input_params() % s_modInputParams; - unsigned numOutParams = 0; - createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::None); -} - -void ProtoConverter::visit(FunctionDefinitionSingleReturnVal const& _x) -{ - unsigned numInParams = _x.num_input_params() % s_modInputParams; - unsigned numOutParams = 1; - createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::Single); -} - -void ProtoConverter::visit(FunctionDefinitionMultiReturnVal const& _x) -{ - unsigned numInParams = _x.num_input_params() % s_modInputParams; - // Synthesize at least 2 return parameters and at most (s_modOutputParams - 1) - unsigned numOutParams = std::max(2, _x.num_output_params() % s_modOutputParams); - createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::Multiple); -} - -void ProtoConverter::visit(FunctionDefinition const& _x) -{ - visit(_x.fd_zero()); - visit(_x.fd_one()); - visit(_x.fd_multi()); - m_numFunctionSets++; + unsigned numOutParams = _x.num_output_params() % s_modOutputParams; + createFunctionDefAndCall(_x, numInParams, numOutParams); } void ProtoConverter::visit(Program const& _x) @@ -1048,25 +1166,14 @@ void ProtoConverter::visit(Program const& _x) m_inputSize = _x.ByteSizeLong(); /* Program template is as follows - * Four Globals a_0, a_1, a_2, and a_3 to hold up to four function return values - * - * Repeated function definitions followed by function calls of the respective function + * Zero or more statements. If function definition is present, it is + * called post definition. * Example: function foo(x_0) -> x_1 {} - * a_0 := foo(calldataload(0)) - * sstore(0, a_0) + * x_2 := foo(calldataload(0)) + * sstore(0, x_2) */ m_output << "{\n"; - // Create globals at the beginning - // This creates let a_0, a_1, a_2, a_3 (followed by a new line) - m_output << "let " << dev::suffixedVariableNameList("a_", 0, s_modOutputParams - 1) << "\n"; - // Register function interface. Useful while visiting multi var decl/assignment statements. - for (auto const& f: _x.funcs()) - registerFunction(f); - - for (auto const& f: _x.funcs()) - visit(f); - - yulAssert((unsigned)_x.funcs_size() == m_numFunctionSets, "Proto fuzzer: Functions not correctly registered."); + visit(_x.block()); m_output << "}\n"; } @@ -1076,25 +1183,15 @@ string ProtoConverter::programToString(Program const& _input) return m_output.str(); } -void ProtoConverter::registerFunction(FunctionDefinition const& _x) -{ - // No return and single return functions explicitly state the number of values returned - registerFunction(_x.fd_zero(), NumFunctionReturns::None); - registerFunction(_x.fd_one(), NumFunctionReturns::Single); - // A multi return function can have between two and (s_modOutputParams - 1) parameters - unsigned numOutParams = std::max(2, _x.fd_multi().num_output_params() % s_modOutputParams); - registerFunction(_x.fd_multi(), NumFunctionReturns::Multiple, numOutParams); -} - std::string ProtoConverter::functionTypeToString(NumFunctionReturns _type) { switch (_type) { case NumFunctionReturns::None: - return "noreturn"; + return "n"; case NumFunctionReturns::Single: - return "singlereturn"; + return "s"; case NumFunctionReturns::Multiple: - return "multireturn"; + return "m"; } } \ No newline at end of file diff --git a/test/tools/ossfuzz/protoToYul.h b/test/tools/ossfuzz/protoToYul.h index a3f617a82..329f4663d 100644 --- a/test/tools/ossfuzz/protoToYul.h +++ b/test/tools/ossfuzz/protoToYul.h @@ -40,12 +40,12 @@ class ProtoConverter public: ProtoConverter() { - m_numFunctionSets = 0; m_inForBodyScope = false; m_inForInitScope = false; m_numNestedForLoops = 0; m_counter = 0; m_inputSize = 0; + m_inFunctionDef = false; } ProtoConverter(ProtoConverter const&) = delete; ProtoConverter(ProtoConverter&&) = delete; @@ -53,32 +53,23 @@ public: private: void visit(BinaryOp const&); + /// Visits a basic block optionally adding @a _funcParams to scope. /// @param _block Reference to a basic block of yul statements. /// @param _funcParams List of function parameter names, defaults to /// an empty vector. void visit(Block const& _block, std::vector _funcParams = {}); - /// Visits a basic block that contains a variable declaration at the - /// very beginning, optionally adding @a _funcParams to scope. - /// @param _block Reference to a basic block of yul statements. - /// @param _funcParams List of function parameter names, defaults to - /// an empty vector. - void visit(SpecialBlock const& _block, std::vector _funcParams = {}); std::string visit(Literal const&); void visit(VarRef const&); void visit(Expression const&); void visit(VarDecl const&); - void visit(EmptyVarDecl const&); - void visit(MultiVarDecl const&); void visit(TypedVarDecl const&); void visit(UnaryOp const&); void visit(AssignmentStatement const&); - void visit(MultiAssignment const&); void visit(IfStmt const&); void visit(StoreFunc const&); void visit(Statement const&); - void visit(FunctionDefinition const&); void visit(ForStmt const&); void visit(BoundedForStmt const&); void visit(CaseStmt const&); @@ -92,21 +83,17 @@ private: void visit(RetRevStmt const&); void visit(SelfDestructStmt const&); void visit(TerminatingStmt const&); - void visit(FunctionCallNoReturnVal const&); - void visit(FunctionCallSingleReturnVal const&); void visit(FunctionCall const&); - void visit(FunctionDefinitionNoReturnVal const&); - void visit(FunctionDefinitionSingleReturnVal const&); - void visit(FunctionDefinitionMultiReturnVal const&); + void visit(FunctionDef const&); void visit(Program const&); - void registerFunction(FunctionDefinition const&); - /// Creates a new scope, and optionally adds @a _funcParams to it + /// Creates a new scope, and adds @a _funcParams to it if it + /// is non-empty. void openScope(std::vector const& _funcParams); /// Closes current scope void closeScope(); /// Adds @a _vars to current scope - void addToScope(std::vector const& _vars); + void addVarsToScope(std::vector const& _vars); std::string createHex(std::string const& _hexBytes); @@ -126,33 +113,101 @@ private: Multiple }; - template - void visitFunctionInputParams(T const&, unsigned); + void visitFunctionInputParams(FunctionCall const&, unsigned); + void createFunctionDefAndCall(FunctionDef const&, unsigned, unsigned); - template - void createFunctionDefAndCall(T const&, unsigned, unsigned, NumFunctionReturns); + /// Convert function type to a string to be used while naming a + /// function that is created by a function declaration statement. + /// @param _type Type classified according to the number of + /// values returned by function. + /// @return A string as follows. If _type is + /// None -> "n" + /// Single -> "s" + /// Multiple -> "m" std::string functionTypeToString(NumFunctionReturns _type); - /// Creates variable declarations "x_<_startIdx>",...,"x_<_endIdx - 1>" + /// Return true if at least one variable declaration is in scope, + /// false otherwise. + /// @return True in the following cases: + /// - If we are inside a function that has already declared a variable + /// - If there is at least one variable declaration that is + /// in scope + bool varDeclAvailable(); + + /// Return true if a function call cannot be made, false otherwise. + /// @param _type is an enum denoting the type of function call. It + /// can be one of NONE, SINGLE, MULTIDECL, MULTIASSIGN. + /// NONE -> Function call does not return a value + /// SINGLE -> Function call returns a single value + /// MULTIDECL -> Function call returns more than one value + /// and it is used to create a multi declaration + /// statement + /// MULTIASSIGN -> Function call returns more than one value + /// and it is used to create a multi assignment + /// statement + /// @return True if the function call cannot be created for one of the + /// following reasons + // - It is a SINGLE function call (we reserve SINGLE functions for + // expressions) + // - It is a MULTIASSIGN function call and we do not have any + // variables available for assignment. + bool functionCallNotPossible(FunctionCall_Returns _type); + + /// Checks if function call of type @a _type returns the correct number + /// of values. + /// @param _type Function call type of the function being checked + /// @param _numOutParams Number of values returned by the function + /// being checked + /// @return true if the function returns the correct number of values, + /// false otherwise + bool functionValid(FunctionCall_Returns _type, unsigned _numOutParams); + + /// Converts protobuf function call to a yul function call and appends + /// it to output stream. + /// @param _x Protobuf function call + /// @param _name Function name + /// @param _numInParams Number of input arguments accepted by function + /// @param _newLine Flag that prints a new line to the output stream if + /// true. Default value for the flag is true. + void convertFunctionCall( + FunctionCall const& _x, + std::string _name, + unsigned _numInParams, + bool _newLine = true + ); + + /// Prints a yul formatted variable declaration statement to the output + /// stream. + /// Example 1: createVarDecls(0, 1, true) returns {"x_0"} and prints + /// let x_0 := + /// Example 2: createVarDecls(0, 2, false) returns {"x_0", "x_1"} and prints + /// let x_0, x_1 + /// @param _start Start index of variable (inclusive) + /// @param _end End index of variable (exclusive) + /// @param _isAssignment Flag indicating if variable declaration is also + /// an assignment. If true, the string " := " follows the variable + /// declaration. Otherwise, a new line is follows the variable + /// declaration. + /// @return A vector of strings containing the variable names used in + /// the declaration statement. + std::vector createVarDecls(unsigned _start, unsigned _end, bool _isAssignment); + + /// Prints comma separated variable names to output stream and + /// returns a vector containing the printed variable names. + /// Example: createVars(0, 2) returns {"x_0", "x_1"} and prints + /// x_0, x_1 + /// @param _startIdx Start index of variable (inclusive) + /// @param _endIdx End index of variable (exclusive) + /// @return A vector of strings containing the printed variable names. std::vector createVars(unsigned _startIdx, unsigned _endIdx); - template - void registerFunction(T const& _x, NumFunctionReturns _type, unsigned _numOutputParams = 0) - { - unsigned numInputParams = _x.num_input_params() % s_modInputParams; - switch (_type) - { - case NumFunctionReturns::None: - m_functionVecNoReturnValue.push_back(numInputParams); - break; - case NumFunctionReturns::Single: - m_functionVecSingleReturnValue.push_back(numInputParams); - break; - case NumFunctionReturns::Multiple: - m_functionVecMultiReturnValue.push_back(std::make_pair(numInputParams, _numOutputParams)); - break; - } - } + /// Register a function declaration + /// @param _f Pointer to a FunctionDef object + void registerFunction(FunctionDef const* _f); + + /// Removes entry from m_functionMap and m_functionName + void updateFunctionMaps(std::string const& _x); + /// Returns a pseudo-random dictionary token. /// @param _p Enum that decides if the returned token is hex prefixed ("0x") or not /// @return Dictionary token at the index computed using a @@ -168,33 +223,46 @@ private: return m_counter++; } + /// Generate function name of the form "foo__". + /// @param _type Type classified according to the number of + /// values returned by function. + std::string functionName(NumFunctionReturns _type) + { + return "foo_" + functionTypeToString(_type) + "_" + std::to_string(counter()); + } + std::ostringstream m_output; - /// Scope - std::stack> m_scopes; + /// Variables in current scope + std::stack> m_scopeVars; + /// Functions in current scope + std::stack> m_scopeFuncs; /// Variables std::vector m_variables; + /// Functions + std::vector m_functions; + /// Maps FunctionDef object to its name + std::map m_functionDefMap; // Set that is used for deduplicating switch case literals std::stack> m_switchLiteralSetPerScope; - // Total number of function sets. A function set contains one function of each type defined by - // NumFunctionReturns - unsigned m_numFunctionSets; // Look-up table per function type that holds the number of input (output) function parameters - std::vector m_functionVecNoReturnValue; - std::vector m_functionVecSingleReturnValue; - std::vector> m_functionVecMultiReturnValue; + std::map> m_functionSigMap; // mod input/output parameters impose an upper bound on the number of input/output parameters a function may have. static unsigned constexpr s_modInputParams = 5; static unsigned constexpr s_modOutputParams = 5; - // predicate to keep track of for body scope + /// Predicate to keep track of for body scope. If true, break/continue + /// statements can not be created. bool m_inForBodyScope; // Index used for naming loop variable of bounded for loops unsigned m_numNestedForLoops; - // predicate to keep track of for loop init scope + /// Predicate to keep track of for loop init scope. If true, variable + /// or function declarations can not be created. bool m_inForInitScope; /// Monotonically increasing counter unsigned m_counter; /// Size of protobuf input unsigned m_inputSize; + /// Predicate that is true if inside function definition, false otherwise + bool m_inFunctionDef; }; } } diff --git a/test/tools/ossfuzz/yulProto.proto b/test/tools/ossfuzz/yulProto.proto index 4a39ab258..910903afa 100644 --- a/test/tools/ossfuzz/yulProto.proto +++ b/test/tools/ossfuzz/yulProto.proto @@ -21,54 +21,24 @@ message VarDecl { required Expression expr = 1; } -message FunctionCallNoReturnVal { - // Indexes a function that does not return anything - required uint32 func_index = 1; - required Expression in_param1 = 2; - required Expression in_param2 = 3; - required Expression in_param3 = 4; - required Expression in_param4 = 5; -} - -// Used by Expression -message FunctionCallSingleReturnVal { - // Indexes a function that returns exactly one value - required uint32 func_index = 1; - required Expression in_param1 = 2; - required Expression in_param2 = 3; - required Expression in_param3 = 4; - required Expression in_param4 = 5; -} - -message MultiVarDecl { - // Indexes a function that returns more than one value - required uint32 func_index = 1; - required Expression in_param1 = 2; - required Expression in_param2 = 3; - required Expression in_param3 = 4; - required Expression in_param4 = 5; -} - -message MultiAssignment { - // Indexes a function that returns more than one value - required uint32 func_index = 1; - required Expression in_param1 = 2; - required Expression in_param2 = 3; - required Expression in_param3 = 4; - required Expression in_param4 = 5; - required VarRef out_param1 = 6; - required VarRef out_param2 = 7; - required VarRef out_param3 = 8; - required VarRef out_param4 = 9; -} - -// We exclude function calls with single return value here and use them as expressions message FunctionCall { - oneof functioncall_oneof { - FunctionCallNoReturnVal call_zero = 1; - MultiVarDecl call_multidecl = 2; - MultiAssignment call_multiassign = 3; + enum Returns { + ZERO = 1; + SINGLE = 2; + MULTIDECL = 3; + MULTIASSIGN = 4; } + required Returns ret = 1; + // Indexes an existing function + required uint32 func_index = 2; + required Expression in_param1 = 3; + required Expression in_param2 = 4; + required Expression in_param3 = 5; + required Expression in_param4 = 6; + required VarRef out_param1 = 7; + required VarRef out_param2 = 8; + required VarRef out_param3 = 9; + required VarRef out_param4 = 10; } message TypedVarDecl { @@ -242,7 +212,7 @@ message Expression { UnaryOp unop = 4; TernaryOp top = 5; NullaryOp nop = 6; - FunctionCallSingleReturnVal func_expr = 7; + FunctionCall func_expr = 7; } } @@ -311,10 +281,12 @@ message TerminatingStmt { } } -// Stub for a VarDecl without an Expression on the RHS -message EmptyVarDecl {} +message FunctionDef { + required uint32 num_input_params = 1; + required uint32 num_output_params = 2; + required Block block = 3; +} -// TODO: Make Function definition a Statement message Statement { oneof stmt_oneof { VarDecl decl = 1; @@ -332,6 +304,7 @@ message Statement { TerminatingStmt terminatestmt = 13; FunctionCall functioncall = 14; BoundedForStmt boundedforstmt = 15; + FunctionDef funcdef = 16; } } @@ -339,39 +312,8 @@ message Block { repeated Statement statements = 1; } -// Identical to Block with the addition of an empty var right at the top -// Used by FunctionDefinitionNoReturnVal only. -message SpecialBlock { - required EmptyVarDecl var = 1; - repeated Statement statements = 2; -} - -// This ensures that proto mutator generates at least one of each type if it creates at least 1 functiondef message. -message FunctionDefinition { - required FunctionDefinitionNoReturnVal fd_zero = 1; - required FunctionDefinitionSingleReturnVal fd_one = 2; - required FunctionDefinitionMultiReturnVal fd_multi = 3; -} - -// Since this function can have 0 parameters, we hoist an empty var decl at the top via SpecialBlock. -message FunctionDefinitionNoReturnVal { - required uint32 num_input_params = 1; - required SpecialBlock statements = 2; -} - -message FunctionDefinitionSingleReturnVal { - required uint32 num_input_params = 1; - required Block statements = 2; -} - -message FunctionDefinitionMultiReturnVal { - required uint32 num_input_params = 1; - required uint32 num_output_params = 2; - required Block statements = 3; -} - message Program { - repeated FunctionDefinition funcs = 1; + required Block block = 1; } package yul.test.yul_fuzzer;