[Proto fuzzer] Add function calls, and multi variable declaration/assignment statements

This commit is contained in:
Bhargava Shastry 2019-04-09 08:45:36 +02:00
parent c3a1c168d0
commit 495f7f9013
6 changed files with 849 additions and 410 deletions

View File

@ -33,7 +33,7 @@ target_link_libraries(strictasm_assembly_ossfuzz PRIVATE yul FuzzingEngine.a)
add_executable(yul_proto_ossfuzz yulProtoFuzzer.cpp protoToYul.cpp yulProto.pb.cc)
target_include_directories(yul_proto_ossfuzz PRIVATE /src/libprotobuf-mutator /src/LPM/external.protobuf/include)
target_link_libraries(yul_proto_ossfuzz PRIVATE yul evmasm
target_link_libraries(yul_proto_ossfuzz PRIVATE yul evmasm solidity
protobuf-mutator-libfuzzer.a
protobuf-mutator.a
protobuf.a

View File

@ -16,10 +16,13 @@
*/
#include <test/tools/ossfuzz/protoToYul.h>
#include <libsolidity/codegen/YulUtilFunctions.h>
#include <boost/range/algorithm_ext/erase.hpp>
#include <libyul/Exceptions.h>
using namespace std;
using namespace yul::test::yul_fuzzer;
using namespace dev::solidity;
string ProtoConverter::createHex(string const& _hexBytes) const
{
@ -104,9 +107,10 @@ void ProtoConverter::visit(Literal const& _x)
}
}
// Reference any index in [0, m_numLiveVars-1] or [0, m_numLiveVars)
// Reference any index in [0, m_numLiveVars-1]
void ProtoConverter::visit(VarRef const& _x)
{
yulAssert(m_numLiveVars > 0, "Proto fuzzer: No variables to reference.");
m_output << "x_" << (static_cast<uint32_t>(_x.varnum()) % m_numLiveVars);
}
@ -132,6 +136,9 @@ void ProtoConverter::visit(Expression const& _x)
case Expression::kNop:
visit(_x.nop());
break;
case Expression::kFuncExpr:
visit(_x.func_expr());
break;
case Expression::EXPR_ONEOF_NOT_SET:
m_output << "1";
break;
@ -216,7 +223,6 @@ void ProtoConverter::visit(BinaryOp const& _x)
m_output << ")";
}
// New var numbering starts from x_10
void ProtoConverter::visit(VarDecl const& _x)
{
m_output << "let x_" << m_numLiveVars << " := ";
@ -226,6 +232,44 @@ void ProtoConverter::visit(VarDecl const& _x)
m_output << "\n";
}
void ProtoConverter::visit(EmptyVarDecl const&)
{
m_output << "let x_" << m_numLiveVars++ << "\n";
m_numVarsPerScope.top()++;
}
void ProtoConverter::visit(MultiVarDecl const& _x)
{
size_t funcId = (static_cast<size_t>(_x.func_index()) % m_functionVecMultiReturnValue.size());
int numInParams = m_functionVecMultiReturnValue.at(funcId).first;
int numOutParams = m_functionVecMultiReturnValue.at(funcId).second;
// Ensure that the chosen function returns at least 2 and at most 4 values
yulAssert(
((numOutParams >= 2) && (numOutParams <= 4)),
"Proto fuzzer: Multi variable declaration calls a function with either too few or too many output params."
);
// We must start variable numbering past the number of live variables at this point in time.
// This creates let x_p,..., x_k :=
// (k-p)+1 = numOutParams
m_output <<
"let " <<
YulUtilFunctions::suffixedVariableNameList("x_", m_numLiveVars, m_numLiveVars + numOutParams) <<
" := ";
// Create RHS of multi var decl
m_output << "foo_" << functionTypeToString(NumFunctionReturns::Multiple) << "_" << funcId;
m_output << "(";
visitFunctionInputParams(_x, numInParams);
m_output << ")\n";
// Update live variables in scope and in total to account for the variables created by this
// multi variable declaration.
m_numVarsPerScope.top() += numOutParams;
m_numLiveVars += numOutParams;
}
void ProtoConverter::visit(TypedVarDecl const& _x)
{
m_output << "let x_" << m_numLiveVars;
@ -479,6 +523,115 @@ void ProtoConverter::visit(AssignmentStatement const& _x)
m_output << "\n";
}
// Called at the time function call is being made
template <class T>
void ProtoConverter::visitFunctionInputParams(T const& _x, unsigned _numInputParams)
{
// We reverse the order of function input visits since it helps keep this switch case concise.
switch (_numInputParams)
{
case 4:
visit(_x.in_param4());
m_output << ", ";
BOOST_FALLTHROUGH;
case 3:
visit(_x.in_param3());
m_output << ", ";
BOOST_FALLTHROUGH;
case 2:
visit(_x.in_param2());
m_output << ", ";
BOOST_FALLTHROUGH;
case 1:
visit(_x.in_param1());
BOOST_FALLTHROUGH;
case 0:
break;
default:
yulAssert(false, "Proto fuzzer: Function call with too many input parameters.");
break;
}
}
void ProtoConverter::visit(MultiAssignment const& _x)
{
size_t funcId = (static_cast<size_t>(_x.func_index()) % m_functionVecMultiReturnValue.size());
unsigned numInParams = m_functionVecMultiReturnValue.at(funcId).first;
unsigned numOutParams = m_functionVecMultiReturnValue.at(funcId).second;
yulAssert(
((numOutParams >= 2) && (numOutParams <= 4)),
"Proto fuzzer: Multi assignment calls a function that has either too many or too few output parameters."
);
// Convert LHS of multi assignment
// We reverse the order of out param visits since the order does not matter. This helps reduce the size of this
// switch statement.
switch (numOutParams)
{
case 4:
visit(_x.out_param4());
m_output << ", ";
BOOST_FALLTHROUGH;
case 3:
visit(_x.out_param3());
m_output << ", ";
BOOST_FALLTHROUGH;
case 2:
visit(_x.out_param2());
m_output << ", ";
visit(_x.out_param1());
break;
default:
yulAssert(false, "Proto fuzzer: Function call with too many input parameters.");
break;
}
m_output << " := ";
// Convert RHS of multi assignment
m_output << "foo_" << functionTypeToString(NumFunctionReturns::Multiple) << "_" << funcId;
m_output << "(";
visitFunctionInputParams(_x, numInParams);
m_output << ")\n";
}
void ProtoConverter::visit(FunctionCallNoReturnVal const& _x)
{
size_t funcId = (static_cast<size_t>(_x.func_index()) % m_functionVecNoReturnValue.size());
unsigned numInParams = m_functionVecNoReturnValue.at(funcId);
m_output << "foo_" << functionTypeToString(NumFunctionReturns::None) << "_" << funcId;
m_output << "(";
visitFunctionInputParams(_x, numInParams);
m_output << ")\n";
}
void ProtoConverter::visit(FunctionCallSingleReturnVal const& _x)
{
size_t funcId = (static_cast<size_t>(_x.func_index()) % m_functionVecSingleReturnValue.size());
unsigned numInParams = m_functionVecSingleReturnValue.at(funcId);
m_output << "foo_" << functionTypeToString(NumFunctionReturns::Single) << "_" << funcId;
m_output << "(";
visitFunctionInputParams(_x, numInParams);
m_output << ")";
}
void ProtoConverter::visit(FunctionCall const& _x)
{
switch (_x.functioncall_oneof_case())
{
case FunctionCall::kCallZero:
visit(_x.call_zero());
break;
case FunctionCall::kCallMultidecl:
visit(_x.call_multidecl());
break;
case FunctionCall::kCallMultiassign:
visit(_x.call_multiassign());
break;
case FunctionCall::FUNCTIONCALL_ONEOF_NOT_SET:
break;
}
}
void ProtoConverter::visit(IfStmt const& _x)
{
m_output << "if ";
@ -509,6 +662,8 @@ void ProtoConverter::visit(StoreFunc const& _x)
void ProtoConverter::visit(ForStmt const& _x)
{
// Boilerplate for loop that limits the number of iterations to a maximum of 4.
// TODO: Generalize for loop init, condition, and post blocks.
std::string loopVarName("i_" + std::to_string(m_numNestedForLoops++));
m_output << "for { let " << loopVarName << " := 0 } "
<< "lt(" << loopVarName << ", 0x60) "
@ -656,6 +811,9 @@ void ProtoConverter::visit(Statement const& _x)
case Statement::kTerminatestmt:
visit(_x.terminatestmt());
break;
case Statement::kFunctioncall:
visit(_x.functioncall());
break;
case Statement::STMT_ONEOF_NOT_SET:
break;
}
@ -677,28 +835,162 @@ void ProtoConverter::visit(Block const& _x)
m_output << "{}\n";
}
void ProtoConverter::visit(Function const& _x)
void ProtoConverter::visit(SpecialBlock const& _x)
{
m_output << "{\n"
<< "let a,b := foo(calldataload(0),calldataload(32),calldataload(64),calldataload(96),calldataload(128),"
<< "calldataload(160),calldataload(192),calldataload(224))\n"
<< "sstore(0, a)\n"
<< "sstore(32, b)\n"
<< "function foo(x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7) -> x_8, x_9\n";
visit(_x.statements());
m_numVarsPerScope.push(0);
m_output << "{\n";
visit(_x.var());
if (_x.statements_size() > 0)
for (auto const& st: _x.statements())
visit(st);
m_numLiveVars -= m_numVarsPerScope.top();
m_numVarsPerScope.pop();
m_output << "}\n";
}
string ProtoConverter::functionToString(Function const& _input)
template <class T>
void ProtoConverter::createFunctionDefAndCall(T const& _x, unsigned _numInParams, unsigned _numOutParams, NumFunctionReturns _type)
{
yulAssert(
((_numInParams <= modInputParams - 1) && (_numOutParams <= modOutputParams - 1)),
"Proto fuzzer: Too many function I/O parameters requested."
);
// At the time of function definition creation, the number of live variables must be 0.
// This is because we always create only as many variables as we need within function scope.
yulAssert(m_numLiveVars == 0, "Proto fuzzer: Unused live variable found.");
// Signature
// This creates function foo_<noreturn|singlereturn|multireturn>_<m_numFunctionSets>(x_0,...,x_n)
m_output << "function foo_" << functionTypeToString(_type) << "_" << m_numFunctionSets;
m_output << "(";
if (_numInParams > 0)
m_output << YulUtilFunctions::suffixedVariableNameList("x_", 0, _numInParams);
m_output << ")";
// Book keeping for variables in function scope and in nested scopes
m_numVarsPerScope.push(_numInParams);
m_numLiveVars += _numInParams;
// This creates -> x_n+1,...,x_r
if (_numOutParams > 0)
{
m_output << " -> " << YulUtilFunctions::suffixedVariableNameList("x_", _numInParams, _numInParams + _numOutParams);
// More bookkeeping
m_numVarsPerScope.top() += _numOutParams;
m_numLiveVars += _numOutParams;
}
m_output << "\n";
// Body
visit(_x.statements());
// Ensure that variable stack is balanced
m_numLiveVars -= m_numVarsPerScope.top();
m_numVarsPerScope.pop();
yulAssert(m_numLiveVars == 0, "Proto fuzzer: Variable stack after function definition is unbalanced.");
// Manually create a multi assignment using global variables
// This prints a_0, ..., a_k-1 for this function that returns "k" values
if (_numOutParams > 0)
m_output << YulUtilFunctions::suffixedVariableNameList("a_", 0, _numOutParams) << " := ";
// Call the function with the correct number of input parameters via calls to calldataload with
// incremental addresses.
m_output << "foo_" << functionTypeToString(_type) << "_" << std::to_string(m_numFunctionSets);
m_output << "(";
for (unsigned i = 0; i < _numInParams; i++)
{
m_output << "calldataload(" << std::to_string(i*32) << ")";
if (i < _numInParams - 1)
m_output << ",";
}
m_output << ")\n";
for (unsigned i = 0; i < _numOutParams; i++)
m_output << "sstore(" << std::to_string(i*32) << ", a_" << std::to_string(i) << ")\n";
}
void ProtoConverter::visit(FunctionDefinitionNoReturnVal const& _x)
{
unsigned numInParams = _x.num_input_params() % modInputParams;
unsigned numOutParams = 0;
createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::None);
}
void ProtoConverter::visit(FunctionDefinitionSingleReturnVal const& _x)
{
unsigned numInParams = _x.num_input_params() % modInputParams;
unsigned numOutParams = 1;
createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::Single);
}
void ProtoConverter::visit(FunctionDefinitionMultiReturnVal const& _x)
{
unsigned numInParams = _x.num_input_params() % modInputParams;
// Synthesize at least 2 return parameters and at most (modOutputParams - 1)
unsigned numOutParams = std::max<unsigned>(2, _x.num_output_params() % modOutputParams);
createFunctionDefAndCall(_x, numInParams, numOutParams, NumFunctionReturns::Multiple);
}
void ProtoConverter::visit(FunctionDefinition const& _x)
{
visit(_x.fd_zero());
visit(_x.fd_one());
visit(_x.fd_multi());
m_numFunctionSets++;
}
void ProtoConverter::visit(Program const& _x)
{
/* Program template is as follows
* Four Globals a_0, a_1, a_2, and a_3 to hold up to four function return values
*
* Repeated function definitions followed by function calls of the respective function
* Example: function foo(x_0) -> x_1 {}
* a_0 := foo(calldataload(0))
* sstore(0, a_0)
*/
m_output << "{\n";
// Create globals at the beginning
// This creates let a_0, a_1, a_2, a_3 (followed by a new line)
m_output << "let " << YulUtilFunctions::suffixedVariableNameList("a_", 0, modOutputParams - 1) << "\n";
// Register function interface. Useful while visiting multi var decl/assignment statements.
for (auto const& f: _x.funcs())
registerFunction(f);
for (auto const& f: _x.funcs())
visit(f);
yulAssert((unsigned)_x.funcs_size() == m_numFunctionSets, "Proto fuzzer: Functions not correctly registered.");
m_output << "}\n";
}
string ProtoConverter::programToString(Program const& _input)
{
visit(_input);
return m_output.str();
}
string ProtoConverter::protoToYul(const uint8_t* _data, size_t _size)
void ProtoConverter::registerFunction(FunctionDefinition const& _x)
{
Function message;
if (!message.ParsePartialFromArray(_data, _size))
return "#error invalid proto\n";
return functionToString(message);
// No return and single return functions explicitly state the number of values returned
registerFunction(_x.fd_zero(), NumFunctionReturns::None);
registerFunction(_x.fd_one(), NumFunctionReturns::Single);
// A multi return function can have between two and (modOutputParams - 1) parameters
unsigned numOutParams = std::max<unsigned>(2, _x.fd_multi().num_output_params() % modOutputParams);
registerFunction(_x.fd_multi(), NumFunctionReturns::Multiple, numOutParams);
}
std::string ProtoConverter::functionTypeToString(NumFunctionReturns _type)
{
switch (_type)
{
case NumFunctionReturns::None:
return "noreturn";
case NumFunctionReturns::Single:
return "singlereturn";
case NumFunctionReturns::Multiple:
return "multireturn";
}
}

View File

@ -22,6 +22,8 @@
#include <sstream>
#include <stack>
#include <set>
#include <vector>
#include <tuple>
#include <test/tools/ossfuzz/yulProto.pb.h>
#include <libdevcore/Common.h>
@ -38,31 +40,34 @@ class ProtoConverter
public:
ProtoConverter()
{
// The hard-coded function template foo has 10 parameters that are already "live"
m_numLiveVars = 10;
m_numLiveVars = 0;
m_numVarsPerScope.push(m_numLiveVars);
m_numNestedForLoops = 0;
m_inForScope.push(false);
m_numFunctionSets = 0;
}
ProtoConverter(ProtoConverter const&) = delete;
ProtoConverter(ProtoConverter&&) = delete;
std::string functionToString(Function const& _input);
std::string protoToYul(uint8_t const* _data, size_t _size);
std::string programToString(Program const& _input);
private:
void visit(BinaryOp const&);
void visit(Block const&);
void visit(SpecialBlock const&);
void visit(Literal const&);
void visit(VarRef const&);
void visit(Expression const&);
void visit(VarDecl const&);
void visit(EmptyVarDecl const&);
void visit(MultiVarDecl const&);
void visit(TypedVarDecl const&);
void visit(UnaryOp const&);
void visit(AssignmentStatement const&);
void visit(MultiAssignment const&);
void visit(IfStmt const&);
void visit(StoreFunc const&);
void visit(Statement const&);
void visit(Function const&);
void visit(FunctionDefinition const&);
void visit(ForStmt const&);
void visit(CaseStmt const&);
void visit(SwitchStmt const&);
@ -75,19 +80,72 @@ private:
void visit(RetRevStmt const&);
void visit(SelfDestructStmt const&);
void visit(TerminatingStmt const&);
void visit(FunctionCallNoReturnVal const&);
void visit(FunctionCallSingleReturnVal const&);
void visit(FunctionCall const&);
void visit(FunctionDefinitionNoReturnVal const&);
void visit(FunctionDefinitionSingleReturnVal const&);
void visit(FunctionDefinitionMultiReturnVal const&);
void visit(Program const&);
template <class T>
void visit(google::protobuf::RepeatedPtrField<T> const& _repeated_field);
void registerFunction(FunctionDefinition const&);
std::string createHex(std::string const& _hexBytes) const;
std::string createAlphaNum(std::string const& _strBytes) const;
bool isCaseLiteralUnique(Literal const&);
enum class NumFunctionReturns
{
None,
Single,
Multiple
};
template<class T>
void visitFunctionInputParams(T const&, unsigned);
template<class T>
void createFunctionDefAndCall(T const&, unsigned, unsigned, NumFunctionReturns);
std::string functionTypeToString(NumFunctionReturns _type);
template <class T>
void registerFunction(T const& _x, NumFunctionReturns _type, unsigned _numOutputParams = 0)
{
unsigned numInputParams = _x.num_input_params() % modInputParams;
switch (_type)
{
case NumFunctionReturns::None:
m_functionVecNoReturnValue.push_back(numInputParams);
break;
case NumFunctionReturns::Single:
m_functionVecSingleReturnValue.push_back(numInputParams);
break;
case NumFunctionReturns::Multiple:
m_functionVecMultiReturnValue.push_back(std::make_pair(numInputParams, _numOutputParams));
break;
}
}
std::ostringstream m_output;
std::stack<uint8_t> m_numVarsPerScope;
int32_t m_numLiveVars;
int32_t m_numNestedForLoops;
// Number of live variables in inner scope of a function
std::stack<unsigned> m_numVarsPerScope;
// Number of live variables in function scope
unsigned m_numLiveVars;
// Number of nested for loops for loop index referencing
unsigned m_numNestedForLoops;
std::stack<bool> m_inForScope;
// Set that is used for deduplicating switch case literals
std::stack<std::set<dev::u256>> m_switchLiteralSetPerScope;
// Total number of function sets. A function set contains one function of each type defined by
// NumFunctionReturns
unsigned m_numFunctionSets;
// Look-up table per function type that holds the number of input (output) function parameters
std::vector<unsigned> m_functionVecNoReturnValue;
std::vector<unsigned> m_functionVecSingleReturnValue;
std::vector<std::pair<unsigned, unsigned>> m_functionVecMultiReturnValue;
// mod input/output parameters impose an upper bound on the number of input/output parameters a function may have.
static unsigned constexpr modInputParams = 5;
static unsigned constexpr modOutputParams = 5;
};
}
}

View File

@ -21,6 +21,56 @@ message VarDecl {
required Expression expr = 1;
}
message FunctionCallNoReturnVal {
// Indexes a function that does not return anything
required uint32 func_index = 1;
required Expression in_param1 = 2;
required Expression in_param2 = 3;
required Expression in_param3 = 4;
required Expression in_param4 = 5;
}
// Used by Expression
message FunctionCallSingleReturnVal {
// Indexes a function that returns exactly one value
required uint32 func_index = 1;
required Expression in_param1 = 2;
required Expression in_param2 = 3;
required Expression in_param3 = 4;
required Expression in_param4 = 5;
}
message MultiVarDecl {
// Indexes a function that returns more than one value
required uint32 func_index = 1;
required Expression in_param1 = 2;
required Expression in_param2 = 3;
required Expression in_param3 = 4;
required Expression in_param4 = 5;
}
message MultiAssignment {
// Indexes a function that returns more than one value
required uint32 func_index = 1;
required Expression in_param1 = 2;
required Expression in_param2 = 3;
required Expression in_param3 = 4;
required Expression in_param4 = 5;
required VarRef out_param1 = 6;
required VarRef out_param2 = 7;
required VarRef out_param3 = 8;
required VarRef out_param4 = 9;
}
// We exclude function calls with single return value here and use them as expressions
message FunctionCall {
oneof functioncall_oneof {
FunctionCallNoReturnVal call_zero = 1;
MultiVarDecl call_multidecl = 2;
MultiAssignment call_multiassign = 3;
}
}
message TypedVarDecl {
enum TypeName {
BOOL = 1;
@ -192,6 +242,7 @@ message Expression {
UnaryOp unop = 4;
TernaryOp top = 5;
NullaryOp nop = 6;
FunctionCallSingleReturnVal func_expr = 7;
}
}
@ -253,6 +304,10 @@ message TerminatingStmt {
}
}
// Stub for a VarDecl without an Expression on the RHS
message EmptyVarDecl {}
// TODO: Make Function definition a Statement
message Statement {
oneof stmt_oneof {
VarDecl decl = 1;
@ -268,6 +323,7 @@ message Statement {
CopyFunc copy_func = 11;
ExtCodeCopy extcode_copy = 12;
TerminatingStmt terminatestmt = 13;
FunctionCall functioncall = 14;
}
}
@ -275,8 +331,39 @@ message Block {
repeated Statement statements = 1;
}
message Function {
required Block statements = 1;
// Identical to Block with the addition of an empty var right at the top
// Used by FunctionDefinitionNoReturnVal only.
message SpecialBlock {
required EmptyVarDecl var = 1;
repeated Statement statements = 2;
}
// This ensures that proto mutator generates at least one of each type if it creates at least 1 functiondef message.
message FunctionDefinition {
required FunctionDefinitionNoReturnVal fd_zero = 1;
required FunctionDefinitionSingleReturnVal fd_one = 2;
required FunctionDefinitionMultiReturnVal fd_multi = 3;
}
// Since this function can have 0 parameters, we hoist an empty var decl at the top via SpecialBlock.
message FunctionDefinitionNoReturnVal {
required uint32 num_input_params = 1;
required SpecialBlock statements = 2;
}
message FunctionDefinitionSingleReturnVal {
required uint32 num_input_params = 1;
required Block statements = 2;
}
message FunctionDefinitionMultiReturnVal {
required uint32 num_input_params = 1;
required uint32 num_output_params = 2;
required Block statements = 3;
}
message Program {
repeated FunctionDefinition funcs = 1;
}
package yul.test.yul_fuzzer;

View File

@ -30,12 +30,10 @@ using namespace yul;
using namespace yul::test::yul_fuzzer;
using namespace std;
DEFINE_PROTO_FUZZER(Function const& _input)
DEFINE_PROTO_FUZZER(Program const& _input)
{
ProtoConverter converter;
string yul_source = converter.functionToString(_input);
if (yul_source.size() > 600)
return;
string yul_source = converter.programToString(_input);
if (const char* dump_path = getenv("PROTO_FUZZER_DUMP_PATH"))
{
@ -45,6 +43,9 @@ DEFINE_PROTO_FUZZER(Function const& _input)
of.write(yul_source.data(), yul_source.size());
}
if (yul_source.size() > 1200)
return;
// AssemblyStack entry point
AssemblyStack stack(
langutil::EVMVersion(),

View File

@ -37,12 +37,10 @@ using namespace langutil;
using namespace dev;
using namespace yul::test;
DEFINE_PROTO_FUZZER(Function const& _input)
DEFINE_PROTO_FUZZER(Program const& _input)
{
ProtoConverter converter;
string yul_source = converter.functionToString(_input);
if (yul_source.size() > 600)
return;
string yul_source = converter.programToString(_input);
if (const char* dump_path = getenv("PROTO_FUZZER_DUMP_PATH"))
{
@ -52,6 +50,9 @@ DEFINE_PROTO_FUZZER(Function const& _input)
of.write(yul_source.data(), yul_source.size());
}
if (yul_source.size() > 1200)
return;
// AssemblyStack entry point
AssemblyStack stack(
langutil::EVMVersion(),