From d677a155072039e9347b6af795cb1a11ba967cfc Mon Sep 17 00:00:00 2001 From: Bhargava Shastry Date: Mon, 27 May 2019 11:18:05 +0200 Subject: [PATCH 1/2] Add object access builtin functions --- test/tools/ossfuzz/protoToYul.cpp | 94 +++++++++++++++++--- test/tools/ossfuzz/protoToYul.h | 31 +++++++ test/tools/ossfuzz/yulProto.proto | 42 ++++++++- test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp | 19 ++++ 4 files changed, 174 insertions(+), 12 deletions(-) diff --git a/test/tools/ossfuzz/protoToYul.cpp b/test/tools/ossfuzz/protoToYul.cpp index 70f0e3ec5..3dc6d78ee 100644 --- a/test/tools/ossfuzz/protoToYul.cpp +++ b/test/tools/ossfuzz/protoToYul.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -54,6 +55,10 @@ string ProtoConverter::createHex(string const& _hexBytes) // Use a dictionary token. if (tmp.empty()) tmp = dictionaryToken(HexPrefix::DontAdd); + // Hex literals must have even number of digits + if (tmp.size() % 2) + tmp.insert(0, "0"); + yulAssert(tmp.size() <= 64, "Proto Fuzzer: Dictionary token too large"); return tmp; } @@ -158,6 +163,12 @@ void ProtoConverter::visit(Expression const& _x) case Expression::kCreate: visit(_x.create()); break; + case Expression::kUnopdata: + if (m_isObject) + visit(_x.unopdata()); + else + m_output << dictionaryToken(); + break; case Expression::EXPR_ONEOF_NOT_SET: m_output << dictionaryToken(); break; @@ -432,7 +443,14 @@ void ProtoConverter::visit(NullaryOp const& _x) void ProtoConverter::visit(CopyFunc const& _x) { - switch (_x.ct()) + CopyFunc_CopyType type = _x.ct(); + + // datacopy() is valid only if we are inside + // a yul object. + if (type == CopyFunc::DATA && !m_isObject) + return; + + switch (type) { case CopyFunc::CALLDATA: m_output << "calldatacopy"; @@ -443,6 +461,9 @@ void ProtoConverter::visit(CopyFunc const& _x) case CopyFunc::RETURNDATA: m_output << "returndatacopy"; break; + case CopyFunc::DATA: + m_output << "datacopy"; + break; } m_output << "("; visit(_x.target()); @@ -988,6 +1009,23 @@ void ProtoConverter::visit(TerminatingStmt const& _x) } } +void ProtoConverter::visit(UnaryOpData const& _x) +{ + switch (_x.op()) + { + case UnaryOpData::SIZE: + m_output << Whiskers(R"(datasize(""))") + ("id", getObjectIdentifier(_x.identifier())) + .render(); + break; + case UnaryOpData::OFFSET: + m_output << Whiskers(R"(dataoffset(""))") + ("id", getObjectIdentifier(_x.identifier())) + .render(); + break; + } +} + void ProtoConverter::visit(Statement const& _x) { switch (_x.stmt_oneof_case()) @@ -1347,21 +1385,55 @@ void ProtoConverter::visit(PopStmt const& _x) m_output << ")\n"; } +void ProtoConverter::visit(Code const& _x) +{ + m_output << "code {\n"; + visit(_x.block()); + m_output << "}\n"; +} + +void ProtoConverter::visit(Data const& _x) +{ + m_output << "data \"datablock\" hex\"" << createHex(_x.hex()) << "\"\n"; +} + +void ProtoConverter::visit(Object const& _x) +{ + // object "object" { + // ... + // } + m_output << "object " << newObjectId() << " {\n"; + visit(_x.code()); + if (_x.has_data()) + visit(_x.data()); + if (_x.has_obj()) + visit(_x.obj()); + m_output << "}\n"; +} + void ProtoConverter::visit(Program const& _x) { // Initialize input size m_inputSize = _x.ByteSizeLong(); - /* Program template is as follows - * Zero or more statements. If function definition is present, it is - * called post definition. - * Example: function foo(x_0) -> x_1 {} - * x_2 := foo(calldataload(0)) - * sstore(0, x_2) - */ - m_output << "{\n"; - visit(_x.block()); - m_output << "}\n"; + // Program is either a yul object or a block of + // statements. + switch (_x.program_oneof_case()) + { + case Program::kBlock: + m_output << "{\n"; + visit(_x.block()); + m_output << "}\n"; + break; + case Program::kObj: + m_isObject = true; + visit(_x.obj()); + break; + case Program::PROGRAM_ONEOF_NOT_SET: + // {} is a trivial yul program + m_output << "{}"; + break; + } } string ProtoConverter::programToString(Program const& _input) diff --git a/test/tools/ossfuzz/protoToYul.h b/test/tools/ossfuzz/protoToYul.h index f2530bcd5..49a34a8fd 100644 --- a/test/tools/ossfuzz/protoToYul.h +++ b/test/tools/ossfuzz/protoToYul.h @@ -46,6 +46,8 @@ public: m_counter = 0; m_inputSize = 0; m_inFunctionDef = false; + m_objectId = 0; + m_isObject = false; } ProtoConverter(ProtoConverter const&) = delete; ProtoConverter(ProtoConverter&&) = delete; @@ -88,6 +90,10 @@ private: void visit(PopStmt const&); void visit(LowLevelCall const&); void visit(Create const&); + void visit(UnaryOpData const&); + void visit(Object const&); + void visit(Data const&); + void visit(Code const&); void visit(Program const&); /// Creates a new scope, and adds @a _funcParams to it if it @@ -109,6 +115,7 @@ private: /// Accepts an arbitrary string, removes all characters that are neither /// alphabets nor digits from it and returns the said string. std::string createAlphaNum(std::string const& _strBytes); + enum class NumFunctionReturns { None, @@ -256,6 +263,25 @@ private: return "foo_" + functionTypeToString(_type) + "_" + std::to_string(counter()); } + /// Returns current object identifier as string. Input parameter + /// is ignored. + std::string getObjectIdentifier(ObjectId const&) + { + // TODO: Return a pseudo randomly chosen object identifier + // that is in scope as string. + // At the moment, we simply return the identifier that + // corresponds to the currently visited object. + return "object" + std::to_string(m_objectId - 1); + } + + /// Return new object identifier as string. Identifier string + /// is a template of the form "\"object\"" where is + /// a monotonically increasing object ID counter. + std::string newObjectId() + { + return "\"object" + std::to_string(m_objectId++) + "\""; + } + std::ostringstream m_output; /// Variables in current scope std::stack> m_scopeVars; @@ -288,6 +314,11 @@ private: unsigned m_inputSize; /// Predicate that is true if inside function definition, false otherwise bool m_inFunctionDef; + /// Index used for naming objects + unsigned m_objectId; + /// Flag to track whether program is an object (true) or a statement block + /// (false: default value) + bool m_isObject; }; } } diff --git a/test/tools/ossfuzz/yulProto.proto b/test/tools/ossfuzz/yulProto.proto index 52fd632ec..54befeeb9 100644 --- a/test/tools/ossfuzz/yulProto.proto +++ b/test/tools/ossfuzz/yulProto.proto @@ -167,6 +167,15 @@ message UnaryOp { required Expression operand = 2; } +message UnaryOpData { + enum UOpData { + SIZE = 1; + OFFSET = 2; + } + required UOpData op = 1; + required ObjectId identifier = 2; +} + message TernaryOp { enum TOp { ADDM = 0; @@ -183,6 +192,7 @@ message CopyFunc { CALLDATA = 0; CODE = 1; RETURNDATA = 2; + DATA = 3; } required CopyType ct = 1; required Expression target = 2; @@ -197,6 +207,18 @@ message ExtCodeCopy { required Expression size = 4; } +/// TODO: Add a field that may be used for referencing +/// a pseudo random object identifier at run time. +message ObjectId {} + +message DataSize { + required ObjectId identifier = 1; +} + +message DataOffset { + required ObjectId identifier = 1; +} + message NullaryOp { enum NOp { PC = 1; @@ -258,6 +280,7 @@ message Expression { FunctionCall func_expr = 7; LowLevelCall lowcall = 8; Create create = 9; + UnaryOpData unopdata = 10; } } @@ -362,8 +385,25 @@ message Block { repeated Statement statements = 1; } -message Program { +message Object { + required Code code = 1; + optional Data data = 2; + optional Object obj = 3; +} + +message Code { required Block block = 1; } +message Data { + required string hex = 1; +} + +message Program { + oneof program_oneof { + Block block = 1; + Object obj = 2; + } +} + package yul.test.yul_fuzzer; diff --git a/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp b/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp index 4102bb4b0..41cd6edab 100644 --- a/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp +++ b/test/tools/ossfuzz/yulProto_diff_ossfuzz.cpp @@ -25,7 +25,9 @@ #include #include #include + #include +#include #include @@ -37,6 +39,20 @@ using namespace langutil; using namespace dev; using namespace yul::test; +namespace +{ +void printErrors(ostream& _stream, ErrorList const& _errors) +{ + SourceReferenceFormatter formatter(_stream); + + for (auto const& error: _errors) + formatter.printExceptionInformation( + *error, + (error->type() == Error::Type::Warning) ? "Warning" : "Error" + ); +} +} + DEFINE_PROTO_FUZZER(Program const& _input) { ProtoConverter converter; @@ -67,7 +83,10 @@ DEFINE_PROTO_FUZZER(Program const& _input) // Parse protobuf mutated YUL code if (!stack.parseAndAnalyze("source", yul_source) || !stack.parserResult()->code || !stack.parserResult()->analysisInfo) + { + printErrors(std::cout, stack.errors()); return; + } } catch (Exception const&) { From 34022a2c8cb16a9de627cf067ebb999050fe506b Mon Sep 17 00:00:00 2001 From: Bhargava Shastry Date: Wed, 11 Sep 2019 10:57:07 +0200 Subject: [PATCH 2/2] Pseudo-randomly choose object/data identifier that is in scope --- test/tools/ossfuzz/protoToYul.cpp | 41 ++++++++++++++++++++++++++++--- test/tools/ossfuzz/protoToYul.h | 40 +++++++++++++++++++++--------- test/tools/ossfuzz/yulProto.proto | 8 +++--- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/test/tools/ossfuzz/protoToYul.cpp b/test/tools/ossfuzz/protoToYul.cpp index 3dc6d78ee..345797afb 100644 --- a/test/tools/ossfuzz/protoToYul.cpp +++ b/test/tools/ossfuzz/protoToYul.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -1385,6 +1384,14 @@ void ProtoConverter::visit(PopStmt const& _x) m_output << ")\n"; } +string ProtoConverter::getObjectIdentifier(ObjectId const& _x) +{ + unsigned currentId = currentObjectId(); + yulAssert(m_objectScopeTree.size() > currentId, "Proto fuzzer: Error referencing object"); + std::vector objectIdsInScope = m_objectScopeTree[currentId]; + return objectIdsInScope[_x.id() % objectIdsInScope.size()]; +} + void ProtoConverter::visit(Code const& _x) { m_output << "code {\n"; @@ -1394,7 +1401,8 @@ void ProtoConverter::visit(Code const& _x) void ProtoConverter::visit(Data const& _x) { - m_output << "data \"datablock\" hex\"" << createHex(_x.hex()) << "\"\n"; + // TODO: Generate random data block identifier + m_output << "data \"" << s_dataIdentifier << "\" hex\"" << createHex(_x.hex()) << "\"\n"; } void ProtoConverter::visit(Object const& _x) @@ -1406,11 +1414,33 @@ void ProtoConverter::visit(Object const& _x) visit(_x.code()); if (_x.has_data()) visit(_x.data()); - if (_x.has_obj()) - visit(_x.obj()); + if (_x.has_sub_obj()) + visit(_x.sub_obj()); m_output << "}\n"; } +void ProtoConverter::buildObjectScopeTree(Object const& _x) +{ + // Identifies object being visited + string objectId = newObjectId(false); + vector node{objectId}; + if (_x.has_data()) + node.push_back(s_dataIdentifier); + if (_x.has_sub_obj()) + { + // Identifies sub object whose numeric suffix is + // m_objectId + string subObjectId = "object" + to_string(m_objectId); + node.push_back(subObjectId); + // TODO: Add sub-object to object's ancestors once + // nested access is implemented. + m_objectScopeTree.push_back(node); + buildObjectScopeTree(_x.sub_obj()); + } + else + m_objectScopeTree.push_back(node); +} + void ProtoConverter::visit(Program const& _x) { // Initialize input size @@ -1427,6 +1457,9 @@ void ProtoConverter::visit(Program const& _x) break; case Program::kObj: m_isObject = true; + buildObjectScopeTree(_x.obj()); + // Reset object id counter + m_objectId = 0; visit(_x.obj()); break; case Program::PROGRAM_ONEOF_NOT_SET: diff --git a/test/tools/ossfuzz/protoToYul.h b/test/tools/ossfuzz/protoToYul.h index 49a34a8fd..ef149d198 100644 --- a/test/tools/ossfuzz/protoToYul.h +++ b/test/tools/ossfuzz/protoToYul.h @@ -26,8 +26,10 @@ #include #include + #include #include +#include namespace yul { @@ -240,6 +242,11 @@ private: /// Removes entry from m_functionMap and m_functionName void updateFunctionMaps(std::string const& _x); + /// Build a tree of objects that contains the object/data + /// identifiers that are in scope in a given object. + /// @param _x root object of the yul protobuf specification. + void buildObjectScopeTree(Object const& _x); + /// Returns a pseudo-random dictionary token. /// @param _p Enum that decides if the returned token is hex prefixed ("0x") or not /// @return Dictionary token at the index computed using a @@ -263,23 +270,28 @@ private: return "foo_" + functionTypeToString(_type) + "_" + std::to_string(counter()); } - /// Returns current object identifier as string. Input parameter - /// is ignored. - std::string getObjectIdentifier(ObjectId const&) - { - // TODO: Return a pseudo randomly chosen object identifier - // that is in scope as string. - // At the moment, we simply return the identifier that - // corresponds to the currently visited object. - return "object" + std::to_string(m_objectId - 1); - } + /// Returns a pseudo-randomly chosen object identifier that is in the + /// scope of the Yul object being visited. + std::string getObjectIdentifier(ObjectId const& _x); /// Return new object identifier as string. Identifier string /// is a template of the form "\"object\"" where is /// a monotonically increasing object ID counter. - std::string newObjectId() + /// @param _decorate If true (default value), object ID is + /// enclosed within double quotes. + std::string newObjectId(bool _decorate = true) { - return "\"object" + std::to_string(m_objectId++) + "\""; + return dev::Whiskers(R"("object")") + ("decorate", _decorate) + ("id", std::to_string(m_objectId++)) + .render(); + } + + /// Returns the object counter value corresponding to the object + /// being visited. + unsigned currentObjectId() + { + return m_objectId - 1; } std::ostringstream m_output; @@ -297,9 +309,13 @@ private: std::stack> m_switchLiteralSetPerScope; // Look-up table per function type that holds the number of input (output) function parameters std::map> m_functionSigMap; + /// Tree of objects and their scopes + std::vector> m_objectScopeTree; // mod input/output parameters impose an upper bound on the number of input/output parameters a function may have. static unsigned constexpr s_modInputParams = 5; static unsigned constexpr s_modOutputParams = 5; + /// Hard-coded identifier for a Yul object's data block + static auto constexpr s_dataIdentifier = "datablock"; /// Predicate to keep track of for body scope. If true, break/continue /// statements can not be created. bool m_inForBodyScope; diff --git a/test/tools/ossfuzz/yulProto.proto b/test/tools/ossfuzz/yulProto.proto index 54befeeb9..6871a63ac 100644 --- a/test/tools/ossfuzz/yulProto.proto +++ b/test/tools/ossfuzz/yulProto.proto @@ -207,9 +207,9 @@ message ExtCodeCopy { required Expression size = 4; } -/// TODO: Add a field that may be used for referencing -/// a pseudo random object identifier at run time. -message ObjectId {} +message ObjectId { + required uint64 id = 1; +} message DataSize { required ObjectId identifier = 1; @@ -388,7 +388,7 @@ message Block { message Object { required Code code = 1; optional Data data = 2; - optional Object obj = 3; + optional Object sub_obj = 3; } message Code {