From a4e46e665aac0aa26afa37a22eaf44941d856d9f Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Mon, 26 May 2014 11:22:19 +0200 Subject: [PATCH] Major reorganisation. New libs (libethsupport, libevm, liblll). New LLLC binary. --- Assembly.cpp | 0 Assembly.h | 0 CMakeLists.txt | 70 +++++ CodeFragment.cpp | 701 ++++++++++++++++++++++++++++++++++++++++++++++ CodeFragment.h | 157 +++++++++++ Compiler.cpp | 61 ++++ Compiler.h | 35 +++ CompilerState.cpp | 37 +++ CompilerState.h | 49 ++++ Parser.cpp | 91 ++++++ Parser.h | 38 +++ 11 files changed, 1239 insertions(+) create mode 100644 Assembly.cpp create mode 100644 Assembly.h create mode 100644 CMakeLists.txt create mode 100644 CodeFragment.cpp create mode 100644 CodeFragment.h create mode 100644 Compiler.cpp create mode 100644 Compiler.h create mode 100644 CompilerState.cpp create mode 100644 CompilerState.h create mode 100644 Parser.cpp create mode 100644 Parser.h diff --git a/Assembly.cpp b/Assembly.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/Assembly.h b/Assembly.h new file mode 100644 index 000000000..e69de29bb diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..dc5fc2221 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,70 @@ +cmake_policy(SET CMP0015 NEW) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +set(EXECUTABLE lll) + +if(APPLE) + # set(CMAKE_INSTALL_PREFIX ../lib) + add_library(${EXECUTABLE} SHARED ${SRC_LIST}) +else() + add_library(${EXECUTABLE} ${SRC_LIST}) +endif() +if (UNIX) + FIND_PACKAGE(Boost 1.53 REQUIRED COMPONENTS thread date_time system filesystem program_options signals serialization chrono unit_test_framework locale) +endif() +file(GLOB HEADERS "*.h") + +include_directories(..) +include_directories(${MINIUPNPC_ID}) +include_directories(${LEVELDB_ID}) + +target_link_libraries(${EXECUTABLE} ethcore) +target_link_libraries(${EXECUTABLE} ethsupport) +target_link_libraries(${EXECUTABLE} secp256k1) +target_link_libraries(${EXECUTABLE} ${MINIUPNPC_LS}) +target_link_libraries(${EXECUTABLE} ${LEVELDB_LS}) +target_link_libraries(${EXECUTABLE} gmp) + + +if(${TARGET_PLATFORM} STREQUAL "w64") + include_directories(/usr/x86_64-w64-mingw32/include/cryptopp) + target_link_libraries(${EXECUTABLE} cryptopp) + target_link_libraries(${EXECUTABLE} boost_system-mt-s) + target_link_libraries(${EXECUTABLE} boost_filesystem-mt-s) + target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) + target_link_libraries(${EXECUTABLE} iphlpapi) + target_link_libraries(${EXECUTABLE} ws2_32) + target_link_libraries(${EXECUTABLE} mswsock) + target_link_libraries(${EXECUTABLE} shlwapi) +elseif (APPLE) + # Latest mavericks boost libraries only come with -mt + target_link_libraries(${EXECUTABLE} ${CRYPTOPP_LIBRARIES}) + target_link_libraries(${EXECUTABLE} boost_system-mt) + target_link_libraries(${EXECUTABLE} boost_filesystem-mt) + target_link_libraries(${EXECUTABLE} boost_thread-mt) + find_package(Threads REQUIRED) + target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) +elseif (UNIX) + target_link_libraries(${EXECUTABLE} ${CRYPTOPP_LIBRARIES}) + target_link_libraries(${EXECUTABLE} ${Boost_SYSTEM_LIBRARY}) + target_link_libraries(${EXECUTABLE} ${Boost_FILESYSTEM_LIBRARY}) + target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY}) + target_link_libraries(${EXECUTABLE} ${Boost_DATE_TIME_LIBRARY}) + target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) +else () + target_link_libraries(${EXECUTABLE} ${CRYPTOPP_LIBRARIES}) + target_link_libraries(${EXECUTABLE} boost_system) + target_link_libraries(${EXECUTABLE} boost_filesystem) + target_link_libraries(${EXECUTABLE} boost_thread) + find_package(Threads REQUIRED) + target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) +endif () + +message("Installation path: ${CMAKE_INSTALL_PREFIX}") + +install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/CodeFragment.cpp b/CodeFragment.cpp new file mode 100644 index 000000000..854a27c40 --- /dev/null +++ b/CodeFragment.cpp @@ -0,0 +1,701 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file CodeFragment.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "Parser.h" +#include "CodeFragment.h" + +#include +#include +#include +#include +#include +#include "CompilerState.h" +using namespace std; +using namespace eth; +namespace qi = boost::spirit::qi; +namespace px = boost::phoenix; +namespace sp = boost::spirit; + +void eth::debugOutAST(ostream& _out, sp::utree const& _this) +{ + switch (_this.which()) + { + case sp::utree_type::list_type: + switch (_this.tag()) + { + case 0: _out << "( "; for (auto const& i: _this) { debugOutAST(_out, i); _out << " "; } _out << ")"; break; + case 1: _out << "@ "; debugOutAST(_out, _this.front()); break; + case 2: _out << "@@ "; debugOutAST(_out, _this.front()); break; + case 3: _out << "[ "; debugOutAST(_out, _this.front()); _out << " ] "; debugOutAST(_out, _this.back()); break; + case 4: _out << "[[ "; debugOutAST(_out, _this.front()); _out << " ]] "; debugOutAST(_out, _this.back()); break; + case 5: _out << "{ "; for (auto const& i: _this) { debugOutAST(_out, i); _out << " "; } _out << "}"; break; + default:; + } + + break; + case sp::utree_type::int_type: _out << _this.get(); break; + case sp::utree_type::string_type: _out << "\"" << _this.get, sp::utree_type::string_type>>() << "\""; break; + case sp::utree_type::symbol_type: _out << _this.get, sp::utree_type::symbol_type>>(); break; + case sp::utree_type::any_type: _out << *_this.get(); break; + default: _out << "nil"; + } +} + +CodeLocation::CodeLocation(CodeFragment* _f) +{ + m_f = _f; + m_pos = _f->m_code.size(); +} + +unsigned CodeLocation::get() const +{ + assert(m_f->m_code[m_pos - 1] == (byte)Instruction::PUSH4); + bytesConstRef r(&m_f->m_code[m_pos], 4); + cdebug << toHex(r); + return fromBigEndian(r); +} + +void CodeLocation::set(unsigned _val) +{ + assert(m_f->m_code[m_pos - 1] == (byte)Instruction::PUSH4); + assert(!get()); + bytesRef r(&m_f->m_code[m_pos], 4); + toBigEndian(_val, r); +} + +void CodeLocation::anchor() +{ + set(m_f->m_code.size()); +} + +void CodeLocation::increase(unsigned _val) +{ + assert(m_f->m_code[m_pos - 1] == (byte)Instruction::PUSH4); + bytesRef r(&m_f->m_code[m_pos], 4); + toBigEndian(get() + _val, r); +} + +void CodeFragment::appendFragment(CodeFragment const& _f) +{ + m_locs.reserve(m_locs.size() + _f.m_locs.size()); + m_code.reserve(m_code.size() + _f.m_code.size()); + + unsigned os = m_code.size(); + + for (auto i: _f.m_code) + m_code.push_back(i); + + for (auto i: _f.m_locs) + { + CodeLocation(this, i + os).increase(os); + m_locs.push_back(i + os); + } + + for (auto i: _f.m_data) + m_data.insert(make_pair(i.first, i.second + os)); + + m_deposit += _f.m_deposit; +} + +CodeFragment CodeFragment::compile(string const& _src, CompilerState& _s) +{ + CodeFragment ret; + sp::utree o; + parseTreeLLL(_src, o); + if (!o.empty()) + ret = CodeFragment(o, _s); + _s.treesToKill.push_back(o); + return ret; +} + +void CodeFragment::consolidateData() +{ + m_code.push_back(0); + bytes ld; + for (auto const& i: m_data) + { + if (ld != i.first) + { + ld = i.first; + for (auto j: ld) + m_code.push_back(j); + } + CodeLocation(this, i.second).set(m_code.size() - ld.size()); + } + m_data.clear(); +} + +void CodeFragment::appendFragment(CodeFragment const& _f, unsigned _deposit) +{ + if ((int)_deposit > _f.m_deposit) + error(); + else + { + appendFragment(_f); + while (_deposit++ < (unsigned)_f.m_deposit) + appendInstruction(Instruction::POP); + } +} + +CodeLocation CodeFragment::appendPushLocation(unsigned _locationValue) +{ + m_code.push_back((byte)Instruction::PUSH4); + CodeLocation ret(this, m_code.size()); + m_locs.push_back(m_code.size()); + m_code.resize(m_code.size() + 4); + bytesRef r(&m_code[m_code.size() - 4], 4); + toBigEndian(_locationValue, r); + m_deposit++; + return ret; +} + +unsigned CodeFragment::appendPush(u256 _literalValue) +{ + unsigned br = max(1, bytesRequired(_literalValue)); + m_code.push_back((byte)Instruction::PUSH1 + br - 1); + m_code.resize(m_code.size() + br); + for (unsigned i = 0; i < br; ++i) + { + m_code[m_code.size() - 1 - i] = (byte)(_literalValue & 0xff); + _literalValue >>= 8; + } + m_deposit++; + return br + 1; +} + +void CodeFragment::appendInstruction(Instruction _i) +{ + m_code.push_back((byte)_i); + m_deposit += c_instructionInfo.at(_i).ret - c_instructionInfo.at(_i).args; +} + +CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM) +{ +/* cdebug << "CodeFragment. Locals:"; + for (auto const& i: _s.defs) + cdebug << i.first << ":" << toHex(i.second.m_code); + cdebug << "Args:"; + for (auto const& i: _s.args) + cdebug << i.first << ":" << toHex(i.second.m_code); + cdebug << "Outers:"; + for (auto const& i: _s.outers) + cdebug << i.first << ":" << toHex(i.second.m_code); + debugOutAST(cout, _t); + cout << endl << flush; +*/ + switch (_t.which()) + { + case sp::utree_type::list_type: + constructOperation(_t, _s); + break; + case sp::utree_type::string_type: + { + auto sr = _t.get, sp::utree_type::string_type>>(); + string s(sr.begin(), sr.end()); + if (s.size() > 32) + error(); + h256 valHash; + memcpy(valHash.data(), s.data(), s.size()); + memset(valHash.data() + s.size(), 0, 32 - s.size()); + appendPush(valHash); + break; + } + case sp::utree_type::symbol_type: + { + auto sr = _t.get, sp::utree_type::symbol_type>>(); + string s(sr.begin(), sr.end()); + string us = boost::algorithm::to_upper_copy(s); + if (_allowASM) + { + if (c_instructions.count(us)) + { + auto it = c_instructions.find(us); + m_deposit = c_instructionInfo.at(it->second).ret - c_instructionInfo.at(it->second).args; + m_code.push_back((byte)it->second); + } + } + if (_s.defs.count(s)) + appendFragment(_s.defs.at(s)); + else if (_s.args.count(s)) + appendFragment(_s.args.at(s)); + else if (_s.outers.count(s)) + appendFragment(_s.outers.at(s)); + else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) + { + auto it = _s.vars.find(s); + if (it == _s.vars.end()) + { + bool ok; + tie(it, ok) = _s.vars.insert(make_pair(s, _s.vars.size() * 32)); + } + appendPush(it->second); + } + else + error(); + + break; + } + case sp::utree_type::any_type: + { + bigint i = *_t.get(); + if (i < 0 || i > bigint(u256(0) - 1)) + error(); + appendPush((u256)i); + break; + } + default: break; + } +} + +void CodeFragment::appendPushDataLocation(bytes const& _data) +{ + m_code.push_back((byte)Instruction::PUSH4); + m_data.insert(make_pair(_data, m_code.size())); + m_code.resize(m_code.size() + 4); + memset(&m_code.back() - 3, 0, 4); + m_deposit++; +} + +std::string CodeFragment::asPushedString() const +{ + string ret; + if (m_code.size()) + { + unsigned bc = m_code[0] - (byte)Instruction::PUSH1 + 1; + if (m_code[0] >= (byte)Instruction::PUSH1 && m_code[0] <= (byte)Instruction::PUSH32) + { + for (unsigned s = 0; s < bc && m_code[1 + s]; ++s) + ret.push_back(m_code[1 + s]); + return ret; + } + } + error(); + return ret; +} + +void CodeFragment::optimise() +{ +// map const&)>> pattern = { { "PUSH,PUSH,ADD", [](vector const& v) { return CodeFragment(appendPush(v[0] + v[1])); } } }; +} + +void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) +{ + if (_t.empty()) + error(); + else if (_t.tag() == 0 && _t.front().which() != sp::utree_type::symbol_type) + error(); + else + { + string s; + string us; + switch (_t.tag()) + { + case 0: + { + auto sr = _t.front().get, sp::utree_type::symbol_type>>(); + s = string(sr.begin(), sr.end()); + us = boost::algorithm::to_upper_copy(s); + break; + } + case 1: + us = "MLOAD"; + break; + case 2: + us = "SLOAD"; + break; + case 3: + us = "MSTORE"; + break; + case 4: + us = "SSTORE"; + break; + case 5: + us = "SEQ"; + break; + default:; + } + + // Operations who args are not standard stack-pushers. + bool nonStandard = true; + if (us == "ASM") + { + int c = 0; + for (auto const& i: _t) + if (c++) + appendFragment(CodeFragment(i, _s, true)); + } + else if (us == "INCLUDE") + { + if (_t.size() != 2) + error(); + string n; + auto i = *++_t.begin(); + if (i.tag()) + error(); + if (i.which() == sp::utree_type::string_type) + { + auto sr = i.get, sp::utree_type::string_type>>(); + n = string(sr.begin(), sr.end()); + } + else if (i.which() == sp::utree_type::symbol_type) + { + auto sr = i.get, sp::utree_type::symbol_type>>(); + n = _s.getDef(string(sr.begin(), sr.end())).asPushedString(); + } + appendFragment(CodeFragment::compile(asString(contents(n)), _s)); + } + else if (us == "DEF") + { + string n; + unsigned ii = 0; + if (_t.size() != 3 && _t.size() != 4) + error(); + for (auto const& i: _t) + { + if (ii == 1) + { + if (i.tag()) + error(); + if (i.which() == sp::utree_type::string_type) + { + auto sr = i.get, sp::utree_type::string_type>>(); + n = string(sr.begin(), sr.end()); + } + else if (i.which() == sp::utree_type::symbol_type) + { + auto sr = i.get, sp::utree_type::symbol_type>>(); + n = _s.getDef(string(sr.begin(), sr.end())).asPushedString(); + } + } + else if (ii == 2) + if (_t.size() == 3) + _s.defs[n] = CodeFragment(i, _s); + else + for (auto const& j: i) + { + if (j.tag() || j.which() != sp::utree_type::symbol_type) + error(); + auto sr = j.get, sp::utree_type::symbol_type>>(); + _s.macros[n].args.push_back(string(sr.begin(), sr.end())); + } + else if (ii == 3) + { + _s.macros[n].code = i; + _s.macros[n].env = _s.outers; + for (auto const& i: _s.args) + _s.macros[n].env[i.first] = i.second; + for (auto const& i: _s.defs) + _s.macros[n].env[i.first] = i.second; + } + ++ii; + } + + } + else if (us == "LIT") + { + if (_t.size() < 3) + error(); + unsigned ii = 0; + CodeFragment pos; + bytes data; + for (auto const& i: _t) + { + if (ii == 1) + { + pos = CodeFragment(i, _s); + if (pos.m_deposit != 1) + error(); + } + else if (ii == 2 && !i.tag() && i.which() == sp::utree_type::string_type) + { + auto sr = i.get, sp::utree_type::string_type>>(); + data = bytes((byte const*)sr.begin(), (byte const*)sr.end()); + } + else if (ii >= 2 && !i.tag() && i.which() == sp::utree_type::any_type) + { + bigint bi = *i.get(); + if (bi < 0) + error(); + else if (bi > bigint(u256(0) - 1)) + { + if (ii == 2 && _t.size() == 3) + { + // One big int - allow it as hex. + data.resize(bytesRequired(bi)); + toBigEndian(bi, data); + } + else + error(); + } + else + { + data.resize(data.size() + 32); + *(h256*)(&data.back() - 31) = (u256)bi; + } + } + else if (ii) + error(); + ++ii; + } + appendPush(data.size()); + appendInstruction(Instruction::DUP); + appendPushDataLocation(data); + appendFragment(pos, 1); + appendInstruction(Instruction::CODECOPY); + } + else + nonStandard = false; + + if (nonStandard) + return; + + std::map const c_arith = { { "+", Instruction::ADD }, { "-", Instruction::SUB }, { "*", Instruction::MUL }, { "/", Instruction::DIV }, { "%", Instruction::MOD }, { "&", Instruction::AND }, { "|", Instruction::OR }, { "^", Instruction::XOR } }; + std::map> const c_binary = { { "<", { Instruction::LT, false } }, { "<=", { Instruction::GT, true } }, { ">", { Instruction::GT, false } }, { ">=", { Instruction::LT, true } }, { "S<", { Instruction::SLT, false } }, { "S<=", { Instruction::SGT, true } }, { "S>", { Instruction::SGT, false } }, { "S>=", { Instruction::SLT, true } }, { "=", { Instruction::EQ, false } }, { "!=", { Instruction::EQ, true } } }; + std::map const c_unary = { { "!", Instruction::NOT } }; + + vector code; + CompilerState ns = _s; + ns.vars.clear(); + int c = _t.tag() ? 1 : 0; + for (auto const& i: _t) + if (c++) + { + if (us == "LLL" && c == 1) + code.push_back(CodeFragment(i, ns)); + else + code.push_back(CodeFragment(i, _s)); + } + auto requireSize = [&](unsigned s) { if (code.size() != s) error(); }; + auto requireMinSize = [&](unsigned s) { if (code.size() < s) error(); }; + auto requireMaxSize = [&](unsigned s) { if (code.size() > s) error(); }; + auto requireDeposit = [&](unsigned i, int s) { if (code[i].m_deposit != s) error(); }; + + if (_s.macros.count(s) && _s.macros.at(s).args.size() == code.size()) + { + Macro const& m = _s.macros.at(s); + CompilerState cs = _s; + for (auto const& i: m.env) + cs.outers[i.first] = i.second; + for (auto const& i: cs.defs) + cs.outers[i.first] = i.second; + cs.defs.clear(); + for (unsigned i = 0; i < m.args.size(); ++i) + { + requireDeposit(i, 1); + cs.args[m.args[i]] = code[i]; + } + appendFragment(CodeFragment(m.code, cs)); + for (auto const& i: cs.defs) + _s.defs[i.first] = i.second; + for (auto const& i: cs.macros) + _s.macros.insert(i); + } + else if (c_instructions.count(us)) + { + auto it = c_instructions.find(us); + int ea = c_instructionInfo.at(it->second).args; + if (ea >= 0) + requireSize(ea); + else + requireMinSize(-ea); + + for (unsigned i = code.size(); i; --i) + appendFragment(code[i - 1], 1); + appendInstruction(it->second); + } + else if (c_arith.count(us)) + { + auto it = c_arith.find(us); + requireMinSize(1); + for (unsigned i = code.size(); i; --i) + { + requireDeposit(i - 1, 1); + appendFragment(code[i - 1], 1); + } + for (unsigned i = 1; i < code.size(); ++i) + appendInstruction(it->second); + } + else if (c_binary.count(us)) + { + auto it = c_binary.find(us); + requireSize(2); + requireDeposit(0, 1); + requireDeposit(1, 1); + appendFragment(code[1], 1); + appendFragment(code[0], 1); + appendInstruction(it->second.first); + if (it->second.second) + appendInstruction(Instruction::NOT); + } + else if (c_unary.count(us)) + { + auto it = c_unary.find(us); + requireSize(1); + requireDeposit(0, 1); + appendFragment(code[0], 1); + appendInstruction(it->second); + } + else if (us == "IF") + { + requireSize(3); + requireDeposit(0, 1); + appendFragment(code[0]); + auto pos = appendJumpI(); + onePath(); + appendFragment(code[2]); + auto end = appendJump(); + otherPath(); + pos.anchor(); + appendFragment(code[1]); + donePaths(); + end.anchor(); + } + else if (us == "WHEN" || us == "UNLESS") + { + requireSize(2); + requireDeposit(0, 1); + appendFragment(code[0]); + if (us == "WHEN") + appendInstruction(Instruction::NOT); + auto end = appendJumpI(); + onePath(); + otherPath(); + appendFragment(code[1], 0); + donePaths(); + end.anchor(); + } + else if (us == "WHILE") + { + requireSize(2); + requireDeposit(0, 1); + auto begin = CodeLocation(this); + appendFragment(code[0], 1); + appendInstruction(Instruction::NOT); + auto end = appendJumpI(); + appendFragment(code[1], 0); + appendJump(begin); + end.anchor(); + } + else if (us == "FOR") + { + requireSize(4); + requireDeposit(1, 1); + appendFragment(code[0], 0); + auto begin = CodeLocation(this); + appendFragment(code[1], 1); + appendInstruction(Instruction::NOT); + auto end = appendJumpI(); + appendFragment(code[3], 0); + appendFragment(code[2], 0); + appendJump(begin); + end.anchor(); + } + else if (us == "LLL") + { + requireMinSize(2); + requireMaxSize(3); + requireDeposit(1, 1); + + CodeLocation codeloc(this, m_code.size() + 6); + bytes const& subcode = code[0].code(); + appendPush(subcode.size()); + appendInstruction(Instruction::DUP); + if (code.size() == 3) + { + requireDeposit(2, 1); + appendFragment(code[2], 1); + appendInstruction(Instruction::LT); + appendInstruction(Instruction::NOT); + appendInstruction(Instruction::MUL); + appendInstruction(Instruction::DUP); + } + appendPushDataLocation(subcode); + appendFragment(code[1], 1); + appendInstruction(Instruction::CODECOPY); + } + else if (us == "&&" || us == "||") + { + requireMinSize(1); + for (unsigned i = 0; i < code.size(); ++i) + requireDeposit(i, 1); + + vector ends; + if (code.size() > 1) + { + appendPush(us == "||" ? 1 : 0); + for (unsigned i = 1; i < code.size(); ++i) + { + // Check if true - predicate + appendFragment(code[i - 1], 1); + if (us == "&&") + appendInstruction(Instruction::NOT); + ends.push_back(appendJumpI()); + } + appendInstruction(Instruction::POP); + } + + // Check if true - predicate + appendFragment(code.back(), 1); + + // At end now. + for (auto& i: ends) + i.anchor(); + } + else if (us == "~") + { + requireSize(1); + requireDeposit(0, 1); + appendFragment(code[0], 1); + appendPush(1); + appendPush(0); + appendInstruction(Instruction::SUB); + appendInstruction(Instruction::SUB); + } + else if (us == "SEQ") + { + unsigned ii = 0; + for (auto const& i: code) + if (++ii < code.size()) + appendFragment(i, 0); + else + appendFragment(i); + } + else if (us == "RAW") + { + for (auto const& i: code) + appendFragment(i); + while (m_deposit > 1) + appendInstruction(Instruction::POP); + } + else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) + { + auto it = _s.vars.find(s); + if (it == _s.vars.end()) + { + bool ok; + tie(it, ok) = _s.vars.insert(make_pair(s, _s.vars.size() * 32)); + } + appendPush(it->second); + } + else + error(); + } +} diff --git a/CodeFragment.h b/CodeFragment.h new file mode 100644 index 000000000..82630dbe6 --- /dev/null +++ b/CodeFragment.h @@ -0,0 +1,157 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file CodeFragment.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include "Exceptions.h" + +namespace boost { namespace spirit { class utree; } } +namespace sp = boost::spirit; + +namespace eth +{ + +class CompilerState; +class CodeFragment; + +void debugOutAST(std::ostream& _out, sp::utree const& _this); + +class CodeLocation +{ + friend class CodeFragment; + +public: + CodeLocation(CodeFragment* _f); + CodeLocation(CodeFragment* _f, unsigned _p): m_f(_f), m_pos(_p) {} + + unsigned get() const; + void increase(unsigned _val); + void set(unsigned _val); + void set(CodeLocation _loc) { assert(_loc.m_f == m_f); set(_loc.m_pos); } + void anchor(); + + CodeLocation operator+(unsigned _i) const { return CodeLocation(m_f, m_pos + _i); } + +private: + CodeFragment* m_f; + unsigned m_pos; +}; + +class CompilerState; + +enum AssemblyItemType { Operation, Push, PushString, PushTag, Tag, PushData }; + +class AssemblyItem +{ +public: + AssemblyItem(u256 _push): m_type(Push), m_data(_push) {} + AssemblyItem(std::string const& _push): m_type(PushString), m_pushString(_push) {} + AssemblyItem(AssemblyItemType _type, AssemblyItem const& _tag): m_type(_type), m_data(_tag.m_data) { assert(_type == PushTag); assert(_tag.m_type == Tag); } + AssemblyItem(Instruction _i): m_type(Operation), m_data((byte)_i) {} + AssemblyItem(AssemblyItemType _type, u256 _data): m_type(_type), m_data(_data) {} + + AssemblyItemType type() const { return m_type; } + u256 data() const { return m_data; } + std::string const& pushString() const { return m_pushString; } + +private: + AssemblyItemType m_type; + u256 m_data; + std::string m_pushString; +}; + +class Assembly +{ +public: + AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newData(bytes const& _data) { auto h = sha3(_data); m_data[h] = _data; return AssemblyItem(PushData, h); } + bytes assemble() const; + void append(Assembly const& _a); + +private: + u256 m_usedTags = 0; + std::vector m_items; + std::map m_data; +}; + +class CodeFragment +{ + friend class CodeLocation; + +public: + CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM = false); + CodeFragment(bytes const& _c = bytes()): m_code(_c) {} + + static CodeFragment compile(std::string const& _src, CompilerState& _s); + + /// Consolidates data and returns code. + bytes const& code() { optimise(); consolidateData(); return m_code; } + + unsigned appendPush(u256 _l); + void appendFragment(CodeFragment const& _f); + void appendFragment(CodeFragment const& _f, unsigned _i); + void appendInstruction(Instruction _i); + + CodeLocation appendPushLocation(unsigned _l = 0); + void appendPushLocation(CodeLocation _l) { assert(_l.m_f == this); appendPushLocation(_l.m_pos); } + void appendPushDataLocation(bytes const& _data); + + CodeLocation appendJump() { auto ret = appendPushLocation(0); appendInstruction(Instruction::JUMP); return ret; } + CodeLocation appendJumpI() { auto ret = appendPushLocation(0); appendInstruction(Instruction::JUMPI); return ret; } + CodeLocation appendJump(CodeLocation _l) { auto ret = appendPushLocation(_l.m_pos); appendInstruction(Instruction::JUMP); return ret; } + CodeLocation appendJumpI(CodeLocation _l) { auto ret = appendPushLocation(_l.m_pos); appendInstruction(Instruction::JUMPI); return ret; } + + void appendFile(std::string const& _fn); + + std::string asPushedString() const; + + void onePath() { assert(!m_totalDeposit && !m_baseDeposit); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + + bool operator==(CodeFragment const& _f) const { return _f.m_code == m_code && _f.m_data == m_data; } + bool operator!=(CodeFragment const& _f) const { return !operator==(_f); } + unsigned size() const { return m_code.size(); } + + void consolidateData(); + void optimise(); + +private: + template void error() const { throw T(); } + void constructOperation(sp::utree const& _t, CompilerState& _s); + + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) error(); } + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; + bytes m_code; + std::vector m_locs; + std::multimap m_data; +}; + +static const CodeFragment NullCodeFragment; + +} diff --git a/Compiler.cpp b/Compiler.cpp new file mode 100644 index 000000000..afe84eb4b --- /dev/null +++ b/Compiler.cpp @@ -0,0 +1,61 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Compiler.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "Compiler.h" +#include "Parser.h" +#include "CompilerState.h" +#include "CodeFragment.h" + +using namespace std; +using namespace eth; + +bytes eth::compileLLL(string const& _s, vector* _errors) +{ + try + { + CompilerState cs; + bytes ret = CodeFragment::compile(_s, cs).code(); + for (auto i: cs.treesToKill) + killBigints(i); + return ret; + } + catch (Exception const& _e) + { + if (_errors) + _errors->push_back(_e.description()); + } + catch (std::exception) + { + if (_errors) + _errors->push_back("Parse error."); + } + return bytes(); +} + +string eth::parseLLL(string const& _src) +{ + sp::utree o; + parseTreeLLL(_src, o); + ostringstream ret; + debugOutAST(ret, o); + killBigints(o); + return ret.str(); +} diff --git a/Compiler.h b/Compiler.h new file mode 100644 index 000000000..e58e12bae --- /dev/null +++ b/Compiler.h @@ -0,0 +1,35 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Compiler.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include + +namespace eth +{ + +std::string parseLLL(std::string const& _src); +bytes compileLLL(std::string const& _s, std::vector* _errors = nullptr); + +} + diff --git a/CompilerState.cpp b/CompilerState.cpp new file mode 100644 index 000000000..d2894475a --- /dev/null +++ b/CompilerState.cpp @@ -0,0 +1,37 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file CompilerState.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "CompilerState.h" + +using namespace std; +using namespace eth; + +CodeFragment const& CompilerState::getDef(std::string const& _s) +{ + if (defs.count(_s)) + return defs.at(_s); + else if (args.count(_s)) + return args.at(_s); + else if (outers.count(_s)) + return outers.at(_s); + else + return NullCodeFragment; +} diff --git a/CompilerState.h b/CompilerState.h new file mode 100644 index 000000000..d53c2bcd7 --- /dev/null +++ b/CompilerState.h @@ -0,0 +1,49 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file CompilerState.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include "CodeFragment.h" + +namespace eth +{ + +struct Macro +{ + std::vector args; + boost::spirit::utree code; + std::map env; +}; + +struct CompilerState +{ + CodeFragment const& getDef(std::string const& _s); + + std::map vars; + std::map defs; + std::map args; + std::map outers; + std::map macros; + std::vector treesToKill; +}; + +} diff --git a/Parser.cpp b/Parser.cpp new file mode 100644 index 000000000..2a0146a60 --- /dev/null +++ b/Parser.cpp @@ -0,0 +1,91 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Parser.cpp + * @author Gav Wood + * @date 2014 + */ + +#include "Parser.h" + +#include +#include +#include +#include + +using namespace std; +using namespace eth; +namespace qi = boost::spirit::qi; +namespace px = boost::phoenix; +namespace sp = boost::spirit; + +void eth::killBigints(sp::utree const& _this) +{ + switch (_this.which()) + { + case sp::utree_type::list_type: for (auto const& i: _this) killBigints(i); break; + case sp::utree_type::any_type: delete _this.get(); break; + default:; + } +} + +void eth::parseTreeLLL(string const& _s, sp::utree& o_out) +{ + using qi::ascii::space; + typedef sp::basic_string symbol_type; + typedef string::const_iterator it; + + qi::rule element; + qi::rule str = '"' > qi::lexeme[+(~qi::char_(std::string("\"") + '\0'))] > '"'; + qi::rule strsh = '\'' > qi::lexeme[+(~qi::char_(std::string(" ;())") + '\0'))]; + qi::rule symbol = qi::lexeme[+(~qi::char_(std::string(" @[]{}:();\"\x01-\x1f\x7f") + '\0'))]; + qi::rule intstr = qi::lexeme[ qi::no_case["0x"][qi::_val = "0x"] >> *qi::char_("0-9a-fA-F")[qi::_val += qi::_1]] | qi::lexeme[+qi::char_("0-9")[qi::_val += qi::_1]]; + qi::rule integer = intstr; + qi::rule multiplier = qi::lit("wei")[qi::_val = 1] | qi::lit("szabo")[qi::_val = szabo] | qi::lit("finney")[qi::_val = finney] | qi::lit("ether")[qi::_val = ether]; + qi::rule quantity = integer[qi::_val = qi::_1] >> -multiplier[qi::_val *= qi::_1]; + qi::rule atom = quantity[qi::_val = px::construct(px::new_(qi::_1))] | (str | strsh)[qi::_val = qi::_1] | symbol[qi::_val = qi::_1]; + qi::rule seq = '{' > *element > '}'; + qi::rule mload = '@' > element; + qi::rule sload = qi::lit("@@") > element; + qi::rule mstore = '[' > element > ']' > -qi::lit(":") > element; + qi::rule sstore = qi::lit("[[") > element > qi::lit("]]") > -qi::lit(":") > element; + qi::rule extra = sload[qi::_val = qi::_1, bind(&sp::utree::tag, qi::_val, 2)] | mload[qi::_val = qi::_1, bind(&sp::utree::tag, qi::_val, 1)] | sstore[qi::_val = qi::_1, bind(&sp::utree::tag, qi::_val, 4)] | mstore[qi::_val = qi::_1, bind(&sp::utree::tag, qi::_val, 3)] | seq[qi::_val = qi::_1, bind(&sp::utree::tag, qi::_val, 5)]; + qi::rule list = '(' > *element > ')'; + element = atom | list | extra; + + string s; + s.reserve(_s.size()); + bool incomment = false; + bool instring = false; + bool insstring = false; + for (auto i: _s) + { + if (i == ';' && !instring && !insstring) + incomment = true; + else if (i == '\n') + incomment = instring = insstring = false; + else if (i == '"' && !insstring) + instring = !instring; + else if (i == '\'') + insstring = true; + else if (i == ' ') + insstring = false; + if (!incomment) + s.push_back(i); + } + qi::phrase_parse(s.cbegin(), s.cend(), element, space, o_out); +} + diff --git a/Parser.h b/Parser.h new file mode 100644 index 000000000..3b2756576 --- /dev/null +++ b/Parser.h @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file Parser.h + * @author Gav Wood + * @date 2014 + */ + +#pragma once + +#include +#include +#include + +namespace boost { namespace spirit { class utree; } } +namespace sp = boost::spirit; + +namespace eth +{ + +void killBigints(sp::utree const& _this); +void parseTreeLLL(std::string const& _s, sp::utree& o_out); + +} +