From 37878cf8d250856f2516475e597ad9f3335ca560 Mon Sep 17 00:00:00 2001 From: Djordje Mijovic Date: Thu, 27 Feb 2020 16:47:20 +0100 Subject: [PATCH] Adding support for ipfs large files. --- Changelog.md | 3 +- libsolidity/interface/CompilerStack.cpp | 6 +- libsolutil/IpfsHash.cpp | 153 ++++++++++++++++++++---- test/libsolutil/IpfsHash.cpp | 53 ++++++-- 4 files changed, 178 insertions(+), 37 deletions(-) diff --git a/Changelog.md b/Changelog.md index 15221bb7d..e92b536df 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,12 +4,12 @@ Language Features: Compiler Features: + * Metadata: Added support for IPFS hashes of large files that need to be split in multiple chunks. Bugfixes: - ### 0.6.4 (2020-03-10) Language Features: @@ -30,7 +30,6 @@ Bugfixes: * SMTChecker: Fix internal errors when analysing tuples. * Yul AST Import: correctly import blocks as statements, switch statements and string literals. - ### 0.6.3 (2020-02-18) Language Features: diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 49b17cf33..781f68d6b 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -899,8 +899,7 @@ h256 const& CompilerStack::Source::swarmHash() const string const& CompilerStack::Source::ipfsUrl() const { if (ipfsUrlCached.empty()) - if (scanner->source().size() < 1024 * 256) - ipfsUrlCached = "dweb:/ipfs/" + util::ipfsHashBase58(scanner->source()); + ipfsUrlCached = "dweb:/ipfs/" + util::ipfsHashBase58(scanner->source()); return ipfsUrlCached; } @@ -1373,10 +1372,7 @@ bytes CompilerStack::createCBORMetadata(string const& _metadata, bool _experimen MetadataCBOREncoder encoder; if (m_metadataHash == MetadataHash::IPFS) - { - solAssert(_metadata.length() < 1024 * 256, "Metadata too large."); encoder.pushBytes("ipfs", util::ipfsHash(_metadata)); - } else if (m_metadataHash == MetadataHash::Bzzr1) encoder.pushBytes("bzzr1", util::bzzr1Hash(_metadata).asBytes()); else diff --git a/libsolutil/IpfsHash.cpp b/libsolutil/IpfsHash.cpp index d6a511a24..95605cdc1 100644 --- a/libsolutil/IpfsHash.cpp +++ b/libsolutil/IpfsHash.cpp @@ -40,6 +40,21 @@ bytes varintEncoding(size_t _n) return encoded; } +bytes encodeByteArray(bytes const& _data) +{ + return bytes{0x0a} + varintEncoding(_data.size()) + _data; +} + +bytes encodeHash(bytes const& _data) +{ + return bytes{0x12, 0x20} + picosha2::hash256(_data); +} + +bytes encodeLinkData(bytes const& _data) +{ + return bytes{0x12} + varintEncoding(_data.size()) + _data; +} + string base58Encode(bytes const& _data) { static string const alphabet{"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"}; @@ -53,36 +68,132 @@ string base58Encode(bytes const& _data) reverse(output.begin(), output.end()); return output; } + +struct Chunk +{ + Chunk() = default; + Chunk(bytes _hash, size_t _size, size_t _blockSize): + hash(std::move(_hash)), + size(_size), + blockSize(_blockSize) + {} + + bytes hash = {}; + size_t size = 0; + size_t blockSize = 0; +}; + +using Chunks = vector; + +Chunk combineLinks(Chunks& _links) +{ + bytes data = {}; + bytes lengths = {}; + Chunk chunk = {}; + for (Chunk& link: _links) + { + chunk.size += link.size; + chunk.blockSize += link.blockSize; + + data += encodeLinkData( + bytes {0x0a} + + varintEncoding(link.hash.size()) + + std::move(link.hash) + + bytes{0x12, 0x00, 0x18} + + varintEncoding(link.blockSize) + ); + + lengths += bytes{0x20} + varintEncoding(link.size); + } + + bytes blockData = data + encodeByteArray(bytes{0x08, 0x02, 0x18} + varintEncoding(chunk.size) + lengths); + + chunk.blockSize += blockData.size(); + chunk.hash = encodeHash(blockData); + + return chunk; +} + +Chunks buildNextLevel(Chunks& _currentLevel) +{ + size_t const maxChildNum = 174; + + Chunks nextLevel; + Chunks links; + + for (Chunk& chunk: _currentLevel) + { + links.emplace_back(std::move(chunk.hash), chunk.size, chunk.blockSize); + if (links.size() == maxChildNum) + { + nextLevel.emplace_back(combineLinks(links)); + links = {}; + } + } + if (!links.empty()) + nextLevel.emplace_back(combineLinks(links)); + + return nextLevel; +} + +/// Builds a tree starting from the bottom level where nodes are data nodes. +/// Data nodes should be calculated and passed as the only level in chunk levels +/// Each next level is calculated as following: +/// - Pick up to maxChildNum (174) nodes until a whole level is added, group them and pass to the node in the next level +/// - Do this until the current level has only one node, return the hash in that node +bytes groupChunksBottomUp(Chunks _currentLevel) +{ + // when we reach root it will be the only node in that level + while (_currentLevel.size() != 1) + _currentLevel = buildNextLevel(_currentLevel); + + // top level's only node stores the hash for file + return _currentLevel.front().hash; +} } bytes solidity::util::ipfsHash(string _data) { - assertThrow(_data.length() < 1024 * 256, DataTooLong, "IPFS hash for large (chunked) files not yet implemented."); + size_t const maxChunkSize = 1024 * 256; + size_t chunkCount = _data.length() / maxChunkSize + (_data.length() % maxChunkSize > 0 ? 1 : 0); + chunkCount = chunkCount == 0 ? 1 : chunkCount; - bytes lengthAsVarint = varintEncoding(_data.size()); + Chunks allChunks; - bytes protobufEncodedData; - // Type: File - protobufEncodedData += bytes{0x08, 0x02}; - if (!_data.empty()) + for (unsigned long chunkIndex = 0; chunkIndex < chunkCount; chunkIndex++) { - // Data (length delimited bytes) - protobufEncodedData += bytes{0x12}; - protobufEncodedData += lengthAsVarint; - protobufEncodedData += asBytes(std::move(_data)); + bytes chunkBytes = asBytes( + _data.substr(chunkIndex * maxChunkSize, min(maxChunkSize, _data.length() - chunkIndex * maxChunkSize)) + ); + + bytes lengthAsVarint = varintEncoding(chunkBytes.size()); + + bytes protobufEncodedData; + // Type: File + protobufEncodedData += bytes{0x08, 0x02}; + if (!chunkBytes.empty()) + { + // Data (length delimited bytes) + protobufEncodedData += bytes{0x12}; + protobufEncodedData += lengthAsVarint; + protobufEncodedData += chunkBytes; + } + // filesize: length as varint + protobufEncodedData += bytes{0x18} + lengthAsVarint; + + // PBDag: + // Data: (length delimited bytes) + bytes blockData = encodeByteArray(protobufEncodedData); + + // Multihash: sha2-256, 256 bits + allChunks.emplace_back( + encodeHash(blockData), + chunkBytes.size(), + blockData.size() + ); } - // filesize: length as varint - protobufEncodedData += bytes{0x18} + lengthAsVarint; - // PBDag: - // Data: (length delimited bytes) - size_t protobufLength = protobufEncodedData.size(); - bytes blockData = bytes{0x0a} + varintEncoding(protobufLength) + std::move(protobufEncodedData); - // TODO Handle "large" files with multiple blocks - - // Multihash: sha2-256, 256 bits - bytes hash = bytes{0x12, 0x20} + picosha2::hash256(std::move(blockData)); - return hash; + return groupChunksBottomUp(std::move(allChunks)); } string solidity::util::ipfsHashBase58(string _data) diff --git a/test/libsolutil/IpfsHash.cpp b/test/libsolutil/IpfsHash.cpp index 4514b2f4c..9c4fb5633 100644 --- a/test/libsolutil/IpfsHash.cpp +++ b/test/libsolutil/IpfsHash.cpp @@ -60,15 +60,50 @@ BOOST_AUTO_TEST_CASE(test_largest_unchunked) BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmbNDspMkzkMFKyS3eCJGedG7GWRQHSCzJCZLjxP7wyVAx"); } -// TODO This needs chunking implemented -//BOOST_AUTO_TEST_CASE(test_large) -//{ -// size_t length = 1310710; -// string data; -// data.resize(length, 0); -// BOOST_REQUIRE_EQUAL(data.size(), length); -// BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmNg7BJo8gEMDK8yGQbHEwPtycesnE6FUULX5iVd5TAL9f"); -//} +BOOST_AUTO_TEST_CASE(test_smallest_chunked) +{ + size_t length = 1024 * 256 + 1; + string data; + data.resize(length, 0); + BOOST_REQUIRE_EQUAL(data.size(), length); + BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmbVuw4C4vcmVKqxoWtgDVobvcHrSn51qsmQmyxjk4sB2Q"); +} + +BOOST_AUTO_TEST_CASE(test_large) +{ + size_t length = 1310710; + string data; + data.resize(length, 0); + BOOST_REQUIRE_EQUAL(data.size(), length); + BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmNg7BJo8gEMDK8yGQbHEwPtycesnE6FUULX5iVd5TAL9f"); +} + +BOOST_AUTO_TEST_CASE(test_largest_one_level) +{ + size_t length = 45613056; // 1024 * 256 * 174; + string data; + data.resize(length, 0); + BOOST_REQUIRE_EQUAL(data.size(), length); + BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmY4HSz1oVGdUzb8poVYPLsoqBZjH6LZrtgnme9wWn2Qko"); +} + +BOOST_AUTO_TEST_CASE(test_smallest_multi_level) +{ + size_t length = 45613057; // 1024 * 256 * 174 + 1; + string data; + data.resize(length, 0); + BOOST_REQUIRE_EQUAL(data.size(), length); + BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmehMASWcBsX7VcEQqs6rpR5AHoBfKyBVEgmkJHjpPg8jq"); +} + +BOOST_AUTO_TEST_CASE(test_multi_level_tree) +{ + size_t length = 46661632; + string data; + data.resize(length, 0); + BOOST_REQUIRE_EQUAL(data.size(), length); + BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmaTb1sT9hrSXJLmf8bxJ9NuwndiHuMLsgNLgkS2eXu3Xj"); +} BOOST_AUTO_TEST_SUITE_END()