diff --git a/scripts/ci/mini-lz4.js b/scripts/ci/mini-lz4.js new file mode 100644 index 000000000..06514fec5 --- /dev/null +++ b/scripts/ci/mini-lz4.js @@ -0,0 +1,345 @@ +/* +MiniLZ4: Minimal LZ4 block decoding and encoding. + +based off of node-lz4, https://github.com/pierrec/node-lz4 + +==== +Copyright (c) 2012 Pierre Curto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +==== + +changes have the same license +*/ + +var MiniLZ4 = (function() { + +var exports = {}; + +/** + * Decode a block. Assumptions: input contains all sequences of a + * chunk, output is large enough to receive the decoded data. + * If the output buffer is too small, an error will be thrown. + * If the returned value is negative, an error occured at the returned offset. + * + * @param {ArrayBufferView} input input data + * @param {ArrayBufferView} output output data + * @param {number=} sIdx + * @param {number=} eIdx + * @return {number} number of decoded bytes + * @private + */ +exports.uncompress = function (input, output, sIdx, eIdx) { + sIdx = sIdx || 0 + eIdx = eIdx || (input.length - sIdx) + // Process each sequence in the incoming data + for (var i = sIdx, n = eIdx, j = 0; i < n;) { + var token = input[i++] + + // Literals + var literals_length = (token >> 4) + if (literals_length > 0) { + // length of literals + var l = literals_length + 240 + while (l === 255) { + l = input[i++] + literals_length += l + } + + // Copy the literals + var end = i + literals_length + while (i < end) output[j++] = input[i++] + + // End of buffer? + if (i === n) return j + } + + // Match copy + // 2 bytes offset (little endian) + var offset = input[i++] | (input[i++] << 8) + + // XXX 0 is an invalid offset value + if (offset === 0) return j + if (offset > j) return -(i-2) + + // length of match copy + var match_length = (token & 0xf) + var l = match_length + 240 + while (l === 255) { + l = input[i++] + match_length += l + } + + // Copy the match + var pos = j - offset // position of the match copy in the current output + var end = j + match_length + 4 // minmatch = 4 + while (j < end) output[j++] = output[pos++] + } + + return j +} + +var + maxInputSize = 0x7E000000 +, minMatch = 4 +// uint32() optimization +, hashLog = 16 +, hashShift = (minMatch * 8) - hashLog +, hashSize = 1 << hashLog + +, copyLength = 8 +, lastLiterals = 5 +, mfLimit = copyLength + minMatch +, skipStrength = 6 + +, mlBits = 4 +, mlMask = (1 << mlBits) - 1 +, runBits = 8 - mlBits +, runMask = (1 << runBits) - 1 + +, hasher = /* XXX uint32( */ 2654435761 /* ) */ + +assert(hashShift === 16); +var hashTable = new Int16Array(1<<16); +var empty = new Int16Array(hashTable.length); + +// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length +exports.compressBound = function (isize) { + return isize > maxInputSize + ? 0 + : (isize + (isize/255) + 16) | 0 +} + +/** @param {number=} sIdx + @param {number=} eIdx */ +exports.compress = function (src, dst, sIdx, eIdx) { + hashTable.set(empty); + return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length) +} + +function compressBlock (src, dst, pos, sIdx, eIdx) { + // XXX var Hash = uint32() // Reusable unsigned 32 bits integer + var dpos = sIdx + var dlen = eIdx - sIdx + var anchor = 0 + + if (src.length >= maxInputSize) throw new Error("input too large") + + // Minimum of input bytes for compression (LZ4 specs) + if (src.length > mfLimit) { + var n = exports.compressBound(src.length) + if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n) + + var + step = 1 + , findMatchAttempts = (1 << skipStrength) + 3 + // Keep last few bytes incompressible (LZ4 specs): + // last 5 bytes must be literals + , srcLength = src.length - mfLimit + + while (pos + minMatch < srcLength) { + // Find a match + // min match of 4 bytes aka sequence + var sequenceLowBits = src[pos+1]<<8 | src[pos] + var sequenceHighBits = src[pos+3]<<8 | src[pos+2] + // compute hash for the current sequence + var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift; + /* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits) + .multiply(hasher) + .shiftr(hashShift) + .toNumber() */ + // get the position of the sequence matching the hash + // NB. since 2 different sequences may have the same hash + // it is double-checked below + // do -1 to distinguish between initialized and uninitialized values + var ref = hashTable[hash] - 1 + // save position of current sequence in hash table + hashTable[hash] = pos + 1 + + // first reference or within 64k limit or current sequence !== hashed one: no match + if ( ref < 0 || + ((pos - ref) >>> 16) > 0 || + ( + ((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) || + ((src[ref+1]<<8 | src[ref]) != sequenceLowBits ) + ) + ) { + // increase step if nothing found within limit + step = findMatchAttempts++ >> skipStrength + pos += step + continue + } + + findMatchAttempts = (1 << skipStrength) + 3 + + // got a match + var literals_length = pos - anchor + var offset = pos - ref + + // minMatch already verified + pos += minMatch + ref += minMatch + + // move to the end of the match (>=minMatch) + var match_length = pos + while (pos < srcLength && src[pos] == src[ref]) { + pos++ + ref++ + } + + // match length + match_length = pos - match_length + + // token + var token = match_length < mlMask ? match_length : mlMask + + // encode literals length + if (literals_length >= runMask) { + // add match length to the token + dst[dpos++] = (runMask << mlBits) + token + for (var len = literals_length - runMask; len > 254; len -= 255) { + dst[dpos++] = 255 + } + dst[dpos++] = len + } else { + // add match length to the token + dst[dpos++] = (literals_length << mlBits) + token + } + + // write literals + for (var i = 0; i < literals_length; i++) { + dst[dpos++] = src[anchor+i] + } + + // encode offset + dst[dpos++] = offset + dst[dpos++] = (offset >> 8) + + // encode match length + if (match_length >= mlMask) { + match_length -= mlMask + while (match_length >= 255) { + match_length -= 255 + dst[dpos++] = 255 + } + + dst[dpos++] = match_length + } + + anchor = pos + } + } + + // cannot compress input + if (anchor == 0) return 0 + + // Write last literals + // encode literals length + literals_length = src.length - anchor + if (literals_length >= runMask) { + // add match length to the token + dst[dpos++] = (runMask << mlBits) + for (var ln = literals_length - runMask; ln > 254; ln -= 255) { + dst[dpos++] = 255 + } + dst[dpos++] = ln + } else { + // add match length to the token + dst[dpos++] = (literals_length << mlBits) + } + + // write literals + pos = anchor + while (pos < src.length) { + dst[dpos++] = src[pos++] + } + + return dpos +} + +exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea + +exports.compressPackage = function(data, verify) { + if (verify) { + var temp = new Uint8Array(exports.CHUNK_SIZE); + } + // compress the data in chunks + assert(data instanceof ArrayBuffer); + data = new Uint8Array(data); + console.log('compressing package of size ' + data.length); + var compressedChunks = []; + var successes = []; + var offset = 0; + var total = 0; + while (offset < data.length) { + var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE); + //console.log('compress a chunk ' + [offset, total, data.length]); + offset += exports.CHUNK_SIZE; + var bound = exports.compressBound(chunk.length); + var compressed = new Uint8Array(bound); + var compressedSize = exports.compress(chunk, compressed); + if (compressedSize > 0) { + assert(compressedSize <= bound); + compressed = compressed.subarray(0, compressedSize); + compressedChunks.push(compressed); + total += compressedSize; + successes.push(1); + if (verify) { + var back = exports.uncompress(compressed, temp); + assert(back === chunk.length, [back, chunk.length]); + for (var i = 0; i < chunk.length; i++) { + assert(chunk[i] === temp[i]); + } + } + } else { + assert(compressedSize === 0); + // failure to compress :( + compressedChunks.push(chunk); + total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size + successes.push(0); + } + } + data = null; // XXX null out pack['data'] too? + var compressedData = { + 'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array + 'cachedOffset': total, + 'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing + 'cachedChunks': [null, null], + 'offsets': [], // chunk# => start in compressed data + 'sizes': [], + 'successes': successes, // 1 if chunk is compressed + }; + offset = 0; + for (var i = 0; i < compressedChunks.length; i++) { + compressedData['data'].set(compressedChunks[i], offset); + compressedData['offsets'][i] = offset; + compressedData['sizes'][i] = compressedChunks[i].length + offset += compressedChunks[i].length; + } + console.log('compressed package into ' + [compressedData['data'].length]); + assert(offset === total); + return compressedData; +}; + +assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller + +return exports; + +})(); +