Add emscripten's mini-lz4.js.

2023-10-03 13:03:40 +00:00 · 2022-02-08 13:00:35 +01:00 · 2022-02-08 13:00:35 +01:00 · a2a5ca6fef
commit a2a5ca6fef
parent d29709e741
1 changed files with 345 additions and 0 deletions
--- a/scripts/ci/mini-lz4.js
+++ b/scripts/ci/mini-lz4.js
@ -0,0 +1,345 @@
+/*
+MiniLZ4: Minimal LZ4 block decoding and encoding.
+
+based off of node-lz4, https://github.com/pierrec/node-lz4
+
+====
+Copyright (c) 2012 Pierre Curto
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+====
+
+changes have the same license
+*/
+
+var MiniLZ4 = (function() {
+
+var exports = {};
+
+/**
+ * Decode a block. Assumptions: input contains all sequences of a 
+ * chunk, output is large enough to receive the decoded data.
+ * If the output buffer is too small, an error will be thrown.
+ * If the returned value is negative, an error occured at the returned offset.
+ *
+ * @param {ArrayBufferView} input input data
+ * @param {ArrayBufferView} output output data
+ * @param {number=} sIdx
+ * @param {number=} eIdx
+ * @return {number} number of decoded bytes
+ * @private
+ */
+exports.uncompress = function (input, output, sIdx, eIdx) {
+	sIdx = sIdx || 0
+	eIdx = eIdx || (input.length - sIdx)
+	// Process each sequence in the incoming data
+	for (var i = sIdx, n = eIdx, j = 0; i < n;) {
+		var token = input[i++]
+
+		// Literals
+		var literals_length = (token >> 4)
+		if (literals_length > 0) {
+			// length of literals
+			var l = literals_length + 240
+			while (l === 255) {
+				l = input[i++]
+				literals_length += l
+			}
+
+			// Copy the literals
+			var end = i + literals_length
+			while (i < end) output[j++] = input[i++]
+
+			// End of buffer?
+			if (i === n) return j
+		}
+
+		// Match copy
+		// 2 bytes offset (little endian)
+		var offset = input[i++] | (input[i++] << 8)
+
+		// XXX 0 is an invalid offset value
+		if (offset === 0) return j
+		if (offset > j) return -(i-2)
+
+		// length of match copy
+		var match_length = (token & 0xf)
+		var l = match_length + 240
+		while (l === 255) {
+			l = input[i++]
+			match_length += l
+		}
+
+		// Copy the match
+		var pos = j - offset // position of the match copy in the current output
+		var end = j + match_length + 4 // minmatch = 4
+		while (j < end) output[j++] = output[pos++]
+	}
+
+	return j
+}
+
+var
+	maxInputSize	= 0x7E000000
+,	minMatch		= 4
+// uint32() optimization
+,	hashLog			= 16
+,	hashShift		= (minMatch * 8) - hashLog
+,	hashSize		= 1 << hashLog
+
+,	copyLength		= 8
+,	lastLiterals	= 5
+,	mfLimit			= copyLength + minMatch
+,	skipStrength	= 6
+
+,	mlBits  		= 4
+,	mlMask  		= (1 << mlBits) - 1
+,	runBits 		= 8 - mlBits
+,	runMask 		= (1 << runBits) - 1
+
+,	hasher 			= /* XXX uint32( */ 2654435761 /* ) */
+
+assert(hashShift === 16);
+var hashTable = new Int16Array(1<<16);
+var empty = new Int16Array(hashTable.length);
+
+// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
+exports.compressBound = function (isize) {
+	return isize > maxInputSize
+		? 0
+		: (isize + (isize/255) + 16) | 0
+}
+
+/** @param {number=} sIdx
+	@param {number=} eIdx */
+exports.compress = function (src, dst, sIdx, eIdx) {
+	hashTable.set(empty);
+	return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
+}
+
+function compressBlock (src, dst, pos, sIdx, eIdx) {
+	// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
+	var dpos = sIdx
+	var dlen = eIdx - sIdx
+	var anchor = 0
+
+	if (src.length >= maxInputSize) throw new Error("input too large")
+
+	// Minimum of input bytes for compression (LZ4 specs)
+	if (src.length > mfLimit) {
+		var n = exports.compressBound(src.length)
+		if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)
+
+		var 
+			step  = 1
+		,	findMatchAttempts = (1 << skipStrength) + 3
+		// Keep last few bytes incompressible (LZ4 specs):
+		// last 5 bytes must be literals
+		,	srcLength = src.length - mfLimit
+
+		while (pos + minMatch < srcLength) {
+			// Find a match
+			// min match of 4 bytes aka sequence
+			var sequenceLowBits = src[pos+1]<<8 | src[pos]
+			var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
+			// compute hash for the current sequence
+			var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
+			/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
+							.multiply(hasher)
+							.shiftr(hashShift)
+							.toNumber() */
+			// get the position of the sequence matching the hash
+			// NB. since 2 different sequences may have the same hash
+			// it is double-checked below
+			// do -1 to distinguish between initialized and uninitialized values
+			var ref = hashTable[hash] - 1
+			// save position of current sequence in hash table
+			hashTable[hash] = pos + 1
+
+			// first reference or within 64k limit or current sequence !== hashed one: no match
+			if ( ref < 0 ||
+				((pos - ref) >>> 16) > 0 ||
+				(
+					((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
+					((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
+				)
+			) {
+				// increase step if nothing found within limit
+				step = findMatchAttempts++ >> skipStrength
+				pos += step
+				continue
+			}
+
+			findMatchAttempts = (1 << skipStrength) + 3
+
+			// got a match
+			var literals_length = pos - anchor
+			var offset = pos - ref
+
+			// minMatch already verified
+			pos += minMatch
+			ref += minMatch
+
+			// move to the end of the match (>=minMatch)
+			var match_length = pos
+			while (pos < srcLength && src[pos] == src[ref]) {
+				pos++
+				ref++
+			}
+
+			// match length
+			match_length = pos - match_length
+
+			// token
+			var token = match_length < mlMask ? match_length : mlMask
+
+			// encode literals length
+			if (literals_length >= runMask) {
+				// add match length to the token
+				dst[dpos++] = (runMask << mlBits) + token
+				for (var len = literals_length - runMask; len > 254; len -= 255) {
+					dst[dpos++] = 255
+				}
+				dst[dpos++] = len
+			} else {
+				// add match length to the token
+				dst[dpos++] = (literals_length << mlBits) + token
+			}
+
+			// write literals
+			for (var i = 0; i < literals_length; i++) {
+				dst[dpos++] = src[anchor+i]
+			}
+
+			// encode offset
+			dst[dpos++] = offset
+			dst[dpos++] = (offset >> 8)
+
+			// encode match length
+			if (match_length >= mlMask) {
+				match_length -= mlMask
+				while (match_length >= 255) {
+					match_length -= 255
+					dst[dpos++] = 255
+				}
+
+				dst[dpos++] = match_length
+			}
+
+			anchor = pos
+		}
+	}
+
+	// cannot compress input
+	if (anchor == 0) return 0
+
+	// Write last literals
+	// encode literals length
+	literals_length = src.length - anchor
+	if (literals_length >= runMask) {
+		// add match length to the token
+		dst[dpos++] = (runMask << mlBits)
+		for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
+			dst[dpos++] = 255
+		}
+		dst[dpos++] = ln
+	} else {
+		// add match length to the token
+		dst[dpos++] = (literals_length << mlBits)
+	}
+
+	// write literals
+	pos = anchor
+	while (pos < src.length) {
+		dst[dpos++] = src[pos++]
+	}
+
+	return dpos
+}
+
+exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea
+
+exports.compressPackage = function(data, verify) {
+  if (verify) {
+    var temp = new Uint8Array(exports.CHUNK_SIZE);
+  }
+  // compress the data in chunks
+  assert(data instanceof ArrayBuffer);
+  data = new Uint8Array(data);
+  console.log('compressing package of size ' + data.length);
+  var compressedChunks = [];
+  var successes = [];
+  var offset = 0;
+  var total = 0;
+  while (offset < data.length) {
+    var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
+    //console.log('compress a chunk ' + [offset, total, data.length]);
+    offset += exports.CHUNK_SIZE;
+    var bound = exports.compressBound(chunk.length);
+    var compressed = new Uint8Array(bound);
+    var compressedSize = exports.compress(chunk, compressed);
+    if (compressedSize > 0) {
+      assert(compressedSize <= bound);
+      compressed = compressed.subarray(0, compressedSize);
+      compressedChunks.push(compressed);
+      total += compressedSize;
+      successes.push(1);
+      if (verify) {
+        var back = exports.uncompress(compressed, temp);
+        assert(back === chunk.length, [back, chunk.length]);
+        for (var i = 0; i < chunk.length; i++) {
+          assert(chunk[i] === temp[i]);
+        }
+      }
+    } else {
+      assert(compressedSize === 0);
+      // failure to compress :(
+      compressedChunks.push(chunk);
+      total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
+      successes.push(0);
+    }
+  }
+  data = null; // XXX null out pack['data'] too?
+  var compressedData = {
+    'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
+    'cachedOffset': total,
+    'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
+    'cachedChunks': [null, null],
+    'offsets': [], // chunk# => start in compressed data
+    'sizes': [],
+    'successes': successes, // 1 if chunk is compressed
+  };
+  offset = 0;
+  for (var i = 0; i < compressedChunks.length; i++) {
+    compressedData['data'].set(compressedChunks[i], offset);
+    compressedData['offsets'][i] = offset;
+    compressedData['sizes'][i] = compressedChunks[i].length
+    offset += compressedChunks[i].length;
+  }
+  console.log('compressed package into ' + [compressedData['data'].length]);
+  assert(offset === total);
+  return compressedData;
+};
+
+assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller
+
+return exports;
+
+})();
+