Adjust mini-lz4.js and base64DecToArr.js.

This commit is contained in:
Daniel Kirchner 2022-02-08 13:16:20 +01:00
parent a68183db3c
commit 6895972d04
2 changed files with 28 additions and 259 deletions

View File

@ -1,3 +1,4 @@
function base64DecToArr (sBase64) {
/*\
|*|
|*| Base64 / binary data / UTF-8 strings utilities
@ -25,15 +26,13 @@ function b64ToUint6 (nChr) {
}
function base64DecToArr (sBase64, nBlocksSize) {
var
sB64Enc = sBase64.replace(/[^A-Za-z0-9\+\/]/g, ""), nInLen = sB64Enc.length,
nOutLen = nBlocksSize ? Math.ceil((nInLen * 3 + 1 >> 2) / nBlocksSize) * nBlocksSize : nInLen * 3 + 1 >> 2, taBytes = new Uint8Array(nOutLen);
nInLen = sBase64.length,
nOutLen = nInLen * 3 + 1 >> 2, taBytes = new Uint8Array(nOutLen);
for (var nMod3, nMod4, nUint24 = 0, nOutIdx = 0, nInIdx = 0; nInIdx < nInLen; nInIdx++) {
nMod4 = nInIdx & 3;
nUint24 |= b64ToUint6(sB64Enc.charCodeAt(nInIdx)) << 6 * (3 - nMod4);
nUint24 |= b64ToUint6(sBase64.charCodeAt(nInIdx)) << 6 * (3 - nMod4);
if (nMod4 === 3 || nInLen - nInIdx === 1) {
for (nMod3 = 0; nMod3 < 3 && nOutIdx < nOutLen; nMod3++, nOutIdx++) {
taBytes[nOutIdx] = nUint24 >>> (16 >>> nMod3 & 24) & 255;

View File

@ -1,4 +1,7 @@
function uncompress(source, uncompressedSize) {
/*
based off https://github.com/emscripten-core/emscripten/blob/main/third_party/mini-lz4.js
====
MiniLZ4: Minimal LZ4 block decoding and encoding.
based off of node-lz4, https://github.com/pierrec/node-lz4
@ -27,16 +30,11 @@ THE SOFTWARE.
changes have the same license
*/
var MiniLZ4 = (function() {
var exports = {};
/**
* Decode a block. Assumptions: input contains all sequences of a
* chunk, output is large enough to receive the decoded data.
* If the output buffer is too small, an error will be thrown.
* If the returned value is negative, an error occured at the returned offset.
* If the returned value is negative, an error occurred at the returned offset.
*
* @param {ArrayBufferView} input input data
* @param {ArrayBufferView} output output data
@ -45,7 +43,7 @@ var exports = {};
* @return {number} number of decoded bytes
* @private
*/
exports.uncompress = function (input, output, sIdx, eIdx) {
function uncompressBlock (input, output, sIdx, eIdx) {
sIdx = sIdx || 0
eIdx = eIdx || (input.length - sIdx)
// Process each sequence in the incoming data
@ -85,7 +83,6 @@ exports.uncompress = function (input, output, sIdx, eIdx) {
l = input[i++]
match_length += l
}
// Copy the match
var pos = j - offset // position of the match copy in the current output
var end = j + match_length + 4 // minmatch = 4
@ -94,252 +91,25 @@ exports.uncompress = function (input, output, sIdx, eIdx) {
return j
}
var
maxInputSize = 0x7E000000
, minMatch = 4
// uint32() optimization
, hashLog = 16
, hashShift = (minMatch * 8) - hashLog
, hashSize = 1 << hashLog
, copyLength = 8
, lastLiterals = 5
, mfLimit = copyLength + minMatch
, skipStrength = 6
, mlBits = 4
, mlMask = (1 << mlBits) - 1
, runBits = 8 - mlBits
, runMask = (1 << runBits) - 1
, hasher = /* XXX uint32( */ 2654435761 /* ) */
assert(hashShift === 16);
var hashTable = new Int16Array(1<<16);
var empty = new Int16Array(hashTable.length);
// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
exports.compressBound = function (isize) {
return isize > maxInputSize
? 0
: (isize + (isize/255) + 16) | 0
}
/** @param {number=} sIdx
@param {number=} eIdx */
exports.compress = function (src, dst, sIdx, eIdx) {
hashTable.set(empty);
return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
}
function compressBlock (src, dst, pos, sIdx, eIdx) {
// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
var dpos = sIdx
var dlen = eIdx - sIdx
var anchor = 0
if (src.length >= maxInputSize) throw new Error("input too large")
// Minimum of input bytes for compression (LZ4 specs)
if (src.length > mfLimit) {
var n = exports.compressBound(src.length)
if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)
var
step = 1
, findMatchAttempts = (1 << skipStrength) + 3
// Keep last few bytes incompressible (LZ4 specs):
// last 5 bytes must be literals
, srcLength = src.length - mfLimit
while (pos + minMatch < srcLength) {
// Find a match
// min match of 4 bytes aka sequence
var sequenceLowBits = src[pos+1]<<8 | src[pos]
var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
// compute hash for the current sequence
var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
.multiply(hasher)
.shiftr(hashShift)
.toNumber() */
// get the position of the sequence matching the hash
// NB. since 2 different sequences may have the same hash
// it is double-checked below
// do -1 to distinguish between initialized and uninitialized values
var ref = hashTable[hash] - 1
// save position of current sequence in hash table
hashTable[hash] = pos + 1
// first reference or within 64k limit or current sequence !== hashed one: no match
if ( ref < 0 ||
((pos - ref) >>> 16) > 0 ||
(
((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
)
) {
// increase step if nothing found within limit
step = findMatchAttempts++ >> skipStrength
pos += step
continue
}
findMatchAttempts = (1 << skipStrength) + 3
// got a match
var literals_length = pos - anchor
var offset = pos - ref
// minMatch already verified
pos += minMatch
ref += minMatch
// move to the end of the match (>=minMatch)
var match_length = pos
while (pos < srcLength && src[pos] == src[ref]) {
pos++
ref++
}
// match length
match_length = pos - match_length
// token
var token = match_length < mlMask ? match_length : mlMask
// encode literals length
if (literals_length >= runMask) {
// add match length to the token
dst[dpos++] = (runMask << mlBits) + token
for (var len = literals_length - runMask; len > 254; len -= 255) {
dst[dpos++] = 255
}
dst[dpos++] = len
} else {
// add match length to the token
dst[dpos++] = (literals_length << mlBits) + token
}
// write literals
for (var i = 0; i < literals_length; i++) {
dst[dpos++] = src[anchor+i]
}
// encode offset
dst[dpos++] = offset
dst[dpos++] = (offset >> 8)
// encode match length
if (match_length >= mlMask) {
match_length -= mlMask
while (match_length >= 255) {
match_length -= 255
dst[dpos++] = 255
}
dst[dpos++] = match_length
}
anchor = pos
var result = new ArrayBuffer(uncompressedSize);
var sourceIndex = 0;
var destIndex = 0;
var blockSize;
while((blockSize = (source[sourceIndex] | (source[sourceIndex + 1] << 8) | (source[sourceIndex + 2] << 16) | (source[sourceIndex + 3] << 24))) > 0)
{
sourceIndex += 4;
if (blockSize & 0x80000000)
{
blockSize &= 0x7FFFFFFFF;
for (var i = 0; i < blockSize; i++) {
result[destIndex++] = source[sourceIndex++];
}
}
// cannot compress input
if (anchor == 0) return 0
// Write last literals
// encode literals length
literals_length = src.length - anchor
if (literals_length >= runMask) {
// add match length to the token
dst[dpos++] = (runMask << mlBits)
for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
dst[dpos++] = 255
}
dst[dpos++] = ln
} else {
// add match length to the token
dst[dpos++] = (literals_length << mlBits)
else
{
destIndex += uncompressBlock(source, new Uint8Array(result, destIndex, uncompressedSize - destIndex), sourceIndex, sourceIndex + blockSize);
sourceIndex += blockSize;
}
// write literals
pos = anchor
while (pos < src.length) {
dst[dpos++] = src[pos++]
}
return dpos
}
exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea
exports.compressPackage = function(data, verify) {
if (verify) {
var temp = new Uint8Array(exports.CHUNK_SIZE);
}
// compress the data in chunks
assert(data instanceof ArrayBuffer);
data = new Uint8Array(data);
console.log('compressing package of size ' + data.length);
var compressedChunks = [];
var successes = [];
var offset = 0;
var total = 0;
while (offset < data.length) {
var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
//console.log('compress a chunk ' + [offset, total, data.length]);
offset += exports.CHUNK_SIZE;
var bound = exports.compressBound(chunk.length);
var compressed = new Uint8Array(bound);
var compressedSize = exports.compress(chunk, compressed);
if (compressedSize > 0) {
assert(compressedSize <= bound);
compressed = compressed.subarray(0, compressedSize);
compressedChunks.push(compressed);
total += compressedSize;
successes.push(1);
if (verify) {
var back = exports.uncompress(compressed, temp);
assert(back === chunk.length, [back, chunk.length]);
for (var i = 0; i < chunk.length; i++) {
assert(chunk[i] === temp[i]);
}
}
} else {
assert(compressedSize === 0);
// failure to compress :(
compressedChunks.push(chunk);
total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
successes.push(0);
}
}
data = null; // XXX null out pack['data'] too?
var compressedData = {
'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
'cachedOffset': total,
'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
'cachedChunks': [null, null],
'offsets': [], // chunk# => start in compressed data
'sizes': [],
'successes': successes, // 1 if chunk is compressed
};
offset = 0;
for (var i = 0; i < compressedChunks.length; i++) {
compressedData['data'].set(compressedChunks[i], offset);
compressedData['offsets'][i] = offset;
compressedData['sizes'][i] = compressedChunks[i].length
offset += compressedChunks[i].length;
}
console.log('compressed package into ' + [compressedData['data'].length]);
assert(offset === total);
return compressedData;
};
assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller
return exports;
})();
return new Uint8Array(result, 0, uncompressedSize);
}