diff --git a/fe25519_25.js b/fe25519_25.js index 0349979..8f5bd8a 100644 --- a/fe25519_25.js +++ b/fe25519_25.js @@ -35,6 +35,7 @@ const importWithMemory = { } const wasm_mul = require('./fe25519_25/fe25519_mul')(importWithMemory) +const wasm_mul32 = require('./fe25519_25/fe25519_mul32')() const wasm_sq = require('./fe25519_25/fe25519_sq')(importWithMemory) const wasm_invert = require('./fe25519_25/fe25519_invert')(importWithMemory) const wasm_pow = require('./fe25519_25/fe25519_pow22523')() @@ -799,6 +800,22 @@ function fe25519_mul (h, f, g) { parse_fe(h, mem, 80) } +function fe25519_mul32 (h, f, n) { + check_fe(h) + check_fe(f) + + // printFe(f, 'f') + // printFe(g, 'g') + + var fbuf = new Uint8Array(f.buffer) + + wasm_mul32.memory.set(fbuf) + wasm_mul32.exports.fe25519_mul32(40, 0, n) + + const output = Buffer.from(wasm_mul32.memory.slice(40, 80)) + parse_fe(h, output, 0) +} + /* h = f * f Can overlap h with f. diff --git a/fe25519_25/fe25519_mul32.js b/fe25519_25/fe25519_mul32.js new file mode 100644 index 0000000..c9625d5 --- /dev/null +++ b/fe25519_25/fe25519_mul32.js @@ -0,0 +1,61 @@ + +module.exports = loadWebAssembly + +loadWebAssembly.supported = typeof WebAssembly !== 'undefined' + +function loadWebAssembly (opts) { + if (!loadWebAssembly.supported) return null + + var imp = opts && opts.imports + var wasm = toUint8Array('AGFzbQEAAAABFgJgDH5+fn5+fn5+fn5/fwBgA39/fwADAwIAAQUDAQABBxoCBm1lbW9yeQIADWZlMjU1MTlfbXVsMzIAAQrcBAKeBAEVfiAKrCEMIACnrCEAIAGnrCEBIAKnrCECIAOnrCEDIASnrCEEIAWnrCEFIAanrCEGIAenrCEHIAinrCEIIAmnrCEJIAAgDH4hDSABIAx+IQ4gAiAMfiEPIAMgDH4hECAEIAx+IREgBSAMfiESIAYgDH4hEyAHIAx+IRQgCCAMfiEVIAkgDH4hFiAWQgFCGIZ8QhmHISAgDSAgQhN+fCENIBYgIEIBQhmGfn0hFiAOQgFCGIZ8QhmHIRggDyAYfCEPIA4gGEIBQhmGfn0hDiAQQgFCGIZ8QhmHIRogESAafCERIBAgGkIBQhmGfn0hECASQgFCGIZ8QhmHIRwgEyAcfCETIBIgHEIBQhmGfn0hEiAUQgFCGIZ8QhmHIR4gFSAefCEVIBQgHkIBQhmGfn0hFCANQgFCGYZ8QhqHIRcgDiAXfCEOIA0gF0IBQhqGfn0hDSAPQgFCGYZ8QhqHIRkgECAZfCEQIA8gGUIBQhqGfn0hDyARQgFCGYZ8QhqHIRsgEiAbfCESIBEgG0IBQhqGfn0hESATQgFCGYZ8QhqHIR0gFCAdfCEUIBMgHUIBQhqGfn0hEyAVQgFCGYZ8QhqHIR8gFiAffCEWIBUgH0IBQhqGfn0hFSALIA0+AgAgCyAOPgIEIAsgDz4CCCALIBA+AgwgCyARPgIQIAsgEj4CFCALIBM+AhggCyAUPgIcIAsgFT4CICALIBY+AiQLOgAgATUCACABNQIEIAE1AgggATUCDCABNQIQIAE1AhQgATUCGCABNQIcIAE1AiAgATUCJCACIAAQAAs=') + var ready = null + + var mod = { + buffer: wasm, + memory: null, + exports: null, + realloc: realloc, + onload: onload + } + + onload(function () {}) + + return mod + + function realloc (size) { + mod.exports.memory.grow(Math.max(0, Math.ceil(Math.abs(size - mod.memory.length) / 65536))) + mod.memory = new Uint8Array(mod.exports.memory.buffer) + } + + function onload (cb) { + if (mod.exports) return cb() + + if (ready) { + ready.then(cb.bind(null, null)).catch(cb) + return + } + + try { + if (opts && opts.async) throw new Error('async') + setup({instance: new WebAssembly.Instance(new WebAssembly.Module(wasm), imp)}) + } catch (err) { + ready = WebAssembly.instantiate(wasm, imp).then(setup) + } + + onload(cb) + } + + function setup (w) { + mod.exports = w.instance.exports + mod.memory = mod.exports.memory && mod.exports.memory.buffer && new Uint8Array(mod.exports.memory.buffer) + } +} + +function toUint8Array (s) { + if (typeof atob === 'function') return new Uint8Array(atob(s).split('').map(charCodeAt)) + return (require('buf' + 'fer').Buffer).from(s, 'base64') +} + +function charCodeAt (c) { + return c.charCodeAt(0) +} diff --git a/fe25519_25/fe25519_mul32.wat b/fe25519_25/fe25519_mul32.wat new file mode 100644 index 0000000..0e28720 --- /dev/null +++ b/fe25519_25/fe25519_mul32.wat @@ -0,0 +1,161 @@ +(module + (memory $0 1) + (export "memory" (memory $0)) + + ;; (func $i32.log (import "debug" "log") (param i32)) + ;; (func $i32.log_tee (import "debug" "log_tee") (param i32) (result i32)) + ;; ;; No i64 interop with JS yet - but maybe coming with WebAssembly BigInt + ;; ;; So we can instead fake this by splitting the i64 into two i32 limbs, + ;; ;; however these are WASM functions using i32x2.log: + ;; (func $i32x2.log (import "debug" "log") (param i32) (param i32)) + ;; (func $f32.log (import "debug" "log") (param f32)) + ;; (func $f32.log_tee (import "debug" "log_tee") (param f32) (result f32)) + ;; (func $f64.log (import "debug" "log") (param f64)) + ;; (func $f64.log_tee (import "debug" "log_tee") (param f64) (result f64)) + + ;; ;; i64 logging by splitting into two i32 limbs + ;; (func $i64.log + ;; (param $0 i64) + ;; (call $i32x2.log + ;; ;; Upper limb + ;; (i32.wrap/i64 + ;; (i64.shr_s (get_local $0) + ;; (i64.const 32))) + ;; ;; Lower limb + ;; (i32.wrap/i64 (get_local $0)))) + + ;; (func $i64.log_tee + ;; (param $0 i64) + ;; (result i64) + ;; (call $i64.log (get_local $0)) + ;; (return (get_local $0))) + + (func $fe_mul32 + (param $f0 i64) + (param $f1 i64) + (param $f2 i64) + (param $f3 i64) + (param $f4 i64) + (param $f5 i64) + (param $f6 i64) + (param $f7 i64) + (param $f8 i64) + (param $f9 i64) + + (param $n i32) + + (param $h i32) + + (local $sn i64) + + (local $h0 i64) + (local $h1 i64) + (local $h2 i64) + (local $h3 i64) + (local $h4 i64) + (local $h5 i64) + (local $h6 i64) + (local $h7 i64) + (local $h8 i64) + (local $h9 i64) + + (local $carry0 i64) + (local $carry1 i64) + (local $carry2 i64) + (local $carry3 i64) + (local $carry4 i64) + (local $carry5 i64) + (local $carry6 i64) + (local $carry7 i64) + (local $carry8 i64) + (local $carry9 i64) + + (set_local $sn (i64.extend_s/i32 (get_local $n))) + + (set_local $f0 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f0)))) + (set_local $f1 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f1)))) + (set_local $f2 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f2)))) + (set_local $f3 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f3)))) + (set_local $f4 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f4)))) + (set_local $f5 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f5)))) + (set_local $f6 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f6)))) + (set_local $f7 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f7)))) + (set_local $f8 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f8)))) + (set_local $f9 (i64.extend_s/i32 (i32.wrap/i64 (get_local $f9)))) + + (set_local $h0 (i64.mul (get_local $f0) (get_local $sn))) + (set_local $h1 (i64.mul (get_local $f1) (get_local $sn))) + (set_local $h2 (i64.mul (get_local $f2) (get_local $sn))) + (set_local $h3 (i64.mul (get_local $f3) (get_local $sn))) + (set_local $h4 (i64.mul (get_local $f4) (get_local $sn))) + (set_local $h5 (i64.mul (get_local $f5) (get_local $sn))) + (set_local $h6 (i64.mul (get_local $f6) (get_local $sn))) + (set_local $h7 (i64.mul (get_local $f7) (get_local $sn))) + (set_local $h8 (i64.mul (get_local $f8) (get_local $sn))) + (set_local $h9 (i64.mul (get_local $f9) (get_local $sn))) + + (set_local $carry9 (i64.shr_s (i64.add (get_local $h9) (i64.shl (i64.const 1) (i64.const 24))) (i64.const 25))) + (set_local $h0 (i64.add (get_local $h0) (i64.mul (get_local $carry9) (i64.const 19)))) + (set_local $h9 (i64.sub (get_local $h9) (i64.mul (get_local $carry9) (i64.shl (i64.const 1) (i64.const 25))))) + + (set_local $carry1 (i64.shr_s (i64.add (get_local $h1) (i64.shl (i64.const 1) (i64.const 24))) (i64.const 25))) + (set_local $h2 (i64.add (get_local $h2) (get_local $carry1))) + (set_local $h1 (i64.sub (get_local $h1) (i64.mul (get_local $carry1) (i64.shl (i64.const 1) (i64.const 25))))) + + (set_local $carry3 (i64.shr_s (i64.add (get_local $h3) (i64.shl (i64.const 1) (i64.const 24))) (i64.const 25))) + (set_local $h4 (i64.add (get_local $h4) (get_local $carry3))) + (set_local $h3 (i64.sub (get_local $h3) (i64.mul (get_local $carry3) (i64.shl (i64.const 1) (i64.const 25))))) + + (set_local $carry5 (i64.shr_s (i64.add (get_local $h5) (i64.shl (i64.const 1) (i64.const 24))) (i64.const 25))) + (set_local $h6 (i64.add (get_local $h6) (get_local $carry5))) + (set_local $h5 (i64.sub (get_local $h5) (i64.mul (get_local $carry5) (i64.shl (i64.const 1) (i64.const 25))))) + + (set_local $carry7 (i64.shr_s (i64.add (get_local $h7) (i64.shl (i64.const 1) (i64.const 24))) (i64.const 25))) + (set_local $h8 (i64.add (get_local $h8) (get_local $carry7))) + (set_local $h7 (i64.sub (get_local $h7) (i64.mul (get_local $carry7) (i64.shl (i64.const 1) (i64.const 25))))) + + (set_local $carry0 (i64.shr_s (i64.add (get_local $h0) (i64.shl (i64.const 1) (i64.const 25))) (i64.const 26))) + (set_local $h1 (i64.add (get_local $h1) (get_local $carry0))) + (set_local $h0 (i64.sub (get_local $h0) (i64.mul (get_local $carry0) (i64.shl (i64.const 1) (i64.const 26))))) + + (set_local $carry2 (i64.shr_s (i64.add (get_local $h2) (i64.shl (i64.const 1) (i64.const 25))) (i64.const 26))) + (set_local $h3 (i64.add (get_local $h3) (get_local $carry2))) + (set_local $h2 (i64.sub (get_local $h2) (i64.mul (get_local $carry2) (i64.shl (i64.const 1) (i64.const 26))))) + + (set_local $carry4 (i64.shr_s (i64.add (get_local $h4) (i64.shl (i64.const 1) (i64.const 25))) (i64.const 26))) + (set_local $h5 (i64.add (get_local $h5) (get_local $carry4))) + (set_local $h4 (i64.sub (get_local $h4) (i64.mul (get_local $carry4) (i64.shl (i64.const 1) (i64.const 26))))) + + (set_local $carry6 (i64.shr_s (i64.add (get_local $h6) (i64.shl (i64.const 1) (i64.const 25))) (i64.const 26))) + (set_local $h7 (i64.add (get_local $h7) (get_local $carry6))) + (set_local $h6 (i64.sub (get_local $h6) (i64.mul (get_local $carry6) (i64.shl (i64.const 1) (i64.const 26))))) + + (set_local $carry8 (i64.shr_s (i64.add (get_local $h8) (i64.shl (i64.const 1) (i64.const 25))) (i64.const 26))) + (set_local $h9 (i64.add (get_local $h9) (get_local $carry8))) + (set_local $h8 (i64.sub (get_local $h8) (i64.mul (get_local $carry8) (i64.shl (i64.const 1) (i64.const 26))))) + + (i64.store32 offset=0 (get_local $h) (get_local $h0)) + (i64.store32 offset=4 (get_local $h) (get_local $h1)) + (i64.store32 offset=8 (get_local $h) (get_local $h2)) + (i64.store32 offset=12 (get_local $h) (get_local $h3)) + (i64.store32 offset=16 (get_local $h) (get_local $h4)) + (i64.store32 offset=20 (get_local $h) (get_local $h5)) + (i64.store32 offset=24 (get_local $h) (get_local $h6)) + (i64.store32 offset=28 (get_local $h) (get_local $h7)) + (i64.store32 offset=32 (get_local $h) (get_local $h8)) + (i64.store32 offset=36 (get_local $h) (get_local $h9))) + + (func $fe25519_mul32 (export "fe25519_mul32") (param $h i32) (param $f i32) (param $n i32) + (i64.load32_u offset=0 (get_local $f)) + (i64.load32_u offset=4 (get_local $f)) + (i64.load32_u offset=8 (get_local $f)) + (i64.load32_u offset=12 (get_local $f)) + (i64.load32_u offset=16 (get_local $f)) + (i64.load32_u offset=20 (get_local $f)) + (i64.load32_u offset=24 (get_local $f)) + (i64.load32_u offset=28 (get_local $f)) + (i64.load32_u offset=32 (get_local $f)) + (i64.load32_u offset=36 (get_local $f)) + (get_local $n) + (get_local $h) + (call $fe_mul32)))