forked from cerc-io/ipld-eth-server
899 lines
13 KiB
Go
899 lines
13 KiB
Go
// +build ignore
|
|
|
|
package main
|
|
|
|
import (
|
|
"flag"
|
|
|
|
. "github.com/mmcloughlin/avo/build"
|
|
. "github.com/mmcloughlin/avo/operand"
|
|
. "github.com/mmcloughlin/avo/reg"
|
|
)
|
|
|
|
const k0 uint64 = 0xc3a5c85c97cb3127
|
|
const k1 uint64 = 0xb492b66fbe98f273
|
|
const k2 uint64 = 0x9ae16a3b2f90404f
|
|
|
|
const c1 uint32 = 0xcc9e2d51
|
|
const c2 uint32 = 0x1b873593
|
|
|
|
func shiftMix(val GPVirtual) GPVirtual {
|
|
r := GP64()
|
|
MOVQ(val, r)
|
|
SHRQ(Imm(47), r)
|
|
XORQ(val, r)
|
|
return r
|
|
}
|
|
|
|
func shiftMix64(val uint64) uint64 {
|
|
return val ^ (val >> 47)
|
|
}
|
|
|
|
func hashLen16MulLine(a, b, c, d, k, mul GPVirtual) GPVirtual {
|
|
tmpa := GP64()
|
|
MOVQ(a, tmpa)
|
|
|
|
ADDQ(b, tmpa)
|
|
RORQ(Imm(43), tmpa)
|
|
ADDQ(d, tmpa)
|
|
tmpc := GP64()
|
|
MOVQ(c, tmpc)
|
|
RORQ(Imm(30), tmpc)
|
|
ADDQ(tmpc, tmpa)
|
|
|
|
ADDQ(c, a)
|
|
ADDQ(k, b)
|
|
RORQ(Imm(18), b)
|
|
ADDQ(b, a)
|
|
|
|
r := hashLen16Mul(tmpa, a, mul)
|
|
return r
|
|
}
|
|
|
|
func hashLen16Mul(u, v, mul GPVirtual) GPVirtual {
|
|
XORQ(v, u)
|
|
IMULQ(mul, u)
|
|
a := shiftMix(u)
|
|
|
|
XORQ(a, v)
|
|
IMULQ(mul, v)
|
|
b := shiftMix(v)
|
|
|
|
IMULQ(mul, b)
|
|
|
|
return b
|
|
}
|
|
|
|
func hashLen0to16(sbase, slen GPVirtual) {
|
|
CMPQ(slen, Imm(8))
|
|
JL(LabelRef("check4"))
|
|
{
|
|
a := GP64()
|
|
MOVQ(Mem{Base: sbase}, a)
|
|
|
|
b := GP64()
|
|
t := GP64()
|
|
MOVQ(slen, t)
|
|
SUBQ(Imm(8), t)
|
|
ADDQ(sbase, t)
|
|
MOVQ(Mem{Base: t}, b)
|
|
|
|
rk2 := GP64()
|
|
MOVQ(Imm(k2), rk2)
|
|
|
|
ADDQ(rk2, a)
|
|
|
|
mul := slen
|
|
SHLQ(Imm(1), mul)
|
|
ADDQ(rk2, mul)
|
|
|
|
c := GP64()
|
|
MOVQ(b, c)
|
|
RORQ(Imm(37), c)
|
|
IMULQ(mul, c)
|
|
ADDQ(a, c)
|
|
|
|
d := GP64()
|
|
MOVQ(a, d)
|
|
RORQ(Imm(25), d)
|
|
ADDQ(b, d)
|
|
IMULQ(mul, d)
|
|
|
|
r := hashLen16Mul(c, d, mul)
|
|
Store(r, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
Label("check4")
|
|
|
|
CMPQ(slen, Imm(4))
|
|
JL(LabelRef("check0"))
|
|
{
|
|
rk2 := GP64()
|
|
MOVQ(Imm(k2), rk2)
|
|
|
|
mul := GP64()
|
|
MOVQ(slen, mul)
|
|
SHLQ(Imm(1), mul)
|
|
ADDQ(rk2, mul)
|
|
|
|
a := GP64()
|
|
MOVL(Mem{Base: sbase}, a.As32())
|
|
|
|
SHLQ(Imm(3), a)
|
|
ADDQ(slen, a)
|
|
|
|
b := GP64()
|
|
SUBQ(Imm(4), slen)
|
|
ADDQ(slen, sbase)
|
|
MOVL(Mem{Base: sbase}, b.As32())
|
|
r := hashLen16Mul(a, b, mul)
|
|
|
|
Store(r, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
Label("check0")
|
|
TESTQ(slen, slen)
|
|
JZ(LabelRef("empty"))
|
|
{
|
|
|
|
a := GP64()
|
|
MOVBQZX(Mem{Base: sbase}, a)
|
|
|
|
base := GP64()
|
|
MOVQ(slen, base)
|
|
SHRQ(Imm(1), base)
|
|
|
|
b := GP64()
|
|
ADDQ(sbase, base)
|
|
MOVBQZX(Mem{Base: base}, b)
|
|
|
|
MOVQ(slen, base)
|
|
SUBQ(Imm(1), base)
|
|
c := GP64()
|
|
ADDQ(sbase, base)
|
|
MOVBQZX(Mem{Base: base}, c)
|
|
|
|
SHLQ(Imm(8), b)
|
|
ADDQ(b, a)
|
|
y := a
|
|
|
|
SHLQ(Imm(2), c)
|
|
ADDQ(c, slen)
|
|
z := slen
|
|
|
|
rk0 := GP64()
|
|
MOVQ(Imm(k0), rk0)
|
|
IMULQ(rk0, z)
|
|
|
|
rk2 := GP64()
|
|
MOVQ(Imm(k2), rk2)
|
|
|
|
IMULQ(rk2, y)
|
|
XORQ(y, z)
|
|
|
|
r := shiftMix(z)
|
|
IMULQ(rk2, r)
|
|
|
|
Store(r, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
Label("empty")
|
|
|
|
ret := GP64()
|
|
MOVQ(Imm(k2), ret)
|
|
Store(ret, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
func hashLen17to32(sbase, slen GPVirtual) {
|
|
mul := GP64()
|
|
MOVQ(slen, mul)
|
|
SHLQ(Imm(1), mul)
|
|
|
|
rk2 := GP64()
|
|
MOVQ(Imm(k2), rk2)
|
|
ADDQ(rk2, mul)
|
|
|
|
a := GP64()
|
|
MOVQ(Mem{Base: sbase}, a)
|
|
|
|
rk1 := GP64()
|
|
MOVQ(Imm(k1), rk1)
|
|
IMULQ(rk1, a)
|
|
|
|
b := GP64()
|
|
MOVQ(Mem{Base: sbase, Disp: 8}, b)
|
|
|
|
base := GP64()
|
|
MOVQ(slen, base)
|
|
SUBQ(Imm(16), base)
|
|
ADDQ(sbase, base)
|
|
|
|
c := GP64()
|
|
MOVQ(Mem{Base: base, Disp: 8}, c)
|
|
IMULQ(mul, c)
|
|
|
|
d := GP64()
|
|
MOVQ(Mem{Base: base}, d)
|
|
IMULQ(rk2, d)
|
|
|
|
r := hashLen16MulLine(a, b, c, d, rk2, mul)
|
|
Store(r, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
// Return an 8-byte hash for 33 to 64 bytes.
|
|
func hashLen33to64(sbase, slen GPVirtual) {
|
|
mul := GP64()
|
|
MOVQ(slen, mul)
|
|
SHLQ(Imm(1), mul)
|
|
|
|
rk2 := GP64()
|
|
MOVQ(Imm(k2), rk2)
|
|
ADDQ(rk2, mul)
|
|
|
|
a := GP64()
|
|
MOVQ(Mem{Base: sbase}, a)
|
|
IMULQ(rk2, a)
|
|
|
|
b := GP64()
|
|
MOVQ(Mem{Base: sbase, Disp: 8}, b)
|
|
|
|
base := GP64()
|
|
MOVQ(slen, base)
|
|
SUBQ(Imm(16), base)
|
|
ADDQ(sbase, base)
|
|
|
|
c := GP64()
|
|
MOVQ(Mem{Base: base, Disp: 8}, c)
|
|
IMULQ(mul, c)
|
|
|
|
d := GP64()
|
|
MOVQ(Mem{Base: base}, d)
|
|
IMULQ(rk2, d)
|
|
|
|
y := GP64()
|
|
MOVQ(a, y)
|
|
|
|
ADDQ(b, y)
|
|
RORQ(Imm(43), y)
|
|
ADDQ(d, y)
|
|
tmpc := GP64()
|
|
MOVQ(c, tmpc)
|
|
RORQ(Imm(30), tmpc)
|
|
ADDQ(tmpc, y)
|
|
|
|
ADDQ(a, c)
|
|
ADDQ(rk2, b)
|
|
RORQ(Imm(18), b)
|
|
ADDQ(b, c)
|
|
|
|
tmpy := GP64()
|
|
MOVQ(y, tmpy)
|
|
z := hashLen16Mul(tmpy, c, mul)
|
|
|
|
e := GP64()
|
|
MOVQ(Mem{Base: sbase, Disp: 16}, e)
|
|
IMULQ(mul, e)
|
|
|
|
f := GP64()
|
|
MOVQ(Mem{Base: sbase, Disp: 24}, f)
|
|
|
|
base = GP64()
|
|
MOVQ(slen, base)
|
|
SUBQ(Imm(32), base)
|
|
ADDQ(sbase, base)
|
|
g := GP64()
|
|
MOVQ(Mem{Base: base}, g)
|
|
ADDQ(y, g)
|
|
IMULQ(mul, g)
|
|
|
|
h := GP64()
|
|
MOVQ(Mem{Base: base, Disp: 8}, h)
|
|
ADDQ(z, h)
|
|
IMULQ(mul, h)
|
|
|
|
r := hashLen16MulLine(e, f, g, h, a, mul)
|
|
Store(r, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
|
func weakHashLen32WithSeeds(sbase GPVirtual, disp int, a, b GPVirtual) {
|
|
|
|
w := Mem{Base: sbase, Disp: disp + 0}
|
|
x := Mem{Base: sbase, Disp: disp + 8}
|
|
y := Mem{Base: sbase, Disp: disp + 16}
|
|
z := Mem{Base: sbase, Disp: disp + 24}
|
|
|
|
// a += w
|
|
ADDQ(w, a)
|
|
|
|
// b = bits.RotateLeft64(b+a+z, -21)
|
|
ADDQ(a, b)
|
|
ADDQ(z, b)
|
|
RORQ(Imm(21), b)
|
|
|
|
// c := a
|
|
c := GP64()
|
|
MOVQ(a, c)
|
|
|
|
// a += x
|
|
// a += y
|
|
ADDQ(x, a)
|
|
ADDQ(y, a)
|
|
|
|
// b += bits.RotateLeft64(a, -44)
|
|
atmp := GP64()
|
|
MOVQ(a, atmp)
|
|
RORQ(Imm(44), atmp)
|
|
ADDQ(atmp, b)
|
|
|
|
// a += z
|
|
// b += c
|
|
ADDQ(z, a)
|
|
ADDQ(c, b)
|
|
|
|
XCHGQ(a, b)
|
|
}
|
|
|
|
func hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase GPVirtual, mul1 GPVirtual, mul2 uint64) {
|
|
ADDQ(y, x)
|
|
ADDQ(vlo, x)
|
|
ADDQ(Mem{Base: sbase, Disp: 8}, x)
|
|
RORQ(Imm(37), x)
|
|
|
|
IMULQ(mul1, x)
|
|
|
|
ADDQ(vhi, y)
|
|
ADDQ(Mem{Base: sbase, Disp: 48}, y)
|
|
RORQ(Imm(42), y)
|
|
IMULQ(mul1, y)
|
|
|
|
if mul2 != 1 {
|
|
t := GP64()
|
|
MOVQ(U32(mul2), t)
|
|
IMULQ(whi, t)
|
|
XORQ(t, x)
|
|
} else {
|
|
XORQ(whi, x)
|
|
}
|
|
|
|
if mul2 != 1 {
|
|
t := GP64()
|
|
MOVQ(U32(mul2), t)
|
|
IMULQ(vlo, t)
|
|
ADDQ(t, y)
|
|
} else {
|
|
ADDQ(vlo, y)
|
|
}
|
|
|
|
ADDQ(Mem{Base: sbase, Disp: 40}, y)
|
|
|
|
ADDQ(wlo, z)
|
|
RORQ(Imm(33), z)
|
|
IMULQ(mul1, z)
|
|
|
|
{
|
|
IMULQ(mul1, vhi)
|
|
MOVQ(x, vlo)
|
|
ADDQ(wlo, vlo)
|
|
weakHashLen32WithSeeds(sbase, 0, vhi, vlo)
|
|
}
|
|
|
|
{
|
|
ADDQ(z, whi)
|
|
MOVQ(y, wlo)
|
|
ADDQ(Mem{Base: sbase, Disp: 16}, wlo)
|
|
weakHashLen32WithSeeds(sbase, 32, whi, wlo)
|
|
}
|
|
|
|
XCHGQ(z, x)
|
|
}
|
|
|
|
func fp64() {
|
|
|
|
TEXT("Fingerprint64", NOSPLIT, "func(s []byte) uint64")
|
|
|
|
slen := GP64()
|
|
sbase := GP64()
|
|
|
|
Load(Param("s").Base(), sbase)
|
|
Load(Param("s").Len(), slen)
|
|
|
|
CMPQ(slen, Imm(16))
|
|
JG(LabelRef("check32"))
|
|
hashLen0to16(sbase, slen)
|
|
|
|
Label("check32")
|
|
CMPQ(slen, Imm(32))
|
|
JG(LabelRef("check64"))
|
|
hashLen17to32(sbase, slen)
|
|
|
|
Label("check64")
|
|
CMPQ(slen, Imm(64))
|
|
JG(LabelRef("long"))
|
|
hashLen33to64(sbase, slen)
|
|
|
|
Label("long")
|
|
|
|
seed := uint64(81)
|
|
|
|
vlo, vhi, wlo, whi := GP64(), GP64(), GP64(), GP64()
|
|
XORQ(vlo, vlo)
|
|
XORQ(vhi, vhi)
|
|
XORQ(wlo, wlo)
|
|
XORQ(whi, whi)
|
|
|
|
x := GP64()
|
|
|
|
eightOne := uint64(81)
|
|
|
|
MOVQ(Imm(eightOne*k2), x)
|
|
ADDQ(Mem{Base: sbase}, x)
|
|
|
|
y := GP64()
|
|
y64 := uint64(seed*k1) + 113
|
|
MOVQ(Imm(y64), y)
|
|
|
|
z := GP64()
|
|
MOVQ(Imm(shiftMix64(y64*k2+113)*k2), z)
|
|
|
|
endIdx := GP64()
|
|
MOVQ(slen, endIdx)
|
|
tmp := GP64()
|
|
SUBQ(Imm(1), endIdx)
|
|
MOVQ(U64(^uint64(63)), tmp)
|
|
ANDQ(tmp, endIdx)
|
|
last64Idx := GP64()
|
|
MOVQ(slen, last64Idx)
|
|
SUBQ(Imm(1), last64Idx)
|
|
ANDQ(Imm(63), last64Idx)
|
|
SUBQ(Imm(63), last64Idx)
|
|
ADDQ(endIdx, last64Idx)
|
|
|
|
last64 := GP64()
|
|
MOVQ(last64Idx, last64)
|
|
ADDQ(sbase, last64)
|
|
|
|
end := GP64()
|
|
MOVQ(slen, end)
|
|
|
|
Label("loop")
|
|
|
|
rk1 := GP64()
|
|
MOVQ(Imm(k1), rk1)
|
|
|
|
hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase, rk1, 1)
|
|
|
|
ADDQ(Imm(64), sbase)
|
|
SUBQ(Imm(64), end)
|
|
CMPQ(end, Imm(64))
|
|
JG(LabelRef("loop"))
|
|
|
|
MOVQ(last64, sbase)
|
|
|
|
mul := GP64()
|
|
MOVQ(z, mul)
|
|
ANDQ(Imm(0xff), mul)
|
|
SHLQ(Imm(1), mul)
|
|
ADDQ(rk1, mul)
|
|
|
|
MOVQ(last64, sbase)
|
|
|
|
SUBQ(Imm(1), slen)
|
|
ANDQ(Imm(63), slen)
|
|
ADDQ(slen, wlo)
|
|
|
|
ADDQ(wlo, vlo)
|
|
ADDQ(vlo, wlo)
|
|
|
|
hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase, mul, 9)
|
|
|
|
{
|
|
a := hashLen16Mul(vlo, wlo, mul)
|
|
ADDQ(z, a)
|
|
b := shiftMix(y)
|
|
rk0 := GP64()
|
|
MOVQ(Imm(k0), rk0)
|
|
IMULQ(rk0, b)
|
|
ADDQ(b, a)
|
|
|
|
c := hashLen16Mul(vhi, whi, mul)
|
|
ADDQ(x, c)
|
|
|
|
r := hashLen16Mul(a, c, mul)
|
|
Store(r, ReturnIndex(0))
|
|
}
|
|
|
|
RET()
|
|
}
|
|
|
|
func fmix(h GPVirtual) GPVirtual {
|
|
h2 := GP32()
|
|
MOVL(h, h2)
|
|
SHRL(Imm(16), h2)
|
|
XORL(h2, h)
|
|
|
|
MOVL(Imm(0x85ebca6b), h2)
|
|
IMULL(h2, h)
|
|
|
|
MOVL(h, h2)
|
|
SHRL(Imm(13), h2)
|
|
XORL(h2, h)
|
|
|
|
MOVL(Imm(0xc2b2ae35), h2)
|
|
IMULL(h2, h)
|
|
|
|
MOVL(h, h2)
|
|
SHRL(Imm(16), h2)
|
|
XORL(h2, h)
|
|
return h
|
|
}
|
|
|
|
func mur(a, h GPVirtual) GPVirtual {
|
|
imul3l(c1, a, a)
|
|
RORL(Imm(17), a)
|
|
imul3l(c2, a, a)
|
|
XORL(a, h)
|
|
RORL(Imm(19), h)
|
|
|
|
LEAL(Mem{Base: h, Index: h, Scale: 4}, a)
|
|
LEAL(Mem{Base: a, Disp: 0xe6546b64}, h)
|
|
|
|
return h
|
|
}
|
|
|
|
func hash32Len5to12(sbase, slen GPVirtual) {
|
|
|
|
a := GP32()
|
|
MOVL(slen.As32(), a)
|
|
b := GP32()
|
|
MOVL(a, b)
|
|
SHLL(Imm(2), b)
|
|
ADDL(a, b)
|
|
|
|
c := GP32()
|
|
MOVL(U32(9), c)
|
|
|
|
d := GP32()
|
|
MOVL(b, d)
|
|
|
|
ADDL(Mem{Base: sbase, Disp: 0}, a)
|
|
|
|
t := GP64()
|
|
MOVQ(slen, t)
|
|
SUBQ(Imm(4), t)
|
|
ADDQ(sbase, t)
|
|
ADDL(Mem{Base: t}, b)
|
|
|
|
MOVQ(slen, t)
|
|
SHRQ(Imm(1), t)
|
|
ANDQ(Imm(4), t)
|
|
ADDQ(sbase, t)
|
|
ADDL(Mem{Base: t}, c)
|
|
|
|
t = mur(a, d)
|
|
t = mur(b, t)
|
|
t = mur(c, t)
|
|
t = fmix(t)
|
|
|
|
Store(t, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
func hash32Len13to24Seed(sbase, slen GPVirtual) {
|
|
slen2 := GP64()
|
|
MOVQ(slen, slen2)
|
|
SHRQ(Imm(1), slen2)
|
|
ADDQ(sbase, slen2)
|
|
|
|
a := GP32()
|
|
MOVL(Mem{Base: slen2, Disp: -4}, a)
|
|
|
|
b := GP32()
|
|
MOVL(Mem{Base: sbase, Disp: 4}, b)
|
|
|
|
send := GP64()
|
|
MOVQ(slen, send)
|
|
ADDQ(sbase, send)
|
|
|
|
c := GP32()
|
|
MOVL(Mem{Base: send, Disp: -8}, c)
|
|
|
|
d := GP32()
|
|
MOVL(Mem{Base: slen2}, d)
|
|
|
|
e := GP32()
|
|
MOVL(Mem{Base: sbase}, e)
|
|
|
|
f := GP32()
|
|
MOVL(Mem{Base: send, Disp: -4}, f)
|
|
|
|
h := GP32()
|
|
MOVL(U32(c1), h)
|
|
IMULL(d, h)
|
|
ADDL(slen.As32(), h)
|
|
|
|
RORL(Imm(12), a)
|
|
ADDL(f, a)
|
|
|
|
ctmp := GP32()
|
|
MOVL(c, ctmp)
|
|
h = mur(ctmp, h)
|
|
ADDL(a, h)
|
|
|
|
RORL(Imm(3), a)
|
|
ADDL(c, a)
|
|
|
|
h = mur(e, h)
|
|
ADDL(a, h)
|
|
|
|
ADDL(f, a)
|
|
RORL(Imm(12), a)
|
|
ADDL(d, a)
|
|
|
|
h = mur(b, h)
|
|
ADDL(a, h)
|
|
|
|
h = fmix(h)
|
|
|
|
Store(h, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
func hash32Len0to4(sbase, slen GPVirtual) {
|
|
b := GP32()
|
|
c := GP32()
|
|
|
|
XORL(b, b)
|
|
MOVL(U32(9), c)
|
|
|
|
TESTQ(slen, slen)
|
|
JZ(LabelRef("done"))
|
|
|
|
l := GP64()
|
|
v := GP32()
|
|
MOVQ(slen, l)
|
|
|
|
c1reg := GP32()
|
|
MOVL(U32(c1), c1reg)
|
|
|
|
for i := 0; i < 4; i++ {
|
|
IMULL(c1reg, b)
|
|
MOVBLSX(Mem{Base: sbase, Disp: i}, v)
|
|
ADDL(v, b)
|
|
XORL(b, c)
|
|
SUBQ(Imm(1), l)
|
|
TESTQ(l, l)
|
|
JZ(LabelRef("done"))
|
|
}
|
|
|
|
Label("done")
|
|
|
|
s32 := GP32()
|
|
MOVL(slen.As32(), s32)
|
|
r := mur(s32, c)
|
|
r = mur(b, r)
|
|
r = fmix(r)
|
|
|
|
Store(r, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
func fp32() {
|
|
|
|
TEXT("Fingerprint32", NOSPLIT, "func(s []byte) uint32")
|
|
|
|
sbase := GP64()
|
|
slen := GP64()
|
|
|
|
Load(Param("s").Base(), sbase)
|
|
Load(Param("s").Len(), slen)
|
|
|
|
CMPQ(slen, Imm(24))
|
|
JG(LabelRef("long"))
|
|
|
|
CMPQ(slen, Imm(12))
|
|
JG(LabelRef("hash_13_24"))
|
|
|
|
CMPQ(slen, Imm(4))
|
|
JG(LabelRef("hash_5_12"))
|
|
hash32Len0to4(sbase, slen)
|
|
|
|
Label("hash_5_12")
|
|
hash32Len5to12(sbase, slen)
|
|
|
|
Label("hash_13_24")
|
|
hash32Len13to24Seed(sbase, slen)
|
|
|
|
Label("long")
|
|
|
|
h := GP32()
|
|
MOVL(slen.As32(), h)
|
|
|
|
g := GP32()
|
|
MOVL(U32(c1), g)
|
|
IMULL(h, g)
|
|
|
|
f := GP32()
|
|
MOVL(g, f)
|
|
|
|
// len > 24
|
|
|
|
send := GP64()
|
|
MOVQ(slen, send)
|
|
ADDQ(sbase, send)
|
|
c1reg := GP32()
|
|
MOVL(U32(c1), c1reg)
|
|
c2reg := GP32()
|
|
MOVL(U32(c2), c2reg)
|
|
|
|
shuf := func(r GPVirtual, disp int) {
|
|
a := GP32()
|
|
MOVL(Mem{Base: send, Disp: disp}, a)
|
|
IMULL(c1reg, a)
|
|
RORL(Imm(17), a)
|
|
IMULL(c2reg, a)
|
|
XORL(a, r)
|
|
RORL(Imm(19), r)
|
|
MOVL(r, a)
|
|
SHLL(Imm(2), a)
|
|
ADDL(a, r)
|
|
ADDL(Imm(0xe6546b64), r)
|
|
}
|
|
|
|
shuf(h, -4)
|
|
shuf(g, -8)
|
|
shuf(h, -16)
|
|
shuf(g, -12)
|
|
|
|
PREFETCHT0(Mem{Base: sbase})
|
|
{
|
|
a := GP32()
|
|
MOVL(Mem{Base: send, Disp: -20}, a)
|
|
IMULL(c1reg, a)
|
|
RORL(Imm(17), a)
|
|
IMULL(c2reg, a)
|
|
|
|
ADDL(a, f)
|
|
RORL(Imm(19), f)
|
|
ADDL(Imm(113), f)
|
|
|
|
}
|
|
|
|
loop32Body := func(f, g, h, sbase, slen GPVirtual, disp int) {
|
|
a, b, c, d, e := GP32(), GP32(), GP32(), GP32(), GP32()
|
|
|
|
MOVL(Mem{Base: sbase, Disp: disp + 0}, a)
|
|
ADDL(a, h)
|
|
|
|
MOVL(Mem{Base: sbase, Disp: disp + 4}, b)
|
|
ADDL(b, g)
|
|
|
|
MOVL(Mem{Base: sbase, Disp: disp + 8}, c)
|
|
ADDL(c, f)
|
|
|
|
MOVL(Mem{Base: sbase, Disp: disp + 12}, d)
|
|
t := GP32()
|
|
MOVL(d, t)
|
|
h = mur(t, h)
|
|
|
|
MOVL(Mem{Base: sbase, Disp: disp + 16}, e)
|
|
ADDL(e, h)
|
|
|
|
MOVL(c, t)
|
|
g = mur(t, g)
|
|
ADDL(a, g)
|
|
|
|
imul3l(c1, e, t)
|
|
ADDL(b, t)
|
|
f = mur(t, f)
|
|
ADDL(d, f)
|
|
|
|
ADDL(g, f)
|
|
ADDL(f, g)
|
|
}
|
|
|
|
Label("loop80")
|
|
CMPQ(slen, Imm(80+20))
|
|
JL(LabelRef("loop20"))
|
|
{
|
|
PREFETCHT0(Mem{Base: sbase, Disp: 20})
|
|
loop32Body(f, g, h, sbase, slen, 0)
|
|
PREFETCHT0(Mem{Base: sbase, Disp: 40})
|
|
loop32Body(f, g, h, sbase, slen, 20)
|
|
PREFETCHT0(Mem{Base: sbase, Disp: 60})
|
|
loop32Body(f, g, h, sbase, slen, 40)
|
|
PREFETCHT0(Mem{Base: sbase, Disp: 80})
|
|
loop32Body(f, g, h, sbase, slen, 60)
|
|
|
|
ADDQ(Imm(80), sbase)
|
|
SUBQ(Imm(80), slen)
|
|
JMP(LabelRef("loop80"))
|
|
}
|
|
|
|
Label("loop20")
|
|
CMPQ(slen, Imm(20))
|
|
JLE(LabelRef("after"))
|
|
{
|
|
loop32Body(f, g, h, sbase, slen, 0)
|
|
|
|
ADDQ(Imm(20), sbase)
|
|
SUBQ(Imm(20), slen)
|
|
JMP(LabelRef("loop20"))
|
|
}
|
|
|
|
Label("after")
|
|
|
|
c1reg = GP32()
|
|
MOVL(U32(c1), c1reg)
|
|
|
|
RORL(Imm(11), g)
|
|
IMULL(c1reg, g)
|
|
|
|
RORL(Imm(17), g)
|
|
IMULL(c1reg, g)
|
|
|
|
RORL(Imm(11), f)
|
|
IMULL(c1reg, f)
|
|
|
|
RORL(Imm(17), f)
|
|
IMULL(c1reg, f)
|
|
|
|
ADDL(g, h)
|
|
RORL(Imm(19), h)
|
|
|
|
t := GP32()
|
|
MOVL(h, t)
|
|
SHLL(Imm(2), t)
|
|
ADDL(t, h)
|
|
ADDL(Imm(0xe6546b64), h)
|
|
|
|
RORL(Imm(17), h)
|
|
IMULL(c1reg, h)
|
|
|
|
ADDL(f, h)
|
|
RORL(Imm(19), h)
|
|
|
|
t = GP32()
|
|
MOVL(h, t)
|
|
SHLL(Imm(2), t)
|
|
ADDL(t, h)
|
|
ADDL(Imm(0xe6546b64), h)
|
|
|
|
RORL(Imm(17), h)
|
|
IMULL(c1reg, h)
|
|
|
|
Store(h, ReturnIndex(0))
|
|
RET()
|
|
}
|
|
|
|
var go111 = flag.Bool("go111", true, "use assembly instructions present in go1.11 and later")
|
|
|
|
func imul3l(m uint32, x, y Register) {
|
|
if *go111 {
|
|
IMUL3L(U32(m), x, y)
|
|
} else {
|
|
t := GP32()
|
|
MOVL(U32(m), t)
|
|
IMULL(t, x)
|
|
MOVL(x, y)
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
|
|
flag.Parse()
|
|
|
|
ConstraintExpr("amd64,!purego")
|
|
|
|
fp64()
|
|
fp32()
|
|
|
|
Generate()
|
|
}
|