ipld-eth-server/vendor/github.com/dgryski/go-farm/asm.go

899 lines
13 KiB
Go
Raw Normal View History

// +build ignore
package main
import (
"flag"
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
const k0 uint64 = 0xc3a5c85c97cb3127
const k1 uint64 = 0xb492b66fbe98f273
const k2 uint64 = 0x9ae16a3b2f90404f
const c1 uint32 = 0xcc9e2d51
const c2 uint32 = 0x1b873593
func shiftMix(val GPVirtual) GPVirtual {
r := GP64()
MOVQ(val, r)
SHRQ(Imm(47), r)
XORQ(val, r)
return r
}
func shiftMix64(val uint64) uint64 {
return val ^ (val >> 47)
}
func hashLen16MulLine(a, b, c, d, k, mul GPVirtual) GPVirtual {
tmpa := GP64()
MOVQ(a, tmpa)
ADDQ(b, tmpa)
RORQ(Imm(43), tmpa)
ADDQ(d, tmpa)
tmpc := GP64()
MOVQ(c, tmpc)
RORQ(Imm(30), tmpc)
ADDQ(tmpc, tmpa)
ADDQ(c, a)
ADDQ(k, b)
RORQ(Imm(18), b)
ADDQ(b, a)
r := hashLen16Mul(tmpa, a, mul)
return r
}
func hashLen16Mul(u, v, mul GPVirtual) GPVirtual {
XORQ(v, u)
IMULQ(mul, u)
a := shiftMix(u)
XORQ(a, v)
IMULQ(mul, v)
b := shiftMix(v)
IMULQ(mul, b)
return b
}
func hashLen0to16(sbase, slen GPVirtual) {
CMPQ(slen, Imm(8))
JL(LabelRef("check4"))
{
a := GP64()
MOVQ(Mem{Base: sbase}, a)
b := GP64()
t := GP64()
MOVQ(slen, t)
SUBQ(Imm(8), t)
ADDQ(sbase, t)
MOVQ(Mem{Base: t}, b)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
ADDQ(rk2, a)
mul := slen
SHLQ(Imm(1), mul)
ADDQ(rk2, mul)
c := GP64()
MOVQ(b, c)
RORQ(Imm(37), c)
IMULQ(mul, c)
ADDQ(a, c)
d := GP64()
MOVQ(a, d)
RORQ(Imm(25), d)
ADDQ(b, d)
IMULQ(mul, d)
r := hashLen16Mul(c, d, mul)
Store(r, ReturnIndex(0))
RET()
}
Label("check4")
CMPQ(slen, Imm(4))
JL(LabelRef("check0"))
{
rk2 := GP64()
MOVQ(Imm(k2), rk2)
mul := GP64()
MOVQ(slen, mul)
SHLQ(Imm(1), mul)
ADDQ(rk2, mul)
a := GP64()
MOVL(Mem{Base: sbase}, a.As32())
SHLQ(Imm(3), a)
ADDQ(slen, a)
b := GP64()
SUBQ(Imm(4), slen)
ADDQ(slen, sbase)
MOVL(Mem{Base: sbase}, b.As32())
r := hashLen16Mul(a, b, mul)
Store(r, ReturnIndex(0))
RET()
}
Label("check0")
TESTQ(slen, slen)
JZ(LabelRef("empty"))
{
a := GP64()
MOVBQZX(Mem{Base: sbase}, a)
base := GP64()
MOVQ(slen, base)
SHRQ(Imm(1), base)
b := GP64()
ADDQ(sbase, base)
MOVBQZX(Mem{Base: base}, b)
MOVQ(slen, base)
SUBQ(Imm(1), base)
c := GP64()
ADDQ(sbase, base)
MOVBQZX(Mem{Base: base}, c)
SHLQ(Imm(8), b)
ADDQ(b, a)
y := a
SHLQ(Imm(2), c)
ADDQ(c, slen)
z := slen
rk0 := GP64()
MOVQ(Imm(k0), rk0)
IMULQ(rk0, z)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
IMULQ(rk2, y)
XORQ(y, z)
r := shiftMix(z)
IMULQ(rk2, r)
Store(r, ReturnIndex(0))
RET()
}
Label("empty")
ret := GP64()
MOVQ(Imm(k2), ret)
Store(ret, ReturnIndex(0))
RET()
}
func hashLen17to32(sbase, slen GPVirtual) {
mul := GP64()
MOVQ(slen, mul)
SHLQ(Imm(1), mul)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
ADDQ(rk2, mul)
a := GP64()
MOVQ(Mem{Base: sbase}, a)
rk1 := GP64()
MOVQ(Imm(k1), rk1)
IMULQ(rk1, a)
b := GP64()
MOVQ(Mem{Base: sbase, Disp: 8}, b)
base := GP64()
MOVQ(slen, base)
SUBQ(Imm(16), base)
ADDQ(sbase, base)
c := GP64()
MOVQ(Mem{Base: base, Disp: 8}, c)
IMULQ(mul, c)
d := GP64()
MOVQ(Mem{Base: base}, d)
IMULQ(rk2, d)
r := hashLen16MulLine(a, b, c, d, rk2, mul)
Store(r, ReturnIndex(0))
RET()
}
// Return an 8-byte hash for 33 to 64 bytes.
func hashLen33to64(sbase, slen GPVirtual) {
mul := GP64()
MOVQ(slen, mul)
SHLQ(Imm(1), mul)
rk2 := GP64()
MOVQ(Imm(k2), rk2)
ADDQ(rk2, mul)
a := GP64()
MOVQ(Mem{Base: sbase}, a)
IMULQ(rk2, a)
b := GP64()
MOVQ(Mem{Base: sbase, Disp: 8}, b)
base := GP64()
MOVQ(slen, base)
SUBQ(Imm(16), base)
ADDQ(sbase, base)
c := GP64()
MOVQ(Mem{Base: base, Disp: 8}, c)
IMULQ(mul, c)
d := GP64()
MOVQ(Mem{Base: base}, d)
IMULQ(rk2, d)
y := GP64()
MOVQ(a, y)
ADDQ(b, y)
RORQ(Imm(43), y)
ADDQ(d, y)
tmpc := GP64()
MOVQ(c, tmpc)
RORQ(Imm(30), tmpc)
ADDQ(tmpc, y)
ADDQ(a, c)
ADDQ(rk2, b)
RORQ(Imm(18), b)
ADDQ(b, c)
tmpy := GP64()
MOVQ(y, tmpy)
z := hashLen16Mul(tmpy, c, mul)
e := GP64()
MOVQ(Mem{Base: sbase, Disp: 16}, e)
IMULQ(mul, e)
f := GP64()
MOVQ(Mem{Base: sbase, Disp: 24}, f)
base = GP64()
MOVQ(slen, base)
SUBQ(Imm(32), base)
ADDQ(sbase, base)
g := GP64()
MOVQ(Mem{Base: base}, g)
ADDQ(y, g)
IMULQ(mul, g)
h := GP64()
MOVQ(Mem{Base: base, Disp: 8}, h)
ADDQ(z, h)
IMULQ(mul, h)
r := hashLen16MulLine(e, f, g, h, a, mul)
Store(r, ReturnIndex(0))
RET()
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
func weakHashLen32WithSeeds(sbase GPVirtual, disp int, a, b GPVirtual) {
w := Mem{Base: sbase, Disp: disp + 0}
x := Mem{Base: sbase, Disp: disp + 8}
y := Mem{Base: sbase, Disp: disp + 16}
z := Mem{Base: sbase, Disp: disp + 24}
// a += w
ADDQ(w, a)
// b = bits.RotateLeft64(b+a+z, -21)
ADDQ(a, b)
ADDQ(z, b)
RORQ(Imm(21), b)
// c := a
c := GP64()
MOVQ(a, c)
// a += x
// a += y
ADDQ(x, a)
ADDQ(y, a)
// b += bits.RotateLeft64(a, -44)
atmp := GP64()
MOVQ(a, atmp)
RORQ(Imm(44), atmp)
ADDQ(atmp, b)
// a += z
// b += c
ADDQ(z, a)
ADDQ(c, b)
XCHGQ(a, b)
}
func hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase GPVirtual, mul1 GPVirtual, mul2 uint64) {
ADDQ(y, x)
ADDQ(vlo, x)
ADDQ(Mem{Base: sbase, Disp: 8}, x)
RORQ(Imm(37), x)
IMULQ(mul1, x)
ADDQ(vhi, y)
ADDQ(Mem{Base: sbase, Disp: 48}, y)
RORQ(Imm(42), y)
IMULQ(mul1, y)
if mul2 != 1 {
t := GP64()
MOVQ(U32(mul2), t)
IMULQ(whi, t)
XORQ(t, x)
} else {
XORQ(whi, x)
}
if mul2 != 1 {
t := GP64()
MOVQ(U32(mul2), t)
IMULQ(vlo, t)
ADDQ(t, y)
} else {
ADDQ(vlo, y)
}
ADDQ(Mem{Base: sbase, Disp: 40}, y)
ADDQ(wlo, z)
RORQ(Imm(33), z)
IMULQ(mul1, z)
{
IMULQ(mul1, vhi)
MOVQ(x, vlo)
ADDQ(wlo, vlo)
weakHashLen32WithSeeds(sbase, 0, vhi, vlo)
}
{
ADDQ(z, whi)
MOVQ(y, wlo)
ADDQ(Mem{Base: sbase, Disp: 16}, wlo)
weakHashLen32WithSeeds(sbase, 32, whi, wlo)
}
XCHGQ(z, x)
}
func fp64() {
TEXT("Fingerprint64", NOSPLIT, "func(s []byte) uint64")
slen := GP64()
sbase := GP64()
Load(Param("s").Base(), sbase)
Load(Param("s").Len(), slen)
CMPQ(slen, Imm(16))
JG(LabelRef("check32"))
hashLen0to16(sbase, slen)
Label("check32")
CMPQ(slen, Imm(32))
JG(LabelRef("check64"))
hashLen17to32(sbase, slen)
Label("check64")
CMPQ(slen, Imm(64))
JG(LabelRef("long"))
hashLen33to64(sbase, slen)
Label("long")
seed := uint64(81)
vlo, vhi, wlo, whi := GP64(), GP64(), GP64(), GP64()
XORQ(vlo, vlo)
XORQ(vhi, vhi)
XORQ(wlo, wlo)
XORQ(whi, whi)
x := GP64()
eightOne := uint64(81)
MOVQ(Imm(eightOne*k2), x)
ADDQ(Mem{Base: sbase}, x)
y := GP64()
y64 := uint64(seed*k1) + 113
MOVQ(Imm(y64), y)
z := GP64()
MOVQ(Imm(shiftMix64(y64*k2+113)*k2), z)
endIdx := GP64()
MOVQ(slen, endIdx)
tmp := GP64()
SUBQ(Imm(1), endIdx)
MOVQ(U64(^uint64(63)), tmp)
ANDQ(tmp, endIdx)
last64Idx := GP64()
MOVQ(slen, last64Idx)
SUBQ(Imm(1), last64Idx)
ANDQ(Imm(63), last64Idx)
SUBQ(Imm(63), last64Idx)
ADDQ(endIdx, last64Idx)
last64 := GP64()
MOVQ(last64Idx, last64)
ADDQ(sbase, last64)
end := GP64()
MOVQ(slen, end)
Label("loop")
rk1 := GP64()
MOVQ(Imm(k1), rk1)
hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase, rk1, 1)
ADDQ(Imm(64), sbase)
SUBQ(Imm(64), end)
CMPQ(end, Imm(64))
JG(LabelRef("loop"))
MOVQ(last64, sbase)
mul := GP64()
MOVQ(z, mul)
ANDQ(Imm(0xff), mul)
SHLQ(Imm(1), mul)
ADDQ(rk1, mul)
MOVQ(last64, sbase)
SUBQ(Imm(1), slen)
ANDQ(Imm(63), slen)
ADDQ(slen, wlo)
ADDQ(wlo, vlo)
ADDQ(vlo, wlo)
hashLoopBody(x, y, z, vlo, vhi, wlo, whi, sbase, mul, 9)
{
a := hashLen16Mul(vlo, wlo, mul)
ADDQ(z, a)
b := shiftMix(y)
rk0 := GP64()
MOVQ(Imm(k0), rk0)
IMULQ(rk0, b)
ADDQ(b, a)
c := hashLen16Mul(vhi, whi, mul)
ADDQ(x, c)
r := hashLen16Mul(a, c, mul)
Store(r, ReturnIndex(0))
}
RET()
}
func fmix(h GPVirtual) GPVirtual {
h2 := GP32()
MOVL(h, h2)
SHRL(Imm(16), h2)
XORL(h2, h)
MOVL(Imm(0x85ebca6b), h2)
IMULL(h2, h)
MOVL(h, h2)
SHRL(Imm(13), h2)
XORL(h2, h)
MOVL(Imm(0xc2b2ae35), h2)
IMULL(h2, h)
MOVL(h, h2)
SHRL(Imm(16), h2)
XORL(h2, h)
return h
}
func mur(a, h GPVirtual) GPVirtual {
imul3l(c1, a, a)
RORL(Imm(17), a)
imul3l(c2, a, a)
XORL(a, h)
RORL(Imm(19), h)
LEAL(Mem{Base: h, Index: h, Scale: 4}, a)
LEAL(Mem{Base: a, Disp: 0xe6546b64}, h)
return h
}
func hash32Len5to12(sbase, slen GPVirtual) {
a := GP32()
MOVL(slen.As32(), a)
b := GP32()
MOVL(a, b)
SHLL(Imm(2), b)
ADDL(a, b)
c := GP32()
MOVL(U32(9), c)
d := GP32()
MOVL(b, d)
ADDL(Mem{Base: sbase, Disp: 0}, a)
t := GP64()
MOVQ(slen, t)
SUBQ(Imm(4), t)
ADDQ(sbase, t)
ADDL(Mem{Base: t}, b)
MOVQ(slen, t)
SHRQ(Imm(1), t)
ANDQ(Imm(4), t)
ADDQ(sbase, t)
ADDL(Mem{Base: t}, c)
t = mur(a, d)
t = mur(b, t)
t = mur(c, t)
t = fmix(t)
Store(t, ReturnIndex(0))
RET()
}
func hash32Len13to24Seed(sbase, slen GPVirtual) {
slen2 := GP64()
MOVQ(slen, slen2)
SHRQ(Imm(1), slen2)
ADDQ(sbase, slen2)
a := GP32()
MOVL(Mem{Base: slen2, Disp: -4}, a)
b := GP32()
MOVL(Mem{Base: sbase, Disp: 4}, b)
send := GP64()
MOVQ(slen, send)
ADDQ(sbase, send)
c := GP32()
MOVL(Mem{Base: send, Disp: -8}, c)
d := GP32()
MOVL(Mem{Base: slen2}, d)
e := GP32()
MOVL(Mem{Base: sbase}, e)
f := GP32()
MOVL(Mem{Base: send, Disp: -4}, f)
h := GP32()
MOVL(U32(c1), h)
IMULL(d, h)
ADDL(slen.As32(), h)
RORL(Imm(12), a)
ADDL(f, a)
ctmp := GP32()
MOVL(c, ctmp)
h = mur(ctmp, h)
ADDL(a, h)
RORL(Imm(3), a)
ADDL(c, a)
h = mur(e, h)
ADDL(a, h)
ADDL(f, a)
RORL(Imm(12), a)
ADDL(d, a)
h = mur(b, h)
ADDL(a, h)
h = fmix(h)
Store(h, ReturnIndex(0))
RET()
}
func hash32Len0to4(sbase, slen GPVirtual) {
b := GP32()
c := GP32()
XORL(b, b)
MOVL(U32(9), c)
TESTQ(slen, slen)
JZ(LabelRef("done"))
l := GP64()
v := GP32()
MOVQ(slen, l)
c1reg := GP32()
MOVL(U32(c1), c1reg)
for i := 0; i < 4; i++ {
IMULL(c1reg, b)
MOVBLSX(Mem{Base: sbase, Disp: i}, v)
ADDL(v, b)
XORL(b, c)
SUBQ(Imm(1), l)
TESTQ(l, l)
JZ(LabelRef("done"))
}
Label("done")
s32 := GP32()
MOVL(slen.As32(), s32)
r := mur(s32, c)
r = mur(b, r)
r = fmix(r)
Store(r, ReturnIndex(0))
RET()
}
func fp32() {
TEXT("Fingerprint32", NOSPLIT, "func(s []byte) uint32")
sbase := GP64()
slen := GP64()
Load(Param("s").Base(), sbase)
Load(Param("s").Len(), slen)
CMPQ(slen, Imm(24))
JG(LabelRef("long"))
CMPQ(slen, Imm(12))
JG(LabelRef("hash_13_24"))
CMPQ(slen, Imm(4))
JG(LabelRef("hash_5_12"))
hash32Len0to4(sbase, slen)
Label("hash_5_12")
hash32Len5to12(sbase, slen)
Label("hash_13_24")
hash32Len13to24Seed(sbase, slen)
Label("long")
h := GP32()
MOVL(slen.As32(), h)
g := GP32()
MOVL(U32(c1), g)
IMULL(h, g)
f := GP32()
MOVL(g, f)
// len > 24
send := GP64()
MOVQ(slen, send)
ADDQ(sbase, send)
c1reg := GP32()
MOVL(U32(c1), c1reg)
c2reg := GP32()
MOVL(U32(c2), c2reg)
shuf := func(r GPVirtual, disp int) {
a := GP32()
MOVL(Mem{Base: send, Disp: disp}, a)
IMULL(c1reg, a)
RORL(Imm(17), a)
IMULL(c2reg, a)
XORL(a, r)
RORL(Imm(19), r)
MOVL(r, a)
SHLL(Imm(2), a)
ADDL(a, r)
ADDL(Imm(0xe6546b64), r)
}
shuf(h, -4)
shuf(g, -8)
shuf(h, -16)
shuf(g, -12)
PREFETCHT0(Mem{Base: sbase})
{
a := GP32()
MOVL(Mem{Base: send, Disp: -20}, a)
IMULL(c1reg, a)
RORL(Imm(17), a)
IMULL(c2reg, a)
ADDL(a, f)
RORL(Imm(19), f)
ADDL(Imm(113), f)
}
loop32Body := func(f, g, h, sbase, slen GPVirtual, disp int) {
a, b, c, d, e := GP32(), GP32(), GP32(), GP32(), GP32()
MOVL(Mem{Base: sbase, Disp: disp + 0}, a)
ADDL(a, h)
MOVL(Mem{Base: sbase, Disp: disp + 4}, b)
ADDL(b, g)
MOVL(Mem{Base: sbase, Disp: disp + 8}, c)
ADDL(c, f)
MOVL(Mem{Base: sbase, Disp: disp + 12}, d)
t := GP32()
MOVL(d, t)
h = mur(t, h)
MOVL(Mem{Base: sbase, Disp: disp + 16}, e)
ADDL(e, h)
MOVL(c, t)
g = mur(t, g)
ADDL(a, g)
imul3l(c1, e, t)
ADDL(b, t)
f = mur(t, f)
ADDL(d, f)
ADDL(g, f)
ADDL(f, g)
}
Label("loop80")
CMPQ(slen, Imm(80+20))
JL(LabelRef("loop20"))
{
PREFETCHT0(Mem{Base: sbase, Disp: 20})
loop32Body(f, g, h, sbase, slen, 0)
PREFETCHT0(Mem{Base: sbase, Disp: 40})
loop32Body(f, g, h, sbase, slen, 20)
PREFETCHT0(Mem{Base: sbase, Disp: 60})
loop32Body(f, g, h, sbase, slen, 40)
PREFETCHT0(Mem{Base: sbase, Disp: 80})
loop32Body(f, g, h, sbase, slen, 60)
ADDQ(Imm(80), sbase)
SUBQ(Imm(80), slen)
JMP(LabelRef("loop80"))
}
Label("loop20")
CMPQ(slen, Imm(20))
JLE(LabelRef("after"))
{
loop32Body(f, g, h, sbase, slen, 0)
ADDQ(Imm(20), sbase)
SUBQ(Imm(20), slen)
JMP(LabelRef("loop20"))
}
Label("after")
c1reg = GP32()
MOVL(U32(c1), c1reg)
RORL(Imm(11), g)
IMULL(c1reg, g)
RORL(Imm(17), g)
IMULL(c1reg, g)
RORL(Imm(11), f)
IMULL(c1reg, f)
RORL(Imm(17), f)
IMULL(c1reg, f)
ADDL(g, h)
RORL(Imm(19), h)
t := GP32()
MOVL(h, t)
SHLL(Imm(2), t)
ADDL(t, h)
ADDL(Imm(0xe6546b64), h)
RORL(Imm(17), h)
IMULL(c1reg, h)
ADDL(f, h)
RORL(Imm(19), h)
t = GP32()
MOVL(h, t)
SHLL(Imm(2), t)
ADDL(t, h)
ADDL(Imm(0xe6546b64), h)
RORL(Imm(17), h)
IMULL(c1reg, h)
Store(h, ReturnIndex(0))
RET()
}
var go111 = flag.Bool("go111", true, "use assembly instructions present in go1.11 and later")
func imul3l(m uint32, x, y Register) {
if *go111 {
IMUL3L(U32(m), x, y)
} else {
t := GP32()
MOVL(U32(m), t)
IMULL(t, x)
MOVL(x, y)
}
}
func main() {
flag.Parse()
ConstraintExpr("amd64,!purego")
fp64()
fp32()
Generate()
}