bd6879ac51
* core/vm, crypto/bn256: switch over to cloudflare library * crypto/bn256: unmarshal constraint + start pure go impl * crypto/bn256: combo cloudflare and google lib * travis: drop 386 test job
98 lines
1.6 KiB
ArmAsm
98 lines
1.6 KiB
ArmAsm
// +build amd64,!appengine,!gccgo
|
|
|
|
#include "gfp.h"
|
|
#include "mul.h"
|
|
#include "mul_bmi2.h"
|
|
|
|
TEXT ·gfpNeg(SB),0,$0-16
|
|
MOVQ ·p2+0(SB), R8
|
|
MOVQ ·p2+8(SB), R9
|
|
MOVQ ·p2+16(SB), R10
|
|
MOVQ ·p2+24(SB), R11
|
|
|
|
MOVQ a+8(FP), DI
|
|
SUBQ 0(DI), R8
|
|
SBBQ 8(DI), R9
|
|
SBBQ 16(DI), R10
|
|
SBBQ 24(DI), R11
|
|
|
|
MOVQ $0, AX
|
|
gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX)
|
|
|
|
MOVQ c+0(FP), DI
|
|
storeBlock(R8,R9,R10,R11, 0(DI))
|
|
RET
|
|
|
|
TEXT ·gfpAdd(SB),0,$0-24
|
|
MOVQ a+8(FP), DI
|
|
MOVQ b+16(FP), SI
|
|
|
|
loadBlock(0(DI), R8,R9,R10,R11)
|
|
MOVQ $0, R12
|
|
|
|
ADDQ 0(SI), R8
|
|
ADCQ 8(SI), R9
|
|
ADCQ 16(SI), R10
|
|
ADCQ 24(SI), R11
|
|
ADCQ $0, R12
|
|
|
|
gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX)
|
|
|
|
MOVQ c+0(FP), DI
|
|
storeBlock(R8,R9,R10,R11, 0(DI))
|
|
RET
|
|
|
|
TEXT ·gfpSub(SB),0,$0-24
|
|
MOVQ a+8(FP), DI
|
|
MOVQ b+16(FP), SI
|
|
|
|
loadBlock(0(DI), R8,R9,R10,R11)
|
|
|
|
MOVQ ·p2+0(SB), R12
|
|
MOVQ ·p2+8(SB), R13
|
|
MOVQ ·p2+16(SB), R14
|
|
MOVQ ·p2+24(SB), R15
|
|
MOVQ $0, AX
|
|
|
|
SUBQ 0(SI), R8
|
|
SBBQ 8(SI), R9
|
|
SBBQ 16(SI), R10
|
|
SBBQ 24(SI), R11
|
|
|
|
CMOVQCC AX, R12
|
|
CMOVQCC AX, R13
|
|
CMOVQCC AX, R14
|
|
CMOVQCC AX, R15
|
|
|
|
ADDQ R12, R8
|
|
ADCQ R13, R9
|
|
ADCQ R14, R10
|
|
ADCQ R15, R11
|
|
|
|
MOVQ c+0(FP), DI
|
|
storeBlock(R8,R9,R10,R11, 0(DI))
|
|
RET
|
|
|
|
TEXT ·gfpMul(SB),0,$160-24
|
|
MOVQ a+8(FP), DI
|
|
MOVQ b+16(FP), SI
|
|
|
|
// Jump to a slightly different implementation if MULX isn't supported.
|
|
CMPB runtime·support_bmi2(SB), $0
|
|
JE nobmi2Mul
|
|
|
|
mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI))
|
|
storeBlock( R8, R9,R10,R11, 0(SP))
|
|
storeBlock(R12,R13,R14,R15, 32(SP))
|
|
gfpReduceBMI2()
|
|
JMP end
|
|
|
|
nobmi2Mul:
|
|
mul(0(DI),8(DI),16(DI),24(DI), 0(SI), 0(SP))
|
|
gfpReduce(0(SP))
|
|
|
|
end:
|
|
MOVQ c+0(FP), DI
|
|
storeBlock(R12,R13,R14,R15, 0(DI))
|
|
RET
|