Bump github.com/hashicorp/terraform-plugin-sdk/v2 from 2.26.1 to 2.27.0
Bumps [github.com/hashicorp/terraform-plugin-sdk/v2](https://github.com/hashicorp/terraform-plugin-sdk) from 2.26.1 to 2.27.0. - [Release notes](https://github.com/hashicorp/terraform-plugin-sdk/releases) - [Changelog](https://github.com/hashicorp/terraform-plugin-sdk/blob/main/CHANGELOG.md) - [Commits](https://github.com/hashicorp/terraform-plugin-sdk/compare/v2.26.1...v2.27.0) --- updated-dependencies: - dependency-name: github.com/hashicorp/terraform-plugin-sdk/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
205
vendor/github.com/cloudflare/circl/math/fp25519/fp.go
generated
vendored
Normal file
205
vendor/github.com/cloudflare/circl/math/fp25519/fp.go
generated
vendored
Normal file
@ -0,0 +1,205 @@
|
||||
// Package fp25519 provides prime field arithmetic over GF(2^255-19).
|
||||
package fp25519
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/cloudflare/circl/internal/conv"
|
||||
)
|
||||
|
||||
// Size in bytes of an element.
|
||||
const Size = 32
|
||||
|
||||
// Elt is a prime field element.
|
||||
type Elt [Size]byte
|
||||
|
||||
func (e Elt) String() string { return conv.BytesLe2Hex(e[:]) }
|
||||
|
||||
// p is the prime modulus 2^255-19.
|
||||
var p = Elt{
|
||||
0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
|
||||
}
|
||||
|
||||
// P returns the prime modulus 2^255-19.
|
||||
func P() Elt { return p }
|
||||
|
||||
// ToBytes stores in b the little-endian byte representation of x.
|
||||
func ToBytes(b []byte, x *Elt) error {
|
||||
if len(b) != Size {
|
||||
return errors.New("wrong size")
|
||||
}
|
||||
Modp(x)
|
||||
copy(b, x[:])
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsZero returns true if x is equal to 0.
|
||||
func IsZero(x *Elt) bool { Modp(x); return *x == Elt{} }
|
||||
|
||||
// SetOne assigns x=1.
|
||||
func SetOne(x *Elt) { *x = Elt{}; x[0] = 1 }
|
||||
|
||||
// Neg calculates z = -x.
|
||||
func Neg(z, x *Elt) { Sub(z, &p, x) }
|
||||
|
||||
// InvSqrt calculates z = sqrt(x/y) iff x/y is a quadratic-residue, which is
|
||||
// indicated by returning isQR = true. Otherwise, when x/y is a quadratic
|
||||
// non-residue, z will have an undetermined value and isQR = false.
|
||||
func InvSqrt(z, x, y *Elt) (isQR bool) {
|
||||
sqrtMinusOne := &Elt{
|
||||
0xb0, 0xa0, 0x0e, 0x4a, 0x27, 0x1b, 0xee, 0xc4,
|
||||
0x78, 0xe4, 0x2f, 0xad, 0x06, 0x18, 0x43, 0x2f,
|
||||
0xa7, 0xd7, 0xfb, 0x3d, 0x99, 0x00, 0x4d, 0x2b,
|
||||
0x0b, 0xdf, 0xc1, 0x4f, 0x80, 0x24, 0x83, 0x2b,
|
||||
}
|
||||
t0, t1, t2, t3 := &Elt{}, &Elt{}, &Elt{}, &Elt{}
|
||||
|
||||
Mul(t0, x, y) // t0 = u*v
|
||||
Sqr(t1, y) // t1 = v^2
|
||||
Mul(t2, t0, t1) // t2 = u*v^3
|
||||
Sqr(t0, t1) // t0 = v^4
|
||||
Mul(t1, t0, t2) // t1 = u*v^7
|
||||
|
||||
var Tab [4]*Elt
|
||||
Tab[0] = &Elt{}
|
||||
Tab[1] = &Elt{}
|
||||
Tab[2] = t3
|
||||
Tab[3] = t1
|
||||
|
||||
*Tab[0] = *t1
|
||||
Sqr(Tab[0], Tab[0])
|
||||
Sqr(Tab[1], Tab[0])
|
||||
Sqr(Tab[1], Tab[1])
|
||||
Mul(Tab[1], Tab[1], Tab[3])
|
||||
Mul(Tab[0], Tab[0], Tab[1])
|
||||
Sqr(Tab[0], Tab[0])
|
||||
Mul(Tab[0], Tab[0], Tab[1])
|
||||
Sqr(Tab[1], Tab[0])
|
||||
for i := 0; i < 4; i++ {
|
||||
Sqr(Tab[1], Tab[1])
|
||||
}
|
||||
Mul(Tab[1], Tab[1], Tab[0])
|
||||
Sqr(Tab[2], Tab[1])
|
||||
for i := 0; i < 4; i++ {
|
||||
Sqr(Tab[2], Tab[2])
|
||||
}
|
||||
Mul(Tab[2], Tab[2], Tab[0])
|
||||
Sqr(Tab[1], Tab[2])
|
||||
for i := 0; i < 14; i++ {
|
||||
Sqr(Tab[1], Tab[1])
|
||||
}
|
||||
Mul(Tab[1], Tab[1], Tab[2])
|
||||
Sqr(Tab[2], Tab[1])
|
||||
for i := 0; i < 29; i++ {
|
||||
Sqr(Tab[2], Tab[2])
|
||||
}
|
||||
Mul(Tab[2], Tab[2], Tab[1])
|
||||
Sqr(Tab[1], Tab[2])
|
||||
for i := 0; i < 59; i++ {
|
||||
Sqr(Tab[1], Tab[1])
|
||||
}
|
||||
Mul(Tab[1], Tab[1], Tab[2])
|
||||
for i := 0; i < 5; i++ {
|
||||
Sqr(Tab[1], Tab[1])
|
||||
}
|
||||
Mul(Tab[1], Tab[1], Tab[0])
|
||||
Sqr(Tab[2], Tab[1])
|
||||
for i := 0; i < 124; i++ {
|
||||
Sqr(Tab[2], Tab[2])
|
||||
}
|
||||
Mul(Tab[2], Tab[2], Tab[1])
|
||||
Sqr(Tab[2], Tab[2])
|
||||
Sqr(Tab[2], Tab[2])
|
||||
Mul(Tab[2], Tab[2], Tab[3])
|
||||
|
||||
Mul(z, t3, t2) // z = xy^(p+3)/8 = xy^3*(xy^7)^(p-5)/8
|
||||
// Checking whether y z^2 == x
|
||||
Sqr(t0, z) // t0 = z^2
|
||||
Mul(t0, t0, y) // t0 = yz^2
|
||||
Sub(t1, t0, x) // t1 = t0-u
|
||||
Add(t2, t0, x) // t2 = t0+u
|
||||
if IsZero(t1) {
|
||||
return true
|
||||
} else if IsZero(t2) {
|
||||
Mul(z, z, sqrtMinusOne) // z = z*sqrt(-1)
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Inv calculates z = 1/x mod p.
|
||||
func Inv(z, x *Elt) {
|
||||
x0, x1, x2 := &Elt{}, &Elt{}, &Elt{}
|
||||
Sqr(x1, x)
|
||||
Sqr(x0, x1)
|
||||
Sqr(x0, x0)
|
||||
Mul(x0, x0, x)
|
||||
Mul(z, x0, x1)
|
||||
Sqr(x1, z)
|
||||
Mul(x0, x0, x1)
|
||||
Sqr(x1, x0)
|
||||
for i := 0; i < 4; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x0, x0, x1)
|
||||
Sqr(x1, x0)
|
||||
for i := 0; i < 9; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x1, x1, x0)
|
||||
Sqr(x2, x1)
|
||||
for i := 0; i < 19; i++ {
|
||||
Sqr(x2, x2)
|
||||
}
|
||||
Mul(x2, x2, x1)
|
||||
for i := 0; i < 10; i++ {
|
||||
Sqr(x2, x2)
|
||||
}
|
||||
Mul(x2, x2, x0)
|
||||
Sqr(x0, x2)
|
||||
for i := 0; i < 49; i++ {
|
||||
Sqr(x0, x0)
|
||||
}
|
||||
Mul(x0, x0, x2)
|
||||
Sqr(x1, x0)
|
||||
for i := 0; i < 99; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x1, x1, x0)
|
||||
for i := 0; i < 50; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x1, x1, x2)
|
||||
for i := 0; i < 5; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(z, z, x1)
|
||||
}
|
||||
|
||||
// Cmov assigns y to x if n is 1.
|
||||
func Cmov(x, y *Elt, n uint) { cmov(x, y, n) }
|
||||
|
||||
// Cswap interchanges x and y if n is 1.
|
||||
func Cswap(x, y *Elt, n uint) { cswap(x, y, n) }
|
||||
|
||||
// Add calculates z = x+y mod p.
|
||||
func Add(z, x, y *Elt) { add(z, x, y) }
|
||||
|
||||
// Sub calculates z = x-y mod p.
|
||||
func Sub(z, x, y *Elt) { sub(z, x, y) }
|
||||
|
||||
// AddSub calculates (x,y) = (x+y mod p, x-y mod p).
|
||||
func AddSub(x, y *Elt) { addsub(x, y) }
|
||||
|
||||
// Mul calculates z = x*y mod p.
|
||||
func Mul(z, x, y *Elt) { mul(z, x, y) }
|
||||
|
||||
// Sqr calculates z = x^2 mod p.
|
||||
func Sqr(z, x *Elt) { sqr(z, x) }
|
||||
|
||||
// Modp ensures that z is between [0,p-1].
|
||||
func Modp(z *Elt) { modp(z) }
|
45
vendor/github.com/cloudflare/circl/math/fp25519/fp_amd64.go
generated
vendored
Normal file
45
vendor/github.com/cloudflare/circl/math/fp25519/fp_amd64.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
//go:build amd64 && !purego
|
||||
// +build amd64,!purego
|
||||
|
||||
package fp25519
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
var hasBmi2Adx = cpu.X86.HasBMI2 && cpu.X86.HasADX
|
||||
|
||||
var _ = hasBmi2Adx
|
||||
|
||||
func cmov(x, y *Elt, n uint) { cmovAmd64(x, y, n) }
|
||||
func cswap(x, y *Elt, n uint) { cswapAmd64(x, y, n) }
|
||||
func add(z, x, y *Elt) { addAmd64(z, x, y) }
|
||||
func sub(z, x, y *Elt) { subAmd64(z, x, y) }
|
||||
func addsub(x, y *Elt) { addsubAmd64(x, y) }
|
||||
func mul(z, x, y *Elt) { mulAmd64(z, x, y) }
|
||||
func sqr(z, x *Elt) { sqrAmd64(z, x) }
|
||||
func modp(z *Elt) { modpAmd64(z) }
|
||||
|
||||
//go:noescape
|
||||
func cmovAmd64(x, y *Elt, n uint)
|
||||
|
||||
//go:noescape
|
||||
func cswapAmd64(x, y *Elt, n uint)
|
||||
|
||||
//go:noescape
|
||||
func addAmd64(z, x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func subAmd64(z, x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func addsubAmd64(x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func mulAmd64(z, x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func sqrAmd64(z, x *Elt)
|
||||
|
||||
//go:noescape
|
||||
func modpAmd64(z *Elt)
|
351
vendor/github.com/cloudflare/circl/math/fp25519/fp_amd64.h
generated
vendored
Normal file
351
vendor/github.com/cloudflare/circl/math/fp25519/fp_amd64.h
generated
vendored
Normal file
@ -0,0 +1,351 @@
|
||||
// This code was imported from https://github.com/armfazh/rfc7748_precomputed
|
||||
|
||||
// CHECK_BMI2ADX triggers bmi2adx if supported,
|
||||
// otherwise it fallbacks to legacy code.
|
||||
#define CHECK_BMI2ADX(label, legacy, bmi2adx) \
|
||||
CMPB ·hasBmi2Adx(SB), $0 \
|
||||
JE label \
|
||||
bmi2adx \
|
||||
RET \
|
||||
label: \
|
||||
legacy \
|
||||
RET
|
||||
|
||||
// cselect is a conditional move
|
||||
// if b=1: it copies y into x;
|
||||
// if b=0: x remains with the same value;
|
||||
// if b<> 0,1: undefined.
|
||||
// Uses: AX, DX, FLAGS
|
||||
// Instr: x86_64, cmov
|
||||
#define cselect(x,y,b) \
|
||||
TESTQ b, b \
|
||||
MOVQ 0+x, AX; MOVQ 0+y, DX; CMOVQNE DX, AX; MOVQ AX, 0+x; \
|
||||
MOVQ 8+x, AX; MOVQ 8+y, DX; CMOVQNE DX, AX; MOVQ AX, 8+x; \
|
||||
MOVQ 16+x, AX; MOVQ 16+y, DX; CMOVQNE DX, AX; MOVQ AX, 16+x; \
|
||||
MOVQ 24+x, AX; MOVQ 24+y, DX; CMOVQNE DX, AX; MOVQ AX, 24+x;
|
||||
|
||||
// cswap is a conditional swap
|
||||
// if b=1: x,y <- y,x;
|
||||
// if b=0: x,y remain with the same values;
|
||||
// if b<> 0,1: undefined.
|
||||
// Uses: AX, DX, R8, FLAGS
|
||||
// Instr: x86_64, cmov
|
||||
#define cswap(x,y,b) \
|
||||
TESTQ b, b \
|
||||
MOVQ 0+x, AX; MOVQ AX, R8; MOVQ 0+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 0+x; MOVQ DX, 0+y; \
|
||||
MOVQ 8+x, AX; MOVQ AX, R8; MOVQ 8+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 8+x; MOVQ DX, 8+y; \
|
||||
MOVQ 16+x, AX; MOVQ AX, R8; MOVQ 16+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 16+x; MOVQ DX, 16+y; \
|
||||
MOVQ 24+x, AX; MOVQ AX, R8; MOVQ 24+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 24+x; MOVQ DX, 24+y;
|
||||
|
||||
// additionLeg adds x and y and stores in z
|
||||
// Uses: AX, DX, R8-R11, FLAGS
|
||||
// Instr: x86_64, cmov
|
||||
#define additionLeg(z,x,y) \
|
||||
MOVL $38, AX; \
|
||||
MOVL $0, DX; \
|
||||
MOVQ 0+x, R8; ADDQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; ADCQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; ADCQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; ADCQ 24+y, R11; \
|
||||
CMOVQCS AX, DX; \
|
||||
ADDQ DX, R8; \
|
||||
ADCQ $0, R9; MOVQ R9, 8+z; \
|
||||
ADCQ $0, R10; MOVQ R10, 16+z; \
|
||||
ADCQ $0, R11; MOVQ R11, 24+z; \
|
||||
MOVL $0, DX; \
|
||||
CMOVQCS AX, DX; \
|
||||
ADDQ DX, R8; MOVQ R8, 0+z;
|
||||
|
||||
// additionAdx adds x and y and stores in z
|
||||
// Uses: AX, DX, R8-R11, FLAGS
|
||||
// Instr: x86_64, cmov, adx
|
||||
#define additionAdx(z,x,y) \
|
||||
MOVL $38, AX; \
|
||||
XORL DX, DX; \
|
||||
MOVQ 0+x, R8; ADCXQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; ADCXQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; ADCXQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; ADCXQ 24+y, R11; \
|
||||
CMOVQCS AX, DX ; \
|
||||
XORL AX, AX; \
|
||||
ADCXQ DX, R8; \
|
||||
ADCXQ AX, R9; MOVQ R9, 8+z; \
|
||||
ADCXQ AX, R10; MOVQ R10, 16+z; \
|
||||
ADCXQ AX, R11; MOVQ R11, 24+z; \
|
||||
MOVL $38, DX; \
|
||||
CMOVQCS DX, AX; \
|
||||
ADDQ AX, R8; MOVQ R8, 0+z;
|
||||
|
||||
// subtraction subtracts y from x and stores in z
|
||||
// Uses: AX, DX, R8-R11, FLAGS
|
||||
// Instr: x86_64, cmov
|
||||
#define subtraction(z,x,y) \
|
||||
MOVL $38, AX; \
|
||||
MOVQ 0+x, R8; SUBQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; SBBQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; SBBQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; SBBQ 24+y, R11; \
|
||||
MOVL $0, DX; \
|
||||
CMOVQCS AX, DX; \
|
||||
SUBQ DX, R8; \
|
||||
SBBQ $0, R9; MOVQ R9, 8+z; \
|
||||
SBBQ $0, R10; MOVQ R10, 16+z; \
|
||||
SBBQ $0, R11; MOVQ R11, 24+z; \
|
||||
MOVL $0, DX; \
|
||||
CMOVQCS AX, DX; \
|
||||
SUBQ DX, R8; MOVQ R8, 0+z;
|
||||
|
||||
// integerMulAdx multiplies x and y and stores in z
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64, bmi2, adx
|
||||
#define integerMulAdx(z,x,y) \
|
||||
MOVL $0,R15; \
|
||||
MOVQ 0+y, DX; XORL AX, AX; \
|
||||
MULXQ 0+x, AX, R8; MOVQ AX, 0+z; \
|
||||
MULXQ 8+x, AX, R9; ADCXQ AX, R8; \
|
||||
MULXQ 16+x, AX, R10; ADCXQ AX, R9; \
|
||||
MULXQ 24+x, AX, R11; ADCXQ AX, R10; \
|
||||
MOVL $0, AX;;;;;;;;; ADCXQ AX, R11; \
|
||||
MOVQ 8+y, DX; XORL AX, AX; \
|
||||
MULXQ 0+x, AX, R12; ADCXQ R8, AX; MOVQ AX, 8+z; \
|
||||
MULXQ 8+x, AX, R13; ADCXQ R9, R12; ADOXQ AX, R12; \
|
||||
MULXQ 16+x, AX, R14; ADCXQ R10, R13; ADOXQ AX, R13; \
|
||||
MULXQ 24+x, AX, R15; ADCXQ R11, R14; ADOXQ AX, R14; \
|
||||
MOVL $0, AX;;;;;;;;; ADCXQ AX, R15; ADOXQ AX, R15; \
|
||||
MOVQ 16+y, DX; XORL AX, AX; \
|
||||
MULXQ 0+x, AX, R8; ADCXQ R12, AX; MOVQ AX, 16+z; \
|
||||
MULXQ 8+x, AX, R9; ADCXQ R13, R8; ADOXQ AX, R8; \
|
||||
MULXQ 16+x, AX, R10; ADCXQ R14, R9; ADOXQ AX, R9; \
|
||||
MULXQ 24+x, AX, R11; ADCXQ R15, R10; ADOXQ AX, R10; \
|
||||
MOVL $0, AX;;;;;;;;; ADCXQ AX, R11; ADOXQ AX, R11; \
|
||||
MOVQ 24+y, DX; XORL AX, AX; \
|
||||
MULXQ 0+x, AX, R12; ADCXQ R8, AX; MOVQ AX, 24+z; \
|
||||
MULXQ 8+x, AX, R13; ADCXQ R9, R12; ADOXQ AX, R12; MOVQ R12, 32+z; \
|
||||
MULXQ 16+x, AX, R14; ADCXQ R10, R13; ADOXQ AX, R13; MOVQ R13, 40+z; \
|
||||
MULXQ 24+x, AX, R15; ADCXQ R11, R14; ADOXQ AX, R14; MOVQ R14, 48+z; \
|
||||
MOVL $0, AX;;;;;;;;; ADCXQ AX, R15; ADOXQ AX, R15; MOVQ R15, 56+z;
|
||||
|
||||
// integerMulLeg multiplies x and y and stores in z
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64
|
||||
#define integerMulLeg(z,x,y) \
|
||||
MOVQ 0+y, R8; \
|
||||
MOVQ 0+x, AX; MULQ R8; MOVQ AX, 0+z; MOVQ DX, R15; \
|
||||
MOVQ 8+x, AX; MULQ R8; MOVQ AX, R13; MOVQ DX, R10; \
|
||||
MOVQ 16+x, AX; MULQ R8; MOVQ AX, R14; MOVQ DX, R11; \
|
||||
MOVQ 24+x, AX; MULQ R8; \
|
||||
ADDQ R13, R15; \
|
||||
ADCQ R14, R10; MOVQ R10, 16+z; \
|
||||
ADCQ AX, R11; MOVQ R11, 24+z; \
|
||||
ADCQ $0, DX; MOVQ DX, 32+z; \
|
||||
MOVQ 8+y, R8; \
|
||||
MOVQ 0+x, AX; MULQ R8; MOVQ AX, R12; MOVQ DX, R9; \
|
||||
MOVQ 8+x, AX; MULQ R8; MOVQ AX, R13; MOVQ DX, R10; \
|
||||
MOVQ 16+x, AX; MULQ R8; MOVQ AX, R14; MOVQ DX, R11; \
|
||||
MOVQ 24+x, AX; MULQ R8; \
|
||||
ADDQ R12, R15; MOVQ R15, 8+z; \
|
||||
ADCQ R13, R9; \
|
||||
ADCQ R14, R10; \
|
||||
ADCQ AX, R11; \
|
||||
ADCQ $0, DX; \
|
||||
ADCQ 16+z, R9; MOVQ R9, R15; \
|
||||
ADCQ 24+z, R10; MOVQ R10, 24+z; \
|
||||
ADCQ 32+z, R11; MOVQ R11, 32+z; \
|
||||
ADCQ $0, DX; MOVQ DX, 40+z; \
|
||||
MOVQ 16+y, R8; \
|
||||
MOVQ 0+x, AX; MULQ R8; MOVQ AX, R12; MOVQ DX, R9; \
|
||||
MOVQ 8+x, AX; MULQ R8; MOVQ AX, R13; MOVQ DX, R10; \
|
||||
MOVQ 16+x, AX; MULQ R8; MOVQ AX, R14; MOVQ DX, R11; \
|
||||
MOVQ 24+x, AX; MULQ R8; \
|
||||
ADDQ R12, R15; MOVQ R15, 16+z; \
|
||||
ADCQ R13, R9; \
|
||||
ADCQ R14, R10; \
|
||||
ADCQ AX, R11; \
|
||||
ADCQ $0, DX; \
|
||||
ADCQ 24+z, R9; MOVQ R9, R15; \
|
||||
ADCQ 32+z, R10; MOVQ R10, 32+z; \
|
||||
ADCQ 40+z, R11; MOVQ R11, 40+z; \
|
||||
ADCQ $0, DX; MOVQ DX, 48+z; \
|
||||
MOVQ 24+y, R8; \
|
||||
MOVQ 0+x, AX; MULQ R8; MOVQ AX, R12; MOVQ DX, R9; \
|
||||
MOVQ 8+x, AX; MULQ R8; MOVQ AX, R13; MOVQ DX, R10; \
|
||||
MOVQ 16+x, AX; MULQ R8; MOVQ AX, R14; MOVQ DX, R11; \
|
||||
MOVQ 24+x, AX; MULQ R8; \
|
||||
ADDQ R12, R15; MOVQ R15, 24+z; \
|
||||
ADCQ R13, R9; \
|
||||
ADCQ R14, R10; \
|
||||
ADCQ AX, R11; \
|
||||
ADCQ $0, DX; \
|
||||
ADCQ 32+z, R9; MOVQ R9, 32+z; \
|
||||
ADCQ 40+z, R10; MOVQ R10, 40+z; \
|
||||
ADCQ 48+z, R11; MOVQ R11, 48+z; \
|
||||
ADCQ $0, DX; MOVQ DX, 56+z;
|
||||
|
||||
// integerSqrLeg squares x and stores in z
|
||||
// Uses: AX, CX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64
|
||||
#define integerSqrLeg(z,x) \
|
||||
MOVQ 0+x, R8; \
|
||||
MOVQ 8+x, AX; MULQ R8; MOVQ AX, R9; MOVQ DX, R10; /* A[0]*A[1] */ \
|
||||
MOVQ 16+x, AX; MULQ R8; MOVQ AX, R14; MOVQ DX, R11; /* A[0]*A[2] */ \
|
||||
MOVQ 24+x, AX; MULQ R8; MOVQ AX, R15; MOVQ DX, R12; /* A[0]*A[3] */ \
|
||||
MOVQ 24+x, R8; \
|
||||
MOVQ 8+x, AX; MULQ R8; MOVQ AX, CX; MOVQ DX, R13; /* A[3]*A[1] */ \
|
||||
MOVQ 16+x, AX; MULQ R8; /* A[3]*A[2] */ \
|
||||
\
|
||||
ADDQ R14, R10;\
|
||||
ADCQ R15, R11; MOVL $0, R15;\
|
||||
ADCQ CX, R12;\
|
||||
ADCQ AX, R13;\
|
||||
ADCQ $0, DX; MOVQ DX, R14;\
|
||||
MOVQ 8+x, AX; MULQ 16+x;\
|
||||
\
|
||||
ADDQ AX, R11;\
|
||||
ADCQ DX, R12;\
|
||||
ADCQ $0, R13;\
|
||||
ADCQ $0, R14;\
|
||||
ADCQ $0, R15;\
|
||||
\
|
||||
SHLQ $1, R14, R15; MOVQ R15, 56+z;\
|
||||
SHLQ $1, R13, R14; MOVQ R14, 48+z;\
|
||||
SHLQ $1, R12, R13; MOVQ R13, 40+z;\
|
||||
SHLQ $1, R11, R12; MOVQ R12, 32+z;\
|
||||
SHLQ $1, R10, R11; MOVQ R11, 24+z;\
|
||||
SHLQ $1, R9, R10; MOVQ R10, 16+z;\
|
||||
SHLQ $1, R9; MOVQ R9, 8+z;\
|
||||
\
|
||||
MOVQ 0+x,AX; MULQ AX; MOVQ AX, 0+z; MOVQ DX, R9;\
|
||||
MOVQ 8+x,AX; MULQ AX; MOVQ AX, R10; MOVQ DX, R11;\
|
||||
MOVQ 16+x,AX; MULQ AX; MOVQ AX, R12; MOVQ DX, R13;\
|
||||
MOVQ 24+x,AX; MULQ AX; MOVQ AX, R14; MOVQ DX, R15;\
|
||||
\
|
||||
ADDQ 8+z, R9; MOVQ R9, 8+z;\
|
||||
ADCQ 16+z, R10; MOVQ R10, 16+z;\
|
||||
ADCQ 24+z, R11; MOVQ R11, 24+z;\
|
||||
ADCQ 32+z, R12; MOVQ R12, 32+z;\
|
||||
ADCQ 40+z, R13; MOVQ R13, 40+z;\
|
||||
ADCQ 48+z, R14; MOVQ R14, 48+z;\
|
||||
ADCQ 56+z, R15; MOVQ R15, 56+z;
|
||||
|
||||
// integerSqrAdx squares x and stores in z
|
||||
// Uses: AX, CX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64, bmi2, adx
|
||||
#define integerSqrAdx(z,x) \
|
||||
MOVQ 0+x, DX; /* A[0] */ \
|
||||
MULXQ 8+x, R8, R14; /* A[1]*A[0] */ XORL R15, R15; \
|
||||
MULXQ 16+x, R9, R10; /* A[2]*A[0] */ ADCXQ R14, R9; \
|
||||
MULXQ 24+x, AX, CX; /* A[3]*A[0] */ ADCXQ AX, R10; \
|
||||
MOVQ 24+x, DX; /* A[3] */ \
|
||||
MULXQ 8+x, R11, R12; /* A[1]*A[3] */ ADCXQ CX, R11; \
|
||||
MULXQ 16+x, AX, R13; /* A[2]*A[3] */ ADCXQ AX, R12; \
|
||||
MOVQ 8+x, DX; /* A[1] */ ADCXQ R15, R13; \
|
||||
MULXQ 16+x, AX, CX; /* A[2]*A[1] */ MOVL $0, R14; \
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ADCXQ R15, R14; \
|
||||
XORL R15, R15; \
|
||||
ADOXQ AX, R10; ADCXQ R8, R8; \
|
||||
ADOXQ CX, R11; ADCXQ R9, R9; \
|
||||
ADOXQ R15, R12; ADCXQ R10, R10; \
|
||||
ADOXQ R15, R13; ADCXQ R11, R11; \
|
||||
ADOXQ R15, R14; ADCXQ R12, R12; \
|
||||
;;;;;;;;;;;;;;; ADCXQ R13, R13; \
|
||||
;;;;;;;;;;;;;;; ADCXQ R14, R14; \
|
||||
MOVQ 0+x, DX; MULXQ DX, AX, CX; /* A[0]^2 */ \
|
||||
;;;;;;;;;;;;;;; MOVQ AX, 0+z; \
|
||||
ADDQ CX, R8; MOVQ R8, 8+z; \
|
||||
MOVQ 8+x, DX; MULXQ DX, AX, CX; /* A[1]^2 */ \
|
||||
ADCQ AX, R9; MOVQ R9, 16+z; \
|
||||
ADCQ CX, R10; MOVQ R10, 24+z; \
|
||||
MOVQ 16+x, DX; MULXQ DX, AX, CX; /* A[2]^2 */ \
|
||||
ADCQ AX, R11; MOVQ R11, 32+z; \
|
||||
ADCQ CX, R12; MOVQ R12, 40+z; \
|
||||
MOVQ 24+x, DX; MULXQ DX, AX, CX; /* A[3]^2 */ \
|
||||
ADCQ AX, R13; MOVQ R13, 48+z; \
|
||||
ADCQ CX, R14; MOVQ R14, 56+z;
|
||||
|
||||
// reduceFromDouble finds z congruent to x modulo p such that 0<z<2^256
|
||||
// Uses: AX, DX, R8-R13, FLAGS
|
||||
// Instr: x86_64
|
||||
#define reduceFromDoubleLeg(z,x) \
|
||||
/* 2*C = 38 = 2^256 */ \
|
||||
MOVL $38, AX; MULQ 32+x; MOVQ AX, R8; MOVQ DX, R9; /* C*C[4] */ \
|
||||
MOVL $38, AX; MULQ 40+x; MOVQ AX, R12; MOVQ DX, R10; /* C*C[5] */ \
|
||||
MOVL $38, AX; MULQ 48+x; MOVQ AX, R13; MOVQ DX, R11; /* C*C[6] */ \
|
||||
MOVL $38, AX; MULQ 56+x; /* C*C[7] */ \
|
||||
ADDQ R12, R9; \
|
||||
ADCQ R13, R10; \
|
||||
ADCQ AX, R11; \
|
||||
ADCQ $0, DX; \
|
||||
ADDQ 0+x, R8; \
|
||||
ADCQ 8+x, R9; \
|
||||
ADCQ 16+x, R10; \
|
||||
ADCQ 24+x, R11; \
|
||||
ADCQ $0, DX; \
|
||||
MOVL $38, AX; \
|
||||
IMULQ AX, DX; /* C*C[4], CF=0, OF=0 */ \
|
||||
ADDQ DX, R8; \
|
||||
ADCQ $0, R9; MOVQ R9, 8+z; \
|
||||
ADCQ $0, R10; MOVQ R10, 16+z; \
|
||||
ADCQ $0, R11; MOVQ R11, 24+z; \
|
||||
MOVL $0, DX; \
|
||||
CMOVQCS AX, DX; \
|
||||
ADDQ DX, R8; MOVQ R8, 0+z;
|
||||
|
||||
// reduceFromDoubleAdx finds z congruent to x modulo p such that 0<z<2^256
|
||||
// Uses: AX, DX, R8-R13, FLAGS
|
||||
// Instr: x86_64, bmi2, adx
|
||||
#define reduceFromDoubleAdx(z,x) \
|
||||
MOVL $38, DX; /* 2*C = 38 = 2^256 */ \
|
||||
MULXQ 32+x, R8, R10; /* C*C[4] */ XORL AX, AX; ADOXQ 0+x, R8; \
|
||||
MULXQ 40+x, R9, R11; /* C*C[5] */ ADCXQ R10, R9; ADOXQ 8+x, R9; \
|
||||
MULXQ 48+x, R10, R13; /* C*C[6] */ ADCXQ R11, R10; ADOXQ 16+x, R10; \
|
||||
MULXQ 56+x, R11, R12; /* C*C[7] */ ADCXQ R13, R11; ADOXQ 24+x, R11; \
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ADCXQ AX, R12; ADOXQ AX, R12; \
|
||||
IMULQ DX, R12; /* C*C[4], CF=0, OF=0 */ \
|
||||
ADCXQ R12, R8; \
|
||||
ADCXQ AX, R9; MOVQ R9, 8+z; \
|
||||
ADCXQ AX, R10; MOVQ R10, 16+z; \
|
||||
ADCXQ AX, R11; MOVQ R11, 24+z; \
|
||||
MOVL $0, R12; \
|
||||
CMOVQCS DX, R12; \
|
||||
ADDQ R12, R8; MOVQ R8, 0+z;
|
||||
|
||||
// addSub calculates two operations: x,y = x+y,x-y
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
#define addSub(x,y) \
|
||||
MOVL $38, AX; \
|
||||
XORL DX, DX; \
|
||||
MOVQ 0+x, R8; MOVQ R8, R12; ADDQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; MOVQ R9, R13; ADCQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; MOVQ R10, R14; ADCQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; MOVQ R11, R15; ADCQ 24+y, R11; \
|
||||
CMOVQCS AX, DX; \
|
||||
XORL AX, AX; \
|
||||
ADDQ DX, R8; \
|
||||
ADCQ $0, R9; \
|
||||
ADCQ $0, R10; \
|
||||
ADCQ $0, R11; \
|
||||
MOVL $38, DX; \
|
||||
CMOVQCS DX, AX; \
|
||||
ADDQ AX, R8; \
|
||||
MOVL $38, AX; \
|
||||
SUBQ 0+y, R12; \
|
||||
SBBQ 8+y, R13; \
|
||||
SBBQ 16+y, R14; \
|
||||
SBBQ 24+y, R15; \
|
||||
MOVL $0, DX; \
|
||||
CMOVQCS AX, DX; \
|
||||
SUBQ DX, R12; \
|
||||
SBBQ $0, R13; \
|
||||
SBBQ $0, R14; \
|
||||
SBBQ $0, R15; \
|
||||
MOVL $0, DX; \
|
||||
CMOVQCS AX, DX; \
|
||||
SUBQ DX, R12; \
|
||||
MOVQ R8, 0+x; \
|
||||
MOVQ R9, 8+x; \
|
||||
MOVQ R10, 16+x; \
|
||||
MOVQ R11, 24+x; \
|
||||
MOVQ R12, 0+y; \
|
||||
MOVQ R13, 8+y; \
|
||||
MOVQ R14, 16+y; \
|
||||
MOVQ R15, 24+y;
|
111
vendor/github.com/cloudflare/circl/math/fp25519/fp_amd64.s
generated
vendored
Normal file
111
vendor/github.com/cloudflare/circl/math/fp25519/fp_amd64.s
generated
vendored
Normal file
@ -0,0 +1,111 @@
|
||||
// +build amd64
|
||||
|
||||
#include "textflag.h"
|
||||
#include "fp_amd64.h"
|
||||
|
||||
// func cmovAmd64(x, y *Elt, n uint)
|
||||
TEXT ·cmovAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ x+0(FP), DI
|
||||
MOVQ y+8(FP), SI
|
||||
MOVQ n+16(FP), BX
|
||||
cselect(0(DI),0(SI),BX)
|
||||
RET
|
||||
|
||||
// func cswapAmd64(x, y *Elt, n uint)
|
||||
TEXT ·cswapAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ x+0(FP), DI
|
||||
MOVQ y+8(FP), SI
|
||||
MOVQ n+16(FP), BX
|
||||
cswap(0(DI),0(SI),BX)
|
||||
RET
|
||||
|
||||
// func subAmd64(z, x, y *Elt)
|
||||
TEXT ·subAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ y+16(FP), BX
|
||||
subtraction(0(DI),0(SI),0(BX))
|
||||
RET
|
||||
|
||||
// func addsubAmd64(x, y *Elt)
|
||||
TEXT ·addsubAmd64(SB),NOSPLIT,$0-16
|
||||
MOVQ x+0(FP), DI
|
||||
MOVQ y+8(FP), SI
|
||||
addSub(0(DI),0(SI))
|
||||
RET
|
||||
|
||||
#define addLegacy \
|
||||
additionLeg(0(DI),0(SI),0(BX))
|
||||
#define addBmi2Adx \
|
||||
additionAdx(0(DI),0(SI),0(BX))
|
||||
|
||||
#define mulLegacy \
|
||||
integerMulLeg(0(SP),0(SI),0(BX)) \
|
||||
reduceFromDoubleLeg(0(DI),0(SP))
|
||||
#define mulBmi2Adx \
|
||||
integerMulAdx(0(SP),0(SI),0(BX)) \
|
||||
reduceFromDoubleAdx(0(DI),0(SP))
|
||||
|
||||
#define sqrLegacy \
|
||||
integerSqrLeg(0(SP),0(SI)) \
|
||||
reduceFromDoubleLeg(0(DI),0(SP))
|
||||
#define sqrBmi2Adx \
|
||||
integerSqrAdx(0(SP),0(SI)) \
|
||||
reduceFromDoubleAdx(0(DI),0(SP))
|
||||
|
||||
// func addAmd64(z, x, y *Elt)
|
||||
TEXT ·addAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ y+16(FP), BX
|
||||
CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx)
|
||||
|
||||
// func mulAmd64(z, x, y *Elt)
|
||||
TEXT ·mulAmd64(SB),NOSPLIT,$64-24
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ y+16(FP), BX
|
||||
CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx)
|
||||
|
||||
// func sqrAmd64(z, x *Elt)
|
||||
TEXT ·sqrAmd64(SB),NOSPLIT,$64-16
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx)
|
||||
|
||||
// func modpAmd64(z *Elt)
|
||||
TEXT ·modpAmd64(SB),NOSPLIT,$0-8
|
||||
MOVQ z+0(FP), DI
|
||||
|
||||
MOVQ (DI), R8
|
||||
MOVQ 8(DI), R9
|
||||
MOVQ 16(DI), R10
|
||||
MOVQ 24(DI), R11
|
||||
|
||||
MOVL $19, AX
|
||||
MOVL $38, CX
|
||||
|
||||
BTRQ $63, R11 // PUT BIT 255 IN CARRY FLAG AND CLEAR
|
||||
CMOVLCC AX, CX // C[255] ? 38 : 19
|
||||
|
||||
// ADD EITHER 19 OR 38 TO C
|
||||
ADDQ CX, R8
|
||||
ADCQ $0, R9
|
||||
ADCQ $0, R10
|
||||
ADCQ $0, R11
|
||||
|
||||
// TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19
|
||||
MOVL $0, CX
|
||||
CMOVLPL AX, CX // C[255] ? 0 : 19
|
||||
BTRQ $63, R11 // CLEAR BIT 255
|
||||
|
||||
// SUBTRACT 19 IF NECESSARY
|
||||
SUBQ CX, R8
|
||||
MOVQ R8, (DI)
|
||||
SBBQ $0, R9
|
||||
MOVQ R9, 8(DI)
|
||||
SBBQ $0, R10
|
||||
MOVQ R10, 16(DI)
|
||||
SBBQ $0, R11
|
||||
MOVQ R11, 24(DI)
|
||||
RET
|
317
vendor/github.com/cloudflare/circl/math/fp25519/fp_generic.go
generated
vendored
Normal file
317
vendor/github.com/cloudflare/circl/math/fp25519/fp_generic.go
generated
vendored
Normal file
@ -0,0 +1,317 @@
|
||||
package fp25519
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func cmovGeneric(x, y *Elt, n uint) {
|
||||
m := -uint64(n & 0x1)
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
|
||||
x0 = (x0 &^ m) | (y0 & m)
|
||||
x1 = (x1 &^ m) | (y1 & m)
|
||||
x2 = (x2 &^ m) | (y2 & m)
|
||||
x3 = (x3 &^ m) | (y3 & m)
|
||||
|
||||
binary.LittleEndian.PutUint64(x[0*8:1*8], x0)
|
||||
binary.LittleEndian.PutUint64(x[1*8:2*8], x1)
|
||||
binary.LittleEndian.PutUint64(x[2*8:3*8], x2)
|
||||
binary.LittleEndian.PutUint64(x[3*8:4*8], x3)
|
||||
}
|
||||
|
||||
func cswapGeneric(x, y *Elt, n uint) {
|
||||
m := -uint64(n & 0x1)
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
|
||||
t0 := m & (x0 ^ y0)
|
||||
t1 := m & (x1 ^ y1)
|
||||
t2 := m & (x2 ^ y2)
|
||||
t3 := m & (x3 ^ y3)
|
||||
x0 ^= t0
|
||||
x1 ^= t1
|
||||
x2 ^= t2
|
||||
x3 ^= t3
|
||||
y0 ^= t0
|
||||
y1 ^= t1
|
||||
y2 ^= t2
|
||||
y3 ^= t3
|
||||
|
||||
binary.LittleEndian.PutUint64(x[0*8:1*8], x0)
|
||||
binary.LittleEndian.PutUint64(x[1*8:2*8], x1)
|
||||
binary.LittleEndian.PutUint64(x[2*8:3*8], x2)
|
||||
binary.LittleEndian.PutUint64(x[3*8:4*8], x3)
|
||||
|
||||
binary.LittleEndian.PutUint64(y[0*8:1*8], y0)
|
||||
binary.LittleEndian.PutUint64(y[1*8:2*8], y1)
|
||||
binary.LittleEndian.PutUint64(y[2*8:3*8], y2)
|
||||
binary.LittleEndian.PutUint64(y[3*8:4*8], y3)
|
||||
}
|
||||
|
||||
func addGeneric(z, x, y *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
|
||||
z0, c0 := bits.Add64(x0, y0, 0)
|
||||
z1, c1 := bits.Add64(x1, y1, c0)
|
||||
z2, c2 := bits.Add64(x2, y2, c1)
|
||||
z3, c3 := bits.Add64(x3, y3, c2)
|
||||
|
||||
z0, c0 = bits.Add64(z0, (-c3)&38, 0)
|
||||
z1, c1 = bits.Add64(z1, 0, c0)
|
||||
z2, c2 = bits.Add64(z2, 0, c1)
|
||||
z3, c3 = bits.Add64(z3, 0, c2)
|
||||
z0, _ = bits.Add64(z0, (-c3)&38, 0)
|
||||
|
||||
binary.LittleEndian.PutUint64(z[0*8:1*8], z0)
|
||||
binary.LittleEndian.PutUint64(z[1*8:2*8], z1)
|
||||
binary.LittleEndian.PutUint64(z[2*8:3*8], z2)
|
||||
binary.LittleEndian.PutUint64(z[3*8:4*8], z3)
|
||||
}
|
||||
|
||||
func subGeneric(z, x, y *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
|
||||
z0, c0 := bits.Sub64(x0, y0, 0)
|
||||
z1, c1 := bits.Sub64(x1, y1, c0)
|
||||
z2, c2 := bits.Sub64(x2, y2, c1)
|
||||
z3, c3 := bits.Sub64(x3, y3, c2)
|
||||
|
||||
z0, c0 = bits.Sub64(z0, (-c3)&38, 0)
|
||||
z1, c1 = bits.Sub64(z1, 0, c0)
|
||||
z2, c2 = bits.Sub64(z2, 0, c1)
|
||||
z3, c3 = bits.Sub64(z3, 0, c2)
|
||||
z0, _ = bits.Sub64(z0, (-c3)&38, 0)
|
||||
|
||||
binary.LittleEndian.PutUint64(z[0*8:1*8], z0)
|
||||
binary.LittleEndian.PutUint64(z[1*8:2*8], z1)
|
||||
binary.LittleEndian.PutUint64(z[2*8:3*8], z2)
|
||||
binary.LittleEndian.PutUint64(z[3*8:4*8], z3)
|
||||
}
|
||||
|
||||
func addsubGeneric(x, y *Elt) {
|
||||
z := &Elt{}
|
||||
addGeneric(z, x, y)
|
||||
subGeneric(y, x, y)
|
||||
*x = *z
|
||||
}
|
||||
|
||||
func mulGeneric(z, x, y *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
|
||||
yi := y0
|
||||
h0, l0 := bits.Mul64(x0, yi)
|
||||
h1, l1 := bits.Mul64(x1, yi)
|
||||
h2, l2 := bits.Mul64(x2, yi)
|
||||
h3, l3 := bits.Mul64(x3, yi)
|
||||
|
||||
z0 := l0
|
||||
a0, c0 := bits.Add64(h0, l1, 0)
|
||||
a1, c1 := bits.Add64(h1, l2, c0)
|
||||
a2, c2 := bits.Add64(h2, l3, c1)
|
||||
a3, _ := bits.Add64(h3, 0, c2)
|
||||
|
||||
yi = y1
|
||||
h0, l0 = bits.Mul64(x0, yi)
|
||||
h1, l1 = bits.Mul64(x1, yi)
|
||||
h2, l2 = bits.Mul64(x2, yi)
|
||||
h3, l3 = bits.Mul64(x3, yi)
|
||||
|
||||
z1, c0 := bits.Add64(a0, l0, 0)
|
||||
h0, c1 = bits.Add64(h0, l1, c0)
|
||||
h1, c2 = bits.Add64(h1, l2, c1)
|
||||
h2, c3 := bits.Add64(h2, l3, c2)
|
||||
h3, _ = bits.Add64(h3, 0, c3)
|
||||
|
||||
a0, c0 = bits.Add64(a1, h0, 0)
|
||||
a1, c1 = bits.Add64(a2, h1, c0)
|
||||
a2, c2 = bits.Add64(a3, h2, c1)
|
||||
a3, _ = bits.Add64(0, h3, c2)
|
||||
|
||||
yi = y2
|
||||
h0, l0 = bits.Mul64(x0, yi)
|
||||
h1, l1 = bits.Mul64(x1, yi)
|
||||
h2, l2 = bits.Mul64(x2, yi)
|
||||
h3, l3 = bits.Mul64(x3, yi)
|
||||
|
||||
z2, c0 := bits.Add64(a0, l0, 0)
|
||||
h0, c1 = bits.Add64(h0, l1, c0)
|
||||
h1, c2 = bits.Add64(h1, l2, c1)
|
||||
h2, c3 = bits.Add64(h2, l3, c2)
|
||||
h3, _ = bits.Add64(h3, 0, c3)
|
||||
|
||||
a0, c0 = bits.Add64(a1, h0, 0)
|
||||
a1, c1 = bits.Add64(a2, h1, c0)
|
||||
a2, c2 = bits.Add64(a3, h2, c1)
|
||||
a3, _ = bits.Add64(0, h3, c2)
|
||||
|
||||
yi = y3
|
||||
h0, l0 = bits.Mul64(x0, yi)
|
||||
h1, l1 = bits.Mul64(x1, yi)
|
||||
h2, l2 = bits.Mul64(x2, yi)
|
||||
h3, l3 = bits.Mul64(x3, yi)
|
||||
|
||||
z3, c0 := bits.Add64(a0, l0, 0)
|
||||
h0, c1 = bits.Add64(h0, l1, c0)
|
||||
h1, c2 = bits.Add64(h1, l2, c1)
|
||||
h2, c3 = bits.Add64(h2, l3, c2)
|
||||
h3, _ = bits.Add64(h3, 0, c3)
|
||||
|
||||
z4, c0 := bits.Add64(a1, h0, 0)
|
||||
z5, c1 := bits.Add64(a2, h1, c0)
|
||||
z6, c2 := bits.Add64(a3, h2, c1)
|
||||
z7, _ := bits.Add64(0, h3, c2)
|
||||
|
||||
red64(z, z0, z1, z2, z3, z4, z5, z6, z7)
|
||||
}
|
||||
|
||||
func sqrGeneric(z, x *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
h0, a0 := bits.Mul64(x0, x1)
|
||||
h1, l1 := bits.Mul64(x0, x2)
|
||||
h2, l2 := bits.Mul64(x0, x3)
|
||||
h3, l3 := bits.Mul64(x3, x1)
|
||||
h4, l4 := bits.Mul64(x3, x2)
|
||||
h, l := bits.Mul64(x1, x2)
|
||||
|
||||
a1, c0 := bits.Add64(l1, h0, 0)
|
||||
a2, c1 := bits.Add64(l2, h1, c0)
|
||||
a3, c2 := bits.Add64(l3, h2, c1)
|
||||
a4, c3 := bits.Add64(l4, h3, c2)
|
||||
a5, _ := bits.Add64(h4, 0, c3)
|
||||
|
||||
a2, c0 = bits.Add64(a2, l, 0)
|
||||
a3, c1 = bits.Add64(a3, h, c0)
|
||||
a4, c2 = bits.Add64(a4, 0, c1)
|
||||
a5, c3 = bits.Add64(a5, 0, c2)
|
||||
a6, _ := bits.Add64(0, 0, c3)
|
||||
|
||||
a0, c0 = bits.Add64(a0, a0, 0)
|
||||
a1, c1 = bits.Add64(a1, a1, c0)
|
||||
a2, c2 = bits.Add64(a2, a2, c1)
|
||||
a3, c3 = bits.Add64(a3, a3, c2)
|
||||
a4, c4 := bits.Add64(a4, a4, c3)
|
||||
a5, c5 := bits.Add64(a5, a5, c4)
|
||||
a6, _ = bits.Add64(a6, a6, c5)
|
||||
|
||||
b1, b0 := bits.Mul64(x0, x0)
|
||||
b3, b2 := bits.Mul64(x1, x1)
|
||||
b5, b4 := bits.Mul64(x2, x2)
|
||||
b7, b6 := bits.Mul64(x3, x3)
|
||||
|
||||
b1, c0 = bits.Add64(b1, a0, 0)
|
||||
b2, c1 = bits.Add64(b2, a1, c0)
|
||||
b3, c2 = bits.Add64(b3, a2, c1)
|
||||
b4, c3 = bits.Add64(b4, a3, c2)
|
||||
b5, c4 = bits.Add64(b5, a4, c3)
|
||||
b6, c5 = bits.Add64(b6, a5, c4)
|
||||
b7, _ = bits.Add64(b7, a6, c5)
|
||||
|
||||
red64(z, b0, b1, b2, b3, b4, b5, b6, b7)
|
||||
}
|
||||
|
||||
func modpGeneric(x *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
|
||||
// CX = C[255] ? 38 : 19
|
||||
cx := uint64(19) << (x3 >> 63)
|
||||
// PUT BIT 255 IN CARRY FLAG AND CLEAR
|
||||
x3 &^= 1 << 63
|
||||
|
||||
x0, c0 := bits.Add64(x0, cx, 0)
|
||||
x1, c1 := bits.Add64(x1, 0, c0)
|
||||
x2, c2 := bits.Add64(x2, 0, c1)
|
||||
x3, _ = bits.Add64(x3, 0, c2)
|
||||
|
||||
// TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19
|
||||
// cx = C[255] ? 0 : 19
|
||||
cx = uint64(19) &^ (-(x3 >> 63))
|
||||
// CLEAR BIT 255
|
||||
x3 &^= 1 << 63
|
||||
|
||||
x0, c0 = bits.Sub64(x0, cx, 0)
|
||||
x1, c1 = bits.Sub64(x1, 0, c0)
|
||||
x2, c2 = bits.Sub64(x2, 0, c1)
|
||||
x3, _ = bits.Sub64(x3, 0, c2)
|
||||
|
||||
binary.LittleEndian.PutUint64(x[0*8:1*8], x0)
|
||||
binary.LittleEndian.PutUint64(x[1*8:2*8], x1)
|
||||
binary.LittleEndian.PutUint64(x[2*8:3*8], x2)
|
||||
binary.LittleEndian.PutUint64(x[3*8:4*8], x3)
|
||||
}
|
||||
|
||||
func red64(z *Elt, x0, x1, x2, x3, x4, x5, x6, x7 uint64) {
|
||||
h0, l0 := bits.Mul64(x4, 38)
|
||||
h1, l1 := bits.Mul64(x5, 38)
|
||||
h2, l2 := bits.Mul64(x6, 38)
|
||||
h3, l3 := bits.Mul64(x7, 38)
|
||||
|
||||
l1, c0 := bits.Add64(h0, l1, 0)
|
||||
l2, c1 := bits.Add64(h1, l2, c0)
|
||||
l3, c2 := bits.Add64(h2, l3, c1)
|
||||
l4, _ := bits.Add64(h3, 0, c2)
|
||||
|
||||
l0, c0 = bits.Add64(l0, x0, 0)
|
||||
l1, c1 = bits.Add64(l1, x1, c0)
|
||||
l2, c2 = bits.Add64(l2, x2, c1)
|
||||
l3, c3 := bits.Add64(l3, x3, c2)
|
||||
l4, _ = bits.Add64(l4, 0, c3)
|
||||
|
||||
_, l4 = bits.Mul64(l4, 38)
|
||||
l0, c0 = bits.Add64(l0, l4, 0)
|
||||
z1, c1 := bits.Add64(l1, 0, c0)
|
||||
z2, c2 := bits.Add64(l2, 0, c1)
|
||||
z3, c3 := bits.Add64(l3, 0, c2)
|
||||
z0, _ := bits.Add64(l0, (-c3)&38, 0)
|
||||
|
||||
binary.LittleEndian.PutUint64(z[0*8:1*8], z0)
|
||||
binary.LittleEndian.PutUint64(z[1*8:2*8], z1)
|
||||
binary.LittleEndian.PutUint64(z[2*8:3*8], z2)
|
||||
binary.LittleEndian.PutUint64(z[3*8:4*8], z3)
|
||||
}
|
13
vendor/github.com/cloudflare/circl/math/fp25519/fp_noasm.go
generated
vendored
Normal file
13
vendor/github.com/cloudflare/circl/math/fp25519/fp_noasm.go
generated
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
//go:build !amd64 || purego
|
||||
// +build !amd64 purego
|
||||
|
||||
package fp25519
|
||||
|
||||
func cmov(x, y *Elt, n uint) { cmovGeneric(x, y, n) }
|
||||
func cswap(x, y *Elt, n uint) { cswapGeneric(x, y, n) }
|
||||
func add(z, x, y *Elt) { addGeneric(z, x, y) }
|
||||
func sub(z, x, y *Elt) { subGeneric(z, x, y) }
|
||||
func addsub(x, y *Elt) { addsubGeneric(x, y) }
|
||||
func mul(z, x, y *Elt) { mulGeneric(z, x, y) }
|
||||
func sqr(z, x *Elt) { sqrGeneric(z, x) }
|
||||
func modp(z *Elt) { modpGeneric(z) }
|
164
vendor/github.com/cloudflare/circl/math/fp448/fp.go
generated
vendored
Normal file
164
vendor/github.com/cloudflare/circl/math/fp448/fp.go
generated
vendored
Normal file
@ -0,0 +1,164 @@
|
||||
// Package fp448 provides prime field arithmetic over GF(2^448-2^224-1).
|
||||
package fp448
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/cloudflare/circl/internal/conv"
|
||||
)
|
||||
|
||||
// Size in bytes of an element.
|
||||
const Size = 56
|
||||
|
||||
// Elt is a prime field element.
|
||||
type Elt [Size]byte
|
||||
|
||||
func (e Elt) String() string { return conv.BytesLe2Hex(e[:]) }
|
||||
|
||||
// p is the prime modulus 2^448-2^224-1.
|
||||
var p = Elt{
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
}
|
||||
|
||||
// P returns the prime modulus 2^448-2^224-1.
|
||||
func P() Elt { return p }
|
||||
|
||||
// ToBytes stores in b the little-endian byte representation of x.
|
||||
func ToBytes(b []byte, x *Elt) error {
|
||||
if len(b) != Size {
|
||||
return errors.New("wrong size")
|
||||
}
|
||||
Modp(x)
|
||||
copy(b, x[:])
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsZero returns true if x is equal to 0.
|
||||
func IsZero(x *Elt) bool { Modp(x); return *x == Elt{} }
|
||||
|
||||
// IsOne returns true if x is equal to 1.
|
||||
func IsOne(x *Elt) bool { Modp(x); return *x == Elt{1} }
|
||||
|
||||
// SetOne assigns x=1.
|
||||
func SetOne(x *Elt) { *x = Elt{1} }
|
||||
|
||||
// One returns the 1 element.
|
||||
func One() (x Elt) { x = Elt{1}; return }
|
||||
|
||||
// Neg calculates z = -x.
|
||||
func Neg(z, x *Elt) { Sub(z, &p, x) }
|
||||
|
||||
// Modp ensures that z is between [0,p-1].
|
||||
func Modp(z *Elt) { Sub(z, z, &p) }
|
||||
|
||||
// InvSqrt calculates z = sqrt(x/y) iff x/y is a quadratic-residue. If so,
|
||||
// isQR = true; otherwise, isQR = false, since x/y is a quadratic non-residue,
|
||||
// and z = sqrt(-x/y).
|
||||
func InvSqrt(z, x, y *Elt) (isQR bool) {
|
||||
// First note that x^(2(k+1)) = x^(p-1)/2 * x = legendre(x) * x
|
||||
// so that's x if x is a quadratic residue and -x otherwise.
|
||||
// Next, y^(6k+3) = y^(4k+2) * y^(2k+1) = y^(p-1) * y^((p-1)/2) = legendre(y).
|
||||
// So the z we compute satisfies z^2 y = x^(2(k+1)) y^(6k+3) = legendre(x)*legendre(y).
|
||||
// Thus if x and y are quadratic residues, then z is indeed sqrt(x/y).
|
||||
t0, t1 := &Elt{}, &Elt{}
|
||||
Mul(t0, x, y) // x*y
|
||||
Sqr(t1, y) // y^2
|
||||
Mul(t1, t0, t1) // x*y^3
|
||||
powPminus3div4(z, t1) // (x*y^3)^k
|
||||
Mul(z, z, t0) // z = x*y*(x*y^3)^k = x^(k+1) * y^(3k+1)
|
||||
|
||||
// Check if x/y is a quadratic residue
|
||||
Sqr(t0, z) // z^2
|
||||
Mul(t0, t0, y) // y*z^2
|
||||
Sub(t0, t0, x) // y*z^2-x
|
||||
return IsZero(t0)
|
||||
}
|
||||
|
||||
// Inv calculates z = 1/x mod p.
|
||||
func Inv(z, x *Elt) {
|
||||
// Calculates z = x^(4k+1) = x^(p-3+1) = x^(p-2) = x^-1, where k = (p-3)/4.
|
||||
t := &Elt{}
|
||||
powPminus3div4(t, x) // t = x^k
|
||||
Sqr(t, t) // t = x^2k
|
||||
Sqr(t, t) // t = x^4k
|
||||
Mul(z, t, x) // z = x^(4k+1)
|
||||
}
|
||||
|
||||
// powPminus3div4 calculates z = x^k mod p, where k = (p-3)/4.
|
||||
func powPminus3div4(z, x *Elt) {
|
||||
x0, x1 := &Elt{}, &Elt{}
|
||||
Sqr(z, x)
|
||||
Mul(z, z, x)
|
||||
Sqr(x0, z)
|
||||
Mul(x0, x0, x)
|
||||
Sqr(z, x0)
|
||||
Sqr(z, z)
|
||||
Sqr(z, z)
|
||||
Mul(z, z, x0)
|
||||
Sqr(x1, z)
|
||||
for i := 0; i < 5; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x1, x1, z)
|
||||
Sqr(z, x1)
|
||||
for i := 0; i < 11; i++ {
|
||||
Sqr(z, z)
|
||||
}
|
||||
Mul(z, z, x1)
|
||||
Sqr(z, z)
|
||||
Sqr(z, z)
|
||||
Sqr(z, z)
|
||||
Mul(z, z, x0)
|
||||
Sqr(x1, z)
|
||||
for i := 0; i < 26; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x1, x1, z)
|
||||
Sqr(z, x1)
|
||||
for i := 0; i < 53; i++ {
|
||||
Sqr(z, z)
|
||||
}
|
||||
Mul(z, z, x1)
|
||||
Sqr(z, z)
|
||||
Sqr(z, z)
|
||||
Sqr(z, z)
|
||||
Mul(z, z, x0)
|
||||
Sqr(x1, z)
|
||||
for i := 0; i < 110; i++ {
|
||||
Sqr(x1, x1)
|
||||
}
|
||||
Mul(x1, x1, z)
|
||||
Sqr(z, x1)
|
||||
Mul(z, z, x)
|
||||
for i := 0; i < 223; i++ {
|
||||
Sqr(z, z)
|
||||
}
|
||||
Mul(z, z, x1)
|
||||
}
|
||||
|
||||
// Cmov assigns y to x if n is 1.
|
||||
func Cmov(x, y *Elt, n uint) { cmov(x, y, n) }
|
||||
|
||||
// Cswap interchanges x and y if n is 1.
|
||||
func Cswap(x, y *Elt, n uint) { cswap(x, y, n) }
|
||||
|
||||
// Add calculates z = x+y mod p.
|
||||
func Add(z, x, y *Elt) { add(z, x, y) }
|
||||
|
||||
// Sub calculates z = x-y mod p.
|
||||
func Sub(z, x, y *Elt) { sub(z, x, y) }
|
||||
|
||||
// AddSub calculates (x,y) = (x+y mod p, x-y mod p).
|
||||
func AddSub(x, y *Elt) { addsub(x, y) }
|
||||
|
||||
// Mul calculates z = x*y mod p.
|
||||
func Mul(z, x, y *Elt) { mul(z, x, y) }
|
||||
|
||||
// Sqr calculates z = x^2 mod p.
|
||||
func Sqr(z, x *Elt) { sqr(z, x) }
|
43
vendor/github.com/cloudflare/circl/math/fp448/fp_amd64.go
generated
vendored
Normal file
43
vendor/github.com/cloudflare/circl/math/fp448/fp_amd64.go
generated
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
//go:build amd64 && !purego
|
||||
// +build amd64,!purego
|
||||
|
||||
package fp448
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
var hasBmi2Adx = cpu.X86.HasBMI2 && cpu.X86.HasADX
|
||||
|
||||
var _ = hasBmi2Adx
|
||||
|
||||
func cmov(x, y *Elt, n uint) { cmovAmd64(x, y, n) }
|
||||
func cswap(x, y *Elt, n uint) { cswapAmd64(x, y, n) }
|
||||
func add(z, x, y *Elt) { addAmd64(z, x, y) }
|
||||
func sub(z, x, y *Elt) { subAmd64(z, x, y) }
|
||||
func addsub(x, y *Elt) { addsubAmd64(x, y) }
|
||||
func mul(z, x, y *Elt) { mulAmd64(z, x, y) }
|
||||
func sqr(z, x *Elt) { sqrAmd64(z, x) }
|
||||
|
||||
/* Functions defined in fp_amd64.s */
|
||||
|
||||
//go:noescape
|
||||
func cmovAmd64(x, y *Elt, n uint)
|
||||
|
||||
//go:noescape
|
||||
func cswapAmd64(x, y *Elt, n uint)
|
||||
|
||||
//go:noescape
|
||||
func addAmd64(z, x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func subAmd64(z, x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func addsubAmd64(x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func mulAmd64(z, x, y *Elt)
|
||||
|
||||
//go:noescape
|
||||
func sqrAmd64(z, x *Elt)
|
591
vendor/github.com/cloudflare/circl/math/fp448/fp_amd64.h
generated
vendored
Normal file
591
vendor/github.com/cloudflare/circl/math/fp448/fp_amd64.h
generated
vendored
Normal file
@ -0,0 +1,591 @@
|
||||
// This code was imported from https://github.com/armfazh/rfc7748_precomputed
|
||||
|
||||
// CHECK_BMI2ADX triggers bmi2adx if supported,
|
||||
// otherwise it fallbacks to legacy code.
|
||||
#define CHECK_BMI2ADX(label, legacy, bmi2adx) \
|
||||
CMPB ·hasBmi2Adx(SB), $0 \
|
||||
JE label \
|
||||
bmi2adx \
|
||||
RET \
|
||||
label: \
|
||||
legacy \
|
||||
RET
|
||||
|
||||
// cselect is a conditional move
|
||||
// if b=1: it copies y into x;
|
||||
// if b=0: x remains with the same value;
|
||||
// if b<> 0,1: undefined.
|
||||
// Uses: AX, DX, FLAGS
|
||||
// Instr: x86_64, cmov
|
||||
#define cselect(x,y,b) \
|
||||
TESTQ b, b \
|
||||
MOVQ 0+x, AX; MOVQ 0+y, DX; CMOVQNE DX, AX; MOVQ AX, 0+x; \
|
||||
MOVQ 8+x, AX; MOVQ 8+y, DX; CMOVQNE DX, AX; MOVQ AX, 8+x; \
|
||||
MOVQ 16+x, AX; MOVQ 16+y, DX; CMOVQNE DX, AX; MOVQ AX, 16+x; \
|
||||
MOVQ 24+x, AX; MOVQ 24+y, DX; CMOVQNE DX, AX; MOVQ AX, 24+x; \
|
||||
MOVQ 32+x, AX; MOVQ 32+y, DX; CMOVQNE DX, AX; MOVQ AX, 32+x; \
|
||||
MOVQ 40+x, AX; MOVQ 40+y, DX; CMOVQNE DX, AX; MOVQ AX, 40+x; \
|
||||
MOVQ 48+x, AX; MOVQ 48+y, DX; CMOVQNE DX, AX; MOVQ AX, 48+x;
|
||||
|
||||
// cswap is a conditional swap
|
||||
// if b=1: x,y <- y,x;
|
||||
// if b=0: x,y remain with the same values;
|
||||
// if b<> 0,1: undefined.
|
||||
// Uses: AX, DX, R8, FLAGS
|
||||
// Instr: x86_64, cmov
|
||||
#define cswap(x,y,b) \
|
||||
TESTQ b, b \
|
||||
MOVQ 0+x, AX; MOVQ AX, R8; MOVQ 0+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 0+x; MOVQ DX, 0+y; \
|
||||
MOVQ 8+x, AX; MOVQ AX, R8; MOVQ 8+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 8+x; MOVQ DX, 8+y; \
|
||||
MOVQ 16+x, AX; MOVQ AX, R8; MOVQ 16+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 16+x; MOVQ DX, 16+y; \
|
||||
MOVQ 24+x, AX; MOVQ AX, R8; MOVQ 24+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 24+x; MOVQ DX, 24+y; \
|
||||
MOVQ 32+x, AX; MOVQ AX, R8; MOVQ 32+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 32+x; MOVQ DX, 32+y; \
|
||||
MOVQ 40+x, AX; MOVQ AX, R8; MOVQ 40+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 40+x; MOVQ DX, 40+y; \
|
||||
MOVQ 48+x, AX; MOVQ AX, R8; MOVQ 48+y, DX; CMOVQNE DX, AX; CMOVQNE R8, DX; MOVQ AX, 48+x; MOVQ DX, 48+y;
|
||||
|
||||
// additionLeg adds x and y and stores in z
|
||||
// Uses: AX, DX, R8-R14, FLAGS
|
||||
// Instr: x86_64
|
||||
#define additionLeg(z,x,y) \
|
||||
MOVQ 0+x, R8; ADDQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; ADCQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; ADCQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; ADCQ 24+y, R11; \
|
||||
MOVQ 32+x, R12; ADCQ 32+y, R12; \
|
||||
MOVQ 40+x, R13; ADCQ 40+y, R13; \
|
||||
MOVQ 48+x, R14; ADCQ 48+y, R14; \
|
||||
MOVQ $0, AX; ADCQ $0, AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
ADDQ AX, R8; MOVQ $0, AX; \
|
||||
ADCQ $0, R9; \
|
||||
ADCQ $0, R10; \
|
||||
ADCQ DX, R11; \
|
||||
ADCQ $0, R12; \
|
||||
ADCQ $0, R13; \
|
||||
ADCQ $0, R14; \
|
||||
ADCQ $0, AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
ADDQ AX, R8; MOVQ R8, 0+z; \
|
||||
ADCQ $0, R9; MOVQ R9, 8+z; \
|
||||
ADCQ $0, R10; MOVQ R10, 16+z; \
|
||||
ADCQ DX, R11; MOVQ R11, 24+z; \
|
||||
ADCQ $0, R12; MOVQ R12, 32+z; \
|
||||
ADCQ $0, R13; MOVQ R13, 40+z; \
|
||||
ADCQ $0, R14; MOVQ R14, 48+z;
|
||||
|
||||
|
||||
// additionAdx adds x and y and stores in z
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64, adx
|
||||
#define additionAdx(z,x,y) \
|
||||
MOVL $32, R15; \
|
||||
XORL DX, DX; \
|
||||
MOVQ 0+x, R8; ADCXQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; ADCXQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; ADCXQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; ADCXQ 24+y, R11; \
|
||||
MOVQ 32+x, R12; ADCXQ 32+y, R12; \
|
||||
MOVQ 40+x, R13; ADCXQ 40+y, R13; \
|
||||
MOVQ 48+x, R14; ADCXQ 48+y, R14; \
|
||||
;;;;;;;;;;;;;;; ADCXQ DX, DX; \
|
||||
XORL AX, AX; \
|
||||
ADCXQ DX, R8; SHLXQ R15, DX, DX; \
|
||||
ADCXQ AX, R9; \
|
||||
ADCXQ AX, R10; \
|
||||
ADCXQ DX, R11; \
|
||||
ADCXQ AX, R12; \
|
||||
ADCXQ AX, R13; \
|
||||
ADCXQ AX, R14; \
|
||||
ADCXQ AX, AX; \
|
||||
XORL DX, DX; \
|
||||
ADCXQ AX, R8; MOVQ R8, 0+z; SHLXQ R15, AX, AX; \
|
||||
ADCXQ DX, R9; MOVQ R9, 8+z; \
|
||||
ADCXQ DX, R10; MOVQ R10, 16+z; \
|
||||
ADCXQ AX, R11; MOVQ R11, 24+z; \
|
||||
ADCXQ DX, R12; MOVQ R12, 32+z; \
|
||||
ADCXQ DX, R13; MOVQ R13, 40+z; \
|
||||
ADCXQ DX, R14; MOVQ R14, 48+z;
|
||||
|
||||
// subtraction subtracts y from x and stores in z
|
||||
// Uses: AX, DX, R8-R14, FLAGS
|
||||
// Instr: x86_64
|
||||
#define subtraction(z,x,y) \
|
||||
MOVQ 0+x, R8; SUBQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; SBBQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; SBBQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; SBBQ 24+y, R11; \
|
||||
MOVQ 32+x, R12; SBBQ 32+y, R12; \
|
||||
MOVQ 40+x, R13; SBBQ 40+y, R13; \
|
||||
MOVQ 48+x, R14; SBBQ 48+y, R14; \
|
||||
MOVQ $0, AX; SETCS AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
SUBQ AX, R8; MOVQ $0, AX; \
|
||||
SBBQ $0, R9; \
|
||||
SBBQ $0, R10; \
|
||||
SBBQ DX, R11; \
|
||||
SBBQ $0, R12; \
|
||||
SBBQ $0, R13; \
|
||||
SBBQ $0, R14; \
|
||||
SETCS AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
SUBQ AX, R8; MOVQ R8, 0+z; \
|
||||
SBBQ $0, R9; MOVQ R9, 8+z; \
|
||||
SBBQ $0, R10; MOVQ R10, 16+z; \
|
||||
SBBQ DX, R11; MOVQ R11, 24+z; \
|
||||
SBBQ $0, R12; MOVQ R12, 32+z; \
|
||||
SBBQ $0, R13; MOVQ R13, 40+z; \
|
||||
SBBQ $0, R14; MOVQ R14, 48+z;
|
||||
|
||||
// maddBmi2Adx multiplies x and y and accumulates in z
|
||||
// Uses: AX, DX, R15, FLAGS
|
||||
// Instr: x86_64, bmi2, adx
|
||||
#define maddBmi2Adx(z,x,y,i,r0,r1,r2,r3,r4,r5,r6) \
|
||||
MOVQ i+y, DX; XORL AX, AX; \
|
||||
MULXQ 0+x, AX, R8; ADOXQ AX, r0; ADCXQ R8, r1; MOVQ r0,i+z; \
|
||||
MULXQ 8+x, AX, r0; ADOXQ AX, r1; ADCXQ r0, r2; MOVQ $0, R8; \
|
||||
MULXQ 16+x, AX, r0; ADOXQ AX, r2; ADCXQ r0, r3; \
|
||||
MULXQ 24+x, AX, r0; ADOXQ AX, r3; ADCXQ r0, r4; \
|
||||
MULXQ 32+x, AX, r0; ADOXQ AX, r4; ADCXQ r0, r5; \
|
||||
MULXQ 40+x, AX, r0; ADOXQ AX, r5; ADCXQ r0, r6; \
|
||||
MULXQ 48+x, AX, r0; ADOXQ AX, r6; ADCXQ R8, r0; \
|
||||
;;;;;;;;;;;;;;;;;;; ADOXQ R8, r0;
|
||||
|
||||
// integerMulAdx multiplies x and y and stores in z
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64, bmi2, adx
|
||||
#define integerMulAdx(z,x,y) \
|
||||
MOVL $0,R15; \
|
||||
MOVQ 0+y, DX; XORL AX, AX; MOVQ $0, R8; \
|
||||
MULXQ 0+x, AX, R9; MOVQ AX, 0+z; \
|
||||
MULXQ 8+x, AX, R10; ADCXQ AX, R9; \
|
||||
MULXQ 16+x, AX, R11; ADCXQ AX, R10; \
|
||||
MULXQ 24+x, AX, R12; ADCXQ AX, R11; \
|
||||
MULXQ 32+x, AX, R13; ADCXQ AX, R12; \
|
||||
MULXQ 40+x, AX, R14; ADCXQ AX, R13; \
|
||||
MULXQ 48+x, AX, R15; ADCXQ AX, R14; \
|
||||
;;;;;;;;;;;;;;;;;;;; ADCXQ R8, R15; \
|
||||
maddBmi2Adx(z,x,y, 8, R9,R10,R11,R12,R13,R14,R15) \
|
||||
maddBmi2Adx(z,x,y,16,R10,R11,R12,R13,R14,R15, R9) \
|
||||
maddBmi2Adx(z,x,y,24,R11,R12,R13,R14,R15, R9,R10) \
|
||||
maddBmi2Adx(z,x,y,32,R12,R13,R14,R15, R9,R10,R11) \
|
||||
maddBmi2Adx(z,x,y,40,R13,R14,R15, R9,R10,R11,R12) \
|
||||
maddBmi2Adx(z,x,y,48,R14,R15, R9,R10,R11,R12,R13) \
|
||||
MOVQ R15, 56+z; \
|
||||
MOVQ R9, 64+z; \
|
||||
MOVQ R10, 72+z; \
|
||||
MOVQ R11, 80+z; \
|
||||
MOVQ R12, 88+z; \
|
||||
MOVQ R13, 96+z; \
|
||||
MOVQ R14, 104+z;
|
||||
|
||||
// maddLegacy multiplies x and y and accumulates in z
|
||||
// Uses: AX, DX, R15, FLAGS
|
||||
// Instr: x86_64
|
||||
#define maddLegacy(z,x,y,i) \
|
||||
MOVQ i+y, R15; \
|
||||
MOVQ 0+x, AX; MULQ R15; MOVQ AX, R8; ;;;;;;;;;;;; MOVQ DX, R9; \
|
||||
MOVQ 8+x, AX; MULQ R15; ADDQ AX, R9; ADCQ $0, DX; MOVQ DX, R10; \
|
||||
MOVQ 16+x, AX; MULQ R15; ADDQ AX, R10; ADCQ $0, DX; MOVQ DX, R11; \
|
||||
MOVQ 24+x, AX; MULQ R15; ADDQ AX, R11; ADCQ $0, DX; MOVQ DX, R12; \
|
||||
MOVQ 32+x, AX; MULQ R15; ADDQ AX, R12; ADCQ $0, DX; MOVQ DX, R13; \
|
||||
MOVQ 40+x, AX; MULQ R15; ADDQ AX, R13; ADCQ $0, DX; MOVQ DX, R14; \
|
||||
MOVQ 48+x, AX; MULQ R15; ADDQ AX, R14; ADCQ $0, DX; \
|
||||
ADDQ 0+i+z, R8; MOVQ R8, 0+i+z; \
|
||||
ADCQ 8+i+z, R9; MOVQ R9, 8+i+z; \
|
||||
ADCQ 16+i+z, R10; MOVQ R10, 16+i+z; \
|
||||
ADCQ 24+i+z, R11; MOVQ R11, 24+i+z; \
|
||||
ADCQ 32+i+z, R12; MOVQ R12, 32+i+z; \
|
||||
ADCQ 40+i+z, R13; MOVQ R13, 40+i+z; \
|
||||
ADCQ 48+i+z, R14; MOVQ R14, 48+i+z; \
|
||||
ADCQ $0, DX; MOVQ DX, 56+i+z;
|
||||
|
||||
// integerMulLeg multiplies x and y and stores in z
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64
|
||||
#define integerMulLeg(z,x,y) \
|
||||
MOVQ 0+y, R15; \
|
||||
MOVQ 0+x, AX; MULQ R15; MOVQ AX, 0+z; ;;;;;;;;;;;; MOVQ DX, R8; \
|
||||
MOVQ 8+x, AX; MULQ R15; ADDQ AX, R8; ADCQ $0, DX; MOVQ DX, R9; MOVQ R8, 8+z; \
|
||||
MOVQ 16+x, AX; MULQ R15; ADDQ AX, R9; ADCQ $0, DX; MOVQ DX, R10; MOVQ R9, 16+z; \
|
||||
MOVQ 24+x, AX; MULQ R15; ADDQ AX, R10; ADCQ $0, DX; MOVQ DX, R11; MOVQ R10, 24+z; \
|
||||
MOVQ 32+x, AX; MULQ R15; ADDQ AX, R11; ADCQ $0, DX; MOVQ DX, R12; MOVQ R11, 32+z; \
|
||||
MOVQ 40+x, AX; MULQ R15; ADDQ AX, R12; ADCQ $0, DX; MOVQ DX, R13; MOVQ R12, 40+z; \
|
||||
MOVQ 48+x, AX; MULQ R15; ADDQ AX, R13; ADCQ $0, DX; MOVQ DX,56+z; MOVQ R13, 48+z; \
|
||||
maddLegacy(z,x,y, 8) \
|
||||
maddLegacy(z,x,y,16) \
|
||||
maddLegacy(z,x,y,24) \
|
||||
maddLegacy(z,x,y,32) \
|
||||
maddLegacy(z,x,y,40) \
|
||||
maddLegacy(z,x,y,48)
|
||||
|
||||
// integerSqrLeg squares x and stores in z
|
||||
// Uses: AX, CX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64
|
||||
#define integerSqrLeg(z,x) \
|
||||
XORL R15, R15; \
|
||||
MOVQ 0+x, CX; \
|
||||
MOVQ CX, AX; MULQ CX; MOVQ AX, 0+z; MOVQ DX, R8; \
|
||||
ADDQ CX, CX; ADCQ $0, R15; \
|
||||
MOVQ 8+x, AX; MULQ CX; ADDQ AX, R8; ADCQ $0, DX; MOVQ DX, R9; MOVQ R8, 8+z; \
|
||||
MOVQ 16+x, AX; MULQ CX; ADDQ AX, R9; ADCQ $0, DX; MOVQ DX, R10; \
|
||||
MOVQ 24+x, AX; MULQ CX; ADDQ AX, R10; ADCQ $0, DX; MOVQ DX, R11; \
|
||||
MOVQ 32+x, AX; MULQ CX; ADDQ AX, R11; ADCQ $0, DX; MOVQ DX, R12; \
|
||||
MOVQ 40+x, AX; MULQ CX; ADDQ AX, R12; ADCQ $0, DX; MOVQ DX, R13; \
|
||||
MOVQ 48+x, AX; MULQ CX; ADDQ AX, R13; ADCQ $0, DX; MOVQ DX, R14; \
|
||||
\
|
||||
MOVQ 8+x, CX; \
|
||||
MOVQ CX, AX; ADDQ R15, CX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
;;;;;;;;;;;;;; MULQ CX; ADDQ AX, R9; ADCQ $0, DX; MOVQ R9,16+z; \
|
||||
MOVQ R15, AX; NEGQ AX; ANDQ 8+x, AX; ADDQ AX, DX; ADCQ $0, R11; MOVQ DX, R8; \
|
||||
ADDQ 8+x, CX; ADCQ $0, R15; \
|
||||
MOVQ 16+x, AX; MULQ CX; ADDQ AX, R10; ADCQ $0, DX; ADDQ R8, R10; ADCQ $0, DX; MOVQ DX, R8; MOVQ R10, 24+z; \
|
||||
MOVQ 24+x, AX; MULQ CX; ADDQ AX, R11; ADCQ $0, DX; ADDQ R8, R11; ADCQ $0, DX; MOVQ DX, R8; \
|
||||
MOVQ 32+x, AX; MULQ CX; ADDQ AX, R12; ADCQ $0, DX; ADDQ R8, R12; ADCQ $0, DX; MOVQ DX, R8; \
|
||||
MOVQ 40+x, AX; MULQ CX; ADDQ AX, R13; ADCQ $0, DX; ADDQ R8, R13; ADCQ $0, DX; MOVQ DX, R8; \
|
||||
MOVQ 48+x, AX; MULQ CX; ADDQ AX, R14; ADCQ $0, DX; ADDQ R8, R14; ADCQ $0, DX; MOVQ DX, R9; \
|
||||
\
|
||||
MOVQ 16+x, CX; \
|
||||
MOVQ CX, AX; ADDQ R15, CX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
;;;;;;;;;;;;;; MULQ CX; ADDQ AX, R11; ADCQ $0, DX; MOVQ R11, 32+z; \
|
||||
MOVQ R15, AX; NEGQ AX; ANDQ 16+x,AX; ADDQ AX, DX; ADCQ $0, R13; MOVQ DX, R8; \
|
||||
ADDQ 16+x, CX; ADCQ $0, R15; \
|
||||
MOVQ 24+x, AX; MULQ CX; ADDQ AX, R12; ADCQ $0, DX; ADDQ R8, R12; ADCQ $0, DX; MOVQ DX, R8; MOVQ R12, 40+z; \
|
||||
MOVQ 32+x, AX; MULQ CX; ADDQ AX, R13; ADCQ $0, DX; ADDQ R8, R13; ADCQ $0, DX; MOVQ DX, R8; \
|
||||
MOVQ 40+x, AX; MULQ CX; ADDQ AX, R14; ADCQ $0, DX; ADDQ R8, R14; ADCQ $0, DX; MOVQ DX, R8; \
|
||||
MOVQ 48+x, AX; MULQ CX; ADDQ AX, R9; ADCQ $0, DX; ADDQ R8, R9; ADCQ $0, DX; MOVQ DX,R10; \
|
||||
\
|
||||
MOVQ 24+x, CX; \
|
||||
MOVQ CX, AX; ADDQ R15, CX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
;;;;;;;;;;;;;; MULQ CX; ADDQ AX, R13; ADCQ $0, DX; MOVQ R13, 48+z; \
|
||||
MOVQ R15, AX; NEGQ AX; ANDQ 24+x,AX; ADDQ AX, DX; ADCQ $0, R9; MOVQ DX, R8; \
|
||||
ADDQ 24+x, CX; ADCQ $0, R15; \
|
||||
MOVQ 32+x, AX; MULQ CX; ADDQ AX, R14; ADCQ $0, DX; ADDQ R8, R14; ADCQ $0, DX; MOVQ DX, R8; MOVQ R14, 56+z; \
|
||||
MOVQ 40+x, AX; MULQ CX; ADDQ AX, R9; ADCQ $0, DX; ADDQ R8, R9; ADCQ $0, DX; MOVQ DX, R8; \
|
||||
MOVQ 48+x, AX; MULQ CX; ADDQ AX, R10; ADCQ $0, DX; ADDQ R8, R10; ADCQ $0, DX; MOVQ DX,R11; \
|
||||
\
|
||||
MOVQ 32+x, CX; \
|
||||
MOVQ CX, AX; ADDQ R15, CX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
;;;;;;;;;;;;;; MULQ CX; ADDQ AX, R9; ADCQ $0, DX; MOVQ R9, 64+z; \
|
||||
MOVQ R15, AX; NEGQ AX; ANDQ 32+x,AX; ADDQ AX, DX; ADCQ $0, R11; MOVQ DX, R8; \
|
||||
ADDQ 32+x, CX; ADCQ $0, R15; \
|
||||
MOVQ 40+x, AX; MULQ CX; ADDQ AX, R10; ADCQ $0, DX; ADDQ R8, R10; ADCQ $0, DX; MOVQ DX, R8; MOVQ R10, 72+z; \
|
||||
MOVQ 48+x, AX; MULQ CX; ADDQ AX, R11; ADCQ $0, DX; ADDQ R8, R11; ADCQ $0, DX; MOVQ DX,R12; \
|
||||
\
|
||||
XORL R13, R13; \
|
||||
XORL R14, R14; \
|
||||
MOVQ 40+x, CX; \
|
||||
MOVQ CX, AX; ADDQ R15, CX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
;;;;;;;;;;;;;; MULQ CX; ADDQ AX, R11; ADCQ $0, DX; MOVQ R11, 80+z; \
|
||||
MOVQ R15, AX; NEGQ AX; ANDQ 40+x,AX; ADDQ AX, DX; ADCQ $0, R13; MOVQ DX, R8; \
|
||||
ADDQ 40+x, CX; ADCQ $0, R15; \
|
||||
MOVQ 48+x, AX; MULQ CX; ADDQ AX, R12; ADCQ $0, DX; ADDQ R8, R12; ADCQ $0, DX; MOVQ DX, R8; MOVQ R12, 88+z; \
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ADDQ R8, R13; ADCQ $0,R14; \
|
||||
\
|
||||
XORL R9, R9; \
|
||||
MOVQ 48+x, CX; \
|
||||
MOVQ CX, AX; ADDQ R15, CX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
;;;;;;;;;;;;;; MULQ CX; ADDQ AX, R13; ADCQ $0, DX; MOVQ R13, 96+z; \
|
||||
MOVQ R15, AX; NEGQ AX; ANDQ 48+x,AX; ADDQ AX, DX; ADCQ $0, R9; MOVQ DX, R8; \
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ADDQ R8,R14; ADCQ $0, R9; MOVQ R14, 104+z;
|
||||
|
||||
|
||||
// integerSqrAdx squares x and stores in z
|
||||
// Uses: AX, CX, DX, R8-R15, FLAGS
|
||||
// Instr: x86_64, bmi2, adx
|
||||
#define integerSqrAdx(z,x) \
|
||||
XORL R15, R15; \
|
||||
MOVQ 0+x, DX; \
|
||||
;;;;;;;;;;;;;; MULXQ DX, AX, R8; MOVQ AX, 0+z; \
|
||||
ADDQ DX, DX; ADCQ $0, R15; CLC; \
|
||||
MULXQ 8+x, AX, R9; ADCXQ AX, R8; MOVQ R8, 8+z; \
|
||||
MULXQ 16+x, AX, R10; ADCXQ AX, R9; MOVQ $0, R8;\
|
||||
MULXQ 24+x, AX, R11; ADCXQ AX, R10; \
|
||||
MULXQ 32+x, AX, R12; ADCXQ AX, R11; \
|
||||
MULXQ 40+x, AX, R13; ADCXQ AX, R12; \
|
||||
MULXQ 48+x, AX, R14; ADCXQ AX, R13; \
|
||||
;;;;;;;;;;;;;;;;;;;; ADCXQ R8, R14; \
|
||||
\
|
||||
MOVQ 8+x, DX; \
|
||||
MOVQ DX, AX; ADDQ R15, DX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
MULXQ AX, AX, CX; \
|
||||
MOVQ R15, R8; NEGQ R8; ANDQ 8+x, R8; \
|
||||
ADDQ AX, R9; MOVQ R9, 16+z; \
|
||||
ADCQ CX, R8; \
|
||||
ADCQ $0, R11; \
|
||||
ADDQ 8+x, DX; \
|
||||
ADCQ $0, R15; \
|
||||
XORL R9, R9; ;;;;;;;;;;;;;;;;;;;;; ADOXQ R8, R10; \
|
||||
MULXQ 16+x, AX, CX; ADCXQ AX, R10; ADOXQ CX, R11; MOVQ R10, 24+z; \
|
||||
MULXQ 24+x, AX, CX; ADCXQ AX, R11; ADOXQ CX, R12; MOVQ $0, R10; \
|
||||
MULXQ 32+x, AX, CX; ADCXQ AX, R12; ADOXQ CX, R13; \
|
||||
MULXQ 40+x, AX, CX; ADCXQ AX, R13; ADOXQ CX, R14; \
|
||||
MULXQ 48+x, AX, CX; ADCXQ AX, R14; ADOXQ CX, R9; \
|
||||
;;;;;;;;;;;;;;;;;;; ADCXQ R10, R9; \
|
||||
\
|
||||
MOVQ 16+x, DX; \
|
||||
MOVQ DX, AX; ADDQ R15, DX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
MULXQ AX, AX, CX; \
|
||||
MOVQ R15, R8; NEGQ R8; ANDQ 16+x, R8; \
|
||||
ADDQ AX, R11; MOVQ R11, 32+z; \
|
||||
ADCQ CX, R8; \
|
||||
ADCQ $0, R13; \
|
||||
ADDQ 16+x, DX; \
|
||||
ADCQ $0, R15; \
|
||||
XORL R11, R11; ;;;;;;;;;;;;;;;;;;; ADOXQ R8, R12; \
|
||||
MULXQ 24+x, AX, CX; ADCXQ AX, R12; ADOXQ CX, R13; MOVQ R12, 40+z; \
|
||||
MULXQ 32+x, AX, CX; ADCXQ AX, R13; ADOXQ CX, R14; MOVQ $0, R12; \
|
||||
MULXQ 40+x, AX, CX; ADCXQ AX, R14; ADOXQ CX, R9; \
|
||||
MULXQ 48+x, AX, CX; ADCXQ AX, R9; ADOXQ CX, R10; \
|
||||
;;;;;;;;;;;;;;;;;;; ADCXQ R11,R10; \
|
||||
\
|
||||
MOVQ 24+x, DX; \
|
||||
MOVQ DX, AX; ADDQ R15, DX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
MULXQ AX, AX, CX; \
|
||||
MOVQ R15, R8; NEGQ R8; ANDQ 24+x, R8; \
|
||||
ADDQ AX, R13; MOVQ R13, 48+z; \
|
||||
ADCQ CX, R8; \
|
||||
ADCQ $0, R9; \
|
||||
ADDQ 24+x, DX; \
|
||||
ADCQ $0, R15; \
|
||||
XORL R13, R13; ;;;;;;;;;;;;;;;;;;; ADOXQ R8, R14; \
|
||||
MULXQ 32+x, AX, CX; ADCXQ AX, R14; ADOXQ CX, R9; MOVQ R14, 56+z; \
|
||||
MULXQ 40+x, AX, CX; ADCXQ AX, R9; ADOXQ CX, R10; MOVQ $0, R14; \
|
||||
MULXQ 48+x, AX, CX; ADCXQ AX, R10; ADOXQ CX, R11; \
|
||||
;;;;;;;;;;;;;;;;;;; ADCXQ R12,R11; \
|
||||
\
|
||||
MOVQ 32+x, DX; \
|
||||
MOVQ DX, AX; ADDQ R15, DX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
MULXQ AX, AX, CX; \
|
||||
MOVQ R15, R8; NEGQ R8; ANDQ 32+x, R8; \
|
||||
ADDQ AX, R9; MOVQ R9, 64+z; \
|
||||
ADCQ CX, R8; \
|
||||
ADCQ $0, R11; \
|
||||
ADDQ 32+x, DX; \
|
||||
ADCQ $0, R15; \
|
||||
XORL R9, R9; ;;;;;;;;;;;;;;;;;;;;; ADOXQ R8, R10; \
|
||||
MULXQ 40+x, AX, CX; ADCXQ AX, R10; ADOXQ CX, R11; MOVQ R10, 72+z; \
|
||||
MULXQ 48+x, AX, CX; ADCXQ AX, R11; ADOXQ CX, R12; \
|
||||
;;;;;;;;;;;;;;;;;;; ADCXQ R13,R12; \
|
||||
\
|
||||
MOVQ 40+x, DX; \
|
||||
MOVQ DX, AX; ADDQ R15, DX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
MULXQ AX, AX, CX; \
|
||||
MOVQ R15, R8; NEGQ R8; ANDQ 40+x, R8; \
|
||||
ADDQ AX, R11; MOVQ R11, 80+z; \
|
||||
ADCQ CX, R8; \
|
||||
ADCQ $0, R13; \
|
||||
ADDQ 40+x, DX; \
|
||||
ADCQ $0, R15; \
|
||||
XORL R11, R11; ;;;;;;;;;;;;;;;;;;; ADOXQ R8, R12; \
|
||||
MULXQ 48+x, AX, CX; ADCXQ AX, R12; ADOXQ CX, R13; MOVQ R12, 88+z; \
|
||||
;;;;;;;;;;;;;;;;;;; ADCXQ R14,R13; \
|
||||
\
|
||||
MOVQ 48+x, DX; \
|
||||
MOVQ DX, AX; ADDQ R15, DX; MOVQ $0, R15; ADCQ $0, R15; \
|
||||
MULXQ AX, AX, CX; \
|
||||
MOVQ R15, R8; NEGQ R8; ANDQ 48+x, R8; \
|
||||
XORL R10, R10; ;;;;;;;;;;;;;; ADOXQ CX, R14; \
|
||||
;;;;;;;;;;;;;; ADCXQ AX, R13; ;;;;;;;;;;;;;; MOVQ R13, 96+z; \
|
||||
;;;;;;;;;;;;;; ADCXQ R8, R14; MOVQ R14, 104+z;
|
||||
|
||||
// reduceFromDoubleLeg finds a z=x modulo p such that z<2^448 and stores in z
|
||||
// Uses: AX, R8-R15, FLAGS
|
||||
// Instr: x86_64
|
||||
#define reduceFromDoubleLeg(z,x) \
|
||||
/* ( ,2C13,2C12,2C11,2C10|C10,C9,C8, C7) + (C6,...,C0) */ \
|
||||
/* (r14, r13, r12, r11, r10,r9,r8,r15) */ \
|
||||
MOVQ 80+x,AX; MOVQ AX,R10; \
|
||||
MOVQ $0xFFFFFFFF00000000, R8; \
|
||||
ANDQ R8,R10; \
|
||||
\
|
||||
MOVQ $0,R14; \
|
||||
MOVQ 104+x,R13; SHLQ $1,R13,R14; \
|
||||
MOVQ 96+x,R12; SHLQ $1,R12,R13; \
|
||||
MOVQ 88+x,R11; SHLQ $1,R11,R12; \
|
||||
MOVQ 72+x, R9; SHLQ $1,R10,R11; \
|
||||
MOVQ 64+x, R8; SHLQ $1,R10; \
|
||||
MOVQ $0xFFFFFFFF,R15; ANDQ R15,AX; ORQ AX,R10; \
|
||||
MOVQ 56+x,R15; \
|
||||
\
|
||||
ADDQ 0+x,R15; MOVQ R15, 0+z; MOVQ 56+x,R15; \
|
||||
ADCQ 8+x, R8; MOVQ R8, 8+z; MOVQ 64+x, R8; \
|
||||
ADCQ 16+x, R9; MOVQ R9,16+z; MOVQ 72+x, R9; \
|
||||
ADCQ 24+x,R10; MOVQ R10,24+z; MOVQ 80+x,R10; \
|
||||
ADCQ 32+x,R11; MOVQ R11,32+z; MOVQ 88+x,R11; \
|
||||
ADCQ 40+x,R12; MOVQ R12,40+z; MOVQ 96+x,R12; \
|
||||
ADCQ 48+x,R13; MOVQ R13,48+z; MOVQ 104+x,R13; \
|
||||
ADCQ $0,R14; \
|
||||
/* (c10c9,c9c8,c8c7,c7c13,c13c12,c12c11,c11c10) + (c6,...,c0) */ \
|
||||
/* ( r9, r8, r15, r13, r12, r11, r10) */ \
|
||||
MOVQ R10, AX; \
|
||||
SHRQ $32,R11,R10; \
|
||||
SHRQ $32,R12,R11; \
|
||||
SHRQ $32,R13,R12; \
|
||||
SHRQ $32,R15,R13; \
|
||||
SHRQ $32, R8,R15; \
|
||||
SHRQ $32, R9, R8; \
|
||||
SHRQ $32, AX, R9; \
|
||||
\
|
||||
ADDQ 0+z,R10; \
|
||||
ADCQ 8+z,R11; \
|
||||
ADCQ 16+z,R12; \
|
||||
ADCQ 24+z,R13; \
|
||||
ADCQ 32+z,R15; \
|
||||
ADCQ 40+z, R8; \
|
||||
ADCQ 48+z, R9; \
|
||||
ADCQ $0,R14; \
|
||||
/* ( c7) + (c6,...,c0) */ \
|
||||
/* (r14) */ \
|
||||
MOVQ R14, AX; SHLQ $32, AX; \
|
||||
ADDQ R14,R10; MOVQ $0,R14; \
|
||||
ADCQ $0,R11; \
|
||||
ADCQ $0,R12; \
|
||||
ADCQ AX,R13; \
|
||||
ADCQ $0,R15; \
|
||||
ADCQ $0, R8; \
|
||||
ADCQ $0, R9; \
|
||||
ADCQ $0,R14; \
|
||||
/* ( c7) + (c6,...,c0) */ \
|
||||
/* (r14) */ \
|
||||
MOVQ R14, AX; SHLQ $32,AX; \
|
||||
ADDQ R14,R10; MOVQ R10, 0+z; \
|
||||
ADCQ $0,R11; MOVQ R11, 8+z; \
|
||||
ADCQ $0,R12; MOVQ R12,16+z; \
|
||||
ADCQ AX,R13; MOVQ R13,24+z; \
|
||||
ADCQ $0,R15; MOVQ R15,32+z; \
|
||||
ADCQ $0, R8; MOVQ R8,40+z; \
|
||||
ADCQ $0, R9; MOVQ R9,48+z;
|
||||
|
||||
// reduceFromDoubleAdx finds a z=x modulo p such that z<2^448 and stores in z
|
||||
// Uses: AX, R8-R15, FLAGS
|
||||
// Instr: x86_64, adx
|
||||
#define reduceFromDoubleAdx(z,x) \
|
||||
/* ( ,2C13,2C12,2C11,2C10|C10,C9,C8, C7) + (C6,...,C0) */ \
|
||||
/* (r14, r13, r12, r11, r10,r9,r8,r15) */ \
|
||||
MOVQ 80+x,AX; MOVQ AX,R10; \
|
||||
MOVQ $0xFFFFFFFF00000000, R8; \
|
||||
ANDQ R8,R10; \
|
||||
\
|
||||
MOVQ $0,R14; \
|
||||
MOVQ 104+x,R13; SHLQ $1,R13,R14; \
|
||||
MOVQ 96+x,R12; SHLQ $1,R12,R13; \
|
||||
MOVQ 88+x,R11; SHLQ $1,R11,R12; \
|
||||
MOVQ 72+x, R9; SHLQ $1,R10,R11; \
|
||||
MOVQ 64+x, R8; SHLQ $1,R10; \
|
||||
MOVQ $0xFFFFFFFF,R15; ANDQ R15,AX; ORQ AX,R10; \
|
||||
MOVQ 56+x,R15; \
|
||||
\
|
||||
XORL AX,AX; \
|
||||
ADCXQ 0+x,R15; MOVQ R15, 0+z; MOVQ 56+x,R15; \
|
||||
ADCXQ 8+x, R8; MOVQ R8, 8+z; MOVQ 64+x, R8; \
|
||||
ADCXQ 16+x, R9; MOVQ R9,16+z; MOVQ 72+x, R9; \
|
||||
ADCXQ 24+x,R10; MOVQ R10,24+z; MOVQ 80+x,R10; \
|
||||
ADCXQ 32+x,R11; MOVQ R11,32+z; MOVQ 88+x,R11; \
|
||||
ADCXQ 40+x,R12; MOVQ R12,40+z; MOVQ 96+x,R12; \
|
||||
ADCXQ 48+x,R13; MOVQ R13,48+z; MOVQ 104+x,R13; \
|
||||
ADCXQ AX,R14; \
|
||||
/* (c10c9,c9c8,c8c7,c7c13,c13c12,c12c11,c11c10) + (c6,...,c0) */ \
|
||||
/* ( r9, r8, r15, r13, r12, r11, r10) */ \
|
||||
MOVQ R10, AX; \
|
||||
SHRQ $32,R11,R10; \
|
||||
SHRQ $32,R12,R11; \
|
||||
SHRQ $32,R13,R12; \
|
||||
SHRQ $32,R15,R13; \
|
||||
SHRQ $32, R8,R15; \
|
||||
SHRQ $32, R9, R8; \
|
||||
SHRQ $32, AX, R9; \
|
||||
\
|
||||
XORL AX,AX; \
|
||||
ADCXQ 0+z,R10; \
|
||||
ADCXQ 8+z,R11; \
|
||||
ADCXQ 16+z,R12; \
|
||||
ADCXQ 24+z,R13; \
|
||||
ADCXQ 32+z,R15; \
|
||||
ADCXQ 40+z, R8; \
|
||||
ADCXQ 48+z, R9; \
|
||||
ADCXQ AX,R14; \
|
||||
/* ( c7) + (c6,...,c0) */ \
|
||||
/* (r14) */ \
|
||||
MOVQ R14, AX; SHLQ $32, AX; \
|
||||
CLC; \
|
||||
ADCXQ R14,R10; MOVQ $0,R14; \
|
||||
ADCXQ R14,R11; \
|
||||
ADCXQ R14,R12; \
|
||||
ADCXQ AX,R13; \
|
||||
ADCXQ R14,R15; \
|
||||
ADCXQ R14, R8; \
|
||||
ADCXQ R14, R9; \
|
||||
ADCXQ R14,R14; \
|
||||
/* ( c7) + (c6,...,c0) */ \
|
||||
/* (r14) */ \
|
||||
MOVQ R14, AX; SHLQ $32, AX; \
|
||||
CLC; \
|
||||
ADCXQ R14,R10; MOVQ R10, 0+z; MOVQ $0,R14; \
|
||||
ADCXQ R14,R11; MOVQ R11, 8+z; \
|
||||
ADCXQ R14,R12; MOVQ R12,16+z; \
|
||||
ADCXQ AX,R13; MOVQ R13,24+z; \
|
||||
ADCXQ R14,R15; MOVQ R15,32+z; \
|
||||
ADCXQ R14, R8; MOVQ R8,40+z; \
|
||||
ADCXQ R14, R9; MOVQ R9,48+z;
|
||||
|
||||
// addSub calculates two operations: x,y = x+y,x-y
|
||||
// Uses: AX, DX, R8-R15, FLAGS
|
||||
#define addSub(x,y) \
|
||||
MOVQ 0+x, R8; ADDQ 0+y, R8; \
|
||||
MOVQ 8+x, R9; ADCQ 8+y, R9; \
|
||||
MOVQ 16+x, R10; ADCQ 16+y, R10; \
|
||||
MOVQ 24+x, R11; ADCQ 24+y, R11; \
|
||||
MOVQ 32+x, R12; ADCQ 32+y, R12; \
|
||||
MOVQ 40+x, R13; ADCQ 40+y, R13; \
|
||||
MOVQ 48+x, R14; ADCQ 48+y, R14; \
|
||||
MOVQ $0, AX; ADCQ $0, AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
ADDQ AX, R8; MOVQ $0, AX; \
|
||||
ADCQ $0, R9; \
|
||||
ADCQ $0, R10; \
|
||||
ADCQ DX, R11; \
|
||||
ADCQ $0, R12; \
|
||||
ADCQ $0, R13; \
|
||||
ADCQ $0, R14; \
|
||||
ADCQ $0, AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
ADDQ AX, R8; MOVQ 0+x,AX; MOVQ R8, 0+x; MOVQ AX, R8; \
|
||||
ADCQ $0, R9; MOVQ 8+x,AX; MOVQ R9, 8+x; MOVQ AX, R9; \
|
||||
ADCQ $0, R10; MOVQ 16+x,AX; MOVQ R10, 16+x; MOVQ AX, R10; \
|
||||
ADCQ DX, R11; MOVQ 24+x,AX; MOVQ R11, 24+x; MOVQ AX, R11; \
|
||||
ADCQ $0, R12; MOVQ 32+x,AX; MOVQ R12, 32+x; MOVQ AX, R12; \
|
||||
ADCQ $0, R13; MOVQ 40+x,AX; MOVQ R13, 40+x; MOVQ AX, R13; \
|
||||
ADCQ $0, R14; MOVQ 48+x,AX; MOVQ R14, 48+x; MOVQ AX, R14; \
|
||||
SUBQ 0+y, R8; \
|
||||
SBBQ 8+y, R9; \
|
||||
SBBQ 16+y, R10; \
|
||||
SBBQ 24+y, R11; \
|
||||
SBBQ 32+y, R12; \
|
||||
SBBQ 40+y, R13; \
|
||||
SBBQ 48+y, R14; \
|
||||
MOVQ $0, AX; SETCS AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
SUBQ AX, R8; MOVQ $0, AX; \
|
||||
SBBQ $0, R9; \
|
||||
SBBQ $0, R10; \
|
||||
SBBQ DX, R11; \
|
||||
SBBQ $0, R12; \
|
||||
SBBQ $0, R13; \
|
||||
SBBQ $0, R14; \
|
||||
SETCS AX; \
|
||||
MOVQ AX, DX; \
|
||||
SHLQ $32, DX; \
|
||||
SUBQ AX, R8; MOVQ R8, 0+y; \
|
||||
SBBQ $0, R9; MOVQ R9, 8+y; \
|
||||
SBBQ $0, R10; MOVQ R10, 16+y; \
|
||||
SBBQ DX, R11; MOVQ R11, 24+y; \
|
||||
SBBQ $0, R12; MOVQ R12, 32+y; \
|
||||
SBBQ $0, R13; MOVQ R13, 40+y; \
|
||||
SBBQ $0, R14; MOVQ R14, 48+y;
|
74
vendor/github.com/cloudflare/circl/math/fp448/fp_amd64.s
generated
vendored
Normal file
74
vendor/github.com/cloudflare/circl/math/fp448/fp_amd64.s
generated
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
// +build amd64
|
||||
|
||||
#include "textflag.h"
|
||||
#include "fp_amd64.h"
|
||||
|
||||
// func cmovAmd64(x, y *Elt, n uint)
|
||||
TEXT ·cmovAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ x+0(FP), DI
|
||||
MOVQ y+8(FP), SI
|
||||
MOVQ n+16(FP), BX
|
||||
cselect(0(DI),0(SI),BX)
|
||||
RET
|
||||
|
||||
// func cswapAmd64(x, y *Elt, n uint)
|
||||
TEXT ·cswapAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ x+0(FP), DI
|
||||
MOVQ y+8(FP), SI
|
||||
MOVQ n+16(FP), BX
|
||||
cswap(0(DI),0(SI),BX)
|
||||
RET
|
||||
|
||||
// func subAmd64(z, x, y *Elt)
|
||||
TEXT ·subAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ y+16(FP), BX
|
||||
subtraction(0(DI),0(SI),0(BX))
|
||||
RET
|
||||
|
||||
// func addsubAmd64(x, y *Elt)
|
||||
TEXT ·addsubAmd64(SB),NOSPLIT,$0-16
|
||||
MOVQ x+0(FP), DI
|
||||
MOVQ y+8(FP), SI
|
||||
addSub(0(DI),0(SI))
|
||||
RET
|
||||
|
||||
#define addLegacy \
|
||||
additionLeg(0(DI),0(SI),0(BX))
|
||||
#define addBmi2Adx \
|
||||
additionAdx(0(DI),0(SI),0(BX))
|
||||
|
||||
#define mulLegacy \
|
||||
integerMulLeg(0(SP),0(SI),0(BX)) \
|
||||
reduceFromDoubleLeg(0(DI),0(SP))
|
||||
#define mulBmi2Adx \
|
||||
integerMulAdx(0(SP),0(SI),0(BX)) \
|
||||
reduceFromDoubleAdx(0(DI),0(SP))
|
||||
|
||||
#define sqrLegacy \
|
||||
integerSqrLeg(0(SP),0(SI)) \
|
||||
reduceFromDoubleLeg(0(DI),0(SP))
|
||||
#define sqrBmi2Adx \
|
||||
integerSqrAdx(0(SP),0(SI)) \
|
||||
reduceFromDoubleAdx(0(DI),0(SP))
|
||||
|
||||
// func addAmd64(z, x, y *Elt)
|
||||
TEXT ·addAmd64(SB),NOSPLIT,$0-24
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ y+16(FP), BX
|
||||
CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx)
|
||||
|
||||
// func mulAmd64(z, x, y *Elt)
|
||||
TEXT ·mulAmd64(SB),NOSPLIT,$112-24
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ y+16(FP), BX
|
||||
CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx)
|
||||
|
||||
// func sqrAmd64(z, x *Elt)
|
||||
TEXT ·sqrAmd64(SB),NOSPLIT,$112-16
|
||||
MOVQ z+0(FP), DI
|
||||
MOVQ x+8(FP), SI
|
||||
CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx)
|
339
vendor/github.com/cloudflare/circl/math/fp448/fp_generic.go
generated
vendored
Normal file
339
vendor/github.com/cloudflare/circl/math/fp448/fp_generic.go
generated
vendored
Normal file
@ -0,0 +1,339 @@
|
||||
package fp448
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func cmovGeneric(x, y *Elt, n uint) {
|
||||
m := -uint64(n & 0x1)
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
x4 := binary.LittleEndian.Uint64(x[4*8 : 5*8])
|
||||
x5 := binary.LittleEndian.Uint64(x[5*8 : 6*8])
|
||||
x6 := binary.LittleEndian.Uint64(x[6*8 : 7*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
y4 := binary.LittleEndian.Uint64(y[4*8 : 5*8])
|
||||
y5 := binary.LittleEndian.Uint64(y[5*8 : 6*8])
|
||||
y6 := binary.LittleEndian.Uint64(y[6*8 : 7*8])
|
||||
|
||||
x0 = (x0 &^ m) | (y0 & m)
|
||||
x1 = (x1 &^ m) | (y1 & m)
|
||||
x2 = (x2 &^ m) | (y2 & m)
|
||||
x3 = (x3 &^ m) | (y3 & m)
|
||||
x4 = (x4 &^ m) | (y4 & m)
|
||||
x5 = (x5 &^ m) | (y5 & m)
|
||||
x6 = (x6 &^ m) | (y6 & m)
|
||||
|
||||
binary.LittleEndian.PutUint64(x[0*8:1*8], x0)
|
||||
binary.LittleEndian.PutUint64(x[1*8:2*8], x1)
|
||||
binary.LittleEndian.PutUint64(x[2*8:3*8], x2)
|
||||
binary.LittleEndian.PutUint64(x[3*8:4*8], x3)
|
||||
binary.LittleEndian.PutUint64(x[4*8:5*8], x4)
|
||||
binary.LittleEndian.PutUint64(x[5*8:6*8], x5)
|
||||
binary.LittleEndian.PutUint64(x[6*8:7*8], x6)
|
||||
}
|
||||
|
||||
func cswapGeneric(x, y *Elt, n uint) {
|
||||
m := -uint64(n & 0x1)
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
x4 := binary.LittleEndian.Uint64(x[4*8 : 5*8])
|
||||
x5 := binary.LittleEndian.Uint64(x[5*8 : 6*8])
|
||||
x6 := binary.LittleEndian.Uint64(x[6*8 : 7*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
y4 := binary.LittleEndian.Uint64(y[4*8 : 5*8])
|
||||
y5 := binary.LittleEndian.Uint64(y[5*8 : 6*8])
|
||||
y6 := binary.LittleEndian.Uint64(y[6*8 : 7*8])
|
||||
|
||||
t0 := m & (x0 ^ y0)
|
||||
t1 := m & (x1 ^ y1)
|
||||
t2 := m & (x2 ^ y2)
|
||||
t3 := m & (x3 ^ y3)
|
||||
t4 := m & (x4 ^ y4)
|
||||
t5 := m & (x5 ^ y5)
|
||||
t6 := m & (x6 ^ y6)
|
||||
x0 ^= t0
|
||||
x1 ^= t1
|
||||
x2 ^= t2
|
||||
x3 ^= t3
|
||||
x4 ^= t4
|
||||
x5 ^= t5
|
||||
x6 ^= t6
|
||||
y0 ^= t0
|
||||
y1 ^= t1
|
||||
y2 ^= t2
|
||||
y3 ^= t3
|
||||
y4 ^= t4
|
||||
y5 ^= t5
|
||||
y6 ^= t6
|
||||
|
||||
binary.LittleEndian.PutUint64(x[0*8:1*8], x0)
|
||||
binary.LittleEndian.PutUint64(x[1*8:2*8], x1)
|
||||
binary.LittleEndian.PutUint64(x[2*8:3*8], x2)
|
||||
binary.LittleEndian.PutUint64(x[3*8:4*8], x3)
|
||||
binary.LittleEndian.PutUint64(x[4*8:5*8], x4)
|
||||
binary.LittleEndian.PutUint64(x[5*8:6*8], x5)
|
||||
binary.LittleEndian.PutUint64(x[6*8:7*8], x6)
|
||||
|
||||
binary.LittleEndian.PutUint64(y[0*8:1*8], y0)
|
||||
binary.LittleEndian.PutUint64(y[1*8:2*8], y1)
|
||||
binary.LittleEndian.PutUint64(y[2*8:3*8], y2)
|
||||
binary.LittleEndian.PutUint64(y[3*8:4*8], y3)
|
||||
binary.LittleEndian.PutUint64(y[4*8:5*8], y4)
|
||||
binary.LittleEndian.PutUint64(y[5*8:6*8], y5)
|
||||
binary.LittleEndian.PutUint64(y[6*8:7*8], y6)
|
||||
}
|
||||
|
||||
func addGeneric(z, x, y *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
x4 := binary.LittleEndian.Uint64(x[4*8 : 5*8])
|
||||
x5 := binary.LittleEndian.Uint64(x[5*8 : 6*8])
|
||||
x6 := binary.LittleEndian.Uint64(x[6*8 : 7*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
y4 := binary.LittleEndian.Uint64(y[4*8 : 5*8])
|
||||
y5 := binary.LittleEndian.Uint64(y[5*8 : 6*8])
|
||||
y6 := binary.LittleEndian.Uint64(y[6*8 : 7*8])
|
||||
|
||||
z0, c0 := bits.Add64(x0, y0, 0)
|
||||
z1, c1 := bits.Add64(x1, y1, c0)
|
||||
z2, c2 := bits.Add64(x2, y2, c1)
|
||||
z3, c3 := bits.Add64(x3, y3, c2)
|
||||
z4, c4 := bits.Add64(x4, y4, c3)
|
||||
z5, c5 := bits.Add64(x5, y5, c4)
|
||||
z6, z7 := bits.Add64(x6, y6, c5)
|
||||
|
||||
z0, c0 = bits.Add64(z0, z7, 0)
|
||||
z1, c1 = bits.Add64(z1, 0, c0)
|
||||
z2, c2 = bits.Add64(z2, 0, c1)
|
||||
z3, c3 = bits.Add64(z3, z7<<32, c2)
|
||||
z4, c4 = bits.Add64(z4, 0, c3)
|
||||
z5, c5 = bits.Add64(z5, 0, c4)
|
||||
z6, z7 = bits.Add64(z6, 0, c5)
|
||||
|
||||
z0, c0 = bits.Add64(z0, z7, 0)
|
||||
z1, c1 = bits.Add64(z1, 0, c0)
|
||||
z2, c2 = bits.Add64(z2, 0, c1)
|
||||
z3, c3 = bits.Add64(z3, z7<<32, c2)
|
||||
z4, c4 = bits.Add64(z4, 0, c3)
|
||||
z5, c5 = bits.Add64(z5, 0, c4)
|
||||
z6, _ = bits.Add64(z6, 0, c5)
|
||||
|
||||
binary.LittleEndian.PutUint64(z[0*8:1*8], z0)
|
||||
binary.LittleEndian.PutUint64(z[1*8:2*8], z1)
|
||||
binary.LittleEndian.PutUint64(z[2*8:3*8], z2)
|
||||
binary.LittleEndian.PutUint64(z[3*8:4*8], z3)
|
||||
binary.LittleEndian.PutUint64(z[4*8:5*8], z4)
|
||||
binary.LittleEndian.PutUint64(z[5*8:6*8], z5)
|
||||
binary.LittleEndian.PutUint64(z[6*8:7*8], z6)
|
||||
}
|
||||
|
||||
func subGeneric(z, x, y *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
x4 := binary.LittleEndian.Uint64(x[4*8 : 5*8])
|
||||
x5 := binary.LittleEndian.Uint64(x[5*8 : 6*8])
|
||||
x6 := binary.LittleEndian.Uint64(x[6*8 : 7*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
y4 := binary.LittleEndian.Uint64(y[4*8 : 5*8])
|
||||
y5 := binary.LittleEndian.Uint64(y[5*8 : 6*8])
|
||||
y6 := binary.LittleEndian.Uint64(y[6*8 : 7*8])
|
||||
|
||||
z0, c0 := bits.Sub64(x0, y0, 0)
|
||||
z1, c1 := bits.Sub64(x1, y1, c0)
|
||||
z2, c2 := bits.Sub64(x2, y2, c1)
|
||||
z3, c3 := bits.Sub64(x3, y3, c2)
|
||||
z4, c4 := bits.Sub64(x4, y4, c3)
|
||||
z5, c5 := bits.Sub64(x5, y5, c4)
|
||||
z6, z7 := bits.Sub64(x6, y6, c5)
|
||||
|
||||
z0, c0 = bits.Sub64(z0, z7, 0)
|
||||
z1, c1 = bits.Sub64(z1, 0, c0)
|
||||
z2, c2 = bits.Sub64(z2, 0, c1)
|
||||
z3, c3 = bits.Sub64(z3, z7<<32, c2)
|
||||
z4, c4 = bits.Sub64(z4, 0, c3)
|
||||
z5, c5 = bits.Sub64(z5, 0, c4)
|
||||
z6, z7 = bits.Sub64(z6, 0, c5)
|
||||
|
||||
z0, c0 = bits.Sub64(z0, z7, 0)
|
||||
z1, c1 = bits.Sub64(z1, 0, c0)
|
||||
z2, c2 = bits.Sub64(z2, 0, c1)
|
||||
z3, c3 = bits.Sub64(z3, z7<<32, c2)
|
||||
z4, c4 = bits.Sub64(z4, 0, c3)
|
||||
z5, c5 = bits.Sub64(z5, 0, c4)
|
||||
z6, _ = bits.Sub64(z6, 0, c5)
|
||||
|
||||
binary.LittleEndian.PutUint64(z[0*8:1*8], z0)
|
||||
binary.LittleEndian.PutUint64(z[1*8:2*8], z1)
|
||||
binary.LittleEndian.PutUint64(z[2*8:3*8], z2)
|
||||
binary.LittleEndian.PutUint64(z[3*8:4*8], z3)
|
||||
binary.LittleEndian.PutUint64(z[4*8:5*8], z4)
|
||||
binary.LittleEndian.PutUint64(z[5*8:6*8], z5)
|
||||
binary.LittleEndian.PutUint64(z[6*8:7*8], z6)
|
||||
}
|
||||
|
||||
func addsubGeneric(x, y *Elt) {
|
||||
z := &Elt{}
|
||||
addGeneric(z, x, y)
|
||||
subGeneric(y, x, y)
|
||||
*x = *z
|
||||
}
|
||||
|
||||
func mulGeneric(z, x, y *Elt) {
|
||||
x0 := binary.LittleEndian.Uint64(x[0*8 : 1*8])
|
||||
x1 := binary.LittleEndian.Uint64(x[1*8 : 2*8])
|
||||
x2 := binary.LittleEndian.Uint64(x[2*8 : 3*8])
|
||||
x3 := binary.LittleEndian.Uint64(x[3*8 : 4*8])
|
||||
x4 := binary.LittleEndian.Uint64(x[4*8 : 5*8])
|
||||
x5 := binary.LittleEndian.Uint64(x[5*8 : 6*8])
|
||||
x6 := binary.LittleEndian.Uint64(x[6*8 : 7*8])
|
||||
|
||||
y0 := binary.LittleEndian.Uint64(y[0*8 : 1*8])
|
||||
y1 := binary.LittleEndian.Uint64(y[1*8 : 2*8])
|
||||
y2 := binary.LittleEndian.Uint64(y[2*8 : 3*8])
|
||||
y3 := binary.LittleEndian.Uint64(y[3*8 : 4*8])
|
||||
y4 := binary.LittleEndian.Uint64(y[4*8 : 5*8])
|
||||
y5 := binary.LittleEndian.Uint64(y[5*8 : 6*8])
|
||||
y6 := binary.LittleEndian.Uint64(y[6*8 : 7*8])
|
||||
|
||||
yy := [7]uint64{y0, y1, y2, y3, y4, y5, y6}
|
||||
zz := [7]uint64{}
|
||||
|
||||
yi := yy[0]
|
||||
h0, l0 := bits.Mul64(x0, yi)
|
||||
h1, l1 := bits.Mul64(x1, yi)
|
||||
h2, l2 := bits.Mul64(x2, yi)
|
||||
h3, l3 := bits.Mul64(x3, yi)
|
||||
h4, l4 := bits.Mul64(x4, yi)
|
||||
h5, l5 := bits.Mul64(x5, yi)
|
||||
h6, l6 := bits.Mul64(x6, yi)
|
||||
|
||||
zz[0] = l0
|
||||
a0, c0 := bits.Add64(h0, l1, 0)
|
||||
a1, c1 := bits.Add64(h1, l2, c0)
|
||||
a2, c2 := bits.Add64(h2, l3, c1)
|
||||
a3, c3 := bits.Add64(h3, l4, c2)
|
||||
a4, c4 := bits.Add64(h4, l5, c3)
|
||||
a5, c5 := bits.Add64(h5, l6, c4)
|
||||
a6, _ := bits.Add64(h6, 0, c5)
|
||||
|
||||
for i := 1; i < 7; i++ {
|
||||
yi = yy[i]
|
||||
h0, l0 = bits.Mul64(x0, yi)
|
||||
h1, l1 = bits.Mul64(x1, yi)
|
||||
h2, l2 = bits.Mul64(x2, yi)
|
||||
h3, l3 = bits.Mul64(x3, yi)
|
||||
h4, l4 = bits.Mul64(x4, yi)
|
||||
h5, l5 = bits.Mul64(x5, yi)
|
||||
h6, l6 = bits.Mul64(x6, yi)
|
||||
|
||||
zz[i], c0 = bits.Add64(a0, l0, 0)
|
||||
a0, c1 = bits.Add64(a1, l1, c0)
|
||||
a1, c2 = bits.Add64(a2, l2, c1)
|
||||
a2, c3 = bits.Add64(a3, l3, c2)
|
||||
a3, c4 = bits.Add64(a4, l4, c3)
|
||||
a4, c5 = bits.Add64(a5, l5, c4)
|
||||
a5, a6 = bits.Add64(a6, l6, c5)
|
||||
|
||||
a0, c0 = bits.Add64(a0, h0, 0)
|
||||
a1, c1 = bits.Add64(a1, h1, c0)
|
||||
a2, c2 = bits.Add64(a2, h2, c1)
|
||||
a3, c3 = bits.Add64(a3, h3, c2)
|
||||
a4, c4 = bits.Add64(a4, h4, c3)
|
||||
a5, c5 = bits.Add64(a5, h5, c4)
|
||||
a6, _ = bits.Add64(a6, h6, c5)
|
||||
}
|
||||
red64(z, &zz, &[7]uint64{a0, a1, a2, a3, a4, a5, a6})
|
||||
}
|
||||
|
||||
func sqrGeneric(z, x *Elt) { mulGeneric(z, x, x) }
|
||||
|
||||
func red64(z *Elt, l, h *[7]uint64) {
|
||||
/* (2C13, 2C12, 2C11, 2C10|C10, C9, C8, C7) + (C6,...,C0) */
|
||||
h0 := h[0]
|
||||
h1 := h[1]
|
||||
h2 := h[2]
|
||||
h3 := ((h[3] & (0xFFFFFFFF << 32)) << 1) | (h[3] & 0xFFFFFFFF)
|
||||
h4 := (h[3] >> 63) | (h[4] << 1)
|
||||
h5 := (h[4] >> 63) | (h[5] << 1)
|
||||
h6 := (h[5] >> 63) | (h[6] << 1)
|
||||
h7 := (h[6] >> 63)
|
||||
|
||||
l0, c0 := bits.Add64(h0, l[0], 0)
|
||||
l1, c1 := bits.Add64(h1, l[1], c0)
|
||||
l2, c2 := bits.Add64(h2, l[2], c1)
|
||||
l3, c3 := bits.Add64(h3, l[3], c2)
|
||||
l4, c4 := bits.Add64(h4, l[4], c3)
|
||||
l5, c5 := bits.Add64(h5, l[5], c4)
|
||||
l6, c6 := bits.Add64(h6, l[6], c5)
|
||||
l7, _ := bits.Add64(h7, 0, c6)
|
||||
|
||||
/* (C10C9, C9C8,C8C7,C7C13,C13C12,C12C11,C11C10) + (C6,...,C0) */
|
||||
h0 = (h[3] >> 32) | (h[4] << 32)
|
||||
h1 = (h[4] >> 32) | (h[5] << 32)
|
||||
h2 = (h[5] >> 32) | (h[6] << 32)
|
||||
h3 = (h[6] >> 32) | (h[0] << 32)
|
||||
h4 = (h[0] >> 32) | (h[1] << 32)
|
||||
h5 = (h[1] >> 32) | (h[2] << 32)
|
||||
h6 = (h[2] >> 32) | (h[3] << 32)
|
||||
|
||||
l0, c0 = bits.Add64(l0, h0, 0)
|
||||
l1, c1 = bits.Add64(l1, h1, c0)
|
||||
l2, c2 = bits.Add64(l2, h2, c1)
|
||||
l3, c3 = bits.Add64(l3, h3, c2)
|
||||
l4, c4 = bits.Add64(l4, h4, c3)
|
||||
l5, c5 = bits.Add64(l5, h5, c4)
|
||||
l6, c6 = bits.Add64(l6, h6, c5)
|
||||
l7, _ = bits.Add64(l7, 0, c6)
|
||||
|
||||
/* (C7) + (C6,...,C0) */
|
||||
l0, c0 = bits.Add64(l0, l7, 0)
|
||||
l1, c1 = bits.Add64(l1, 0, c0)
|
||||
l2, c2 = bits.Add64(l2, 0, c1)
|
||||
l3, c3 = bits.Add64(l3, l7<<32, c2)
|
||||
l4, c4 = bits.Add64(l4, 0, c3)
|
||||
l5, c5 = bits.Add64(l5, 0, c4)
|
||||
l6, l7 = bits.Add64(l6, 0, c5)
|
||||
|
||||
/* (C7) + (C6,...,C0) */
|
||||
l0, c0 = bits.Add64(l0, l7, 0)
|
||||
l1, c1 = bits.Add64(l1, 0, c0)
|
||||
l2, c2 = bits.Add64(l2, 0, c1)
|
||||
l3, c3 = bits.Add64(l3, l7<<32, c2)
|
||||
l4, c4 = bits.Add64(l4, 0, c3)
|
||||
l5, c5 = bits.Add64(l5, 0, c4)
|
||||
l6, _ = bits.Add64(l6, 0, c5)
|
||||
|
||||
binary.LittleEndian.PutUint64(z[0*8:1*8], l0)
|
||||
binary.LittleEndian.PutUint64(z[1*8:2*8], l1)
|
||||
binary.LittleEndian.PutUint64(z[2*8:3*8], l2)
|
||||
binary.LittleEndian.PutUint64(z[3*8:4*8], l3)
|
||||
binary.LittleEndian.PutUint64(z[4*8:5*8], l4)
|
||||
binary.LittleEndian.PutUint64(z[5*8:6*8], l5)
|
||||
binary.LittleEndian.PutUint64(z[6*8:7*8], l6)
|
||||
}
|
12
vendor/github.com/cloudflare/circl/math/fp448/fp_noasm.go
generated
vendored
Normal file
12
vendor/github.com/cloudflare/circl/math/fp448/fp_noasm.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
//go:build !amd64 || purego
|
||||
// +build !amd64 purego
|
||||
|
||||
package fp448
|
||||
|
||||
func cmov(x, y *Elt, n uint) { cmovGeneric(x, y, n) }
|
||||
func cswap(x, y *Elt, n uint) { cswapGeneric(x, y, n) }
|
||||
func add(z, x, y *Elt) { addGeneric(z, x, y) }
|
||||
func sub(z, x, y *Elt) { subGeneric(z, x, y) }
|
||||
func addsub(x, y *Elt) { addsubGeneric(x, y) }
|
||||
func mul(z, x, y *Elt) { mulGeneric(z, x, y) }
|
||||
func sqr(z, x *Elt) { sqrGeneric(z, x) }
|
75
vendor/github.com/cloudflare/circl/math/fp448/fuzzer.go
generated
vendored
Normal file
75
vendor/github.com/cloudflare/circl/math/fp448/fuzzer.go
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
// How to run the fuzzer:
|
||||
//
|
||||
// $ go get -u github.com/dvyukov/go-fuzz/go-fuzz
|
||||
// $ go get -u github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
// $ go-fuzz-build -libfuzzer -func FuzzReduction -o lib.a
|
||||
// $ clang -fsanitize=fuzzer lib.a -o fu.exe
|
||||
// $ ./fu.exe
|
||||
package fp448
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math/big"
|
||||
|
||||
"github.com/cloudflare/circl/internal/conv"
|
||||
)
|
||||
|
||||
// FuzzReduction is a fuzzer target for red64 function, which reduces t
|
||||
// (112 bits) to a number t' (56 bits) congruent modulo p448.
|
||||
func FuzzReduction(data []byte) int {
|
||||
if len(data) != 2*Size {
|
||||
return -1
|
||||
}
|
||||
var got, want Elt
|
||||
var lo, hi [7]uint64
|
||||
a := data[:Size]
|
||||
b := data[Size:]
|
||||
lo[0] = binary.LittleEndian.Uint64(a[0*8 : 1*8])
|
||||
lo[1] = binary.LittleEndian.Uint64(a[1*8 : 2*8])
|
||||
lo[2] = binary.LittleEndian.Uint64(a[2*8 : 3*8])
|
||||
lo[3] = binary.LittleEndian.Uint64(a[3*8 : 4*8])
|
||||
lo[4] = binary.LittleEndian.Uint64(a[4*8 : 5*8])
|
||||
lo[5] = binary.LittleEndian.Uint64(a[5*8 : 6*8])
|
||||
lo[6] = binary.LittleEndian.Uint64(a[6*8 : 7*8])
|
||||
|
||||
hi[0] = binary.LittleEndian.Uint64(b[0*8 : 1*8])
|
||||
hi[1] = binary.LittleEndian.Uint64(b[1*8 : 2*8])
|
||||
hi[2] = binary.LittleEndian.Uint64(b[2*8 : 3*8])
|
||||
hi[3] = binary.LittleEndian.Uint64(b[3*8 : 4*8])
|
||||
hi[4] = binary.LittleEndian.Uint64(b[4*8 : 5*8])
|
||||
hi[5] = binary.LittleEndian.Uint64(b[5*8 : 6*8])
|
||||
hi[6] = binary.LittleEndian.Uint64(b[6*8 : 7*8])
|
||||
|
||||
red64(&got, &lo, &hi)
|
||||
|
||||
t := conv.BytesLe2BigInt(data[:2*Size])
|
||||
|
||||
two448 := big.NewInt(1)
|
||||
two448.Lsh(two448, 448) // 2^448
|
||||
mask448 := big.NewInt(1)
|
||||
mask448.Sub(two448, mask448) // 2^448-1
|
||||
two224plus1 := big.NewInt(1)
|
||||
two224plus1.Lsh(two224plus1, 224)
|
||||
two224plus1.Add(two224plus1, big.NewInt(1)) // 2^224+1
|
||||
|
||||
var loBig, hiBig big.Int
|
||||
for t.Cmp(two448) >= 0 {
|
||||
loBig.And(t, mask448)
|
||||
hiBig.Rsh(t, 448)
|
||||
t.Mul(&hiBig, two224plus1)
|
||||
t.Add(t, &loBig)
|
||||
}
|
||||
conv.BigInt2BytesLe(want[:], t)
|
||||
|
||||
if got != want {
|
||||
fmt.Printf("in: %v\n", conv.BytesLe2BigInt(data[:2*Size]))
|
||||
fmt.Printf("got: %v\n", got)
|
||||
fmt.Printf("want: %v\n", want)
|
||||
panic("error found")
|
||||
}
|
||||
return 1
|
||||
}
|
122
vendor/github.com/cloudflare/circl/math/mlsbset/mlsbset.go
generated
vendored
Normal file
122
vendor/github.com/cloudflare/circl/math/mlsbset/mlsbset.go
generated
vendored
Normal file
@ -0,0 +1,122 @@
|
||||
// Package mlsbset provides a constant-time exponentiation method with precomputation.
|
||||
//
|
||||
// References: "Efficient and secure algorithms for GLV-based scalar
|
||||
// multiplication and their implementation on GLV–GLS curves" by (Faz-Hernandez et al.)
|
||||
// - https://doi.org/10.1007/s13389-014-0085-7
|
||||
// - https://eprint.iacr.org/2013/158
|
||||
package mlsbset
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/big"
|
||||
|
||||
"github.com/cloudflare/circl/internal/conv"
|
||||
)
|
||||
|
||||
// EltG is a group element.
|
||||
type EltG interface{}
|
||||
|
||||
// EltP is a precomputed group element.
|
||||
type EltP interface{}
|
||||
|
||||
// Group defines the operations required by MLSBSet exponentiation method.
|
||||
type Group interface {
|
||||
Identity() EltG // Returns the identity of the group.
|
||||
Sqr(x EltG) // Calculates x = x^2.
|
||||
Mul(x EltG, y EltP) // Calculates x = x*y.
|
||||
NewEltP() EltP // Returns an arbitrary precomputed element.
|
||||
ExtendedEltP() EltP // Returns the precomputed element x^(2^(w*d)).
|
||||
Lookup(a EltP, v uint, s, u int32) // Sets a = s*T[v][u].
|
||||
}
|
||||
|
||||
// Params contains the parameters of the encoding.
|
||||
type Params struct {
|
||||
T uint // T is the maximum size (in bits) of exponents.
|
||||
V uint // V is the number of tables.
|
||||
W uint // W is the window size.
|
||||
E uint // E is the number of digits per table.
|
||||
D uint // D is the number of digits in total.
|
||||
L uint // L is the length of the code.
|
||||
}
|
||||
|
||||
// Encoder allows to convert integers into valid powers.
|
||||
type Encoder struct{ p Params }
|
||||
|
||||
// New produces an encoder of the MLSBSet algorithm.
|
||||
func New(t, v, w uint) (Encoder, error) {
|
||||
if !(t > 1 && v >= 1 && w >= 2) {
|
||||
return Encoder{}, errors.New("t>1, v>=1, w>=2")
|
||||
}
|
||||
e := (t + w*v - 1) / (w * v)
|
||||
d := e * v
|
||||
l := d * w
|
||||
return Encoder{Params{t, v, w, e, d, l}}, nil
|
||||
}
|
||||
|
||||
// Encode converts an odd integer k into a valid power for exponentiation.
|
||||
func (m Encoder) Encode(k []byte) (*Power, error) {
|
||||
if len(k) == 0 {
|
||||
return nil, errors.New("empty slice")
|
||||
}
|
||||
if !(len(k) <= int(m.p.L+7)>>3) {
|
||||
return nil, errors.New("k too big")
|
||||
}
|
||||
if k[0]%2 == 0 {
|
||||
return nil, errors.New("k must be odd")
|
||||
}
|
||||
ap := int((m.p.L+7)/8) - len(k)
|
||||
k = append(k, make([]byte, ap)...)
|
||||
s := m.signs(k)
|
||||
b := make([]int32, m.p.L-m.p.D)
|
||||
c := conv.BytesLe2BigInt(k)
|
||||
c.Rsh(c, m.p.D)
|
||||
var bi big.Int
|
||||
for i := m.p.D; i < m.p.L; i++ {
|
||||
c0 := int32(c.Bit(0))
|
||||
b[i-m.p.D] = s[i%m.p.D] * c0
|
||||
bi.SetInt64(int64(b[i-m.p.D] >> 1))
|
||||
c.Rsh(c, 1)
|
||||
c.Sub(c, &bi)
|
||||
}
|
||||
carry := int(c.Int64())
|
||||
return &Power{m, s, b, carry}, nil
|
||||
}
|
||||
|
||||
// signs calculates the set of signs.
|
||||
func (m Encoder) signs(k []byte) []int32 {
|
||||
s := make([]int32, m.p.D)
|
||||
s[m.p.D-1] = 1
|
||||
for i := uint(1); i < m.p.D; i++ {
|
||||
ki := int32((k[i>>3] >> (i & 0x7)) & 0x1)
|
||||
s[i-1] = 2*ki - 1
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// GetParams returns the complementary parameters of the encoding.
|
||||
func (m Encoder) GetParams() Params { return m.p }
|
||||
|
||||
// tableSize returns the size of each table.
|
||||
func (m Encoder) tableSize() uint { return 1 << (m.p.W - 1) }
|
||||
|
||||
// Elts returns the total number of elements that must be precomputed.
|
||||
func (m Encoder) Elts() uint { return m.p.V * m.tableSize() }
|
||||
|
||||
// IsExtended returns true if the element x^(2^(wd)) must be calculated.
|
||||
func (m Encoder) IsExtended() bool { q := m.p.T / (m.p.V * m.p.W); return m.p.T == q*m.p.V*m.p.W }
|
||||
|
||||
// Ops returns the number of squares and multiplications executed during an exponentiation.
|
||||
func (m Encoder) Ops() (S uint, M uint) {
|
||||
S = m.p.E
|
||||
M = m.p.E * m.p.V
|
||||
if m.IsExtended() {
|
||||
M++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m Encoder) String() string {
|
||||
return fmt.Sprintf("T: %v W: %v V: %v e: %v d: %v l: %v wv|t: %v",
|
||||
m.p.T, m.p.W, m.p.V, m.p.E, m.p.D, m.p.L, m.IsExtended())
|
||||
}
|
64
vendor/github.com/cloudflare/circl/math/mlsbset/power.go
generated
vendored
Normal file
64
vendor/github.com/cloudflare/circl/math/mlsbset/power.go
generated
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
package mlsbset
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Power is a valid exponent produced by the MLSBSet encoding algorithm.
|
||||
type Power struct {
|
||||
set Encoder // parameters of code.
|
||||
s []int32 // set of signs.
|
||||
b []int32 // set of digits.
|
||||
c int // carry is {0,1}.
|
||||
}
|
||||
|
||||
// Exp is calculates x^k, where x is a predetermined element of a group G.
|
||||
func (p *Power) Exp(G Group) EltG {
|
||||
a, b := G.Identity(), G.NewEltP()
|
||||
for e := int(p.set.p.E - 1); e >= 0; e-- {
|
||||
G.Sqr(a)
|
||||
for v := uint(0); v < p.set.p.V; v++ {
|
||||
sgnElt, idElt := p.Digit(v, uint(e))
|
||||
G.Lookup(b, v, sgnElt, idElt)
|
||||
G.Mul(a, b)
|
||||
}
|
||||
}
|
||||
if p.set.IsExtended() && p.c == 1 {
|
||||
G.Mul(a, G.ExtendedEltP())
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// Digit returns the (v,e)-th digit and its sign.
|
||||
func (p *Power) Digit(v, e uint) (sgn, dig int32) {
|
||||
sgn = p.bit(0, v, e)
|
||||
dig = 0
|
||||
for i := p.set.p.W - 1; i > 0; i-- {
|
||||
dig = 2*dig + p.bit(i, v, e)
|
||||
}
|
||||
mask := dig >> 31
|
||||
dig = (dig + mask) ^ mask
|
||||
return sgn, dig
|
||||
}
|
||||
|
||||
// bit returns the (w,v,e)-th bit of the code.
|
||||
func (p *Power) bit(w, v, e uint) int32 {
|
||||
if !(w < p.set.p.W &&
|
||||
v < p.set.p.V &&
|
||||
e < p.set.p.E) {
|
||||
panic(fmt.Errorf("indexes outside (%v,%v,%v)", w, v, e))
|
||||
}
|
||||
if w == 0 {
|
||||
return p.s[p.set.p.E*v+e]
|
||||
}
|
||||
return p.b[p.set.p.D*(w-1)+p.set.p.E*v+e]
|
||||
}
|
||||
|
||||
func (p *Power) String() string {
|
||||
dig := ""
|
||||
for j := uint(0); j < p.set.p.V; j++ {
|
||||
for i := uint(0); i < p.set.p.E; i++ {
|
||||
s, d := p.Digit(j, i)
|
||||
dig += fmt.Sprintf("(%2v,%2v) = %+2v %+2v\n", j, i, s, d)
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("len: %v\ncarry: %v\ndigits:\n%v", len(p.b)+len(p.s), p.c, dig)
|
||||
}
|
84
vendor/github.com/cloudflare/circl/math/wnaf.go
generated
vendored
Normal file
84
vendor/github.com/cloudflare/circl/math/wnaf.go
generated
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
// Package math provides some utility functions for big integers.
|
||||
package math
|
||||
|
||||
import "math/big"
|
||||
|
||||
// SignedDigit obtains the signed-digit recoding of n and returns a list L of
|
||||
// digits such that n = sum( L[i]*2^(i*(w-1)) ), and each L[i] is an odd number
|
||||
// in the set {±1, ±3, ..., ±2^(w-1)-1}. The third parameter ensures that the
|
||||
// output has ceil(l/(w-1)) digits.
|
||||
//
|
||||
// Restrictions:
|
||||
// - n is odd and n > 0.
|
||||
// - 1 < w < 32.
|
||||
// - l >= bit length of n.
|
||||
//
|
||||
// References:
|
||||
// - Alg.6 in "Exponent Recoding and Regular Exponentiation Algorithms"
|
||||
// by Joye-Tunstall. http://doi.org/10.1007/978-3-642-02384-2_21
|
||||
// - Alg.6 in "Selecting Elliptic Curves for Cryptography: An Efficiency and
|
||||
// Security Analysis" by Bos et al. http://doi.org/10.1007/s13389-015-0097-y
|
||||
func SignedDigit(n *big.Int, w, l uint) []int32 {
|
||||
if n.Sign() <= 0 || n.Bit(0) == 0 {
|
||||
panic("n must be non-zero, odd, and positive")
|
||||
}
|
||||
if w <= 1 || w >= 32 {
|
||||
panic("Verify that 1 < w < 32")
|
||||
}
|
||||
if uint(n.BitLen()) > l {
|
||||
panic("n is too big to fit in l digits")
|
||||
}
|
||||
lenN := (l + (w - 1) - 1) / (w - 1) // ceil(l/(w-1))
|
||||
L := make([]int32, lenN+1)
|
||||
var k, v big.Int
|
||||
k.Set(n)
|
||||
|
||||
var i uint
|
||||
for i = 0; i < lenN; i++ {
|
||||
words := k.Bits()
|
||||
value := int32(words[0] & ((1 << w) - 1))
|
||||
value -= int32(1) << (w - 1)
|
||||
L[i] = value
|
||||
v.SetInt64(int64(value))
|
||||
k.Sub(&k, &v)
|
||||
k.Rsh(&k, w-1)
|
||||
}
|
||||
L[i] = int32(k.Int64())
|
||||
return L
|
||||
}
|
||||
|
||||
// OmegaNAF obtains the window-w Non-Adjacent Form of a positive number n and
|
||||
// 1 < w < 32. The returned slice L holds n = sum( L[i]*2^i ).
|
||||
//
|
||||
// Reference:
|
||||
// - Alg.9 "Efficient arithmetic on Koblitz curves" by Solinas.
|
||||
// http://doi.org/10.1023/A:1008306223194
|
||||
func OmegaNAF(n *big.Int, w uint) (L []int32) {
|
||||
if n.Sign() < 0 {
|
||||
panic("n must be positive")
|
||||
}
|
||||
if w <= 1 || w >= 32 {
|
||||
panic("Verify that 1 < w < 32")
|
||||
}
|
||||
|
||||
L = make([]int32, n.BitLen()+1)
|
||||
var k, v big.Int
|
||||
k.Set(n)
|
||||
|
||||
i := 0
|
||||
for ; k.Sign() > 0; i++ {
|
||||
value := int32(0)
|
||||
if k.Bit(0) == 1 {
|
||||
words := k.Bits()
|
||||
value = int32(words[0] & ((1 << w) - 1))
|
||||
if value >= (int32(1) << (w - 1)) {
|
||||
value -= int32(1) << w
|
||||
}
|
||||
v.SetInt64(int64(value))
|
||||
k.Sub(&k, &v)
|
||||
}
|
||||
L[i] = value
|
||||
k.Rsh(&k, 1)
|
||||
}
|
||||
return L[:i]
|
||||
}
|
Reference in New Issue
Block a user