bindle/vendor/github.com/cloudflare/circl/dh/x25519/curve

  1//go:build amd64 && !purego
  2// +build amd64,!purego
  3
  4#include "textflag.h"
  5
  6// Depends on circl/math/fp25519 package
  7#include "../../math/fp25519/fp_amd64.h"
  8#include "curve_amd64.h"
  9
 10// CTE_A24 is (A+2)/4 from Curve25519
 11#define CTE_A24 121666
 12
 13#define Size 32
 14
 15// multiplyA24Leg multiplies x times CTE_A24 and stores in z
 16// Uses: AX, DX, R8-R13, FLAGS
 17// Instr: x86_64, cmov
 18#define multiplyA24Leg(z,x) \
 19    MOVL $CTE_A24, AX; MULQ  0+x; MOVQ AX,  R8; MOVQ DX,  R9; \
 20    MOVL $CTE_A24, AX; MULQ  8+x; MOVQ AX, R12; MOVQ DX, R10; \
 21    MOVL $CTE_A24, AX; MULQ 16+x; MOVQ AX, R13; MOVQ DX, R11; \
 22    MOVL $CTE_A24, AX; MULQ 24+x; \
 23    ADDQ R12,  R9; \
 24    ADCQ R13, R10; \
 25    ADCQ  AX, R11; \
 26    ADCQ  $0,  DX; \
 27    MOVL $38,  AX; /* 2*C = 38 = 2^256 MOD 2^255-19*/ \
 28    IMULQ AX, DX; \
 29    ADDQ DX, R8; \
 30    ADCQ $0,  R9;  MOVQ  R9,  8+z; \
 31    ADCQ $0, R10;  MOVQ R10, 16+z; \
 32    ADCQ $0, R11;  MOVQ R11, 24+z; \
 33    MOVQ $0, DX; \
 34    CMOVQCS AX, DX; \
 35    ADDQ DX, R8;  MOVQ  R8,   0+z;
 36
 37// multiplyA24Adx multiplies x times CTE_A24 and stores in z
 38// Uses: AX, DX, R8-R12, FLAGS
 39// Instr: x86_64, cmov, bmi2
 40#define multiplyA24Adx(z,x) \
 41    MOVQ  $CTE_A24, DX; \
 42    MULXQ  0+x,  R8, R10; \
 43    MULXQ  8+x,  R9, R11;  ADDQ R10,  R9; \
 44    MULXQ 16+x, R10,  AX;  ADCQ R11, R10; \
 45    MULXQ 24+x, R11, R12;  ADCQ  AX, R11; \
 46    ;;;;;;;;;;;;;;;;;;;;;  ADCQ  $0, R12; \
 47    MOVL $38,  DX; /* 2*C = 38 = 2^256 MOD 2^255-19*/ \
 48    IMULQ DX, R12; \
 49    ADDQ R12, R8; \
 50    ADCQ $0,  R9;  MOVQ  R9,  8+z; \
 51    ADCQ $0, R10;  MOVQ R10, 16+z; \
 52    ADCQ $0, R11;  MOVQ R11, 24+z; \
 53    MOVQ $0, R12; \
 54    CMOVQCS DX, R12; \
 55    ADDQ R12, R8;  MOVQ  R8,  0+z;
 56
 57#define mulA24Legacy \
 58    multiplyA24Leg(0(DI),0(SI))
 59#define mulA24Bmi2Adx \
 60    multiplyA24Adx(0(DI),0(SI))
 61
 62// func mulA24Amd64(z, x *fp255.Elt)
 63TEXT ·mulA24Amd64(SB),NOSPLIT,$0-16
 64    MOVQ z+0(FP), DI
 65    MOVQ x+8(FP), SI
 66    CHECK_BMI2ADX(LMA24, mulA24Legacy, mulA24Bmi2Adx)
 67
 68
 69// func ladderStepAmd64(w *[5]fp255.Elt, b uint)
 70// ladderStepAmd64 calculates a point addition and doubling as follows:
 71// (x2,z2) = 2*(x2,z2) and (x3,z3) = (x2,z2)+(x3,z3) using as a difference (x1,-).
 72//  work  = (x1,x2,z2,x3,z3) are five fp255.Elt of 32 bytes.
 73//  stack = (t0,t1) are two fp.Elt of fp.Size bytes, and
 74//          (b0,b1) are two-double precision fp.Elt of 2*fp.Size bytes.
 75TEXT ·ladderStepAmd64(SB),NOSPLIT,$192-16
 76    // Parameters
 77    #define regWork DI
 78    #define regMove SI
 79    #define x1 0*Size(regWork)
 80    #define x2 1*Size(regWork)
 81    #define z2 2*Size(regWork)
 82    #define x3 3*Size(regWork)
 83    #define z3 4*Size(regWork)
 84    // Local variables
 85    #define t0 0*Size(SP)
 86    #define t1 1*Size(SP)
 87    #define b0 2*Size(SP)
 88    #define b1 4*Size(SP)
 89    MOVQ w+0(FP), regWork
 90    MOVQ b+8(FP), regMove
 91    CHECK_BMI2ADX(LLADSTEP, ladderStepLeg, ladderStepBmi2Adx)
 92    #undef regWork
 93    #undef regMove
 94    #undef x1
 95    #undef x2
 96    #undef z2
 97    #undef x3
 98    #undef z3
 99    #undef t0
100    #undef t1
101    #undef b0
102    #undef b1
103
104// func diffAddAmd64(w *[5]fp255.Elt, b uint)
105// diffAddAmd64 calculates a differential point addition using a precomputed point.
106// (x1,z1) = (x1,z1)+(mu) using a difference point (x2,z2)
107//    w    = (mu,x1,z1,x2,z2) are five fp.Elt, and
108//   stack = (b0,b1) are two-double precision fp.Elt of 2*fp.Size bytes.
109TEXT ·diffAddAmd64(SB),NOSPLIT,$128-16
110    // Parameters
111    #define regWork DI
112    #define regSwap SI
113    #define ui 0*Size(regWork)
114    #define x1 1*Size(regWork)
115    #define z1 2*Size(regWork)
116    #define x2 3*Size(regWork)
117    #define z2 4*Size(regWork)
118    // Local variables
119    #define b0 0*Size(SP)
120    #define b1 2*Size(SP)
121    MOVQ w+0(FP), regWork
122    MOVQ b+8(FP), regSwap
123    cswap(x1,x2,regSwap)
124    cswap(z1,z2,regSwap)
125    CHECK_BMI2ADX(LDIFADD, difAddLeg, difAddBmi2Adx)
126    #undef regWork
127    #undef regSwap
128    #undef ui
129    #undef x1
130    #undef z1
131    #undef x2
132    #undef z2
133    #undef b0
134    #undef b1
135
136// func doubleAmd64(x, z *fp255.Elt)
137// doubleAmd64 calculates a point doubling (x1,z1) = 2*(x1,z1).
138//  stack = (t0,t1) are two fp.Elt of fp.Size bytes, and
139//          (b0,b1) are two-double precision fp.Elt of 2*fp.Size bytes.
140TEXT ·doubleAmd64(SB),NOSPLIT,$192-16
141    // Parameters
142    #define x1 0(DI)
143    #define z1 0(SI)
144    // Local variables
145    #define t0 0*Size(SP)
146    #define t1 1*Size(SP)
147    #define b0 2*Size(SP)
148    #define b1 4*Size(SP)
149    MOVQ x+0(FP), DI
150    MOVQ z+8(FP), SI
151    CHECK_BMI2ADX(LDOUB,doubleLeg,doubleBmi2Adx)
152    #undef x1
153    #undef z1
154    #undef t0
155    #undef t1
156    #undef b0
157    #undef b1