main
Raw Download raw file
  1//go:build amd64 && !purego
  2// +build amd64,!purego
  3
  4#include "textflag.h"
  5#include "fp_amd64.h"
  6
  7// func cmovAmd64(x, y *Elt, n uint)
  8TEXT ·cmovAmd64(SB),NOSPLIT,$0-24
  9    MOVQ x+0(FP), DI
 10    MOVQ y+8(FP), SI
 11    MOVQ n+16(FP), BX
 12    cselect(0(DI),0(SI),BX)
 13    RET
 14
 15// func cswapAmd64(x, y *Elt, n uint)
 16TEXT ·cswapAmd64(SB),NOSPLIT,$0-24
 17    MOVQ x+0(FP), DI
 18    MOVQ y+8(FP), SI
 19    MOVQ n+16(FP), BX
 20    cswap(0(DI),0(SI),BX)
 21    RET
 22
 23// func subAmd64(z, x, y *Elt)
 24TEXT ·subAmd64(SB),NOSPLIT,$0-24
 25    MOVQ z+0(FP), DI
 26    MOVQ x+8(FP), SI
 27    MOVQ y+16(FP), BX
 28    subtraction(0(DI),0(SI),0(BX))
 29    RET
 30
 31// func addsubAmd64(x, y *Elt)
 32TEXT ·addsubAmd64(SB),NOSPLIT,$0-16
 33    MOVQ x+0(FP), DI
 34    MOVQ y+8(FP), SI
 35    addSub(0(DI),0(SI))
 36    RET
 37
 38#define addLegacy \
 39    additionLeg(0(DI),0(SI),0(BX))
 40#define addBmi2Adx \
 41    additionAdx(0(DI),0(SI),0(BX))
 42
 43#define mulLegacy \
 44    integerMulLeg(0(SP),0(SI),0(BX)) \
 45    reduceFromDoubleLeg(0(DI),0(SP))
 46#define mulBmi2Adx \
 47    integerMulAdx(0(SP),0(SI),0(BX)) \
 48    reduceFromDoubleAdx(0(DI),0(SP))
 49
 50#define sqrLegacy \
 51    integerSqrLeg(0(SP),0(SI)) \
 52    reduceFromDoubleLeg(0(DI),0(SP))
 53#define sqrBmi2Adx \
 54    integerSqrAdx(0(SP),0(SI)) \
 55    reduceFromDoubleAdx(0(DI),0(SP))
 56
 57// func addAmd64(z, x, y *Elt)
 58TEXT ·addAmd64(SB),NOSPLIT,$0-24
 59    MOVQ z+0(FP), DI
 60    MOVQ x+8(FP), SI
 61    MOVQ y+16(FP), BX
 62    CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx)
 63
 64// func mulAmd64(z, x, y *Elt)
 65TEXT ·mulAmd64(SB),NOSPLIT,$64-24
 66    MOVQ z+0(FP), DI
 67    MOVQ x+8(FP), SI
 68    MOVQ y+16(FP), BX
 69    CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx)
 70
 71// func sqrAmd64(z, x *Elt)
 72TEXT ·sqrAmd64(SB),NOSPLIT,$64-16
 73    MOVQ z+0(FP), DI
 74    MOVQ x+8(FP), SI
 75    CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx)
 76
 77// func modpAmd64(z *Elt)
 78TEXT ·modpAmd64(SB),NOSPLIT,$0-8
 79    MOVQ z+0(FP), DI
 80
 81    MOVQ   (DI),  R8
 82    MOVQ  8(DI),  R9
 83    MOVQ 16(DI), R10
 84    MOVQ 24(DI), R11
 85
 86    MOVL $19, AX
 87    MOVL $38, CX
 88
 89    BTRQ $63, R11 // PUT BIT 255 IN CARRY FLAG AND CLEAR
 90    CMOVLCC AX, CX // C[255] ? 38 : 19
 91
 92    // ADD EITHER 19 OR 38 TO C
 93    ADDQ CX,  R8
 94    ADCQ $0,  R9
 95    ADCQ $0, R10
 96    ADCQ $0, R11
 97
 98    // TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19
 99    MOVL     $0,  CX
100    CMOVLPL  AX,  CX // C[255] ? 0 : 19
101    BTRQ    $63, R11 // CLEAR BIT 255
102
103    // SUBTRACT 19 IF NECESSARY
104    SUBQ CX,  R8
105    MOVQ  R8,   (DI)
106    SBBQ $0,  R9
107    MOVQ  R9,  8(DI)
108    SBBQ $0, R10
109    MOVQ R10, 16(DI)
110    SBBQ $0, R11
111    MOVQ R11, 24(DI)
112    RET