1. update clientset, deepcopy using code-generator
2. add a dummy file tools.go to force "go mod vendor" to see code-generator as dependencies 3. add a script to update CRD 4. add a README to document CRD updating steps run go mod tidy update README
This commit is contained in:
134
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s
generated
vendored
Normal file
134
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
TEXT ·AxpyInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SI // SI = &x
|
||||
MOVQ y_base+40(FP), DI // DI = &y
|
||||
MOVQ n+64(FP), CX // CX = n
|
||||
CMPQ CX, $0 // if n==0 { return }
|
||||
JE axpyi_end
|
||||
MOVQ ix+88(FP), R8 // R8 = ix // Load the first index
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ iy+96(FP), R9 // R9 = iy
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
|
||||
LEAQ (DI)(R9*1), DI // DI = &(y[iy])
|
||||
MOVQ DI, DX // DX = DI // Separate Read/Write pointers
|
||||
MOVQ incX+72(FP), R8 // R8 = incX
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ incY+80(FP), R9 // R9 = iy
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
|
||||
|
||||
axpyi_loop: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS (SI)(R8*1), X4
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
MOVUPS (SI), X6
|
||||
MOVUPS (SI)(R8*1), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX), X3
|
||||
ADDPD (DX)(R9*1), X5
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
ADDPD (DX), X7
|
||||
ADDPD (DX)(R9*1), X9
|
||||
MOVUPS X3, (DI) // dst[i] = X_(i+1)
|
||||
MOVUPS X5, (DI)(R9*1)
|
||||
LEAQ (DI)(R9*2), DI
|
||||
MOVUPS X7, (DI)
|
||||
MOVUPS X9, (DI)(R9*1)
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
|
||||
DECQ BX
|
||||
JNZ axpyi_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE axpyi_end
|
||||
|
||||
axpyi_tail: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DI), X3
|
||||
MOVUPS X3, (DI) // y[i] = X_i
|
||||
ADDQ R8, SI // SI = &(SI[incX])
|
||||
ADDQ R9, DI // DI = &(DI[incY])
|
||||
LOOP axpyi_tail // } while --CX > 0
|
||||
|
||||
axpyi_end:
|
||||
RET
|
141
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s
generated
vendored
Normal file
141
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,141 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
TEXT ·AxpyIncTo(SB), NOSPLIT, $0
|
||||
MOVQ dst_base+0(FP), DI // DI = &dst
|
||||
MOVQ x_base+56(FP), SI // SI = &x
|
||||
MOVQ y_base+80(FP), DX // DX = &y
|
||||
MOVQ n+104(FP), CX // CX = n
|
||||
CMPQ CX, $0 // if n==0 { return }
|
||||
JE axpyi_end
|
||||
MOVQ ix+128(FP), R8 // R8 = ix // Load the first index
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ iy+136(FP), R9 // R9 = iy
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
MOVQ idst+32(FP), R10 // R10 = idst
|
||||
SHLQ $4, R10 // R10 *= sizeof(complex128)
|
||||
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
|
||||
LEAQ (DX)(R9*1), DX // DX = &(y[iy])
|
||||
LEAQ (DI)(R10*1), DI // DI = &(dst[idst])
|
||||
MOVQ incX+112(FP), R8 // R8 = incX
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ incY+120(FP), R9 // R9 = incY
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
MOVQ incDst+24(FP), R10 // R10 = incDst
|
||||
SHLQ $4, R10 // R10 *= sizeof(complex128)
|
||||
MOVUPS alpha+40(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
|
||||
|
||||
axpyi_loop: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS (SI)(R8*1), X4
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
|
||||
MOVUPS (SI), X6
|
||||
MOVUPS (SI)(R8*1), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX), X3
|
||||
ADDPD (DX)(R9*1), X5
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
ADDPD (DX), X7
|
||||
ADDPD (DX)(R9*1), X9
|
||||
MOVUPS X3, (DI) // dst[i] = X_(i+1)
|
||||
MOVUPS X5, (DI)(R10*1)
|
||||
LEAQ (DI)(R10*2), DI
|
||||
MOVUPS X7, (DI)
|
||||
MOVUPS X9, (DI)(R10*1)
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
|
||||
DECQ BX
|
||||
JNZ axpyi_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE axpyi_end
|
||||
|
||||
axpyi_tail: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX), X3
|
||||
MOVUPS X3, (DI) // y[i] X_(i+1)
|
||||
ADDQ R8, SI // SI += incX
|
||||
ADDQ R9, DX // DX += incY
|
||||
ADDQ R10, DI // DI += incDst
|
||||
LOOP axpyi_tail // } while --CX > 0
|
||||
|
||||
axpyi_end:
|
||||
RET
|
122
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s
generated
vendored
Normal file
122
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyUnitary(alpha complex128, x, y []complex128)
|
||||
TEXT ·AxpyUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SI // SI = &x
|
||||
MOVQ y_base+40(FP), DI // DI = &y
|
||||
MOVQ x_len+24(FP), CX // CX = min( len(x), len(y) )
|
||||
CMPQ y_len+48(FP), CX
|
||||
CMOVQLE y_len+48(FP), CX
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
PXOR X0, X0 // Clear work registers and cache-align loop
|
||||
PXOR X1, X1
|
||||
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
XORQ AX, AX // i = 0
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ caxy_tail // if BX == 0 { goto caxy_tail }
|
||||
|
||||
caxy_loop: // do {
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS 16(SI)(AX*8), X4
|
||||
MOVUPS 32(SI)(AX*8), X6
|
||||
MOVUPS 48(SI)(AX*8), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DI)(AX*8), X3
|
||||
ADDPD 16(DI)(AX*8), X5
|
||||
ADDPD 32(DI)(AX*8), X7
|
||||
ADDPD 48(DI)(AX*8), X9
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
MOVUPS X5, 16(DI)(AX*8)
|
||||
MOVUPS X7, 32(DI)(AX*8)
|
||||
MOVUPS X9, 48(DI)(AX*8)
|
||||
ADDQ $8, AX // i += 8
|
||||
DECQ BX
|
||||
JNZ caxy_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
|
||||
caxy_tail: // do {
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DI)(AX*8), X3
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
ADDQ $2, AX // i += 2
|
||||
LOOP caxy_tail // } while --CX > 0
|
||||
|
||||
caxy_end:
|
||||
RET
|
123
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
generated
vendored
Normal file
123
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128)
|
||||
TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
|
||||
MOVQ dst_base+0(FP), DI // DI = &dst
|
||||
MOVQ x_base+40(FP), SI // SI = &x
|
||||
MOVQ y_base+64(FP), DX // DX = &y
|
||||
MOVQ x_len+48(FP), CX // CX = min( len(x), len(y), len(dst) )
|
||||
CMPQ y_len+72(FP), CX
|
||||
CMOVQLE y_len+72(FP), CX
|
||||
CMPQ dst_len+8(FP), CX
|
||||
CMOVQLE dst_len+8(FP), CX
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
MOVUPS alpha+24(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
XORQ AX, AX // i = 0
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ caxy_tail // if BX == 0 { goto caxy_tail }
|
||||
|
||||
caxy_loop: // do {
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS 16(SI)(AX*8), X4
|
||||
MOVUPS 32(SI)(AX*8), X6
|
||||
MOVUPS 48(SI)(AX*8), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi)
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr)
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX)(AX*8), X3
|
||||
ADDPD 16(DX)(AX*8), X5
|
||||
ADDPD 32(DX)(AX*8), X7
|
||||
ADDPD 48(DX)(AX*8), X9
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
MOVUPS X5, 16(DI)(AX*8)
|
||||
MOVUPS X7, 32(DI)(AX*8)
|
||||
MOVUPS X9, 48(DI)(AX*8)
|
||||
ADDQ $8, AX // i += 8
|
||||
DECQ BX
|
||||
JNZ caxy_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
|
||||
caxy_tail: // Same calculation, but read in values to avoid trampling memory
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX)(AX*8), X3
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
ADDQ $2, AX // i += 2
|
||||
LOOP caxy_tail // } while --CX > 0
|
||||
|
||||
caxy_end:
|
||||
RET
|
6
vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go
generated
vendored
Normal file
6
vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package c128 provides complex128 vector primitives.
|
||||
package c128 // import "gonum.org/v1/gonum/internal/asm/c128"
|
153
vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s
generated
vendored
Normal file
153
vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,153 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
|
||||
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
|
||||
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
|
||||
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
|
||||
|
||||
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
|
||||
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
|
||||
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
|
||||
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define INC_X R8
|
||||
#define INCx3_X R9
|
||||
#define INC_Y R10
|
||||
#define INCx3_Y R11
|
||||
#define NEG1 X15
|
||||
#define P_NEG1 X14
|
||||
|
||||
// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
TEXT ·DotcInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ n+48(FP), LEN // LEN = n
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0
|
||||
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
|
||||
SHLQ $4, INC_X
|
||||
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
|
||||
SHLQ $4, INC_Y
|
||||
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
|
||||
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
|
||||
MOVQ incX+56(FP), INC_X // INC_X = incX
|
||||
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
|
||||
MOVQ incY+64(FP), INC_Y // INC_Y = incY
|
||||
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
|
||||
MOVSD $(-1.0), NEG1
|
||||
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = n % 4
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ dot_tail // if n <= 4 { goto dot_tail }
|
||||
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
|
||||
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
|
||||
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_XPTR_INCX__X5
|
||||
MOVDDUP_XPTR_INCX_2__X7
|
||||
MOVDDUP_XPTR_INCx3X__X9
|
||||
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_8_XPTR_INCX__X4
|
||||
MOVDDUP_8_XPTR_INCX_2__X6
|
||||
MOVDDUP_8_XPTR_INCx3X__X8
|
||||
|
||||
// X_i = { -imag(x[i]), -imag(x[i]) }
|
||||
MULPD NEG1, X2
|
||||
MULPD P_NEG1, X4
|
||||
MULPD NEG1, X6
|
||||
MULPD P_NEG1, X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR), X10
|
||||
MOVUPS (Y_PTR)(INC_Y*1), X11
|
||||
MOVUPS (Y_PTR)(INC_Y*2), X12
|
||||
MOVUPS (Y_PTR)(INCx3_Y*1), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
|
||||
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
ADDPD P_SUM, SUM // sum += psum
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
|
||||
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // sum += result[i]
|
||||
ADDQ INC_X, X_PTR // X_PTR += incX
|
||||
ADDQ INC_Y, Y_PTR // Y_PTR += incY
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+88(FP)
|
||||
RET
|
143
vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s
generated
vendored
Normal file
143
vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
|
||||
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
|
||||
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
|
||||
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
|
||||
|
||||
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
|
||||
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
|
||||
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
|
||||
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define IDX AX
|
||||
#define I_IDX DX
|
||||
#define NEG1 X15
|
||||
#define P_NEG1 X14
|
||||
|
||||
// func DotcUnitary(x, y []complex128) (sum complex128)
|
||||
TEXT ·DotcUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||
CMPQ y_len+32(FP), LEN
|
||||
CMOVQLE y_len+32(FP), LEN
|
||||
PXOR SUM, SUM // sum = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
XORPS P_SUM, P_SUM // psum = 0
|
||||
MOVSD $(-1.0), NEG1
|
||||
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
|
||||
XORQ IDX, IDX // i := 0
|
||||
MOVQ $1, I_IDX // j := 1
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = floor( TAIL / 4 )
|
||||
SHRQ $2, LEN // LEN = TAIL % 4
|
||||
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||
|
||||
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_16_XPTR_IDX_8__X5
|
||||
MOVDDUP_32_XPTR_IDX_8__X7
|
||||
MOVDDUP_48_XPTR_IDX_8__X9
|
||||
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_16_XPTR_IIDX_8__X4
|
||||
MOVDDUP_32_XPTR_IIDX_8__X6
|
||||
MOVDDUP_48_XPTR_IIDX_8__X8
|
||||
|
||||
// X_i = { -imag(x[i]), -imag(x[i]) }
|
||||
MULPD NEG1, X2
|
||||
MULPD P_NEG1, X4
|
||||
MULPD NEG1, X6
|
||||
MULPD P_NEG1, X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10
|
||||
MOVUPS 16(Y_PTR)(IDX*8), X11
|
||||
MOVUPS 32(Y_PTR)(IDX*8), X12
|
||||
MOVUPS 48(Y_PTR)(IDX*8), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
ADDQ $8, I_IDX // I_IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
ADDPD P_SUM, SUM // sum += psum
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i]) , real(x[i]) }
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
|
||||
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // SUM += result[i]
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
ADDQ $2, I_IDX // I_IDX += 2
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+48(FP)
|
||||
RET
|
141
vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s
generated
vendored
Normal file
141
vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,141 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
|
||||
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
|
||||
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
|
||||
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
|
||||
|
||||
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
|
||||
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
|
||||
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
|
||||
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define INC_X R8
|
||||
#define INCx3_X R9
|
||||
#define INC_Y R10
|
||||
#define INCx3_Y R11
|
||||
|
||||
// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
TEXT ·DotuInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ n+48(FP), LEN // LEN = n
|
||||
PXOR SUM, SUM // sum = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
|
||||
SHLQ $4, INC_X
|
||||
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
|
||||
SHLQ $4, INC_Y
|
||||
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
|
||||
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
|
||||
MOVQ incX+56(FP), INC_X // INC_X = incX
|
||||
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
|
||||
MOVQ incY+64(FP), INC_Y // INC_Y = incY
|
||||
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // LEN = LEN % 4
|
||||
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||
JZ dot_tail // if LEN <= 4 { goto dot_tail }
|
||||
PXOR P_SUM, P_SUM // psum = 0
|
||||
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
|
||||
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_XPTR_INCX__X5
|
||||
MOVDDUP_XPTR_INCX_2__X7
|
||||
MOVDDUP_XPTR_INCx3X__X9
|
||||
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_8_XPTR_INCX__X4
|
||||
MOVDDUP_8_XPTR_INCX_2__X6
|
||||
MOVDDUP_8_XPTR_INCx3X__X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR), X10
|
||||
MOVUPS (Y_PTR)(INC_Y*1), X11
|
||||
MOVUPS (Y_PTR)(INC_Y*2), X12
|
||||
MOVUPS (Y_PTR)(INCx3_Y*1), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
|
||||
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --BX > 0
|
||||
ADDPD P_SUM, SUM // sum += psum
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // sum += result[i]
|
||||
ADDQ INC_X, X_PTR // X_PTR += incX
|
||||
ADDQ INC_Y, Y_PTR // Y_PTR += incY
|
||||
DECQ TAIL // --TAIL
|
||||
JNZ dot_tail // } while TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+88(FP)
|
||||
RET
|
130
vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s
generated
vendored
Normal file
130
vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,130 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
|
||||
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
|
||||
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
|
||||
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
|
||||
|
||||
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
|
||||
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
|
||||
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
|
||||
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define IDX AX
|
||||
#define I_IDX DX
|
||||
|
||||
// func DotuUnitary(x, y []complex128) (sum complex128)
|
||||
TEXT ·DotuUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||
CMPQ y_len+32(FP), LEN
|
||||
CMOVQLE y_len+32(FP), LEN
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0
|
||||
XORQ IDX, IDX // IDX = 0
|
||||
MOVQ $1, DX // j = 1
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = floor( LEN / 4 )
|
||||
SHRQ $2, LEN // LEN = LEN % 4
|
||||
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_16_XPTR_IDX_8__X5
|
||||
MOVDDUP_32_XPTR_IDX_8__X7
|
||||
MOVDDUP_48_XPTR_IDX_8__X9
|
||||
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_16_XPTR_IIDX_8__X4
|
||||
MOVDDUP_32_XPTR_IIDX_8__X6
|
||||
MOVDDUP_48_XPTR_IIDX_8__X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10
|
||||
MOVUPS 16(Y_PTR)(IDX*8), X11
|
||||
MOVUPS 32(Y_PTR)(IDX*8), X12
|
||||
MOVUPS 48(Y_PTR)(IDX*8), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
ADDQ $8, I_IDX // I_IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
ADDPD P_SUM, SUM // SUM += P_SUM
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i] , real(x[i]) }
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // psum += result[i]
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
ADDQ $2, I_IDX // I_IDX += 2
|
||||
DECQ TAIL // --TAIL
|
||||
JNZ dot_tail // } while TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+48(FP)
|
||||
RET
|
69
vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s
generated
vendored
Normal file
69
vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define INC R9
|
||||
#define INC3 R10
|
||||
#define ALPHA X0
|
||||
#define ALPHA_2 X1
|
||||
|
||||
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
|
||||
|
||||
// func DscalInc(alpha float64, x []complex128, n, inc uintptr)
|
||||
TEXT ·DscalInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+8(FP), SRC // SRC = &x
|
||||
MOVQ n+32(FP), LEN // LEN = n
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dscal_end
|
||||
|
||||
MOVDDUP_ALPHA // ALPHA = alpha
|
||||
MOVQ inc+40(FP), INC // INC = inc
|
||||
SHLQ $4, INC // INC = INC * sizeof(complex128)
|
||||
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
|
||||
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining
|
||||
MOVQ LEN, TAIL // TAIL = LEN
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
|
||||
|
||||
dscal_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = x[i]
|
||||
MOVUPS (SRC)(INC*1), X3
|
||||
MOVUPS (SRC)(INC*2), X4
|
||||
MOVUPS (SRC)(INC3*1), X5
|
||||
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MULPD ALPHA_2, X3
|
||||
MULPD ALPHA, X4
|
||||
MULPD ALPHA_2, X5
|
||||
|
||||
MOVUPS X2, (DST) // x[i] = X_i
|
||||
MOVUPS X3, (DST)(INC*1)
|
||||
MOVUPS X4, (DST)(INC*2)
|
||||
MOVUPS X5, (DST)(INC3*1)
|
||||
|
||||
LEAQ (SRC)(INC*4), SRC // SRC += INC*4
|
||||
DECQ LEN
|
||||
JNZ dscal_loop // } while --LEN > 0
|
||||
|
||||
dscal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JE dscal_end // if TAIL == 0 { return }
|
||||
|
||||
dscal_tail_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = x[i]
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MOVUPS X2, (DST) // x[i] = X_i
|
||||
ADDQ INC, SRC // SRC += INC
|
||||
DECQ TAIL
|
||||
JNZ dscal_tail_loop // } while --TAIL > 0
|
||||
|
||||
dscal_end:
|
||||
RET
|
66
vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s
generated
vendored
Normal file
66
vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define IDX AX
|
||||
#define TAIL BX
|
||||
#define ALPHA X0
|
||||
#define ALPHA_2 X1
|
||||
|
||||
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
|
||||
|
||||
// func DscalUnitary(alpha float64, x []complex128)
|
||||
TEXT ·DscalUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+8(FP), SRC // SRC = &x
|
||||
MOVQ x_len+16(FP), LEN // LEN = len(x)
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dscal_end
|
||||
|
||||
MOVDDUP_ALPHA // ALPHA = alpha
|
||||
XORQ IDX, IDX // IDX = 0
|
||||
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
|
||||
MOVQ LEN, TAIL // TAIL = LEN
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
|
||||
|
||||
dscal_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
|
||||
MOVUPS 16(SRC)(IDX*8), X3
|
||||
MOVUPS 32(SRC)(IDX*8), X4
|
||||
MOVUPS 48(SRC)(IDX*8), X5
|
||||
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MULPD ALPHA_2, X3
|
||||
MULPD ALPHA, X4
|
||||
MULPD ALPHA_2, X5
|
||||
|
||||
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
|
||||
MOVUPS X3, 16(DST)(IDX*8)
|
||||
MOVUPS X4, 32(DST)(IDX*8)
|
||||
MOVUPS X5, 48(DST)(IDX*8)
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dscal_loop // } while --LEN > 0
|
||||
|
||||
dscal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JZ dscal_end // if TAIL == 0 { return }
|
||||
|
||||
dscal_tail_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
DECQ TAIL
|
||||
JNZ dscal_tail_loop // } while --TAIL > 0
|
||||
|
||||
dscal_end:
|
||||
RET
|
31
vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go
generated
vendored
Normal file
31
vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go
generated
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package c128
|
||||
|
||||
// ScalUnitaryTo is
|
||||
// for i, v := range x {
|
||||
// dst[i] = alpha * v
|
||||
// }
|
||||
func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) {
|
||||
for i, v := range x {
|
||||
dst[i] = alpha * v
|
||||
}
|
||||
}
|
||||
|
||||
// ScalIncTo is
|
||||
// var idst, ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// dst[idst] = alpha * x[ix]
|
||||
// ix += incX
|
||||
// idst += incDst
|
||||
// }
|
||||
func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) {
|
||||
var idst, ix uintptr
|
||||
for i := 0; i < int(n); i++ {
|
||||
dst[idst] = alpha * x[ix]
|
||||
ix += incX
|
||||
idst += incDst
|
||||
}
|
||||
}
|
116
vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s
generated
vendored
Normal file
116
vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define IDX AX
|
||||
#define TAIL BX
|
||||
#define ALPHA X0
|
||||
#define ALPHA_C X1
|
||||
#define ALPHA2 X10
|
||||
#define ALPHA_C2 X11
|
||||
|
||||
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
|
||||
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
|
||||
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
|
||||
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
// func ScalUnitary(alpha complex128, x []complex128)
|
||||
TEXT ·ScalUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SRC // SRC = &x
|
||||
MOVQ x_len+24(FP), LEN // LEN = len(x)
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE scal_end
|
||||
|
||||
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
|
||||
MOVAPS ALPHA, ALPHA_C
|
||||
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
|
||||
|
||||
XORQ IDX, IDX // IDX = 0
|
||||
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
|
||||
MOVAPS ALPHA_C, ALPHA_C2
|
||||
MOVQ LEN, TAIL
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ scal_tail // if BX == 0 { goto scal_tail }
|
||||
|
||||
scal_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS 16(SRC)(IDX*8), X4
|
||||
MOVUPS 32(SRC)(IDX*8), X6
|
||||
MOVUPS 48(SRC)(IDX*8), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
MULPD ALPHA_C, X2
|
||||
MULPD ALPHA, X3
|
||||
MULPD ALPHA_C2, X4
|
||||
MULPD ALPHA2, X5
|
||||
MULPD ALPHA_C, X6
|
||||
MULPD ALPHA, X7
|
||||
MULPD ALPHA_C2, X8
|
||||
MULPD ALPHA2, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
|
||||
MOVUPS X5, 16(DST)(IDX*8)
|
||||
MOVUPS X7, 32(DST)(IDX*8)
|
||||
MOVUPS X9, 48(DST)(IDX*8)
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
DECQ LEN
|
||||
JNZ scal_loop // } while --LEN > 0
|
||||
|
||||
scal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JZ scal_end // if TAIL == 0 { return }
|
||||
|
||||
scal_tail_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
DECQ TAIL
|
||||
JNZ scal_tail_loop // } while --LEN > 0
|
||||
|
||||
scal_end:
|
||||
RET
|
121
vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s
generated
vendored
Normal file
121
vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define INC R9
|
||||
#define INC3 R10
|
||||
#define ALPHA X0
|
||||
#define ALPHA_C X1
|
||||
#define ALPHA2 X10
|
||||
#define ALPHA_C2 X11
|
||||
|
||||
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
|
||||
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
|
||||
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
|
||||
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
// func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
|
||||
TEXT ·ScalInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SRC // SRC = &x
|
||||
MOVQ n+40(FP), LEN // LEN = len(x)
|
||||
CMPQ LEN, $0
|
||||
JE scal_end // if LEN == 0 { return }
|
||||
|
||||
MOVQ inc+48(FP), INC // INC = inc
|
||||
SHLQ $4, INC // INC = INC * sizeof(complex128)
|
||||
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
|
||||
|
||||
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
|
||||
MOVAPS ALPHA, ALPHA_C
|
||||
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
|
||||
|
||||
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
|
||||
MOVAPS ALPHA_C, ALPHA_C2
|
||||
MOVQ LEN, TAIL
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ scal_tail // if BX == 0 { goto scal_tail }
|
||||
|
||||
scal_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS (SRC)(INC*1), X4
|
||||
MOVUPS (SRC)(INC*2), X6
|
||||
MOVUPS (SRC)(INC3*1), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
MULPD ALPHA_C, X2
|
||||
MULPD ALPHA, X3
|
||||
MULPD ALPHA_C2, X4
|
||||
MULPD ALPHA2, X5
|
||||
MULPD ALPHA_C, X6
|
||||
MULPD ALPHA, X7
|
||||
MULPD ALPHA_C2, X8
|
||||
MULPD ALPHA2, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
MOVUPS X3, (DST) // x[i] = X_(i+1)
|
||||
MOVUPS X5, (DST)(INC*1)
|
||||
MOVUPS X7, (DST)(INC*2)
|
||||
MOVUPS X9, (DST)(INC3*1)
|
||||
|
||||
LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4])
|
||||
DECQ LEN
|
||||
JNZ scal_loop // } while --BX > 0
|
||||
|
||||
scal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JE scal_end // if TAIL == 0 { return }
|
||||
|
||||
scal_tail_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
MOVUPS X3, (DST) // x[i] = X_i
|
||||
ADDQ INC, SRC // SRC = &(SRC[incX])
|
||||
DECQ TAIL
|
||||
JNZ scal_tail_loop // } while --TAIL > 0
|
||||
|
||||
scal_end:
|
||||
RET
|
96
vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go
generated
vendored
Normal file
96
vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !noasm,!appengine,!safe
|
||||
|
||||
package c128
|
||||
|
||||
// AxpyUnitary is
|
||||
// for i, v := range x {
|
||||
// y[i] += alpha * v
|
||||
// }
|
||||
func AxpyUnitary(alpha complex128, x, y []complex128)
|
||||
|
||||
// AxpyUnitaryTo is
|
||||
// for i, v := range x {
|
||||
// dst[i] = alpha*v + y[i]
|
||||
// }
|
||||
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128)
|
||||
|
||||
// AxpyInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// y[iy] += alpha * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
|
||||
// AxpyIncTo is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// dst[idst] = alpha*x[ix] + y[iy]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// idst += incDst
|
||||
// }
|
||||
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
|
||||
// DscalUnitary is
|
||||
// for i, v := range x {
|
||||
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||
// }
|
||||
func DscalUnitary(alpha float64, x []complex128)
|
||||
|
||||
// DscalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||
// ix += inc
|
||||
// }
|
||||
func DscalInc(alpha float64, x []complex128, n, inc uintptr)
|
||||
|
||||
// ScalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] *= alpha
|
||||
// ix += incX
|
||||
// }
|
||||
func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
|
||||
|
||||
// ScalUnitary is
|
||||
// for i := range x {
|
||||
// x[i] *= alpha
|
||||
// }
|
||||
func ScalUnitary(alpha complex128, x []complex128)
|
||||
|
||||
// DotcUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * cmplx.Conj(v)
|
||||
// }
|
||||
// return sum
|
||||
func DotcUnitary(x, y []complex128) (sum complex128)
|
||||
|
||||
// DotcInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * cmplx.Conj(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
|
||||
// DotuUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotuUnitary(x, y []complex128) (sum complex128)
|
||||
|
||||
// DotuInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
163
vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go
generated
vendored
Normal file
163
vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go
generated
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64 noasm appengine safe
|
||||
|
||||
package c128
|
||||
|
||||
import "math/cmplx"
|
||||
|
||||
// AxpyUnitary is
|
||||
// for i, v := range x {
|
||||
// y[i] += alpha * v
|
||||
// }
|
||||
func AxpyUnitary(alpha complex128, x, y []complex128) {
|
||||
for i, v := range x {
|
||||
y[i] += alpha * v
|
||||
}
|
||||
}
|
||||
|
||||
// AxpyUnitaryTo is
|
||||
// for i, v := range x {
|
||||
// dst[i] = alpha*v + y[i]
|
||||
// }
|
||||
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) {
|
||||
for i, v := range x {
|
||||
dst[i] = alpha*v + y[i]
|
||||
}
|
||||
}
|
||||
|
||||
// AxpyInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// y[iy] += alpha * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
y[iy] += alpha * x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
}
|
||||
|
||||
// AxpyIncTo is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// dst[idst] = alpha*x[ix] + y[iy]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// idst += incDst
|
||||
// }
|
||||
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
dst[idst] = alpha*x[ix] + y[iy]
|
||||
ix += incX
|
||||
iy += incY
|
||||
idst += incDst
|
||||
}
|
||||
}
|
||||
|
||||
// DscalUnitary is
|
||||
// for i, v := range x {
|
||||
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||
// }
|
||||
func DscalUnitary(alpha float64, x []complex128) {
|
||||
for i, v := range x {
|
||||
x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||
}
|
||||
}
|
||||
|
||||
// DscalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||
// ix += inc
|
||||
// }
|
||||
func DscalInc(alpha float64, x []complex128, n, inc uintptr) {
|
||||
var ix uintptr
|
||||
for i := 0; i < int(n); i++ {
|
||||
x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||
ix += inc
|
||||
}
|
||||
}
|
||||
|
||||
// ScalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] *= alpha
|
||||
// ix += incX
|
||||
// }
|
||||
func ScalInc(alpha complex128, x []complex128, n, inc uintptr) {
|
||||
var ix uintptr
|
||||
for i := 0; i < int(n); i++ {
|
||||
x[ix] *= alpha
|
||||
ix += inc
|
||||
}
|
||||
}
|
||||
|
||||
// ScalUnitary is
|
||||
// for i := range x {
|
||||
// x[i] *= alpha
|
||||
// }
|
||||
func ScalUnitary(alpha complex128, x []complex128) {
|
||||
for i := range x {
|
||||
x[i] *= alpha
|
||||
}
|
||||
}
|
||||
|
||||
// DotcUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * cmplx.Conj(v)
|
||||
// }
|
||||
// return sum
|
||||
func DotcUnitary(x, y []complex128) (sum complex128) {
|
||||
for i, v := range x {
|
||||
sum += y[i] * cmplx.Conj(v)
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotcInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * cmplx.Conj(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += y[iy] * cmplx.Conj(x[ix])
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotuUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotuUnitary(x, y []complex128) (sum complex128) {
|
||||
for i, v := range x {
|
||||
sum += y[i] * v
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotuInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += y[iy] * x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return sum
|
||||
}
|
Reference in New Issue
Block a user