1. update clientset, deepcopy using code-generator

2. add a dummy file tools.go to force "go mod vendor" to see
code-generator as dependencies
3. add a script to update CRD
4. add a README to document CRD updating steps
run go mod tidy
update README
This commit is contained in:
xiangqian
2019-12-03 01:22:21 -08:00
parent 90533183e4
commit 728e29aa7e
1128 changed files with 167705 additions and 5135 deletions

47
vendor/gonum.org/v1/gonum/blas/README.md generated vendored Normal file
View File

@@ -0,0 +1,47 @@
# Gonum BLAS [![GoDoc](https://godoc.org/gonum.org/v1/gonum/blas?status.svg)](https://godoc.org/gonum.org/v1/gonum/blas)
A collection of packages to provide BLAS functionality for the [Go programming
language](http://golang.org)
## Installation
```sh
go get gonum.org/v1/gonum/blas/...
```
## Packages
### blas
Defines [BLAS API](http://www.netlib.org/blas/blast-forum/cinterface.pdf) split in several
interfaces.
### blas/gonum
Go implementation of the BLAS API (incomplete, implements the `float32` and `float64` API).
### blas/blas64 and blas/blas32
Wrappers for an implementation of the double (i.e., `float64`) and single (`float32`)
precision real parts of the BLAS API.
```Go
package main
import (
"fmt"
"gonum.org/v1/gonum/blas/blas64"
)
func main() {
v := blas64.Vector{Inc: 1, Data: []float64{1, 1, 1}}
fmt.Println("v has length:", blas64.Nrm2(len(v.Data), v))
}
```
### blas/cblas128 and blas/cblas64
Wrappers for an implementation of the double (i.e., `complex128`) and single (`complex64`)
precision complex parts of the blas API.
Currently blas/cblas64 and blas/cblas128 require gonum.org/v1/netlib/blas.

283
vendor/gonum.org/v1/gonum/blas/blas.go generated vendored Normal file
View File

@@ -0,0 +1,283 @@
// Copyright ©2013 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate ./conversions.bash
package blas
// Flag constants indicate Givens transformation H matrix state.
type Flag int
const (
Identity Flag = -2 // H is the identity matrix; no rotation is needed.
Rescaling Flag = -1 // H specifies rescaling.
OffDiagonal Flag = 0 // Off-diagonal elements of H are non-unit.
Diagonal Flag = 1 // Diagonal elements of H are non-unit.
)
// SrotmParams contains Givens transformation parameters returned
// by the Float32 Srotm method.
type SrotmParams struct {
Flag
H [4]float32 // Column-major 2 by 2 matrix.
}
// DrotmParams contains Givens transformation parameters returned
// by the Float64 Drotm method.
type DrotmParams struct {
Flag
H [4]float64 // Column-major 2 by 2 matrix.
}
// Transpose specifies the transposition operation of a matrix.
type Transpose byte
const (
NoTrans Transpose = 'N'
Trans Transpose = 'T'
ConjTrans Transpose = 'C'
)
// Uplo specifies whether a matrix is upper or lower triangular.
type Uplo byte
const (
Upper Uplo = 'U'
Lower Uplo = 'L'
All Uplo = 'A'
)
// Diag specifies whether a matrix is unit triangular.
type Diag byte
const (
NonUnit Diag = 'N'
Unit Diag = 'U'
)
// Side specifies from which side a multiplication operation is performed.
type Side byte
const (
Left Side = 'L'
Right Side = 'R'
)
// Float32 implements the single precision real BLAS routines.
type Float32 interface {
Float32Level1
Float32Level2
Float32Level3
}
// Float32Level1 implements the single precision real BLAS Level 1 routines.
type Float32Level1 interface {
Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32
Dsdot(n int, x []float32, incX int, y []float32, incY int) float64
Sdot(n int, x []float32, incX int, y []float32, incY int) float32
Snrm2(n int, x []float32, incX int) float32
Sasum(n int, x []float32, incX int) float32
Isamax(n int, x []float32, incX int) int
Sswap(n int, x []float32, incX int, y []float32, incY int)
Scopy(n int, x []float32, incX int, y []float32, incY int)
Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int)
Srotg(a, b float32) (c, s, r, z float32)
Srotmg(d1, d2, b1, b2 float32) (p SrotmParams, rd1, rd2, rb1 float32)
Srot(n int, x []float32, incX int, y []float32, incY int, c, s float32)
Srotm(n int, x []float32, incX int, y []float32, incY int, p SrotmParams)
Sscal(n int, alpha float32, x []float32, incX int)
}
// Float32Level2 implements the single precision real BLAS Level 2 routines.
type Float32Level2 interface {
Sgemv(tA Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Sgbmv(tA Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Strmv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int)
Stbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int)
Stpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int)
Strsv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int)
Stbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int)
Stpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int)
Ssymv(ul Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Ssbmv(ul Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Sspmv(ul Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int)
Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)
Ssyr(ul Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int)
Sspr(ul Uplo, n int, alpha float32, x []float32, incX int, ap []float32)
Ssyr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)
Sspr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32)
}
// Float32Level3 implements the single precision real BLAS Level 3 routines.
type Float32Level3 interface {
Sgemm(tA, tB Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Ssymm(s Side, ul Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Ssyrk(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int)
Ssyr2k(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Strmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)
Strsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)
}
// Float64 implements the single precision real BLAS routines.
type Float64 interface {
Float64Level1
Float64Level2
Float64Level3
}
// Float64Level1 implements the double precision real BLAS Level 1 routines.
type Float64Level1 interface {
Ddot(n int, x []float64, incX int, y []float64, incY int) float64
Dnrm2(n int, x []float64, incX int) float64
Dasum(n int, x []float64, incX int) float64
Idamax(n int, x []float64, incX int) int
Dswap(n int, x []float64, incX int, y []float64, incY int)
Dcopy(n int, x []float64, incX int, y []float64, incY int)
Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int)
Drotg(a, b float64) (c, s, r, z float64)
Drotmg(d1, d2, b1, b2 float64) (p DrotmParams, rd1, rd2, rb1 float64)
Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64)
Drotm(n int, x []float64, incX int, y []float64, incY int, p DrotmParams)
Dscal(n int, alpha float64, x []float64, incX int)
}
// Float64Level2 implements the double precision real BLAS Level 2 routines.
type Float64Level2 interface {
Dgemv(tA Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dgbmv(tA Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dtrmv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int)
Dtbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int)
Dtpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int)
Dtrsv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int)
Dtbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int)
Dtpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int)
Dsymv(ul Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dsbmv(ul Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dspmv(ul Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int)
Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)
Dsyr(ul Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int)
Dspr(ul Uplo, n int, alpha float64, x []float64, incX int, ap []float64)
Dsyr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)
Dspr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64)
}
// Float64Level3 implements the double precision real BLAS Level 3 routines.
type Float64Level3 interface {
Dgemm(tA, tB Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dsymm(s Side, ul Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dsyrk(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int)
Dsyr2k(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dtrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)
Dtrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)
}
// Complex64 implements the single precision complex BLAS routines.
type Complex64 interface {
Complex64Level1
Complex64Level2
Complex64Level3
}
// Complex64Level1 implements the single precision complex BLAS Level 1 routines.
type Complex64Level1 interface {
Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64)
Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64)
Scnrm2(n int, x []complex64, incX int) float32
Scasum(n int, x []complex64, incX int) float32
Icamax(n int, x []complex64, incX int) int
Cswap(n int, x []complex64, incX int, y []complex64, incY int)
Ccopy(n int, x []complex64, incX int, y []complex64, incY int)
Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int)
Cscal(n int, alpha complex64, x []complex64, incX int)
Csscal(n int, alpha float32, x []complex64, incX int)
}
// Complex64Level2 implements the single precision complex BLAS routines Level 2 routines.
type Complex64Level2 interface {
Cgemv(tA Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Cgbmv(tA Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Ctrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int)
Ctbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int)
Ctpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int)
Ctrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int)
Ctbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int)
Ctpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int)
Chemv(ul Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Chbmv(ul Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Chpmv(ul Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int)
Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)
Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)
Cher(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int)
Chpr(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64)
Cher2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)
Chpr2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64)
}
// Complex64Level3 implements the single precision complex BLAS Level 3 routines.
type Complex64Level3 interface {
Cgemm(tA, tB Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Csymm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Csyrk(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int)
Csyr2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Ctrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)
Ctrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)
Chemm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Cherk(ul Uplo, t Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int)
Cher2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int)
}
// Complex128 implements the double precision complex BLAS routines.
type Complex128 interface {
Complex128Level1
Complex128Level2
Complex128Level3
}
// Complex128Level1 implements the double precision complex BLAS Level 1 routines.
type Complex128Level1 interface {
Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128)
Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128)
Dznrm2(n int, x []complex128, incX int) float64
Dzasum(n int, x []complex128, incX int) float64
Izamax(n int, x []complex128, incX int) int
Zswap(n int, x []complex128, incX int, y []complex128, incY int)
Zcopy(n int, x []complex128, incX int, y []complex128, incY int)
Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int)
Zscal(n int, alpha complex128, x []complex128, incX int)
Zdscal(n int, alpha float64, x []complex128, incX int)
}
// Complex128Level2 implements the double precision complex BLAS Level 2 routines.
type Complex128Level2 interface {
Zgemv(tA Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zgbmv(tA Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Ztrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int)
Ztbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int)
Ztpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int)
Ztrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int)
Ztbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int)
Ztpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int)
Zhemv(ul Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zhbmv(ul Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zhpmv(ul Uplo, n int, alpha complex128, ap []complex128, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
Zher(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int)
Zhpr(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128)
Zher2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
Zhpr2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128)
}
// Complex128Level3 implements the double precision complex BLAS Level 3 routines.
type Complex128Level3 interface {
Zgemm(tA, tB Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Zsymm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Zsyrk(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int)
Zsyr2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Ztrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)
Ztrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)
Zhemm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Zherk(ul Uplo, t Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int)
Zher2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)
}

469
vendor/gonum.org/v1/gonum/blas/blas64/blas64.go generated vendored Normal file
View File

@@ -0,0 +1,469 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/gonum"
)
var blas64 blas.Float64 = gonum.Implementation{}
// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
// The default implementation is
// gonum.org/v1/gonum/blas/gonum.Implementation.
func Use(b blas.Float64) {
blas64 = b
}
// Implementation returns the current BLAS float64 implementation.
//
// Implementation allows direct calls to the current the BLAS float64 implementation
// giving finer control of parameters.
func Implementation() blas.Float64 {
return blas64
}
// Vector represents a vector with an associated element increment.
type Vector struct {
N int
Data []float64
Inc int
}
// General represents a matrix using the conventional storage scheme.
type General struct {
Rows, Cols int
Data []float64
Stride int
}
// Band represents a band matrix using the band storage scheme.
type Band struct {
Rows, Cols int
KL, KU int
Data []float64
Stride int
}
// Triangular represents a triangular matrix using the conventional storage scheme.
type Triangular struct {
Uplo blas.Uplo
Diag blas.Diag
N int
Data []float64
Stride int
}
// TriangularBand represents a triangular matrix using the band storage scheme.
type TriangularBand struct {
Uplo blas.Uplo
Diag blas.Diag
N, K int
Data []float64
Stride int
}
// TriangularPacked represents a triangular matrix using the packed storage scheme.
type TriangularPacked struct {
Uplo blas.Uplo
Diag blas.Diag
N int
Data []float64
}
// Symmetric represents a symmetric matrix using the conventional storage scheme.
type Symmetric struct {
Uplo blas.Uplo
N int
Data []float64
Stride int
}
// SymmetricBand represents a symmetric matrix using the band storage scheme.
type SymmetricBand struct {
Uplo blas.Uplo
N, K int
Data []float64
Stride int
}
// SymmetricPacked represents a symmetric matrix using the packed storage scheme.
type SymmetricPacked struct {
Uplo blas.Uplo
N int
Data []float64
}
// Level 1
const (
negInc = "blas64: negative vector increment"
badLength = "blas64: vector length mismatch"
)
// Dot computes the dot product of the two vectors:
// \sum_i x[i]*y[i].
func Dot(x, y Vector) float64 {
if x.N != y.N {
panic(badLength)
}
return blas64.Ddot(x.N, x.Data, x.Inc, y.Data, y.Inc)
}
// Nrm2 computes the Euclidean norm of the vector x:
// sqrt(\sum_i x[i]*x[i]).
//
// Nrm2 will panic if the vector increment is negative.
func Nrm2(x Vector) float64 {
if x.Inc < 0 {
panic(negInc)
}
return blas64.Dnrm2(x.N, x.Data, x.Inc)
}
// Asum computes the sum of the absolute values of the elements of x:
// \sum_i |x[i]|.
//
// Asum will panic if the vector increment is negative.
func Asum(x Vector) float64 {
if x.Inc < 0 {
panic(negInc)
}
return blas64.Dasum(x.N, x.Data, x.Inc)
}
// Iamax returns the index of an element of x with the largest absolute value.
// If there are multiple such indices the earliest is returned.
// Iamax returns -1 if n == 0.
//
// Iamax will panic if the vector increment is negative.
func Iamax(x Vector) int {
if x.Inc < 0 {
panic(negInc)
}
return blas64.Idamax(x.N, x.Data, x.Inc)
}
// Swap exchanges the elements of the two vectors:
// x[i], y[i] = y[i], x[i] for all i.
func Swap(x, y Vector) {
if x.N != y.N {
panic(badLength)
}
blas64.Dswap(x.N, x.Data, x.Inc, y.Data, y.Inc)
}
// Copy copies the elements of x into the elements of y:
// y[i] = x[i] for all i.
// Copy requires that the lengths of x and y match and will panic otherwise.
func Copy(x, y Vector) {
if x.N != y.N {
panic(badLength)
}
blas64.Dcopy(x.N, x.Data, x.Inc, y.Data, y.Inc)
}
// Axpy adds x scaled by alpha to y:
// y[i] += alpha*x[i] for all i.
func Axpy(alpha float64, x, y Vector) {
if x.N != y.N {
panic(badLength)
}
blas64.Daxpy(x.N, alpha, x.Data, x.Inc, y.Data, y.Inc)
}
// Rotg computes the parameters of a Givens plane rotation so that
// ⎡ c s⎤ ⎡a⎤ ⎡r⎤
// ⎣-s c⎦ * ⎣b⎦ = ⎣0⎦
// where a and b are the Cartesian coordinates of a given point.
// c, s, and r are defined as
// r = ±Sqrt(a^2 + b^2),
// c = a/r, the cosine of the rotation angle,
// s = a/r, the sine of the rotation angle,
// and z is defined such that
// if |a| > |b|, z = s,
// otherwise if c != 0, z = 1/c,
// otherwise z = 1.
func Rotg(a, b float64) (c, s, r, z float64) {
return blas64.Drotg(a, b)
}
// Rotmg computes the modified Givens rotation. See
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details.
func Rotmg(d1, d2, b1, b2 float64) (p blas.DrotmParams, rd1, rd2, rb1 float64) {
return blas64.Drotmg(d1, d2, b1, b2)
}
// Rot applies a plane transformation to n points represented by the vectors x
// and y:
// x[i] = c*x[i] + s*y[i],
// y[i] = -s*x[i] + c*y[i], for all i.
func Rot(x, y Vector, c, s float64) {
if x.N != y.N {
panic(badLength)
}
blas64.Drot(x.N, x.Data, x.Inc, y.Data, y.Inc, c, s)
}
// Rotm applies the modified Givens rotation to n points represented by the
// vectors x and y.
func Rotm(x, y Vector, p blas.DrotmParams) {
if x.N != y.N {
panic(badLength)
}
blas64.Drotm(x.N, x.Data, x.Inc, y.Data, y.Inc, p)
}
// Scal scales the vector x by alpha:
// x[i] *= alpha for all i.
//
// Scal will panic if the vector increment is negative.
func Scal(alpha float64, x Vector) {
if x.Inc < 0 {
panic(negInc)
}
blas64.Dscal(x.N, alpha, x.Data, x.Inc)
}
// Level 2
// Gemv computes
// y = alpha * A * x + beta * y, if t == blas.NoTrans,
// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans,
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
func Gemv(t blas.Transpose, alpha float64, a General, x Vector, beta float64, y Vector) {
blas64.Dgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Gbmv computes
// y = alpha * A * x + beta * y, if t == blas.NoTrans,
// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans,
// where A is an m×n band matrix, x and y are vectors, and alpha and beta are scalars.
func Gbmv(t blas.Transpose, alpha float64, a Band, x Vector, beta float64, y Vector) {
blas64.Dgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Trmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix, and x is a vector.
func Trmv(t blas.Transpose, a Triangular, x Vector) {
blas64.Dtrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc)
}
// Tbmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular band matrix, and x is a vector.
func Tbmv(t blas.Transpose, a TriangularBand, x Vector) {
blas64.Dtbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc)
}
// Tpmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix in packed format, and x is a vector.
func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) {
blas64.Dtpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc)
}
// Trsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix, and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Trsv(t blas.Transpose, a Triangular, x Vector) {
blas64.Dtrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc)
}
// Tbsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular band matrix, and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Tbsv(t blas.Transpose, a TriangularBand, x Vector) {
blas64.Dtbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc)
}
// Tpsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix in packed format, and x and b are
// vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) {
blas64.Dtpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc)
}
// Symv computes
// y = alpha * A * x + beta * y,
// where A is an n×n symmetric matrix, x and y are vectors, and alpha and
// beta are scalars.
func Symv(alpha float64, a Symmetric, x Vector, beta float64, y Vector) {
blas64.Dsymv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Sbmv performs
// y = alpha * A * x + beta * y,
// where A is an n×n symmetric band matrix, x and y are vectors, and alpha
// and beta are scalars.
func Sbmv(alpha float64, a SymmetricBand, x Vector, beta float64, y Vector) {
blas64.Dsbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Spmv performs
// y = alpha * A * x + beta * y,
// where A is an n×n symmetric matrix in packed format, x and y are vectors,
// and alpha and beta are scalars.
func Spmv(alpha float64, a SymmetricPacked, x Vector, beta float64, y Vector) {
blas64.Dspmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Ger performs a rank-1 update
// A += alpha * x * y^T,
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func Ger(alpha float64, x, y Vector, a General) {
blas64.Dger(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Syr performs a rank-1 update
// A += alpha * x * x^T,
// where A is an n×n symmetric matrix, x is a vector, and alpha is a scalar.
func Syr(alpha float64, x Vector, a Symmetric) {
blas64.Dsyr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride)
}
// Spr performs the rank-1 update
// A += alpha * x * x^T,
// where A is an n×n symmetric matrix in packed format, x is a vector, and
// alpha is a scalar.
func Spr(alpha float64, x Vector, a SymmetricPacked) {
blas64.Dspr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data)
}
// Syr2 performs a rank-2 update
// A += alpha * x * y^T + alpha * y * x^T,
// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.
func Syr2(alpha float64, x, y Vector, a Symmetric) {
blas64.Dsyr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Spr2 performs a rank-2 update
// A += alpha * x * y^T + alpha * y * x^T,
// where A is an n×n symmetric matrix in packed format, x and y are vectors,
// and alpha is a scalar.
func Spr2(alpha float64, x, y Vector, a SymmetricPacked) {
blas64.Dspr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data)
}
// Level 3
// Gemm computes
// C = alpha * A * B + beta * C,
// where A, B, and C are dense matrices, and alpha and beta are scalars.
// tA and tB specify whether A or B are transposed.
func Gemm(tA, tB blas.Transpose, alpha float64, a, b General, beta float64, c General) {
var m, n, k int
if tA == blas.NoTrans {
m, k = a.Rows, a.Cols
} else {
m, k = a.Cols, a.Rows
}
if tB == blas.NoTrans {
n = b.Cols
} else {
n = b.Rows
}
blas64.Dgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Symm performs
// C = alpha * A * B + beta * C, if s == blas.Left,
// C = alpha * B * A + beta * C, if s == blas.Right,
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and
// alpha is a scalar.
func Symm(s blas.Side, alpha float64, a Symmetric, b General, beta float64, c General) {
var m, n int
if s == blas.Left {
m, n = a.N, b.Cols
} else {
m, n = b.Rows, a.N
}
blas64.Dsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Syrk performs a symmetric rank-k update
// C = alpha * A * A^T + beta * C, if t == blas.NoTrans,
// C = alpha * A^T * A + beta * C, if t == blas.Trans or blas.ConjTrans,
// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans and
// a k×n matrix otherwise, and alpha and beta are scalars.
func Syrk(t blas.Transpose, alpha float64, a General, beta float64, c Symmetric) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
blas64.Dsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride)
}
// Syr2k performs a symmetric rank-2k update
// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans,
// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans or blas.ConjTrans,
// where C is an n×n symmetric matrix, A and B are n×k matrices if t == NoTrans
// and k×n matrices otherwise, and alpha and beta are scalars.
func Syr2k(t blas.Transpose, alpha float64, a, b General, beta float64, c Symmetric) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
blas64.Dsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Trmm performs
// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left,
// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left,
// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right,
// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right,
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is
// a scalar.
func Trmm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) {
blas64.Dtrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride)
}
// Trsm solves
// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left,
// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left,
// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right,
// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right,
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and
// alpha is a scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in-place into X.
//
// No check is made that A is invertible.
func Trsm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) {
blas64.Dtrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride)
}

277
vendor/gonum.org/v1/gonum/blas/blas64/conv.go generated vendored Normal file
View File

@@ -0,0 +1,277 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import "gonum.org/v1/gonum/blas"
// GeneralCols represents a matrix using the conventional column-major storage scheme.
type GeneralCols General
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t GeneralCols) From(a General) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
panic("blas64: short data slice")
}
for i := 0; i < a.Rows; i++ {
for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
t.Data[i+j*t.Stride] = v
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t General) From(a GeneralCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
panic("blas64: short data slice")
}
for j := 0; j < a.Cols; j++ {
for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
t.Data[i*t.Stride+j] = v
}
}
}
// TriangularCols represents a matrix using the conventional column-major storage scheme.
type TriangularCols Triangular
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t TriangularCols) From(a Triangular) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t Triangular) From(a TriangularCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// BandCols represents a matrix using the band column-major storage scheme.
type BandCols Band
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t BandCols) From(a Band) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("blas64: short stride for destination")
}
for i := 0; i < a.Rows; i++ {
for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t Band) From(a BandCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("blas64: short stride for destination")
}
for j := 0; j < a.Cols; j++ {
for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
}
}
}
// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
type TriangularBandCols TriangularBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBandCols) From(a TriangularBand) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBand) From(a TriangularBandCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}

153
vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go generated vendored Normal file
View File

@@ -0,0 +1,153 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import "gonum.org/v1/gonum/blas"
// SymmetricCols represents a matrix using the conventional column-major storage scheme.
type SymmetricCols Symmetric
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t SymmetricCols) From(a Symmetric) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t Symmetric) From(a SymmetricCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
type SymmetricBandCols SymmetricBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBandCols) From(a SymmetricBand) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBand) From(a SymmetricBandCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}

6
vendor/gonum.org/v1/gonum/blas/blas64/doc.go generated vendored Normal file
View File

@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blas64 provides a simple interface to the float64 BLAS API.
package blas64 // import "gonum.org/v1/gonum/blas/blas64"

508
vendor/gonum.org/v1/gonum/blas/cblas128/cblas128.go generated vendored Normal file
View File

@@ -0,0 +1,508 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cblas128
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/gonum"
)
var cblas128 blas.Complex128 = gonum.Implementation{}
// Use sets the BLAS complex128 implementation to be used by subsequent BLAS calls.
// The default implementation is
// gonum.org/v1/gonum/blas/gonum.Implementation.
func Use(b blas.Complex128) {
cblas128 = b
}
// Implementation returns the current BLAS complex128 implementation.
//
// Implementation allows direct calls to the current the BLAS complex128 implementation
// giving finer control of parameters.
func Implementation() blas.Complex128 {
return cblas128
}
// Vector represents a vector with an associated element increment.
type Vector struct {
Inc int
Data []complex128
}
// General represents a matrix using the conventional storage scheme.
type General struct {
Rows, Cols int
Stride int
Data []complex128
}
// Band represents a band matrix using the band storage scheme.
type Band struct {
Rows, Cols int
KL, KU int
Stride int
Data []complex128
}
// Triangular represents a triangular matrix using the conventional storage scheme.
type Triangular struct {
N int
Stride int
Data []complex128
Uplo blas.Uplo
Diag blas.Diag
}
// TriangularBand represents a triangular matrix using the band storage scheme.
type TriangularBand struct {
N, K int
Stride int
Data []complex128
Uplo blas.Uplo
Diag blas.Diag
}
// TriangularPacked represents a triangular matrix using the packed storage scheme.
type TriangularPacked struct {
N int
Data []complex128
Uplo blas.Uplo
Diag blas.Diag
}
// Symmetric represents a symmetric matrix using the conventional storage scheme.
type Symmetric struct {
N int
Stride int
Data []complex128
Uplo blas.Uplo
}
// SymmetricBand represents a symmetric matrix using the band storage scheme.
type SymmetricBand struct {
N, K int
Stride int
Data []complex128
Uplo blas.Uplo
}
// SymmetricPacked represents a symmetric matrix using the packed storage scheme.
type SymmetricPacked struct {
N int
Data []complex128
Uplo blas.Uplo
}
// Hermitian represents an Hermitian matrix using the conventional storage scheme.
type Hermitian Symmetric
// HermitianBand represents an Hermitian matrix using the band storage scheme.
type HermitianBand SymmetricBand
// HermitianPacked represents an Hermitian matrix using the packed storage scheme.
type HermitianPacked SymmetricPacked
// Level 1
const negInc = "cblas128: negative vector increment"
// Dotu computes the dot product of the two vectors without
// complex conjugation:
// x^T * y.
func Dotu(n int, x, y Vector) complex128 {
return cblas128.Zdotu(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Dotc computes the dot product of the two vectors with
// complex conjugation:
// x^H * y.
func Dotc(n int, x, y Vector) complex128 {
return cblas128.Zdotc(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Nrm2 computes the Euclidean norm of the vector x:
// sqrt(\sum_i x[i] * x[i]).
//
// Nrm2 will panic if the vector increment is negative.
func Nrm2(n int, x Vector) float64 {
if x.Inc < 0 {
panic(negInc)
}
return cblas128.Dznrm2(n, x.Data, x.Inc)
}
// Asum computes the sum of magnitudes of the real and imaginary parts of
// elements of the vector x:
// \sum_i (|Re x[i]| + |Im x[i]|).
//
// Asum will panic if the vector increment is negative.
func Asum(n int, x Vector) float64 {
if x.Inc < 0 {
panic(negInc)
}
return cblas128.Dzasum(n, x.Data, x.Inc)
}
// Iamax returns the index of an element of x with the largest sum of
// magnitudes of the real and imaginary parts (|Re x[i]|+|Im x[i]|).
// If there are multiple such indices, the earliest is returned.
//
// Iamax returns -1 if n == 0.
//
// Iamax will panic if the vector increment is negative.
func Iamax(n int, x Vector) int {
if x.Inc < 0 {
panic(negInc)
}
return cblas128.Izamax(n, x.Data, x.Inc)
}
// Swap exchanges the elements of two vectors:
// x[i], y[i] = y[i], x[i] for all i.
func Swap(n int, x, y Vector) {
cblas128.Zswap(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Copy copies the elements of x into the elements of y:
// y[i] = x[i] for all i.
func Copy(n int, x, y Vector) {
cblas128.Zcopy(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Axpy computes
// y = alpha * x + y,
// where x and y are vectors, and alpha is a scalar.
func Axpy(n int, alpha complex128, x, y Vector) {
cblas128.Zaxpy(n, alpha, x.Data, x.Inc, y.Data, y.Inc)
}
// Scal computes
// x = alpha * x,
// where x is a vector, and alpha is a scalar.
//
// Scal will panic if the vector increment is negative.
func Scal(n int, alpha complex128, x Vector) {
if x.Inc < 0 {
panic(negInc)
}
cblas128.Zscal(n, alpha, x.Data, x.Inc)
}
// Dscal computes
// x = alpha * x,
// where x is a vector, and alpha is a real scalar.
//
// Dscal will panic if the vector increment is negative.
func Dscal(n int, alpha float64, x Vector) {
if x.Inc < 0 {
panic(negInc)
}
cblas128.Zdscal(n, alpha, x.Data, x.Inc)
}
// Level 2
// Gemv computes
// y = alpha * A * x + beta * y, if t == blas.NoTrans,
// y = alpha * A^T * x + beta * y, if t == blas.Trans,
// y = alpha * A^H * x + beta * y, if t == blas.ConjTrans,
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are
// scalars.
func Gemv(t blas.Transpose, alpha complex128, a General, x Vector, beta complex128, y Vector) {
cblas128.Zgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Gbmv computes
// y = alpha * A * x + beta * y, if t == blas.NoTrans,
// y = alpha * A^T * x + beta * y, if t == blas.Trans,
// y = alpha * A^H * x + beta * y, if t == blas.ConjTrans,
// where A is an m×n band matrix, x and y are vectors, and alpha and beta are
// scalars.
func Gbmv(t blas.Transpose, alpha complex128, a Band, x Vector, beta complex128, y Vector) {
cblas128.Zgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Trmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans,
// x = A^H * x, if t == blas.ConjTrans,
// where A is an n×n triangular matrix, and x is a vector.
func Trmv(t blas.Transpose, a Triangular, x Vector) {
cblas128.Ztrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc)
}
// Tbmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans,
// x = A^H * x, if t == blas.ConjTrans,
// where A is an n×n triangular band matrix, and x is a vector.
func Tbmv(t blas.Transpose, a TriangularBand, x Vector) {
cblas128.Ztbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc)
}
// Tpmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans,
// x = A^H * x, if t == blas.ConjTrans,
// where A is an n×n triangular matrix in packed format, and x is a vector.
func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) {
cblas128.Ztpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc)
}
// Trsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans,
// A^H * x = b, if t == blas.ConjTrans,
// where A is an n×n triangular matrix and x is a vector.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Trsv(t blas.Transpose, a Triangular, x Vector) {
cblas128.Ztrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc)
}
// Tbsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans,
// A^H * x = b, if t == blas.ConjTrans,
// where A is an n×n triangular band matrix, and x is a vector.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Tbsv(t blas.Transpose, a TriangularBand, x Vector) {
cblas128.Ztbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc)
}
// Tpsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans,
// A^H * x = b, if t == blas.ConjTrans,
// where A is an n×n triangular matrix in packed format and x is a vector.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) {
cblas128.Ztpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc)
}
// Hemv computes
// y = alpha * A * x + beta * y,
// where A is an n×n Hermitian matrix, x and y are vectors, and alpha and
// beta are scalars.
func Hemv(alpha complex128, a Hermitian, x Vector, beta complex128, y Vector) {
cblas128.Zhemv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Hbmv performs
// y = alpha * A * x + beta * y,
// where A is an n×n Hermitian band matrix, x and y are vectors, and alpha
// and beta are scalars.
func Hbmv(alpha complex128, a HermitianBand, x Vector, beta complex128, y Vector) {
cblas128.Zhbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Hpmv performs
// y = alpha * A * x + beta * y,
// where A is an n×n Hermitian matrix in packed format, x and y are vectors,
// and alpha and beta are scalars.
func Hpmv(alpha complex128, a HermitianPacked, x Vector, beta complex128, y Vector) {
cblas128.Zhpmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Geru performs a rank-1 update
// A += alpha * x * y^T,
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func Geru(alpha complex128, x, y Vector, a General) {
cblas128.Zgeru(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Gerc performs a rank-1 update
// A += alpha * x * y^H,
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func Gerc(alpha complex128, x, y Vector, a General) {
cblas128.Zgerc(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Her performs a rank-1 update
// A += alpha * x * y^T,
// where A is an m×n Hermitian matrix, x and y are vectors, and alpha is a scalar.
func Her(alpha float64, x Vector, a Hermitian) {
cblas128.Zher(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride)
}
// Hpr performs a rank-1 update
// A += alpha * x * x^H,
// where A is an n×n Hermitian matrix in packed format, x is a vector, and
// alpha is a scalar.
func Hpr(alpha float64, x Vector, a HermitianPacked) {
cblas128.Zhpr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data)
}
// Her2 performs a rank-2 update
// A += alpha * x * y^H + conj(alpha) * y * x^H,
// where A is an n×n Hermitian matrix, x and y are vectors, and alpha is a scalar.
func Her2(alpha complex128, x, y Vector, a Hermitian) {
cblas128.Zher2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Hpr2 performs a rank-2 update
// A += alpha * x * y^H + conj(alpha) * y * x^H,
// where A is an n×n Hermitian matrix in packed format, x and y are vectors,
// and alpha is a scalar.
func Hpr2(alpha complex128, x, y Vector, a HermitianPacked) {
cblas128.Zhpr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data)
}
// Level 3
// Gemm computes
// C = alpha * A * B + beta * C,
// where A, B, and C are dense matrices, and alpha and beta are scalars.
// tA and tB specify whether A or B are transposed or conjugated.
func Gemm(tA, tB blas.Transpose, alpha complex128, a, b General, beta complex128, c General) {
var m, n, k int
if tA == blas.NoTrans {
m, k = a.Rows, a.Cols
} else {
m, k = a.Cols, a.Rows
}
if tB == blas.NoTrans {
n = b.Cols
} else {
n = b.Rows
}
cblas128.Zgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Symm performs
// C = alpha * A * B + beta * C, if s == blas.Left,
// C = alpha * B * A + beta * C, if s == blas.Right,
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and
// alpha and beta are scalars.
func Symm(s blas.Side, alpha complex128, a Symmetric, b General, beta complex128, c General) {
var m, n int
if s == blas.Left {
m, n = a.N, b.Cols
} else {
m, n = b.Rows, a.N
}
cblas128.Zsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Syrk performs a symmetric rank-k update
// C = alpha * A * A^T + beta * C, if t == blas.NoTrans,
// C = alpha * A^T * A + beta * C, if t == blas.Trans,
// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans
// and a k×n matrix otherwise, and alpha and beta are scalars.
func Syrk(t blas.Transpose, alpha complex128, a General, beta complex128, c Symmetric) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
cblas128.Zsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride)
}
// Syr2k performs a symmetric rank-2k update
// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans,
// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans,
// where C is an n×n symmetric matrix, A and B are n×k matrices if
// t == blas.NoTrans and k×n otherwise, and alpha and beta are scalars.
func Syr2k(t blas.Transpose, alpha complex128, a, b General, beta complex128, c Symmetric) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
cblas128.Zsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Trmm performs
// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left,
// B = alpha * A^T * B, if tA == blas.Trans and s == blas.Left,
// B = alpha * A^H * B, if tA == blas.ConjTrans and s == blas.Left,
// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right,
// B = alpha * B * A^T, if tA == blas.Trans and s == blas.Right,
// B = alpha * B * A^H, if tA == blas.ConjTrans and s == blas.Right,
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is
// a scalar.
func Trmm(s blas.Side, tA blas.Transpose, alpha complex128, a Triangular, b General) {
cblas128.Ztrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride)
}
// Trsm solves
// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left,
// A^T * X = alpha * B, if tA == blas.Trans and s == blas.Left,
// A^H * X = alpha * B, if tA == blas.ConjTrans and s == blas.Left,
// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right,
// X * A^T = alpha * B, if tA == blas.Trans and s == blas.Right,
// X * A^H = alpha * B, if tA == blas.ConjTrans and s == blas.Right,
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and
// alpha is a scalar.
//
// At entry to the function, b contains the values of B, and the result is
// stored in-place into b.
//
// No check is made that A is invertible.
func Trsm(s blas.Side, tA blas.Transpose, alpha complex128, a Triangular, b General) {
cblas128.Ztrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride)
}
// Hemm performs
// C = alpha * A * B + beta * C, if s == blas.Left,
// C = alpha * B * A + beta * C, if s == blas.Right,
// where A is an n×n or m×m Hermitian matrix, B and C are m×n matrices, and
// alpha and beta are scalars.
func Hemm(s blas.Side, alpha complex128, a Hermitian, b General, beta complex128, c General) {
var m, n int
if s == blas.Left {
m, n = a.N, b.Cols
} else {
m, n = b.Rows, a.N
}
cblas128.Zhemm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Herk performs the Hermitian rank-k update
// C = alpha * A * A^H + beta*C, if t == blas.NoTrans,
// C = alpha * A^H * A + beta*C, if t == blas.ConjTrans,
// where C is an n×n Hermitian matrix, A is an n×k matrix if t == blas.NoTrans
// and a k×n matrix otherwise, and alpha and beta are scalars.
func Herk(t blas.Transpose, alpha float64, a General, beta float64, c Hermitian) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
cblas128.Zherk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride)
}
// Her2k performs the Hermitian rank-2k update
// C = alpha * A * B^H + conj(alpha) * B * A^H + beta * C, if t == blas.NoTrans,
// C = alpha * A^H * B + conj(alpha) * B^H * A + beta * C, if t == blas.ConjTrans,
// where C is an n×n Hermitian matrix, A and B are n×k matrices if t == NoTrans
// and k×n matrices otherwise, and alpha and beta are scalars.
func Her2k(t blas.Transpose, alpha complex128, a, b General, beta float64, c Hermitian) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
cblas128.Zher2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}

279
vendor/gonum.org/v1/gonum/blas/cblas128/conv.go generated vendored Normal file
View File

@@ -0,0 +1,279 @@
// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cblas128
import "gonum.org/v1/gonum/blas"
// GeneralCols represents a matrix using the conventional column-major storage scheme.
type GeneralCols General
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t GeneralCols) From(a General) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("cblas128: mismatched dimension")
}
if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
panic("cblas128: short data slice")
}
for i := 0; i < a.Rows; i++ {
for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
t.Data[i+j*t.Stride] = v
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t General) From(a GeneralCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("cblas128: mismatched dimension")
}
if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
panic("cblas128: short data slice")
}
for j := 0; j < a.Cols; j++ {
for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
t.Data[i*t.Stride+j] = v
}
}
}
// TriangularCols represents a matrix using the conventional column-major storage scheme.
type TriangularCols Triangular
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t TriangularCols) From(a Triangular) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("cblas128: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t Triangular) From(a TriangularCols) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("cblas128: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// BandCols represents a matrix using the band column-major storage scheme.
type BandCols Band
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t BandCols) From(a Band) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("cblas128: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("cblas128: short stride for destination")
}
for i := 0; i < a.Rows; i++ {
for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t Band) From(a BandCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("cblas128: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("cblas128: short stride for destination")
}
for j := 0; j < a.Cols; j++ {
for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
}
}
}
// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
type TriangularBandCols TriangularBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBandCols) From(a TriangularBand) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.K != a.K {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.K+1 {
panic("cblas128: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("cblas128: mismatched BLAS diag")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBand) From(a TriangularBandCols) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.K != a.K {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.K+1 {
panic("cblas128: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("cblas128: mismatched BLAS diag")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}

View File

@@ -0,0 +1,155 @@
// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cblas128
import "gonum.org/v1/gonum/blas"
// HermitianCols represents a matrix using the conventional column-major storage scheme.
type HermitianCols Hermitian
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t HermitianCols) From(a Hermitian) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t Hermitian) From(a HermitianCols) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// HermitianBandCols represents an Hermitian matrix using the band column-major storage scheme.
type HermitianBandCols HermitianBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t HermitianBandCols) From(a HermitianBand) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.K != a.K {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.K+1 {
panic("cblas128: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t HermitianBand) From(a HermitianBandCols) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.K != a.K {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.K+1 {
panic("cblas128: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}

View File

@@ -0,0 +1,155 @@
// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cblas128
import "gonum.org/v1/gonum/blas"
// SymmetricCols represents a matrix using the conventional column-major storage scheme.
type SymmetricCols Symmetric
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t SymmetricCols) From(a Symmetric) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t Symmetric) From(a SymmetricCols) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
type SymmetricBandCols SymmetricBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBandCols) From(a SymmetricBand) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.K != a.K {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.K+1 {
panic("cblas128: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBand) From(a SymmetricBandCols) {
if t.N != a.N {
panic("cblas128: mismatched dimension")
}
if t.K != a.K {
panic("cblas128: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("cblas128: short stride for source")
}
if t.Stride < t.K+1 {
panic("cblas128: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("cblas128: mismatched BLAS uplo")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("cblas128: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}

6
vendor/gonum.org/v1/gonum/blas/cblas128/doc.go generated vendored Normal file
View File

@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package cblas128 provides a simple interface to the complex128 BLAS API.
package cblas128 // import "gonum.org/v1/gonum/blas/cblas128"

159
vendor/gonum.org/v1/gonum/blas/conversions.bash generated vendored Normal file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env bash
# Copyright ©2017 The Gonum Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Generate code for blas32.
echo Generating blas32/conv.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv.go
cat blas64/conv.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
\
>> blas32/conv.go
echo Generating blas32/conv_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_test.go
cat blas64/conv_test.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
\
>> blas32/conv_test.go
echo Generating blas32/conv_symmetric.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_symmetric.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
\
>> blas32/conv_symmetric.go
echo Generating blas32/conv_symmetric_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_symmetric_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
\
>> blas32/conv_symmetric_test.go
# Generate code for cblas128.
echo Generating cblas128/conv.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv.go
cat blas64/conv.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
\
>> cblas128/conv.go
echo Generating cblas128/conv_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_test.go
cat blas64/conv_test.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's_"math"_math "math/cmplx"_' \
\
>> cblas128/conv_test.go
echo Generating cblas128/conv_symmetric.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_symmetric.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
\
>> cblas128/conv_symmetric.go
echo Generating cblas128/conv_symmetric_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_symmetric_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's_"math"_math "math/cmplx"_' \
\
>> cblas128/conv_symmetric_test.go
echo Generating cblas128/conv_hermitian.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_hermitian.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
\
>> cblas128/conv_hermitian.go
echo Generating cblas128/conv_hermitian_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_hermitian_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
-e 's_"math"_math "math/cmplx"_' \
\
>> cblas128/conv_hermitian_test.go
# Generate code for cblas64.
echo Generating cblas64/conv.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv.go
cat blas64/conv.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
\
>> cblas64/conv.go
echo Generating cblas64/conv_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_test.go
cat blas64/conv_test.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/cmplx64"_' \
\
>> cblas64/conv_test.go
echo Generating cblas64/conv_hermitian.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_hermitian.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
\
>> cblas64/conv_hermitian.go
echo Generating cblas64/conv_hermitian_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_hermitian_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/cmplx64"_' \
\
>> cblas64/conv_hermitian_test.go

108
vendor/gonum.org/v1/gonum/blas/doc.go generated vendored Normal file
View File

@@ -0,0 +1,108 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package blas provides interfaces for the BLAS linear algebra standard.
All methods must perform appropriate parameter checking and panic if
provided parameters that do not conform to the requirements specified
by the BLAS standard.
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
This version is modified to remove the "order" option. All matrix operations are
on row-order matrices.
Level 1 BLAS
dim scalar vector vector scalars 5-element prefixes
struct
_rotg ( a, b ) S, D
_rotmg( d1, d2, a, b ) S, D
_rot ( n, x, incX, y, incY, c, s ) S, D
_rotm ( n, x, incX, y, incY, param ) S, D
_swap ( n, x, incX, y, incY ) S, D, C, Z
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
_copy ( n, x, incX, y, incY ) S, D, C, Z
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
_dot ( n, x, incX, y, incY ) S, D, Ds
_dotu ( n, x, incX, y, incY ) C, Z
_dotc ( n, x, incX, y, incY ) C, Z
__dot ( n, alpha, x, incX, y, incY ) Sds
_nrm2 ( n, x, incX ) S, D, Sc, Dz
_asum ( n, x, incX ) S, D, Sc, Dz
I_amax( n, x, incX ) s, d, c, z
Level 2 BLAS
options dim b-width scalar matrix vector scalar vector prefixes
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
options dim scalar vector vector matrix prefixes
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
_spr ( uplo, n, alpha, x, incX, ap ) S, D
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
Level 3 BLAS
options dim scalar matrix matrix scalar matrix prefixes
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
Meaning of prefixes
S - float32 C - complex64
D - float64 Z - complex128
Matrix types
GE - GEneral GB - General Band
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
TR - TRiangular TB - Triangular Band TP - Triangular Packed
Options
trans = NoTrans, Trans, ConjTrans
uplo = Upper, Lower
diag = Nonunit, Unit
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
For real matrices, Trans and ConjTrans have the same meaning.
For Hermitian matrices, trans = Trans is not allowed.
For complex symmetric matrices, trans = ConjTrans is not allowed.
*/
package blas // import "gonum.org/v1/gonum/blas"

314
vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go generated vendored Normal file
View File

@@ -0,0 +1,314 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"runtime"
"sync"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
// Dgemm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C
// C = alpha * A^T * B + beta * C
// C = alpha * A * B^T + beta * C
// C = alpha * A^T * B^T + beta * C
// where A is an m×k or k×m dense matrix, B is an n×k or k×n dense matrix, C is
// an m×n matrix, and alpha and beta are scalars. tA and tB specify whether A or
// B are transposed.
func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
switch tA {
default:
panic(badTranspose)
case blas.NoTrans, blas.Trans, blas.ConjTrans:
}
switch tB {
default:
panic(badTranspose)
case blas.NoTrans, blas.Trans, blas.ConjTrans:
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
aTrans := tA == blas.Trans || tA == blas.ConjTrans
if aTrans {
if lda < max(1, m) {
panic(badLdA)
}
} else {
if lda < max(1, k) {
panic(badLdA)
}
}
bTrans := tB == blas.Trans || tB == blas.ConjTrans
if bTrans {
if ldb < max(1, k) {
panic(badLdB)
}
} else {
if ldb < max(1, n) {
panic(badLdB)
}
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if aTrans {
if len(a) < (k-1)*lda+m {
panic(shortA)
}
} else {
if len(a) < (m-1)*lda+k {
panic(shortA)
}
}
if bTrans {
if len(b) < (n-1)*ldb+k {
panic(shortB)
}
} else {
if len(b) < (k-1)*ldb+n {
panic(shortB)
}
}
if len(c) < (m-1)*ldc+n {
panic(shortC)
}
// Quick return if possible.
if (alpha == 0 || k == 0) && beta == 1 {
return
}
// scale c
if beta != 1 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
} else {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
}
}
dgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
}
func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// dgemmParallel computes a parallel matrix multiplication by partitioning
// a and b into sub-blocks, and updating c with the multiplication of the sub-block
// In all cases,
// A = [ A_11 A_12 ... A_1j
// A_21 A_22 ... A_2j
// ...
// A_i1 A_i2 ... A_ij]
//
// and same for B. All of the submatrix sizes are blockSize×blockSize except
// at the edges.
//
// In all cases, there is one dimension for each matrix along which
// C must be updated sequentially.
// Cij = \sum_k Aik Bki, (A * B)
// Cij = \sum_k Aki Bkj, (A^T * B)
// Cij = \sum_k Aik Bjk, (A * B^T)
// Cij = \sum_k Aki Bjk, (A^T * B^T)
//
// This code computes one {i, j} block sequentially along the k dimension,
// and computes all of the {i, j} blocks concurrently. This
// partitioning allows Cij to be updated in-place without race-conditions.
// Instead of launching a goroutine for each possible concurrent computation,
// a number of worker goroutines are created and channels are used to pass
// available and completed cases.
//
// http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix
// multiplies, though this code does not copy matrices to attempt to eliminate
// cache misses.
maxKLen := k
parBlocks := blocks(m, blockSize) * blocks(n, blockSize)
if parBlocks < minParBlock {
// The matrix multiplication is small in the dimensions where it can be
// computed concurrently. Just do it in serial.
dgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
}
nWorkers := runtime.GOMAXPROCS(0)
if parBlocks < nWorkers {
nWorkers = parBlocks
}
// There is a tradeoff between the workers having to wait for work
// and a large buffer making operations slow.
buf := buffMul * nWorkers
if buf > parBlocks {
buf = parBlocks
}
sendChan := make(chan subMul, buf)
// Launch workers. A worker receives an {i, j} submatrix of c, and computes
// A_ik B_ki (or the transposed version) storing the result in c_ij. When the
// channel is finally closed, it signals to the waitgroup that it has finished
// computing.
var wg sync.WaitGroup
for i := 0; i < nWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for sub := range sendChan {
i := sub.i
j := sub.j
leni := blockSize
if i+leni > m {
leni = m - i
}
lenj := blockSize
if j+lenj > n {
lenj = n - j
}
cSub := sliceView64(c, ldc, i, j, leni, lenj)
// Compute A_ik B_kj for all k
for k := 0; k < maxKLen; k += blockSize {
lenk := blockSize
if k+lenk > maxKLen {
lenk = maxKLen - k
}
var aSub, bSub []float64
if aTrans {
aSub = sliceView64(a, lda, k, i, lenk, leni)
} else {
aSub = sliceView64(a, lda, i, k, leni, lenk)
}
if bTrans {
bSub = sliceView64(b, ldb, j, k, lenj, lenk)
} else {
bSub = sliceView64(b, ldb, k, j, lenk, lenj)
}
dgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha)
}
}
}()
}
// Send out all of the {i, j} subblocks for computation.
for i := 0; i < m; i += blockSize {
for j := 0; j < n; j += blockSize {
sendChan <- subMul{
i: i,
j: j,
}
}
}
close(sendChan)
wg.Wait()
}
// dgemmSerial is serial matrix multiply
func dgemmSerial(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
switch {
case !aTrans && !bTrans:
dgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && !bTrans:
dgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case !aTrans && bTrans:
dgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && bTrans:
dgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
default:
panic("unreachable")
}
}
// dgemmSerial where neither a nor b are transposed
func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for l, v := range a[i*lda : i*lda+k] {
tmp := alpha * v
if tmp != 0 {
f64.AxpyUnitary(tmp, b[l*ldb:l*ldb+n], ctmp)
}
}
}
}
// dgemmSerial where neither a is transposed and b is not
func dgemmSerialTransNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
btmp := b[l*ldb : l*ldb+n]
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f64.AxpyUnitary(tmp, btmp, ctmp)
}
}
}
}
// dgemmSerial where neither a is not transposed and b is
func dgemmSerialNotTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
atmp := a[i*lda : i*lda+k]
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] += alpha * f64.DotUnitary(atmp, b[j*ldb:j*ldb+k])
}
}
}
// dgemmSerial where both are transposed
func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f64.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0)
}
}
}
}
func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
return a[i*lda+j : (i+r-1)*lda+j+c]
}

88
vendor/gonum.org/v1/gonum/blas/gonum/doc.go generated vendored Normal file
View File

@@ -0,0 +1,88 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Ensure changes made to blas/native are reflected in blas/cgo where relevant.
/*
Package gonum is a Go implementation of the BLAS API. This implementation
panics when the input arguments are invalid as per the standard, for example
if a vector increment is zero. Note that the treatment of NaN values
is not specified, and differs among the BLAS implementations.
gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
interface. The rest of this text describes the layout of the data for the input types.
Note that in the function documentation, x[i] refers to the i^th element
of the vector, which will be different from the i^th element of the slice if
incX != 1.
See http://www.netlib.org/lapack/explore-html/d4/de1/_l_i_c_e_n_s_e_source.html
for more license information.
Vector arguments are effectively strided slices. They have two input arguments,
a number of elements, n, and an increment, incX. The increment specifies the
distance between elements of the vector. The actual Go slice may be longer
than necessary.
The increment may be positive or negative, except in functions with only
a single vector argument where the increment may only be positive. If the increment
is negative, s[0] is the last element in the slice. Note that this is not the same
as counting backward from the end of the slice, as len(s) may be longer than
necessary. So, for example, if n = 5 and incX = 3, the elements of s are
[0 * * 1 * * 2 * * 3 * * 4 * * * ...]
where elements are never accessed. If incX = -3, the same elements are
accessed, just in reverse order (4, 3, 2, 1, 0).
Dense matrices are specified by a number of rows, a number of columns, and a stride.
The stride specifies the number of entries in the slice between the first element
of successive rows. The stride must be at least as large as the number of columns
but may be longer.
[a00 ... a0n a0* ... a1stride-1 a21 ... amn am* ... amstride-1]
Thus, dense[i*ld + j] refers to the {i, j}th element of the matrix.
Symmetric and triangular matrices (non-packed) are stored identically to Dense,
except that only elements in one triangle of the matrix are accessed.
Packed symmetric and packed triangular matrices are laid out with the entries
condensed such that all of the unreferenced elements are removed. So, the upper triangular
matrix
[
1 2 3
0 4 5
0 0 6
]
and the lower-triangular matrix
[
1 0 0
2 3 0
4 5 6
]
will both be compacted as [1 2 3 4 5 6]. The (i, j) element of the original
dense matrix can be found at element i*n - (i-1)*i/2 + j for upper triangular,
and at element i * (i+1) /2 + j for lower triangular.
Banded matrices are laid out in a compact format, constructed by removing the
zeros in the rows and aligning the diagonals. For example, the matrix
[
1 2 3 0 0 0
4 5 6 7 0 0
0 8 9 10 11 0
0 0 12 13 14 15
0 0 0 16 17 18
0 0 0 0 19 20
]
implicitly becomes ( entries are never accessed)
[
* 1 2 3
4 5 6 7
8 9 10 11
12 13 14 15
16 17 18 *
19 20 * *
]
which is given to the BLAS routine as [ 1 2 3 4 ...].
See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
for more information
*/
package gonum // import "gonum.org/v1/gonum/blas/gonum"

35
vendor/gonum.org/v1/gonum/blas/gonum/errors.go generated vendored Normal file
View File

@@ -0,0 +1,35 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
// Panic strings used during parameter checks.
// This list is duplicated in netlib/blas/netlib. Keep in sync.
const (
zeroIncX = "blas: zero x index increment"
zeroIncY = "blas: zero y index increment"
mLT0 = "blas: m < 0"
nLT0 = "blas: n < 0"
kLT0 = "blas: k < 0"
kLLT0 = "blas: kL < 0"
kULT0 = "blas: kU < 0"
badUplo = "blas: illegal triangle"
badTranspose = "blas: illegal transpose"
badDiag = "blas: illegal diagonal"
badSide = "blas: illegal side"
badFlag = "blas: illegal rotm flag"
badLdA = "blas: bad leading dimension of A"
badLdB = "blas: bad leading dimension of B"
badLdC = "blas: bad leading dimension of C"
shortX = "blas: insufficient length of x"
shortY = "blas: insufficient length of y"
shortAP = "blas: insufficient length of ap"
shortA = "blas: insufficient length of a"
shortB = "blas: insufficient length of b"
shortC = "blas: insufficient length of c"
)

190
vendor/gonum.org/v1/gonum/blas/gonum/gemv.go generated vendored Normal file
View File

@@ -0,0 +1,190 @@
// Copyright ©2018 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
"gonum.org/v1/gonum/internal/asm/f64"
)
// TODO(Kunde21): Merge these methods back into level2double/level2single when Sgemv assembly kernels are merged into f32.
// Dgemv computes
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Set up indexes
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
// Quick return if possible
if m == 0 || n == 0 {
return
}
if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(shortY)
}
if len(a) < lda*(m-1)+n {
panic(shortA)
}
// Quick return if possible
if alpha == 0 && beta == 1 {
return
}
if alpha == 0 {
// First form y = beta * y
if incY > 0 {
Implementation{}.Dscal(lenY, beta, y, incY)
} else {
Implementation{}.Dscal(lenY, beta, y, -incY)
}
return
}
// Form y = alpha * A * x + y
if tA == blas.NoTrans {
f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
return
}
// Cases where a is transposed.
f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
}
// Sgemv computes
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// Set up indexes
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(shortY)
}
if len(a) < lda*(m-1)+n {
panic(shortA)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
// First form y = beta * y
if incY > 0 {
Implementation{}.Sscal(lenY, beta, y, incY)
} else {
Implementation{}.Sscal(lenY, beta, y, -incY)
}
if alpha == 0 {
return
}
var kx, ky int
if incX < 0 {
kx = -(lenX - 1) * incX
}
if incY < 0 {
ky = -(lenY - 1) * incY
}
// Form y = alpha * A * x + y
if tA == blas.NoTrans {
if incX == 1 && incY == 1 {
for i := 0; i < m; i++ {
y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x[:n])
}
return
}
iy := ky
for i := 0; i < m; i++ {
y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
iy += incY
}
return
}
// Cases where a is transposed.
if incX == 1 && incY == 1 {
for i := 0; i < m; i++ {
tmp := alpha * x[i]
if tmp != 0 {
f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y[:n])
}
}
return
}
ix := kx
for i := 0; i < m; i++ {
tmp := alpha * x[ix]
if tmp != 0 {
f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
}
ix += incX
}
}

58
vendor/gonum.org/v1/gonum/blas/gonum/gonum.go generated vendored Normal file
View File

@@ -0,0 +1,58 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate ./single_precision.bash
package gonum
import (
"math"
"gonum.org/v1/gonum/internal/math32"
)
type Implementation struct{}
// [SD]gemm behavior constants. These are kept here to keep them out of the
// way during single precision code genration.
const (
blockSize = 64 // b x b matrix
minParBlock = 4 // minimum number of blocks needed to go parallel
buffMul = 4 // how big is the buffer relative to the number of workers
)
// subMul is a common type shared by [SD]gemm.
type subMul struct {
i, j int // index of block
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func min(a, b int) int {
if a > b {
return b
}
return a
}
// blocks returns the number of divisions of the dimension length with the given
// block size.
func blocks(dim, bsize int) int {
return (dim + bsize - 1) / bsize
}
// dcabs1 returns |real(z)|+|imag(z)|.
func dcabs1(z complex128) float64 {
return math.Abs(real(z)) + math.Abs(imag(z))
}
// scabs1 returns |real(z)|+|imag(z)|.
func scabs1(z complex64) float32 {
return math32.Abs(real(z)) + math32.Abs(imag(z))
}

445
vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go generated vendored Normal file
View File

@@ -0,0 +1,445 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/c128"
)
var _ blas.Complex128Level1 = Implementation{}
// Dzasum returns the sum of the absolute values of the elements of x
// \sum_i |Re(x[i])| + |Im(x[i])|
// Dzasum returns 0 if incX is negative.
func (Implementation) Dzasum(n int, x []complex128, incX int) float64 {
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
var sum float64
if incX == 1 {
if len(x) < n {
panic(shortX)
}
for _, v := range x[:n] {
sum += dcabs1(v)
}
return sum
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
for i := 0; i < n; i++ {
v := x[i*incX]
sum += dcabs1(v)
}
return sum
}
// Dznrm2 computes the Euclidean norm of the complex vector x,
// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
// This function returns 0 if incX is negative.
func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if n < 1 {
if n == 0 {
return 0
}
panic(nLT0)
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
var (
scale float64
ssq float64 = 1
)
if incX == 1 {
for _, v := range x[:n] {
re, im := math.Abs(real(v)), math.Abs(imag(v))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
for ix := 0; ix < n*incX; ix += incX {
re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
// Izamax returns -1 if n is 0 or incX is negative.
func (Implementation) Izamax(n int, x []complex128, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
// Return invalid index.
return -1
}
if n < 1 {
if n == 0 {
// Return invalid index.
return -1
}
panic(nLT0)
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
idx := 0
max := dcabs1(x[0])
if incX == 1 {
for i, v := range x[1:n] {
absV := dcabs1(v)
if absV > max {
max = absV
idx = i + 1
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
absV := dcabs1(x[ix])
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Zaxpy adds alpha times x to y:
// y[i] += alpha * x[i] for all i
func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(shortY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
c128.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (1 - n) * incX
}
if incY < 0 {
iy = (1 - n) * incY
}
c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zcopy copies the vector x to vector y.
func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(shortY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Zdotc computes the dot product
// x^H · y
// of two complex vectors x and y.
func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return c128.DotcUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(shortY)
}
return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zdotu computes the dot product
// x^T · y
// of two complex vectors x and y.
func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return c128.DotuUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(shortY)
}
return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zdscal scales the vector x by a real scalar alpha.
// Zdscal has no effect if incX < 0.
func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
x = x[:n]
for i, v := range x {
x[i] = complex(alpha*real(v), alpha*imag(v))
}
return
}
for ix := 0; ix < n*incX; ix += incX {
v := x[ix]
x[ix] = complex(alpha*real(v), alpha*imag(v))
}
}
// Zscal scales the vector x by a complex scalar alpha.
// Zscal has no effect if incX < 0.
func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
c128.ScalUnitary(alpha, x[:n])
return
}
c128.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}
// Zswap exchanges the elements of two complex vectors x and y.
func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(shortY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}

467
vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx64.go generated vendored Normal file
View File

@@ -0,0 +1,467 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
math "gonum.org/v1/gonum/internal/math32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/c64"
)
var _ blas.Complex64Level1 = Implementation{}
// Scasum returns the sum of the absolute values of the elements of x
// \sum_i |Re(x[i])| + |Im(x[i])|
// Scasum returns 0 if incX is negative.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Scasum(n int, x []complex64, incX int) float32 {
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
var sum float32
if incX == 1 {
if len(x) < n {
panic(shortX)
}
for _, v := range x[:n] {
sum += scabs1(v)
}
return sum
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
for i := 0; i < n; i++ {
v := x[i*incX]
sum += scabs1(v)
}
return sum
}
// Scnrm2 computes the Euclidean norm of the complex vector x,
// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
// This function returns 0 if incX is negative.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if n < 1 {
if n == 0 {
return 0
}
panic(nLT0)
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
var (
scale float32
ssq float32 = 1
)
if incX == 1 {
for _, v := range x[:n] {
re, im := math.Abs(real(v)), math.Abs(imag(v))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
for ix := 0; ix < n*incX; ix += incX {
re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
// Icamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
// Icamax returns -1 if n is 0 or incX is negative.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Icamax(n int, x []complex64, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
// Return invalid index.
return -1
}
if n < 1 {
if n == 0 {
// Return invalid index.
return -1
}
panic(nLT0)
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
idx := 0
max := scabs1(x[0])
if incX == 1 {
for i, v := range x[1:n] {
absV := scabs1(v)
if absV > max {
max = absV
idx = i + 1
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
absV := scabs1(x[ix])
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Caxpy adds alpha times x to y:
// y[i] += alpha * x[i] for all i
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(shortY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
c64.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (1 - n) * incX
}
if incY < 0 {
iy = (1 - n) * incY
}
c64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Ccopy copies the vector x to vector y.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(shortY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Cdotc computes the dot product
// x^H · y
// of two complex vectors x and y.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) complex64 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return c64.DotcUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(shortY)
}
return c64.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Cdotu computes the dot product
// x^T · y
// of two complex vectors x and y.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) complex64 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return c64.DotuUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(shortY)
}
return c64.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Csscal scales the vector x by a real scalar alpha.
// Csscal has no effect if incX < 0.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
x = x[:n]
for i, v := range x {
x[i] = complex(alpha*real(v), alpha*imag(v))
}
return
}
for ix := 0; ix < n*incX; ix += incX {
v := x[ix]
x[ix] = complex(alpha*real(v), alpha*imag(v))
}
}
// Cscal scales the vector x by a complex scalar alpha.
// Cscal has no effect if incX < 0.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
c64.ScalUnitary(alpha, x[:n])
return
}
c64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}
// Cswap exchanges the elements of two complex vectors x and y.
//
// Complex64 implementations are autogenerated and not directly tested.
func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(shortY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}

644
vendor/gonum.org/v1/gonum/blas/gonum/level1float32.go generated vendored Normal file
View File

@@ -0,0 +1,644 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
math "gonum.org/v1/gonum/internal/math32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level1 = Implementation{}
// Snrm2 computes the Euclidean norm of a vector,
// sqrt(\sum_i x[i] * x[i]).
// This function returns 0 if incX is negative.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Snrm2(n int, x []float32, incX int) float32 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
if n < 2 {
if n == 1 {
return math.Abs(x[0])
}
if n == 0 {
return 0
}
panic(nLT0)
}
var (
scale float32 = 0
sumSquares float32 = 1
)
if incX == 1 {
x = x[:n]
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
for ix := 0; ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
// Sasum computes the sum of the absolute values of the elements of x.
// \sum_i |x[i]|
// Sasum returns 0 if incX is negative.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sasum(n int, x []float32, incX int) float32 {
var sum float32
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
if incX == 1 {
x = x[:n]
for _, v := range x {
sum += math.Abs(v)
}
return sum
}
for i := 0; i < n; i++ {
sum += math.Abs(x[i*incX])
}
return sum
}
// Isamax returns the index of an element of x with the largest absolute value.
// If there are multiple such indices the earliest is returned.
// Isamax returns -1 if n == 0.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Isamax(n int, x []float32, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return -1
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
if n < 2 {
if n == 1 {
return 0
}
if n == 0 {
return -1 // Netlib returns invalid index when n == 0.
}
panic(nLT0)
}
idx := 0
max := math.Abs(x[0])
if incX == 1 {
for i, v := range x[:n] {
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
v := x[ix]
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Sswap exchanges the elements of two vectors.
// x[i], y[i] = y[i], x[i] for all i
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sswap(n int, x []float32, incX int, y []float32, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}
// Scopy copies the elements of x into the elements of y.
// y[i] = x[i] for all i
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Scopy(n int, x []float32, incX int, y []float32, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Saxpy adds alpha times x to y
// y[i] += alpha * x[i] for all i
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
f32.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Srotg computes the plane rotation
// _ _ _ _ _ _
// | c s | | a | | r |
// | -s c | * | b | = | 0 |
// ‾ ‾ ‾ ‾ ‾ ‾
// where
// r = ±√(a^2 + b^2)
// c = a/r, the cosine of the plane rotation
// s = b/r, the sine of the plane rotation
//
// NOTE: There is a discrepancy between the reference implementation and the BLAS
// technical manual regarding the sign for r when a or b are zero.
// Srotg agrees with the definition in the manual and other
// common BLAS implementations.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotg(a, b float32) (c, s, r, z float32) {
if b == 0 && a == 0 {
return 1, 0, a, 0
}
absA := math.Abs(a)
absB := math.Abs(b)
aGTb := absA > absB
r = math.Hypot(a, b)
if aGTb {
r = math.Copysign(r, a)
} else {
r = math.Copysign(r, b)
}
c = a / r
s = b / r
if aGTb {
z = s
} else if c != 0 { // r == 0 case handled above
z = 1 / c
} else {
z = 1
}
return
}
// Srotmg computes the modified Givens rotation. See
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) {
// The implementation of Drotmg used here is taken from Hopkins 1997
// Appendix A: https://doi.org/10.1145/289251.289253
// with the exception of the gam constants below.
const (
gam = 4096.0
gamsq = gam * gam
rgamsq = 1.0 / gamsq
)
if d1 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
if d2 == 0 || y1 == 0 {
p.Flag = blas.Identity
return p, d1, d2, x1
}
var h11, h12, h21, h22 float32
if (d1 == 0 || x1 == 0) && d2 > 0 {
p.Flag = blas.Diagonal
h12 = 1
h21 = -1
x1 = y1
d1, d2 = d2, d1
} else {
p2 := d2 * y1
p1 := d1 * x1
q2 := p2 * y1
q1 := p1 * x1
if math.Abs(q1) > math.Abs(q2) {
p.Flag = blas.OffDiagonal
h11 = 1
h22 = 1
h21 = -y1 / x1
h12 = p2 / p1
u := 1 - h12*h21
if u <= 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
d1 /= u
d2 /= u
x1 *= u
} else {
if q2 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
p.Flag = blas.Diagonal
h21 = -1
h12 = 1
h11 = p1 / p2
h22 = x1 / y1
u := 1 + h11*h22
d1, d2 = d2/u, d1/u
x1 = y1 * u
}
}
for d1 <= rgamsq && d1 != 0 {
p.Flag = blas.Rescaling
d1 = (d1 * gam) * gam
x1 /= gam
h11 /= gam
h12 /= gam
}
for d1 > gamsq {
p.Flag = blas.Rescaling
d1 = (d1 / gam) / gam
x1 *= gam
h11 *= gam
h12 *= gam
}
for math.Abs(d2) <= rgamsq && d2 != 0 {
p.Flag = blas.Rescaling
d2 = (d2 * gam) * gam
h21 /= gam
h22 /= gam
}
for math.Abs(d2) > gamsq {
p.Flag = blas.Rescaling
d2 = (d2 / gam) / gam
h21 *= gam
h22 *= gam
}
switch p.Flag {
case blas.Diagonal:
p.H = [4]float32{0: h11, 3: h22}
case blas.OffDiagonal:
p.H = [4]float32{1: h21, 2: h12}
case blas.Rescaling:
p.H = [4]float32{h11, h21, h12, h22}
default:
panic(badFlag)
}
return p, d1, d2, x1
}
// Srot applies a plane transformation.
// x[i] = c * x[i] + s * y[i]
// y[i] = c * y[i] - s * x[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srot(n int, x []float32, incX int, y []float32, incY int, c float32, s float32) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = c*vx+s*vy, c*vy-s*vx
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx
ix += incX
iy += incY
}
}
// Srotm applies the modified Givens rotation to the 2×n matrix.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if p.Flag == blas.Identity {
return
}
switch p.Flag {
case blas.Rescaling:
h11 := p.H[0]
h12 := p.H[2]
h21 := p.H[1]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy*h12, vx*h21+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy*h12, vx*h21+vy*h22
ix += incX
iy += incY
}
case blas.OffDiagonal:
h12 := p.H[2]
h21 := p.H[1]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx+vy*h12, vx*h21+vy
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx+vy*h12, vx*h21+vy
ix += incX
iy += incY
}
case blas.Diagonal:
h11 := p.H[0]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy, -vx+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy, -vx+vy*h22
ix += incX
iy += incY
}
}
}
// Sscal scales x by alpha.
// x[i] *= alpha
// Sscal has no effect if incX < 0.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
f32.ScalUnitary(alpha, x[:n])
return
}
f32.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}

View File

@@ -0,0 +1,53 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f32"
)
// Dsdot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return f32.DdotUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(shortY)
}
return f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}

View File

@@ -0,0 +1,53 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sdot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sdot(n int, x []float32, incX int, y []float32, incY int) float32 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return f32.DotUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(shortY)
}
return f32.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}

View File

@@ -0,0 +1,53 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sdsdot computes the dot product of the two vectors plus a constant
// alpha + \sum_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return alpha + float32(f32.DdotUnitary(x[:n], y[:n]))
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(shortY)
}
return alpha + float32(f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)))
}

620
vendor/gonum.org/v1/gonum/blas/gonum/level1float64.go generated vendored Normal file
View File

@@ -0,0 +1,620 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level1 = Implementation{}
// Dnrm2 computes the Euclidean norm of a vector,
// sqrt(\sum_i x[i] * x[i]).
// This function returns 0 if incX is negative.
func (Implementation) Dnrm2(n int, x []float64, incX int) float64 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
if n < 2 {
if n == 1 {
return math.Abs(x[0])
}
if n == 0 {
return 0
}
panic(nLT0)
}
var (
scale float64 = 0
sumSquares float64 = 1
)
if incX == 1 {
x = x[:n]
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
for ix := 0; ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
// Dasum computes the sum of the absolute values of the elements of x.
// \sum_i |x[i]|
// Dasum returns 0 if incX is negative.
func (Implementation) Dasum(n int, x []float64, incX int) float64 {
var sum float64
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
if incX == 1 {
x = x[:n]
for _, v := range x {
sum += math.Abs(v)
}
return sum
}
for i := 0; i < n; i++ {
sum += math.Abs(x[i*incX])
}
return sum
}
// Idamax returns the index of an element of x with the largest absolute value.
// If there are multiple such indices the earliest is returned.
// Idamax returns -1 if n == 0.
func (Implementation) Idamax(n int, x []float64, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return -1
}
if len(x) <= (n-1)*incX {
panic(shortX)
}
if n < 2 {
if n == 1 {
return 0
}
if n == 0 {
return -1 // Netlib returns invalid index when n == 0.
}
panic(nLT0)
}
idx := 0
max := math.Abs(x[0])
if incX == 1 {
for i, v := range x[:n] {
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
v := x[ix]
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Dswap exchanges the elements of two vectors.
// x[i], y[i] = y[i], x[i] for all i
func (Implementation) Dswap(n int, x []float64, incX int, y []float64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}
// Dcopy copies the elements of x into the elements of y.
// y[i] = x[i] for all i
func (Implementation) Dcopy(n int, x []float64, incX int, y []float64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Daxpy adds alpha times x to y
// y[i] += alpha * x[i] for all i
func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
f64.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Drotg computes the plane rotation
// _ _ _ _ _ _
// | c s | | a | | r |
// | -s c | * | b | = | 0 |
// ‾ ‾ ‾ ‾ ‾ ‾
// where
// r = ±√(a^2 + b^2)
// c = a/r, the cosine of the plane rotation
// s = b/r, the sine of the plane rotation
//
// NOTE: There is a discrepancy between the reference implementation and the BLAS
// technical manual regarding the sign for r when a or b are zero.
// Drotg agrees with the definition in the manual and other
// common BLAS implementations.
func (Implementation) Drotg(a, b float64) (c, s, r, z float64) {
if b == 0 && a == 0 {
return 1, 0, a, 0
}
absA := math.Abs(a)
absB := math.Abs(b)
aGTb := absA > absB
r = math.Hypot(a, b)
if aGTb {
r = math.Copysign(r, a)
} else {
r = math.Copysign(r, b)
}
c = a / r
s = b / r
if aGTb {
z = s
} else if c != 0 { // r == 0 case handled above
z = 1 / c
} else {
z = 1
}
return
}
// Drotmg computes the modified Givens rotation. See
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details.
func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) {
// The implementation of Drotmg used here is taken from Hopkins 1997
// Appendix A: https://doi.org/10.1145/289251.289253
// with the exception of the gam constants below.
const (
gam = 4096.0
gamsq = gam * gam
rgamsq = 1.0 / gamsq
)
if d1 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
if d2 == 0 || y1 == 0 {
p.Flag = blas.Identity
return p, d1, d2, x1
}
var h11, h12, h21, h22 float64
if (d1 == 0 || x1 == 0) && d2 > 0 {
p.Flag = blas.Diagonal
h12 = 1
h21 = -1
x1 = y1
d1, d2 = d2, d1
} else {
p2 := d2 * y1
p1 := d1 * x1
q2 := p2 * y1
q1 := p1 * x1
if math.Abs(q1) > math.Abs(q2) {
p.Flag = blas.OffDiagonal
h11 = 1
h22 = 1
h21 = -y1 / x1
h12 = p2 / p1
u := 1 - h12*h21
if u <= 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
d1 /= u
d2 /= u
x1 *= u
} else {
if q2 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
p.Flag = blas.Diagonal
h21 = -1
h12 = 1
h11 = p1 / p2
h22 = x1 / y1
u := 1 + h11*h22
d1, d2 = d2/u, d1/u
x1 = y1 * u
}
}
for d1 <= rgamsq && d1 != 0 {
p.Flag = blas.Rescaling
d1 = (d1 * gam) * gam
x1 /= gam
h11 /= gam
h12 /= gam
}
for d1 > gamsq {
p.Flag = blas.Rescaling
d1 = (d1 / gam) / gam
x1 *= gam
h11 *= gam
h12 *= gam
}
for math.Abs(d2) <= rgamsq && d2 != 0 {
p.Flag = blas.Rescaling
d2 = (d2 * gam) * gam
h21 /= gam
h22 /= gam
}
for math.Abs(d2) > gamsq {
p.Flag = blas.Rescaling
d2 = (d2 / gam) / gam
h21 *= gam
h22 *= gam
}
switch p.Flag {
case blas.Diagonal:
p.H = [4]float64{0: h11, 3: h22}
case blas.OffDiagonal:
p.H = [4]float64{1: h21, 2: h12}
case blas.Rescaling:
p.H = [4]float64{h11, h21, h12, h22}
default:
panic(badFlag)
}
return p, d1, d2, x1
}
// Drot applies a plane transformation.
// x[i] = c * x[i] + s * y[i]
// y[i] = c * y[i] - s * x[i]
func (Implementation) Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = c*vx+s*vy, c*vy-s*vx
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx
ix += incX
iy += incY
}
}
// Drotm applies the modified Givens rotation to the 2×n matrix.
func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if p.Flag == blas.Identity {
return
}
switch p.Flag {
case blas.Rescaling:
h11 := p.H[0]
h12 := p.H[2]
h21 := p.H[1]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy*h12, vx*h21+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy*h12, vx*h21+vy*h22
ix += incX
iy += incY
}
case blas.OffDiagonal:
h12 := p.H[2]
h21 := p.H[1]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx+vy*h12, vx*h21+vy
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx+vy*h12, vx*h21+vy
ix += incX
iy += incY
}
case blas.Diagonal:
h11 := p.H[0]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy, -vx+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy, -vx+vy*h22
ix += incX
iy += incY
}
}
}
// Dscal scales x by alpha.
// x[i] *= alpha
// Dscal has no effect if incX < 0.
func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (n-1)*incX >= len(x) {
panic(shortX)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
f64.ScalUnitary(alpha, x[:n])
return
}
f64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}

View File

@@ -0,0 +1,49 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f64"
)
// Ddot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
func (Implementation) Ddot(n int, x []float64, incX int, y []float64, incY int) float64 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(shortX)
}
if len(y) < n {
panic(shortY)
}
return f64.DotUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(shortX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(shortY)
}
return f64.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}

2906
vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

2942
vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx64.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

2296
vendor/gonum.org/v1/gonum/blas/gonum/level2float32.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

2264
vendor/gonum.org/v1/gonum/blas/gonum/level2float64.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1715
vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx128.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1735
vendor/gonum.org/v1/gonum/blas/gonum/level3cmplx64.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

876
vendor/gonum.org/v1/gonum/blas/gonum/level3float32.go generated vendored Normal file
View File

@@ -0,0 +1,876 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level3 = Implementation{}
// Strsm solves one of the matrix equations
// A * X = alpha * B if tA == blas.NoTrans and side == blas.Left
// A^T * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// X * A = alpha * B if tA == blas.NoTrans and side == blas.Right
// X * A^T = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
// scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in-place into X.
//
// No check is made that A is invertible.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
k := n
if s == blas.Left {
k = m
}
if lda < max(1, k) {
panic(badLdA)
}
if ldb < max(1, n) {
panic(badLdB)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(k-1)+k {
panic(shortA)
}
if len(b) < ldb*(m-1)+n {
panic(shortB)
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := m - 1; i >= 0; i-- {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f32.ScalUnitary(alpha, btmp)
}
for ka, va := range a[i*lda+i+1 : i*lda+m] {
if va != 0 {
k := ka + i + 1
f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
f32.ScalUnitary(tmp, btmp)
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f32.ScalUnitary(alpha, btmp)
}
for k, va := range a[i*lda : i*lda+i] {
if va != 0 {
f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
f32.ScalUnitary(tmp, btmp)
}
}
return
}
// Cases where a is transposed
if ul == blas.Upper {
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
f32.ScalUnitary(tmp, btmpk)
}
for ia, va := range a[k*lda+k+1 : k*lda+m] {
if va != 0 {
i := ia + k + 1
f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
}
}
if alpha != 1 {
f32.ScalUnitary(alpha, btmpk)
}
}
return
}
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
f32.ScalUnitary(tmp, btmpk)
}
for i, va := range a[k*lda : k*lda+k] {
if va != 0 {
f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
}
}
if alpha != 1 {
f32.ScalUnitary(alpha, btmpk)
}
}
return
}
// Cases where a is to the right of X.
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f32.ScalUnitary(alpha, btmp)
}
for k, vb := range btmp {
if vb == 0 {
continue
}
if nonUnit {
btmp[k] /= a[k*lda+k]
}
f32.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n])
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f32.ScalUnitary(alpha, btmp)
}
for k := n - 1; k >= 0; k-- {
if btmp[k] == 0 {
continue
}
if nonUnit {
btmp[k] /= a[k*lda+k]
}
f32.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k])
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := n - 1; j >= 0; j-- {
tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := 0; j < n; j++ {
tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
}
// Ssymm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C if side == blas.Left
// C = alpha * B * A + beta * C if side == blas.Right
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
// is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
if s != blas.Right && s != blas.Left {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
k := n
if s == blas.Left {
k = m
}
if lda < max(1, k) {
panic(badLdA)
}
if ldb < max(1, n) {
panic(badLdB)
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(k-1)+k {
panic(shortA)
}
if len(b) < ldb*(m-1)+n {
panic(shortB)
}
if len(c) < ldc*(m-1)+n {
panic(shortC)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
if alpha == 0 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] *= beta
}
}
return
}
isUpper := ul == blas.Upper
if s == blas.Left {
for i := 0; i < m; i++ {
atmp := alpha * a[i*lda+i]
btmp := b[i*ldb : i*ldb+n]
ctmp := c[i*ldc : i*ldc+n]
for j, v := range btmp {
ctmp[j] *= beta
ctmp[j] += atmp * v
}
for k := 0; k < i; k++ {
var atmp float32
if isUpper {
atmp = a[k*lda+i]
} else {
atmp = a[i*lda+k]
}
atmp *= alpha
f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
}
for k := i + 1; k < m; k++ {
var atmp float32
if isUpper {
atmp = a[i*lda+k]
} else {
atmp = a[k*lda+i]
}
atmp *= alpha
f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
}
}
return
}
if isUpper {
for i := 0; i < m; i++ {
for j := n - 1; j >= 0; j-- {
tmp := alpha * b[i*ldb+j]
var tmp2 float32
atmp := a[j*lda+j+1 : j*lda+n]
btmp := b[i*ldb+j+1 : i*ldb+n]
ctmp := c[i*ldc+j+1 : i*ldc+n]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
return
}
for i := 0; i < m; i++ {
for j := 0; j < n; j++ {
tmp := alpha * b[i*ldb+j]
var tmp2 float32
atmp := a[j*lda : j*lda+j]
btmp := b[i*ldb : i*ldb+j]
ctmp := c[i*ldc : i*ldc+j]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
}
// Ssyrk performs one of the symmetric rank-k operations
// C = alpha * A * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
// beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
row, col := k, n
if tA == blas.NoTrans {
row, col = n, k
}
if lda < max(1, col) {
panic(badLdA)
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(row-1)+col {
panic(shortA)
}
if len(c) < ldc*(n-1)+n {
panic(shortC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
atmp := a[i*lda : i*lda+k]
if beta == 0 {
for jc := range ctmp {
j := jc + i
ctmp[jc] = alpha * f32.DotUnitary(atmp, a[j*lda:j*lda+k])
}
} else {
for jc, vc := range ctmp {
j := jc + i
ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k])
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
atmp := a[i*lda : i*lda+k]
if beta == 0 {
for j := range ctmp {
ctmp[j] = alpha * f32.DotUnitary(a[j*lda:j*lda+k], atmp)
}
} else {
for j, vc := range ctmp {
ctmp[j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta == 0 {
for j := range ctmp {
ctmp[j] = 0
}
} else if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f32.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp)
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f32.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp)
}
}
}
}
// Ssyr2k performs one of the symmetric rank 2k operations
// C = alpha * A * B^T + alpha * B * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * B + alpha * B^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
// alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
row, col := k, n
if tA == blas.NoTrans {
row, col = n, k
}
if lda < max(1, col) {
panic(badLdA)
}
if ldb < max(1, col) {
panic(badLdB)
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(row-1)+col {
panic(shortA)
}
if len(b) < ldb*(row-1)+col {
panic(shortB)
}
if len(c) < ldc*(n-1)+n {
panic(shortC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc+i : i*ldc+n]
for jc := range ctmp {
j := i + jc
var tmp1, tmp2 float32
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[jc] *= beta
ctmp[jc] += alpha * (tmp1 + tmp2)
}
}
return
}
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc : i*ldc+i+1]
for j := 0; j <= i; j++ {
var tmp1, tmp2 float32
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[j] *= beta
ctmp[j] += alpha * (tmp1 + tmp2)
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*ldb+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb+i : l*ldb+n]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda+i : l*lda+n] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*ldb+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb : l*ldb+i+1]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda : l*lda+i+1] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
}
// Strmm performs one of the matrix-matrix operations
// B = alpha * A * B if tA == blas.NoTrans and side == blas.Left
// B = alpha * A^T * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// B = alpha * B * A if tA == blas.NoTrans and side == blas.Right
// B = alpha * B * A^T if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
k := n
if s == blas.Left {
k = m
}
if lda < max(1, k) {
panic(badLdA)
}
if ldb < max(1, n) {
panic(badLdB)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(k-1)+k {
panic(shortA)
}
if len(b) < ldb*(m-1)+n {
panic(shortB)
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
f32.ScalUnitary(tmp, btmp)
for ka, va := range a[i*lda+i+1 : i*lda+m] {
k := ka + i + 1
if va != 0 {
f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
for i := m - 1; i >= 0; i-- {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
f32.ScalUnitary(tmp, btmp)
for k, va := range a[i*lda : i*lda+i] {
if va != 0 {
f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
for ia, va := range a[k*lda+k+1 : k*lda+m] {
i := ia + k + 1
btmp := b[i*ldb : i*ldb+n]
if va != 0 {
f32.AxpyUnitary(alpha*va, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
f32.ScalUnitary(tmp, btmpk)
}
}
return
}
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
for i, va := range a[k*lda : k*lda+k] {
btmp := b[i*ldb : i*ldb+n]
if va != 0 {
f32.AxpyUnitary(alpha*va, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
f32.ScalUnitary(tmp, btmpk)
}
}
return
}
// Cases where a is on the right
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := n - 1; k >= 0; k-- {
tmp := alpha * btmp[k]
if tmp == 0 {
continue
}
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
f32.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n])
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := 0; k < n; k++ {
tmp := alpha * btmp[k]
if tmp == 0 {
continue
}
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
f32.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k])
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j, vb := range btmp {
tmp := vb
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
btmp[j] = alpha * tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := n - 1; j >= 0; j-- {
tmp := btmp[j]
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
btmp[j] = alpha * tmp
}
}
}

864
vendor/gonum.org/v1/gonum/blas/gonum/level3float64.go generated vendored Normal file
View File

@@ -0,0 +1,864 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level3 = Implementation{}
// Dtrsm solves one of the matrix equations
// A * X = alpha * B if tA == blas.NoTrans and side == blas.Left
// A^T * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// X * A = alpha * B if tA == blas.NoTrans and side == blas.Right
// X * A^T = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
// scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in-place into X.
//
// No check is made that A is invertible.
func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
k := n
if s == blas.Left {
k = m
}
if lda < max(1, k) {
panic(badLdA)
}
if ldb < max(1, n) {
panic(badLdB)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(k-1)+k {
panic(shortA)
}
if len(b) < ldb*(m-1)+n {
panic(shortB)
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := m - 1; i >= 0; i-- {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f64.ScalUnitary(alpha, btmp)
}
for ka, va := range a[i*lda+i+1 : i*lda+m] {
if va != 0 {
k := ka + i + 1
f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
f64.ScalUnitary(tmp, btmp)
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f64.ScalUnitary(alpha, btmp)
}
for k, va := range a[i*lda : i*lda+i] {
if va != 0 {
f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
f64.ScalUnitary(tmp, btmp)
}
}
return
}
// Cases where a is transposed
if ul == blas.Upper {
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
f64.ScalUnitary(tmp, btmpk)
}
for ia, va := range a[k*lda+k+1 : k*lda+m] {
if va != 0 {
i := ia + k + 1
f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
}
}
if alpha != 1 {
f64.ScalUnitary(alpha, btmpk)
}
}
return
}
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
f64.ScalUnitary(tmp, btmpk)
}
for i, va := range a[k*lda : k*lda+k] {
if va != 0 {
f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
}
}
if alpha != 1 {
f64.ScalUnitary(alpha, btmpk)
}
}
return
}
// Cases where a is to the right of X.
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f64.ScalUnitary(alpha, btmp)
}
for k, vb := range btmp {
if vb == 0 {
continue
}
if nonUnit {
btmp[k] /= a[k*lda+k]
}
f64.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n])
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
f64.ScalUnitary(alpha, btmp)
}
for k := n - 1; k >= 0; k-- {
if btmp[k] == 0 {
continue
}
if nonUnit {
btmp[k] /= a[k*lda+k]
}
f64.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k])
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := n - 1; j >= 0; j-- {
tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := 0; j < n; j++ {
tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
}
// Dsymm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C if side == blas.Left
// C = alpha * B * A + beta * C if side == blas.Right
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
// is a scalar.
func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
if s != blas.Right && s != blas.Left {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
k := n
if s == blas.Left {
k = m
}
if lda < max(1, k) {
panic(badLdA)
}
if ldb < max(1, n) {
panic(badLdB)
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(k-1)+k {
panic(shortA)
}
if len(b) < ldb*(m-1)+n {
panic(shortB)
}
if len(c) < ldc*(m-1)+n {
panic(shortC)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
if alpha == 0 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] *= beta
}
}
return
}
isUpper := ul == blas.Upper
if s == blas.Left {
for i := 0; i < m; i++ {
atmp := alpha * a[i*lda+i]
btmp := b[i*ldb : i*ldb+n]
ctmp := c[i*ldc : i*ldc+n]
for j, v := range btmp {
ctmp[j] *= beta
ctmp[j] += atmp * v
}
for k := 0; k < i; k++ {
var atmp float64
if isUpper {
atmp = a[k*lda+i]
} else {
atmp = a[i*lda+k]
}
atmp *= alpha
f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
}
for k := i + 1; k < m; k++ {
var atmp float64
if isUpper {
atmp = a[i*lda+k]
} else {
atmp = a[k*lda+i]
}
atmp *= alpha
f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
}
}
return
}
if isUpper {
for i := 0; i < m; i++ {
for j := n - 1; j >= 0; j-- {
tmp := alpha * b[i*ldb+j]
var tmp2 float64
atmp := a[j*lda+j+1 : j*lda+n]
btmp := b[i*ldb+j+1 : i*ldb+n]
ctmp := c[i*ldc+j+1 : i*ldc+n]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
return
}
for i := 0; i < m; i++ {
for j := 0; j < n; j++ {
tmp := alpha * b[i*ldb+j]
var tmp2 float64
atmp := a[j*lda : j*lda+j]
btmp := b[i*ldb : i*ldb+j]
ctmp := c[i*ldc : i*ldc+j]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
}
// Dsyrk performs one of the symmetric rank-k operations
// C = alpha * A * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
// beta are scalars.
func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
row, col := k, n
if tA == blas.NoTrans {
row, col = n, k
}
if lda < max(1, col) {
panic(badLdA)
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(row-1)+col {
panic(shortA)
}
if len(c) < ldc*(n-1)+n {
panic(shortC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
atmp := a[i*lda : i*lda+k]
if beta == 0 {
for jc := range ctmp {
j := jc + i
ctmp[jc] = alpha * f64.DotUnitary(atmp, a[j*lda:j*lda+k])
}
} else {
for jc, vc := range ctmp {
j := jc + i
ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k])
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
atmp := a[i*lda : i*lda+k]
if beta == 0 {
for j := range ctmp {
ctmp[j] = alpha * f64.DotUnitary(a[j*lda:j*lda+k], atmp)
}
} else {
for j, vc := range ctmp {
ctmp[j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta == 0 {
for j := range ctmp {
ctmp[j] = 0
}
} else if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f64.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp)
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f64.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp)
}
}
}
}
// Dsyr2k performs one of the symmetric rank 2k operations
// C = alpha * A * B^T + alpha * B * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * B + alpha * B^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
// alpha and beta are scalars.
func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
row, col := k, n
if tA == blas.NoTrans {
row, col = n, k
}
if lda < max(1, col) {
panic(badLdA)
}
if ldb < max(1, col) {
panic(badLdB)
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(row-1)+col {
panic(shortA)
}
if len(b) < ldb*(row-1)+col {
panic(shortB)
}
if len(c) < ldc*(n-1)+n {
panic(shortC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc+i : i*ldc+n]
for jc := range ctmp {
j := i + jc
var tmp1, tmp2 float64
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[jc] *= beta
ctmp[jc] += alpha * (tmp1 + tmp2)
}
}
return
}
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc : i*ldc+i+1]
for j := 0; j <= i; j++ {
var tmp1, tmp2 float64
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[j] *= beta
ctmp[j] += alpha * (tmp1 + tmp2)
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*ldb+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb+i : l*ldb+n]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda+i : l*lda+n] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*ldb+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb : l*ldb+i+1]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda : l*lda+i+1] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
}
// Dtrmm performs one of the matrix-matrix operations
// B = alpha * A * B if tA == blas.NoTrans and side == blas.Left
// B = alpha * A^T * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// B = alpha * B * A if tA == blas.NoTrans and side == blas.Right
// B = alpha * B * A^T if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
k := n
if s == blas.Left {
k = m
}
if lda < max(1, k) {
panic(badLdA)
}
if ldb < max(1, n) {
panic(badLdB)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(k-1)+k {
panic(shortA)
}
if len(b) < ldb*(m-1)+n {
panic(shortB)
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
f64.ScalUnitary(tmp, btmp)
for ka, va := range a[i*lda+i+1 : i*lda+m] {
k := ka + i + 1
if va != 0 {
f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
for i := m - 1; i >= 0; i-- {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
f64.ScalUnitary(tmp, btmp)
for k, va := range a[i*lda : i*lda+i] {
if va != 0 {
f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
for ia, va := range a[k*lda+k+1 : k*lda+m] {
i := ia + k + 1
btmp := b[i*ldb : i*ldb+n]
if va != 0 {
f64.AxpyUnitary(alpha*va, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
f64.ScalUnitary(tmp, btmpk)
}
}
return
}
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
for i, va := range a[k*lda : k*lda+k] {
btmp := b[i*ldb : i*ldb+n]
if va != 0 {
f64.AxpyUnitary(alpha*va, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
f64.ScalUnitary(tmp, btmpk)
}
}
return
}
// Cases where a is on the right
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := n - 1; k >= 0; k-- {
tmp := alpha * btmp[k]
if tmp == 0 {
continue
}
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
f64.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n])
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := 0; k < n; k++ {
tmp := alpha * btmp[k]
if tmp == 0 {
continue
}
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
f64.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k])
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j, vb := range btmp {
tmp := vb
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
btmp[j] = alpha * tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := n - 1; j >= 0; j-- {
tmp := btmp[j]
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
btmp[j] = alpha * tmp
}
}
}

318
vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go generated vendored Normal file
View File

@@ -0,0 +1,318 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"runtime"
"sync"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sgemm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C
// C = alpha * A^T * B + beta * C
// C = alpha * A * B^T + beta * C
// C = alpha * A^T * B^T + beta * C
// where A is an m×k or k×m dense matrix, B is an n×k or k×n dense matrix, C is
// an m×n matrix, and alpha and beta are scalars. tA and tB specify whether A or
// B are transposed.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
switch tA {
default:
panic(badTranspose)
case blas.NoTrans, blas.Trans, blas.ConjTrans:
}
switch tB {
default:
panic(badTranspose)
case blas.NoTrans, blas.Trans, blas.ConjTrans:
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
aTrans := tA == blas.Trans || tA == blas.ConjTrans
if aTrans {
if lda < max(1, m) {
panic(badLdA)
}
} else {
if lda < max(1, k) {
panic(badLdA)
}
}
bTrans := tB == blas.Trans || tB == blas.ConjTrans
if bTrans {
if ldb < max(1, k) {
panic(badLdB)
}
} else {
if ldb < max(1, n) {
panic(badLdB)
}
}
if ldc < max(1, n) {
panic(badLdC)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if aTrans {
if len(a) < (k-1)*lda+m {
panic(shortA)
}
} else {
if len(a) < (m-1)*lda+k {
panic(shortA)
}
}
if bTrans {
if len(b) < (n-1)*ldb+k {
panic(shortB)
}
} else {
if len(b) < (k-1)*ldb+n {
panic(shortB)
}
}
if len(c) < (m-1)*ldc+n {
panic(shortC)
}
// Quick return if possible.
if (alpha == 0 || k == 0) && beta == 1 {
return
}
// scale c
if beta != 1 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
} else {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
}
}
sgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
}
func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// dgemmParallel computes a parallel matrix multiplication by partitioning
// a and b into sub-blocks, and updating c with the multiplication of the sub-block
// In all cases,
// A = [ A_11 A_12 ... A_1j
// A_21 A_22 ... A_2j
// ...
// A_i1 A_i2 ... A_ij]
//
// and same for B. All of the submatrix sizes are blockSize×blockSize except
// at the edges.
//
// In all cases, there is one dimension for each matrix along which
// C must be updated sequentially.
// Cij = \sum_k Aik Bki, (A * B)
// Cij = \sum_k Aki Bkj, (A^T * B)
// Cij = \sum_k Aik Bjk, (A * B^T)
// Cij = \sum_k Aki Bjk, (A^T * B^T)
//
// This code computes one {i, j} block sequentially along the k dimension,
// and computes all of the {i, j} blocks concurrently. This
// partitioning allows Cij to be updated in-place without race-conditions.
// Instead of launching a goroutine for each possible concurrent computation,
// a number of worker goroutines are created and channels are used to pass
// available and completed cases.
//
// http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix
// multiplies, though this code does not copy matrices to attempt to eliminate
// cache misses.
maxKLen := k
parBlocks := blocks(m, blockSize) * blocks(n, blockSize)
if parBlocks < minParBlock {
// The matrix multiplication is small in the dimensions where it can be
// computed concurrently. Just do it in serial.
sgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
}
nWorkers := runtime.GOMAXPROCS(0)
if parBlocks < nWorkers {
nWorkers = parBlocks
}
// There is a tradeoff between the workers having to wait for work
// and a large buffer making operations slow.
buf := buffMul * nWorkers
if buf > parBlocks {
buf = parBlocks
}
sendChan := make(chan subMul, buf)
// Launch workers. A worker receives an {i, j} submatrix of c, and computes
// A_ik B_ki (or the transposed version) storing the result in c_ij. When the
// channel is finally closed, it signals to the waitgroup that it has finished
// computing.
var wg sync.WaitGroup
for i := 0; i < nWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for sub := range sendChan {
i := sub.i
j := sub.j
leni := blockSize
if i+leni > m {
leni = m - i
}
lenj := blockSize
if j+lenj > n {
lenj = n - j
}
cSub := sliceView32(c, ldc, i, j, leni, lenj)
// Compute A_ik B_kj for all k
for k := 0; k < maxKLen; k += blockSize {
lenk := blockSize
if k+lenk > maxKLen {
lenk = maxKLen - k
}
var aSub, bSub []float32
if aTrans {
aSub = sliceView32(a, lda, k, i, lenk, leni)
} else {
aSub = sliceView32(a, lda, i, k, leni, lenk)
}
if bTrans {
bSub = sliceView32(b, ldb, j, k, lenj, lenk)
} else {
bSub = sliceView32(b, ldb, k, j, lenk, lenj)
}
sgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha)
}
}
}()
}
// Send out all of the {i, j} subblocks for computation.
for i := 0; i < m; i += blockSize {
for j := 0; j < n; j += blockSize {
sendChan <- subMul{
i: i,
j: j,
}
}
}
close(sendChan)
wg.Wait()
}
// sgemmSerial is serial matrix multiply
func sgemmSerial(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
switch {
case !aTrans && !bTrans:
sgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && !bTrans:
sgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case !aTrans && bTrans:
sgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && bTrans:
sgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
default:
panic("unreachable")
}
}
// sgemmSerial where neither a nor b are transposed
func sgemmSerialNotNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for l, v := range a[i*lda : i*lda+k] {
tmp := alpha * v
if tmp != 0 {
f32.AxpyUnitary(tmp, b[l*ldb:l*ldb+n], ctmp)
}
}
}
}
// sgemmSerial where neither a is transposed and b is not
func sgemmSerialTransNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
btmp := b[l*ldb : l*ldb+n]
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f32.AxpyUnitary(tmp, btmp, ctmp)
}
}
}
}
// sgemmSerial where neither a is not transposed and b is
func sgemmSerialNotTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
atmp := a[i*lda : i*lda+k]
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] += alpha * f32.DotUnitary(atmp, b[j*ldb:j*ldb+k])
}
}
}
// sgemmSerial where both are transposed
func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f32.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0)
}
}
}
}
func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
return a[i*lda+j : (i+r-1)*lda+j+c]
}

View File

@@ -0,0 +1,218 @@
#!/usr/bin/env bash
# Copyright ©2015 The Gonum Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
WARNINGF32='//\
// Float32 implementations are autogenerated and not directly tested.\
'
WARNINGC64='//\
// Complex64 implementations are autogenerated and not directly tested.\
'
# Level1 routines.
echo Generating level1float32.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32.go
cat level1float64.go \
| gofmt -r 'blas.Float64Level1 -> blas.Float32Level1' \
\
| gofmt -r 'float64 -> float32' \
| gofmt -r 'blas.DrotmParams -> blas.SrotmParams' \
\
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
| gofmt -r 'f64.ScalInc -> f32.ScalInc' \
| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \
-e 's_^// D_// S_' \
-e "s_^\(func (Implementation) \)Id\(.*\)\$_$WARNINGF32\1Is\2_" \
-e 's_^// Id_// Is_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
>> level1float32.go
echo Generating level1cmplx64.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1cmplx64.go
cat level1cmplx128.go \
| gofmt -r 'blas.Complex128Level1 -> blas.Complex64Level1' \
\
| gofmt -r 'float64 -> float32' \
| gofmt -r 'complex128 -> complex64' \
\
| gofmt -r 'c128.AxpyInc -> c64.AxpyInc' \
| gofmt -r 'c128.AxpyUnitary -> c64.AxpyUnitary' \
| gofmt -r 'c128.DotcInc -> c64.DotcInc' \
| gofmt -r 'c128.DotcUnitary -> c64.DotcUnitary' \
| gofmt -r 'c128.DotuInc -> c64.DotuInc' \
| gofmt -r 'c128.DotuUnitary -> c64.DotuUnitary' \
| gofmt -r 'c128.ScalInc -> c64.ScalInc' \
| gofmt -r 'c128.ScalUnitary -> c64.ScalUnitary' \
| gofmt -r 'dcabs1 -> scabs1' \
\
| sed -e "s_^\(func (Implementation) \)Zdot\(.*\)\$_$WARNINGC64\1Cdot\2_" \
-e 's_^// Zdot_// Cdot_' \
-e "s_^\(func (Implementation) \)Zdscal\(.*\)\$_$WARNINGC64\1Csscal\2_" \
-e 's_^// Zdscal_// Csscal_' \
-e "s_^\(func (Implementation) \)Z\(.*\)\$_$WARNINGC64\1C\2_" \
-e 's_^// Z_// C_' \
-e "s_^\(func (Implementation) \)Iz\(.*\)\$_$WARNINGC64\1Ic\2_" \
-e 's_^// Iz_// Ic_' \
-e "s_^\(func (Implementation) \)Dz\(.*\)\$_$WARNINGC64\1Sc\2_" \
-e 's_^// Dz_// Sc_' \
-e 's_"gonum.org/v1/gonum/internal/asm/c128"_"gonum.org/v1/gonum/internal/asm/c64"_' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
>> level1cmplx64.go
echo Generating level1float32_sdot.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32_sdot.go
cat level1float64_ddot.go \
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.DotInc -> f32.DotInc' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \
-e 's_^// D_// S_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level1float32_sdot.go
echo Generating level1float32_dsdot.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32_dsdot.go
cat level1float64_ddot.go \
| gofmt -r '[]float64 -> []float32' \
\
| gofmt -r 'f64.DotInc -> f32.DdotInc' \
| gofmt -r 'f64.DotUnitary -> f32.DdotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1Ds\2_" \
-e 's_^// D_// Ds_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level1float32_dsdot.go
echo Generating level1float32_sdsdot.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1float32_sdsdot.go
cat level1float64_ddot.go \
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.DotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)) -> alpha + float32(f32.DdotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)))' \
| gofmt -r 'f64.DotUnitary(a, b) -> alpha + float32(f32.DdotUnitary(a, b))' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1Sds\2_" \
-e 's_^// D\(.*\)$_// Sds\1 plus a constant_' \
-e 's_\\sum_alpha + \\sum_' \
-e 's/n int/n int, alpha float32/' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level1float32_sdsdot.go
# Level2 routines.
echo Generating level2float32.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level2float32.go
cat level2float64.go \
| gofmt -r 'blas.Float64Level2 -> blas.Float32Level2' \
\
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \
| gofmt -r 'f64.DotInc -> f32.DotInc' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
| gofmt -r 'f64.ScalInc -> f32.ScalInc' \
| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
| gofmt -r 'f64.Ger -> f32.Ger' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \
-e 's_^// D_// S_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level2float32.go
echo Generating level2cmplx64.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level2cmplx64.go
cat level2cmplx128.go \
| gofmt -r 'blas.Complex128Level2 -> blas.Complex64Level2' \
\
| gofmt -r 'complex128 -> complex64' \
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'c128.AxpyInc -> c64.AxpyInc' \
| gofmt -r 'c128.AxpyUnitary -> c64.AxpyUnitary' \
| gofmt -r 'c128.DotuInc -> c64.DotuInc' \
| gofmt -r 'c128.DotuUnitary -> c64.DotuUnitary' \
| gofmt -r 'c128.ScalInc -> c64.ScalInc' \
| gofmt -r 'c128.ScalUnitary -> c64.ScalUnitary' \
\
| sed -e "s_^\(func (Implementation) \)Z\(.*\)\$_$WARNINGC64\1C\2_" \
-e 's_^// Z_// C_' \
-e 's_"gonum.org/v1/gonum/internal/asm/c128"_"gonum.org/v1/gonum/internal/asm/c64"_' \
-e 's_"math/cmplx"_cmplx "gonum.org/v1/gonum/internal/cmplx64"_' \
>> level2cmplx64.go
# Level3 routines.
echo Generating level3float32.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level3float32.go
cat level3float64.go \
| gofmt -r 'blas.Float64Level3 -> blas.Float32Level3' \
\
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \
-e 's_^// D_// S_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level3float32.go
echo Generating sgemm.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > sgemm.go
cat dgemm.go \
| gofmt -r 'float64 -> float32' \
| gofmt -r 'sliceView64 -> sliceView32' \
\
| gofmt -r 'dgemmParallel -> sgemmParallel' \
| gofmt -r 'computeNumBlocks64 -> computeNumBlocks32' \
| gofmt -r 'dgemmSerial -> sgemmSerial' \
| gofmt -r 'dgemmSerialNotNot -> sgemmSerialNotNot' \
| gofmt -r 'dgemmSerialTransNot -> sgemmSerialTransNot' \
| gofmt -r 'dgemmSerialNotTrans -> sgemmSerialNotTrans' \
| gofmt -r 'dgemmSerialTransTrans -> sgemmSerialTransTrans' \
\
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNINGF32\1S\2_" \
-e 's_^// D_// S_' \
-e 's_^// d_// s_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> sgemm.go
echo Generating level3cmplx64.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level3cmplx64.go
cat level3cmplx128.go \
| gofmt -r 'blas.Complex128Level3 -> blas.Complex64Level3' \
\
| gofmt -r 'float64 -> float32' \
| gofmt -r 'complex128 -> complex64' \
\
| gofmt -r 'c128.ScalUnitary -> c64.ScalUnitary' \
| gofmt -r 'c128.DscalUnitary -> c64.SscalUnitary' \
| gofmt -r 'c128.DotcUnitary -> c64.DotcUnitary' \
| gofmt -r 'c128.AxpyUnitary -> c64.AxpyUnitary' \
| gofmt -r 'c128.DotuUnitary -> c64.DotuUnitary' \
\
| sed -e "s_^\(func (Implementation) \)Z\(.*\)\$_$WARNINGC64\1C\2_" \
-e 's_^// Z_// C_' \
-e 's_"gonum.org/v1/gonum/internal/asm/c128"_"gonum.org/v1/gonum/internal/asm/c64"_' \
-e 's_"math/cmplx"_cmplx "gonum.org/v1/gonum/internal/cmplx64"_' \
>> level3cmplx64.go