-
Notifications
You must be signed in to change notification settings - Fork 0
/
performance.go
93 lines (81 loc) · 2.16 KB
/
performance.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
package main
// #include <simd.c>
//#cgo LDFLAGS: -Wl,--allow-multiple-definition
import "C"
import (
"fmt"
"github.com/bjwbell/gensimd/simd"
"time"
"unsafe"
)
var a, b []float32
var x, y []simd.F32x4
var arrLen = 100000000
func init() {
// Create slices to be processed
for i := 0; i < arrLen; i++ {
a = append(a, float32(i))
b = append(b, float32(i))
}
// Convert a and b to F32x4 slices so this conversion is not done in the performance loop
for i := 0; i < arrLen; i += 4 {
x = append(x, simd.F32x4{a[i], a[i+1], a[i+2], a[i+3]})
y = append(y, simd.F32x4{b[i], b[i+1], b[i+2], b[i+3]})
}
}
func main() {
unrolled()
unrolled_noboundchecking()
simdGensimd()
simdIntrinsics()
}
func simdIntrinsics() {
start := time.Now()
C.add_arrays((*C.float)(unsafe.Pointer(&a[0])), (*C.float)(unsafe.Pointer(&b[0])), C.int(len(a)))
end := time.Since(start).Seconds()
fmt.Printf("SIMD Intrinsics - Bil ops/second: %v\nSum:%v\n", float64(arrLen)/end/1000000000, a[0]+a[1]+a[2]+a[3])
}
func simdGensimd() {
start := time.Now()
sum := float32(0)
for i := 0; i < len(x); i++ {
a := simd.MulF32x4(x[i], y[i])
sum += a[0]
}
end := time.Since(start).Seconds()
fmt.Printf("SIMD gensimd - Bil ops/second: %v\nSum:%v\n", float64(arrLen)/end/1000000000, sum)
}
func unrolled() {
start := time.Now()
sum := float32(0)
for i := 0; i < len(a); i += 4 {
s0 := a[i] * b[i]
s1 := a[i+1] * b[i+1]
s2 := a[i+2] * b[i+2]
s3 := a[i+3] * b[i+3]
sum += s0 + s1 + s2 + s3
}
end := time.Since(start).Seconds()
fmt.Printf("Unrolled - Bil ops/second: %v\nSum:%v\n", float64(arrLen)/end/1000000000, sum)
}
func unrolled_noboundchecking() {
if len(a) != len(b) {
panic("slices must have equal lengths")
}
if len(a)%4 != 0 {
panic("slice length must be multiple of 4")
}
start := time.Now()
sum := float32(0)
for i := 0; i < len(a); i += 4 {
aTmp := a[i : i+4 : i+4]
bTmp := b[i : i+4 : i+4]
s0 := aTmp[0] * bTmp[0]
s1 := aTmp[1] * bTmp[1]
s2 := aTmp[2] * bTmp[2]
s3 := aTmp[3] * bTmp[3]
sum += s0 + s1 + s2 + s3
}
end := time.Since(start).Seconds()
fmt.Printf("Unrolled no bound checking - Bil ops/second: %v\nSum:%v\n", float64(arrLen)/end/1000000000, sum)
}