-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgemmManaged.cu
87 lines (74 loc) · 2.06 KB
/
gemmManaged.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <stdlib.h>
#include <stdio.h>
#include <cublasXt.h>
#include <cuda_runtime.h>
#include "common.hh"
int
main(int argc, char *argv[])
{
size_t N = 1000;
clock_t start_program, end_program;
clock_t start, end;
cublasHandle_t handle;
float *a, *b, *c;
const float alpha = 1;
const float beta = 0;
size_t count, nn;
if (argc == 2) {
N = checked_strtosize(argv[1]);
}
nn = checked_mul(N, N);
count = checked_mul(nn, sizeof(float));
start_program = clock();
check(cublasCreate(&handle));
start = clock();
check(cudaMallocManaged(&a, count));
check(cudaMallocManaged(&b, count));
check(cudaMallocManaged(&c, count));
for (size_t i = 0; i < N*N; i++) {
a[i] = i / 37.0;
b[i] = i / 101.0;
}
end = clock();
log("host: MallocManaged+init", start, end);
start = clock();
check(cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N,
N, N, N,
&alpha,
a, N,
b, N,
&beta,
c, N));
check(cudaDeviceSynchronize());
end = clock();
log("cublasSgemm", start, end);
start = clock();
for (size_t i = 0; i < N; i++) {
if (a[i] < 0 || b[i] < 0 || c[i] < 0) {
fprintf(stderr, "unexpected result a: %f b: %f c: %f\n",
a[i], b[i], c[i]);
exit(1);
}
}
end = clock();
log("host: access all arrays", start, end);
start = clock();
for (size_t i = 0; i < N; i++) {
if (a[i] < 0 || b[i] < 0 || c[i] < 0) {
fprintf(stderr, "unexpected result a: %f b: %f c: %f\n",
a[i], b[i], c[i]);
exit(1);
}
}
end = clock();
log("host: access all arrays a second time", start, end);
start = clock();
check(cudaFree(a));
check(cudaFree(b));
check(cudaFree(c));
end = clock();
log("host: free", start, end);
end_program = clock();
log("total", start_program, end_program);
return 0;
}