-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Removed ROCM_USE_FLOAT16 and the linking against hip::device, as the default compiler may not support them (e.g. GCC on x86_64), and our samples don't need them anyway. Updated the C++ sample to match the C sample's improved use of const.
- Loading branch information
Showing
5 changed files
with
120 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#include <stdio.h> // for printf | ||
#include <stdlib.h> // for malloc | ||
#include <hip/hip_runtime_api.h> // for hip functions | ||
#include <rocsolver.h> // for all the rocsolver C interfaces and type declarations | ||
|
||
// Example: Compute the QR Factorization of a matrix on the GPU | ||
|
||
double* create_example_matrix(rocblas_int *M_out, | ||
rocblas_int *N_out, | ||
rocblas_int *lda_out) { | ||
// a *very* small example input; not a very efficient use of the API | ||
const double A[3][3] = { { 12, -51, 4}, | ||
{ 6, 167, -68}, | ||
{ -4, 24, -41} }; | ||
const rocblas_int M = 3; | ||
const rocblas_int N = 3; | ||
const rocblas_int lda = 3; | ||
*M_out = M; | ||
*N_out = N; | ||
*lda_out = lda; | ||
// note: rocsolver matrices must be stored in column major format, | ||
// i.e. entry (i,j) should be accessed by hA[i + j*lda] | ||
double* hA = malloc(sizeof(double)*lda*N); | ||
for (size_t i = 0; i < M; ++i) { | ||
for (size_t j = 0; j < N; ++j) { | ||
// copy A (2D array) into hA (1D array, column-major) | ||
hA[i + j*lda] = A[i][j]; | ||
} | ||
} | ||
return hA; | ||
} | ||
|
||
// We use rocsolver_dgeqrf to factor a real M-by-N matrix, A. | ||
// See https://rocsolver.readthedocs.io/en/latest/userguide_api.html#_CPPv416rocsolver_dgeqrf14rocblas_handleK11rocblas_intK11rocblas_intPdK11rocblas_intPd | ||
// and https://www.netlib.org/lapack/explore-html/df/dc5/group__variants_g_ecomputational_ga3766ea903391b5cf9008132f7440ec7b.html | ||
int main() { | ||
rocblas_int M; // rows | ||
rocblas_int N; // cols | ||
rocblas_int lda; // leading dimension | ||
double* hA = create_example_matrix(&M, &N, &lda); // input matrix on CPU | ||
|
||
// let's print the input matrix, just to see it | ||
printf("A = [\n"); | ||
for (size_t i = 0; i < M; ++i) { | ||
printf(" "); | ||
for (size_t j = 0; j < N; ++j) { | ||
printf("% .3f ", hA[i + j*lda]); | ||
} | ||
printf(";\n"); | ||
} | ||
printf("]\n"); | ||
|
||
// initialization | ||
rocblas_handle handle; | ||
rocblas_create_handle(&handle); | ||
|
||
// calculate the sizes of our arrays | ||
size_t size_A = lda * (size_t)N; // count of elements in matrix A | ||
size_t size_piv = (M < N) ? M : N; // count of Householder scalars | ||
|
||
// allocate memory on GPU | ||
double *dA, *dIpiv; | ||
hipMalloc((void**)&dA, sizeof(double)*size_A); | ||
hipMalloc((void**)&dIpiv, sizeof(double)*size_piv); | ||
|
||
// copy data to GPU | ||
hipMemcpy(dA, hA, sizeof(double)*size_A, hipMemcpyHostToDevice); | ||
|
||
// compute the QR factorization on the GPU | ||
rocsolver_dgeqrf(handle, M, N, dA, lda, dIpiv); | ||
|
||
// copy the results back to CPU | ||
double* hIpiv = malloc(sizeof(double)*size_piv); // array for householder scalars on CPU | ||
hipMemcpy(hA, dA, sizeof(double)*size_A, hipMemcpyDeviceToHost); | ||
hipMemcpy(hIpiv, dIpiv, sizeof(double)*size_piv, hipMemcpyDeviceToHost); | ||
|
||
// the results are now in hA and hIpiv | ||
// we can print some of the results if we want to see them | ||
printf("R = [\n"); | ||
for (size_t i = 0; i < M; ++i) { | ||
printf(" "); | ||
for (size_t j = 0; j < N; ++j) { | ||
printf("% .3f ", (i <= j) ? hA[i + j*lda] : 0); | ||
} | ||
printf(";\n"); | ||
} | ||
printf("]\n"); | ||
|
||
// clean up | ||
free(hIpiv); | ||
hipFree(dA); | ||
hipFree(dIpiv); | ||
free(hA); | ||
rocblas_destroy_handle(handle); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters