Skip to content

Commit

Permalink
Add C sample program (#130)
Browse files Browse the repository at this point in the history
Removed ROCM_USE_FLOAT16 and the linking against hip::device, as the
default compiler may not support them (e.g. GCC on x86_64), and our
samples don't need them anyway.

Updated the C++ sample to match the C sample's improved use of const.
  • Loading branch information
cgmb authored Aug 6, 2020
1 parent dde7521 commit afade51
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 11 deletions.
2 changes: 1 addition & 1 deletion rocsolver/clients/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE )
endif()

# This project may compile dependencies for clients
project( rocsolver-clients LANGUAGES CXX Fortran )
project( rocsolver-clients LANGUAGES C CXX Fortran )

if(OS_ID_rhel OR OS_ID_centos OR OS_ID_sles)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libgomp -pthread")
Expand Down
22 changes: 18 additions & 4 deletions rocsolver/clients/samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,35 @@ add_library( rocblas_module OBJECT
"${ROCBLAS_INCLUDE_DIR}/rocblas_module.f90"
)

add_executable( example-basic
# declare sample programs
add_executable( example-c-basic
example_basic.c
)
add_executable( example-cpp-basic
example_basic.cpp
)
add_executable( example-fortran-basic
example_basic.f90
$<TARGET_OBJECTS:rocblas_module>
)

# group sample programs by language
set( c_samples
example-c-basic
)
set( cpp_samples
example-basic
example-cpp-basic
)
set( fortran_samples
example-fortran-basic
)

# set flags for building the sample programs
foreach( exe ${c_samples} ${cpp_samples} ${fortran_samples} )
target_link_libraries( ${exe} PRIVATE roc::rocsolver roc::rocblas )

set_target_properties( ${exe} PROPERTIES CXX_EXTENSIONS NO )
set_target_properties( ${exe} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
target_compile_definitions( ${exe} PRIVATE ROCM_USE_FLOAT16 )

target_include_directories( ${exe}
PRIVATE
Expand Down Expand Up @@ -72,12 +78,20 @@ foreach( exe ${c_samples} ${cpp_samples} ${fortran_samples} )
endif()
endforeach( )

foreach( exe ${c_samples} ${cpp_samples} )
foreach( exe ${cpp_samples} )
if( NOT CUDA_FOUND )
target_link_libraries( ${exe} PRIVATE hip::device )
endif()
endforeach( )

foreach( exe ${c_samples} )
set_target_properties( ${exe} PROPERTIES
C_STANDARD 11
C_STANDARD_REQUIRED ON
C_EXTENSIONS OFF
)
endforeach( )

if( CMAKE_CXX_COMPILER MATCHES ".*/hcc$" )
# include order workaround to force /opt/rocm/include later in order to ignore installed rocblas
set(CMAKE_CXX_FLAGS "-isystem /opt/rocm/include ${CMAKE_CXX_FLAGS}")
Expand Down
95 changes: 95 additions & 0 deletions rocsolver/clients/samples/example_basic.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include <stdio.h> // for printf
#include <stdlib.h> // for malloc
#include <hip/hip_runtime_api.h> // for hip functions
#include <rocsolver.h> // for all the rocsolver C interfaces and type declarations

// Example: Compute the QR Factorization of a matrix on the GPU

double* create_example_matrix(rocblas_int *M_out,
rocblas_int *N_out,
rocblas_int *lda_out) {
// a *very* small example input; not a very efficient use of the API
const double A[3][3] = { { 12, -51, 4},
{ 6, 167, -68},
{ -4, 24, -41} };
const rocblas_int M = 3;
const rocblas_int N = 3;
const rocblas_int lda = 3;
*M_out = M;
*N_out = N;
*lda_out = lda;
// note: rocsolver matrices must be stored in column major format,
// i.e. entry (i,j) should be accessed by hA[i + j*lda]
double* hA = malloc(sizeof(double)*lda*N);
for (size_t i = 0; i < M; ++i) {
for (size_t j = 0; j < N; ++j) {
// copy A (2D array) into hA (1D array, column-major)
hA[i + j*lda] = A[i][j];
}
}
return hA;
}

// We use rocsolver_dgeqrf to factor a real M-by-N matrix, A.
// See https://rocsolver.readthedocs.io/en/latest/userguide_api.html#_CPPv416rocsolver_dgeqrf14rocblas_handleK11rocblas_intK11rocblas_intPdK11rocblas_intPd
// and https://www.netlib.org/lapack/explore-html/df/dc5/group__variants_g_ecomputational_ga3766ea903391b5cf9008132f7440ec7b.html
int main() {
rocblas_int M; // rows
rocblas_int N; // cols
rocblas_int lda; // leading dimension
double* hA = create_example_matrix(&M, &N, &lda); // input matrix on CPU

// let's print the input matrix, just to see it
printf("A = [\n");
for (size_t i = 0; i < M; ++i) {
printf(" ");
for (size_t j = 0; j < N; ++j) {
printf("% .3f ", hA[i + j*lda]);
}
printf(";\n");
}
printf("]\n");

// initialization
rocblas_handle handle;
rocblas_create_handle(&handle);

// calculate the sizes of our arrays
size_t size_A = lda * (size_t)N; // count of elements in matrix A
size_t size_piv = (M < N) ? M : N; // count of Householder scalars

// allocate memory on GPU
double *dA, *dIpiv;
hipMalloc((void**)&dA, sizeof(double)*size_A);
hipMalloc((void**)&dIpiv, sizeof(double)*size_piv);

// copy data to GPU
hipMemcpy(dA, hA, sizeof(double)*size_A, hipMemcpyHostToDevice);

// compute the QR factorization on the GPU
rocsolver_dgeqrf(handle, M, N, dA, lda, dIpiv);

// copy the results back to CPU
double* hIpiv = malloc(sizeof(double)*size_piv); // array for householder scalars on CPU
hipMemcpy(hA, dA, sizeof(double)*size_A, hipMemcpyDeviceToHost);
hipMemcpy(hIpiv, dIpiv, sizeof(double)*size_piv, hipMemcpyDeviceToHost);

// the results are now in hA and hIpiv
// we can print some of the results if we want to see them
printf("R = [\n");
for (size_t i = 0; i < M; ++i) {
printf(" ");
for (size_t j = 0; j < N; ++j) {
printf("% .3f ", (i <= j) ? hA[i + j*lda] : 0);
}
printf(";\n");
}
printf("]\n");

// clean up
free(hIpiv);
hipFree(dA);
hipFree(dIpiv);
free(hA);
rocblas_destroy_handle(handle);
}
6 changes: 3 additions & 3 deletions rocsolver/clients/samples/example_basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ void get_example_matrix(std::vector<double>& hA,
rocblas_int& N,
rocblas_int& lda) {
// a *very* small example input; not a very efficient use of the API
double A[3][3] = { { 12, -51, 4},
{ 6, 167, -68},
{ -4, 24, -41} };
const double A[3][3] = { { 12, -51, 4},
{ 6, 167, -68},
{ -4, 24, -41} };
M = 3;
N = 3;
lda = 3;
Expand Down
6 changes: 3 additions & 3 deletions rocsolver/docs/source/userguide_examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ For a full description of the used rocSOLVER routine, see the API documentation
rocblas_int& N,
rocblas_int& lda) {
// a *very* small example input; not a very efficient use of the API
double A[3][3] = { { 12, -51, 4},
{ 6, 167, -68},
{ -4, 24, -41} };
const double A[3][3] = { { 12, -51, 4},
{ 6, 167, -68},
{ -4, 24, -41} };
M = 3;
N = 3;
lda = 3;
Expand Down

0 comments on commit afade51

Please sign in to comment.