diff --git a/External/HIP/CMakeLists.txt b/External/HIP/CMakeLists.txt index 11d92fd127..31ed02461f 100644 --- a/External/HIP/CMakeLists.txt +++ b/External/HIP/CMakeLists.txt @@ -22,6 +22,7 @@ macro(create_local_hip_tests VariantSuffix) list(APPEND HIP_LOCAL_TESTS saxpy) list(APPEND HIP_LOCAL_TESTS memmove) list(APPEND HIP_LOCAL_TESTS split-kernel-args) + list(APPEND HIP_LOCAL_TESTS builtin-logb-scalbn) # TODO: Re-enable InOneWeekend after it is fixed #list(APPEND HIP_LOCAL_TESTS InOneWeekend) diff --git a/External/HIP/builtin-logb-scalbn.hip b/External/HIP/builtin-logb-scalbn.hip new file mode 100644 index 0000000000..a923e5a51e --- /dev/null +++ b/External/HIP/builtin-logb-scalbn.hip @@ -0,0 +1,105 @@ +#include +#include +#include +#include + +// Simple error check macro +#define HIP_CHECK(call) \ + do { \ + hipError_t err = call; \ + if (err != hipSuccess) { \ + std::cerr << "HIP error: " << hipGetErrorString(err) \ + << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } while (0) + +__global__ void my_kernel(float a[], int alen, int exp[], int explen, float *t_res) { + for (int i = 0; i < alen; i++) { + t_res[4*i] = logbf(a[i]); + t_res[4*i + 1] = logb(a[i]); + t_res[4*i + 2] = __builtin_logbf(a[i]); + t_res[4*i + 3] = __builtin_logb(a[i]); + } + + for (int i = 0; i < alen; i++) { + for (int j = 0; j < explen; j++) { + t_res[4*alen + 4*explen*i + 4*j] = scalbnf(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j + 1] = scalbn(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); + } + } +} + +void __attribute__((noinline)) test(float a[], int alen, int exp[], int explen, float *h_res) { + for (int i = 0; i < alen; i++) { + h_res[4*i] = logbf(a[i]); + h_res[4*i + 1] = logb(a[i]); + h_res[4*i + 2] = __builtin_logbf(a[i]); + h_res[4*i + 3] = __builtin_logb(a[i]); + } + + for (int i = 0; i < alen; i++) { + for (int j = 0; j < explen; j++) { + h_res[4*alen + 4*explen*i + 4*j] = scalbnf(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j + 1] = scalbn(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); + } + } +} + +int main(int argc, char **argv) { + // Init input data + float a[] = {16.0f, 3.14f, 0.0f, -0.0f, INFINITY, NAN}; + int alen = sizeof(a) / sizeof(a[0]); + int exp[] = {10, 0, -5}; + int explen = sizeof(exp) / sizeof(exp[0]); + + // Compute on CPU + int res_len = 4 * alen + 4 * alen * explen; // logb + scalbn + int res_bsize = sizeof(float) * res_len; + float *h_res = (float *)malloc(res_bsize); + test(a, alen, exp, explen, h_res); + + // Make a copy for GPU + float *d_a; + int *d_exp; + float *t_res; + HIP_CHECK(hipMalloc((void**)&d_a, sizeof(a))); + HIP_CHECK(hipMalloc((void**)&d_exp, sizeof(exp))); + HIP_CHECK(hipMalloc((void**)&t_res, res_bsize)); + HIP_CHECK(hipMemcpy(d_a, a, sizeof(a), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_exp, exp, sizeof(exp), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemset(t_res, 0, res_bsize)); + + // Launch a GPU kernel + my_kernel<<<1,1>>>(d_a, alen, d_exp, explen, t_res); + + // Copy the device results to host + float *d_res = (float *)malloc(res_bsize); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemcpy(d_res, t_res, res_bsize, hipMemcpyDeviceToHost)); + + // Verify the results match CPU. + int errs = 0; + for(int i = 0; i < res_len; i++) { + if (fabs(h_res[i] - d_res[i]) > fabs(h_res[i] * 0.0001f)) { + printf("found error i=%i h=%f d=%f\n", i, h_res[i], d_res[i]); + errs++; + } + } + if (errs != 0) + printf("%i errors\n", errs); + else + printf("PASSED!\n"); + + free(h_res); + HIP_CHECK(hipFree(d_a)); + HIP_CHECK(hipFree(d_exp)); + HIP_CHECK(hipFree(t_res)); + free(d_res); + return errs; +} + diff --git a/External/HIP/builtin-logb-scalbn.reference_output b/External/HIP/builtin-logb-scalbn.reference_output new file mode 100644 index 0000000000..391efdf648 --- /dev/null +++ b/External/HIP/builtin-logb-scalbn.reference_output @@ -0,0 +1,2 @@ +PASSED! +exit 0