Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xe, benchdnn: fix large buffer ip and add nighly gpu tests #2573

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/gpu/gpu_inner_product_list.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2021-2024 Intel Corporation
* Copyright 2021-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@
#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/convolution_inner_product.hpp"
#include "gpu/intel/ocl/gemm_inner_product.hpp"
#include "gpu/intel/ocl/gemm_post_ops_inner_product.hpp"
#include "gpu/intel/ocl/ref_inner_product.hpp"
#endif

4 changes: 4 additions & 0 deletions src/gpu/intel/compute/kernel_ctx.hpp
Original file line number Diff line number Diff line change
@@ -72,6 +72,10 @@ class kernel_ctx_t {
if (size > INT_MAX) use_int32_offset(false);
}

void register_buffer_size(const memory_desc_wrapper &mdw) {
register_buffer_size(mdw.size());
}

// Enable various optimizations when all buffers are < 2GB in size. In this
// case, int32_t types can be used for data offsets and avoid int64_t
// operations when native 64-bit operations are unsupported.
63 changes: 0 additions & 63 deletions src/gpu/intel/ocl/gemm_post_ops_inner_product.cl

This file was deleted.

83 changes: 0 additions & 83 deletions src/gpu/intel/ocl/gemm_post_ops_inner_product.cpp

This file was deleted.

222 changes: 0 additions & 222 deletions src/gpu/intel/ocl/gemm_post_ops_inner_product.hpp

This file was deleted.

66 changes: 34 additions & 32 deletions src/gpu/intel/ocl/ref_inner_product.cl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -26,21 +26,23 @@ __kernel void ref_inner_product_fwd(__global SRC_DATA_T *src,
__global DST_DATA_T *dst POST_OP_ARGS, __global float *src_scales,
__global float *wei_scales, __global float *dst_scales) {

const int mb = GWS_GET_MB();
const int oc = GWS_GET_OC();
const off_t mb = GWS_GET_MB();
const off_t oc = GWS_GET_OC();

if (mb >= MB || oc >= OC) return;

ACC_DATA_T d = 0;
#if HAS_SPATIAL == 1
for (int ic = 0; ic < IC; ++ic)
for (int kd = 0; kd < KD; ++kd)
for (int kh = 0; kh < KH; ++kh)
for (int kw = 0; kw < KW; ++kw) {
const uint src_off = SRC_OFF(mb, ic, kd, kh, kw);
const uint wei_off = WEI_OFF(0, oc, ic, kd, kh, kw);
for (off_t ic = 0; ic < IC; ++ic)
for (off_t kd = 0; kd < KD; ++kd)
for (off_t kh = 0; kh < KH; ++kh)
for (off_t kw = 0; kw < KW; ++kw) {
const off_t src_off = SRC_OFF(mb, ic, kd, kh, kw);
const off_t wei_off = WEI_OFF(0, oc, ic, kd, kh, kw);
#else
for (int ic = 0; ic < IC_TOTAL; ++ic) {
const uint src_off = mb * IC_TOTAL + ic;
const uint wei_off = oc * IC_TOTAL + ic;
for (off_t ic = 0; ic < IC_TOTAL; ++ic) {
const off_t src_off = mb * IC_TOTAL + ic;
const off_t wei_off = oc * IC_TOTAL + ic;
#endif
d += SRC_TO_REF(src[src_off]) * WEI_TO_REF(wei[wei_off]);
}
@@ -81,19 +83,19 @@ KERNEL_ATTR
__kernel void ref_inner_product_bwd_data(__global SRC_DATA_T *diff_src,
__global WEI_DATA_T *wei, __global DST_DATA_T *diff_dst) {

const int mb = GWS_GET_MB_IC() / IC;
const int ic = GWS_GET_MB_IC() % IC;
const int kd = GWS_GET_KD();
const int kh = GWS_GET_KH();
const int kw = GWS_GET_KW();
const off_t mb = GWS_GET_MB_IC() / IC;
const off_t ic = GWS_GET_MB_IC() % IC;
const off_t kd = GWS_GET_KD();
const off_t kh = GWS_GET_KH();
const off_t kw = GWS_GET_KW();

float ds = 0.0f;
for (int oc = 0; oc < OC; ++oc) {
const uint diff_dst_off = DST_OFF(mb, oc, 0, 0, 0);
const uint wei_off = WEI_OFF(0, oc, ic, kd, kh, kw);
for (off_t oc = 0; oc < OC; ++oc) {
const off_t diff_dst_off = DST_OFF(mb, oc, 0, 0, 0);
const off_t wei_off = WEI_OFF(0, oc, ic, kd, kh, kw);
ds += DST_TO_REF(diff_dst[diff_dst_off]) * WEI_TO_REF(wei[wei_off]);
}
const uint diff_src_off = SRC_OFF(mb, ic, kd, kh, kw);
const off_t diff_src_off = SRC_OFF(mb, ic, kd, kh, kw);
diff_src[diff_src_off] = REF_TO_SRC(ds);
}
#endif
@@ -104,25 +106,25 @@ __kernel void ref_inner_product_bwd_weights(__global SRC_DATA_T *src,
__global WEI_DATA_T *diff_wei, __global BIA_DATA_T *diff_bias,
__global DST_DATA_T *diff_dst) {

const int oc = GWS_GET_OC();
const int ic = GWS_GET_IC();
const int kd = GWS_GET_KD();
const int kh = GWS_GET_KH();
const int kw = GWS_GET_KW();
const off_t oc = GWS_GET_OC();
const off_t ic = GWS_GET_IC();
const off_t kd = GWS_GET_KD();
const off_t kh = GWS_GET_KH();
const off_t kw = GWS_GET_KW();

float ds = 0.0f;
for (int mb = 0; mb < MB; ++mb) {
const uint diff_dst_off = DST_OFF(mb, oc, 0, 0, 0);
const uint src_off = SRC_OFF(mb, ic, kd, kh, kw);
for (off_t mb = 0; mb < MB; ++mb) {
const off_t diff_dst_off = DST_OFF(mb, oc, 0, 0, 0);
const off_t src_off = SRC_OFF(mb, ic, kd, kh, kw);
ds += DST_TO_REF(diff_dst[diff_dst_off]) * SRC_TO_REF(src[src_off]);
}
const uint diff_wei_off = WEI_OFF(0, oc, ic, kd, kh, kw);
const off_t diff_wei_off = WEI_OFF(0, oc, ic, kd, kh, kw);
diff_wei[diff_wei_off] = REF_TO_WEI(ds);
#if WITH_BIAS == 1
if (ic == 0) {
float db = 0.0f;
for (int mb = 0; mb < MB; ++mb) {
const uint diff_dst_off = DST_OFF(mb, oc, 0, 0, 0);
for (off_t mb = 0; mb < MB; ++mb) {
const off_t diff_dst_off = DST_OFF(mb, oc, 0, 0, 0);
db += DST_TO_REF(diff_dst[diff_dst_off]);
}
diff_bias[oc] = REF_TO_BIA(db);
16 changes: 15 additions & 1 deletion src/gpu/intel/ocl/ref_inner_product.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -169,6 +169,12 @@ status_t ref_inner_product_fwd_t::pd_t::init_conf(impl::engine_t *engine) {

status_t ref_inner_product_fwd_t::pd_t::init_kernel_ctx(
compute::kernel_ctx_t &kernel_ctx) const {

kernel_ctx.register_buffer_size(*src_md());
kernel_ctx.register_buffer_size(*dst_md());
kernel_ctx.register_buffer_size(*weights_md(0));
kernel_ctx.register_buffer_size(*weights_md(1));

return init_kernel_ctx_common(kernel_ctx, conf, off, *this);
}

@@ -213,6 +219,10 @@ status_t ref_inner_product_bwd_data_t::pd_t::init_conf(impl::engine_t *engine) {

status_t ref_inner_product_bwd_data_t::pd_t::init_kernel_ctx(
compute::kernel_ctx_t &kernel_ctx) const {
kernel_ctx.register_buffer_size(*diff_src_md());
kernel_ctx.register_buffer_size(*diff_dst_md());
kernel_ctx.register_buffer_size(*weights_md(0));
kernel_ctx.register_buffer_size(*weights_md(1));
return init_kernel_ctx_common(kernel_ctx, conf, off, *this);
}

@@ -246,6 +256,10 @@ status_t ref_inner_product_bwd_weights_t::pd_t::init_conf(

status_t ref_inner_product_bwd_weights_t::pd_t::init_kernel_ctx(
compute::kernel_ctx_t &kernel_ctx) const {
kernel_ctx.register_buffer_size(*src_md());
kernel_ctx.register_buffer_size(*diff_dst_md());
kernel_ctx.register_buffer_size(*diff_weights_md(0));
kernel_ctx.register_buffer_size(*diff_weights_md(1));
return init_kernel_ctx_common(kernel_ctx, conf, off, *this);
}

26 changes: 26 additions & 0 deletions tests/benchdnn/inputs/ip/test_ip_large_gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
--reset --stag=ab --wtag=any --dtag=ab --dt=bf16:bf16:f32 mb21ic13oc133326203
--reset --stag=ab --wtag=ab --dtag=ab --dt=u8:s8:u8 mb2270183416ic1oc2
--reset --stag=ab --wtag=any --dtag=ab --dt=f32:f32:f32 --dir=BWD_W mb1423ic1oc2481123
--reset --stag=any --wtag=any --dtag=any --dt=u8:s8:u8 mb1723182ic130oc2836
--reset --stag=ab --wtag=ab --dtag=ab --dt=f32:f32:f32 mb99ic7oc15106073
--reset --stag=any --wtag=any --dtag=any --dt=f16:f16:f32 mb53ic1oc26062789
--reset --stag=ab --wtag=ab --dtag=ab --dt=f16:f16:f16 --dir=BWD_D mb106514950ic2oc50
--reset --stag=any --wtag=any --dtag=any --dt=bf16:bf16:bf16 mb3391793ic328oc1
--reset --stag=any --wtag=any --dtag=any --dt=bf16:bf16:bf16 --dir=BWD_W mb24ic670oc3662320
--reset --stag=ab --wtag=any --dtag=ab --dt=bf16:bf16:bf16 mb39ic24oc32170728
--reset --stag=ab --wtag=any --dtag=ab --dt=s8:s8:u8 mb10873493ic6oc1247
--reset --stag=ab --wtag=any --dtag=ab --dt=s8:s8:s32 mb17905ic610oc200127
--reset --stag=ab --wtag=any --dtag=ab --dt=bf16:bf16:bf16 --dir=BWD_D mb53378853ic54oc88
--reset --stag=any --wtag=any --dtag=any --dt=bf16:bf16:f32 mb873846ic4oc928
--reset --stag=ab --wtag=any --dtag=ab --dt=u8:s8:u8 mb64ic9oc176379826
--reset --stag=any --wtag=any --dtag=any --dt=s8:s8:s8 mb16660ic2oc198414
--reset --stag=ab --wtag=any --dtag=ab --dt=u8:s8:s32 mb66884057ic49oc1
--reset --stag=any --wtag=any --dtag=any --dt=u8:s8:s32 mb186ic5oc3888311
--reset --stag=ab --wtag=ab --dtag=ab --dt=bf16:bf16:bf16 mb31ic76oc55185860
--reset --stag=ab --wtag=ab --dtag=ab --dt=f16:f16:f16 mb15058ic4oc275579
--reset --stag=ab --wtag=ab --dtag=ab --dt=f16:f16:f16 --dir=BWD_W mb1ic27oc44024665
--reset --stag=ab --wtag=ab --dtag=ab --dt=u8:s8:s8 mb3276ic812oc1058399
--reset --stag=ab --wtag=any --dtag=ab --dt=f32:f32:f32 mb20135ic369oc47033
--reset --stag=ab --wtag=ab --dtag=ab --dt=bf16:bf16:bf16 --dir=BWD_W mb1ic15oc149971097
--reset --stag=ab --wtag=any --dtag=ab --dt=u8:s8:s8 mb526ic2oc12112924
--reset --stag=ab --wtag=any --dtag=ab --dt=f16:f16:f32 mb1481323ic1oc1577