Skip to content

Commit

Permalink
forward direction working draft
Browse files Browse the repository at this point in the history
  • Loading branch information
kikimych committed Dec 8, 2023
1 parent 8ab0787 commit a9337e6
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 32 deletions.
34 changes: 22 additions & 12 deletions src/include/miopen/rnn_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,18 +142,20 @@ struct RNNWeightOffsets

struct GruWeightOffsets : public RNNWeightOffsets
{
GruWeightOffsets(int input_vector_sz, int hidden_vec_sz, int layers_cnt, int bias_cnt)
: weight_stride(matrixes::Count * hidden_vec_sz),
GruWeightOffsets(int input_vector_sz, int hidden_vec_sz, int layers_cnt, int bias_cnt, int bidirectional_scale)
: weight_stride(matrixes::Count * hidden_vec_sz * bidirectional_scale),
uni_stride(matrixes::Count * hidden_vec_sz),
in_vec_sz(input_vector_sz),
h_vec_sz(hidden_vec_sz),
num_layers(layers_cnt),
bias_count(bias_cnt)
bias_count(bias_cnt),
bi_scale(bidirectional_scale)
{
}

int input_offset(int layer)
{
return layer == 0 ? 0 : first_layer_offset() + h_vec_sz * 2 * weight_stride * (layer - 1);
return layer == 0 ? 0 : first_layer_offset() + (h_vec_sz * bi_scale + h_vec_sz) * weight_stride * (layer - 1);
}

int hidden_offset(int layer)
Expand All @@ -169,12 +171,13 @@ struct GruWeightOffsets : public RNNWeightOffsets
}
int bias_off(int layer_id) const { return bias_off() + layer_id * bias_count * weight_stride; }
int weight_stride;
int uni_stride;

private:
const int in_vec_sz, h_vec_sz;
const int num_layers;
[[maybe_unused]] const int bi_scale = 0;
const int bias_count = 0;
[[maybe_unused]] const int bi_scale = 1;
enum matrixes
{
Z = 0,
Expand Down Expand Up @@ -203,16 +206,16 @@ struct RNNOffsets
struct GRUOffsets : public RNNOffsets
{
public:
GRUOffsets(int h_vec_size, int layers_cnt, int total_batch_size)
: hidden_size(h_vec_size), batches_per_layer(total_batch_size), num_layers(layers_cnt)
GRUOffsets(int h_vec_size, int layers_cnt, int total_batch_size, int bidirect_scale)
: hidden_size(h_vec_size), batches_per_layer(total_batch_size), num_layers(layers_cnt), bi_scale(bidirect_scale)
{
}

size_t layer_offset(int layer_id) const { return layer_id * layer_stride(); }

size_t layer_stride() const { return gemm_write_stride() * batches_per_layer; }

int gemm_write_size() const { return hidden_size; }
int gemm_write_size() const { return hidden_size * bi_scale; }

size_t gemm_write_stride() const { return (size_t)save_point::Count * gemm_write_size(); }

Expand All @@ -228,19 +231,26 @@ struct GRUOffsets : public RNNOffsets

public:
const int batches_per_layer;
// Layout:
// r
// z
// c
// r_reverse
// z_reverse
// c_reverse
int r_offset(int direction) const { return save_point::R * hidden_size + save_point::Ht * hidden_size * direction; }

int r_offset() const { return save_point::R * gemm_write_size(); }
int z_offset(int direction) const { return save_point::Z * hidden_size + save_point::Ht * hidden_size * direction; }

int z_offset() const { return save_point::Z * gemm_write_size(); }

int c_offset() const { return save_point::C * gemm_write_size(); }
int c_offset(int direction) const { return save_point::C * hidden_size + save_point::Ht * hidden_size * direction; }

int activated_offset() const { return layer_stride() * num_layers; }

size_t network_stride() const { return layer_stride() * num_layers; }

private:
int num_layers;
int bi_scale = 1;

enum save_point
{
Expand Down
47 changes: 27 additions & 20 deletions src/ocl/rnnocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
int in_vec_size = xDesc.GetLengths()[1];
int out_vec_size = yDesc.GetLengths()[1];

GruWeightOffsets WeiBuf(in_vec_size, hidden_size, nLayers, biasMode * 2);
int bi = dirMode != 0u ? 2 : 1;
GruWeightOffsets WeiBuf(in_vec_size, hidden_size, nLayers, biasMode * 2, bi);

ActivationDescriptor sigDesc = {miopenActivationLOGISTIC, 1, 0, 1};
ActivationDescriptor tanhDesc = {miopenActivationTANH, 1, 1, 1};
Expand All @@ -84,9 +85,7 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,

bacc_per_time[seq_len] = total_batch_size;

GRUOffsets RBuff(hidden_size, nLayers, total_batch_size);

int bi = dirMode != 0u ? 2 : 1;
GRUOffsets RBuff(hidden_size, nLayers, total_batch_size, bi);

auto get_HxBuff_offset = [&bi, hidden_size, max_batch](int layer_id, int reverse = 0) {
return (static_cast<size_t>(hidden_size) * max_batch) * (bi * layer_id + reverse);
Expand All @@ -102,7 +101,8 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
&wDesc,
reserveSpace,
x,
w](int layer_id, float beta_t = 1) {
w,
bi](int layer_id, float beta_t = 1) {
// n = Rx,Zx,Cx
const int m = RBuff.batches_per_layer, n = WeiBuf.weight_stride,
k = layer_id > 0 ? hidden_size : in_vec_size;
Expand Down Expand Up @@ -211,12 +211,16 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
float alpha1 = 0;
beta_t = 0;

//li * batch_n * hy_stride
//hid_shift + bs * wei_len + 2 * hy_h,
//hid_shift + hid_off + bs * hy_h);

CopyTensor(handle,
desc,
reserveSpace,
desc,
reserveSpace,
RBuff.layer_offset(layer_id) + RBuff.c_offset(),
RBuff.layer_offset(layer_id) + RBuff.c_offset(rnn_direction::Forward),
RBuff.layer_offset(layer_id) + RBuff.hidden_offset());

OpTensor(handle,
Expand All @@ -230,9 +234,9 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
&beta_t,
desc,
reserveSpace,
RBuff.layer_offset(layer_id) + RBuff.c_offset(),
RBuff.layer_offset(layer_id) + RBuff.c_offset(),
RBuff.layer_offset(layer_id) + RBuff.c_offset());
RBuff.layer_offset(layer_id) + RBuff.c_offset(rnn_direction::Forward),
RBuff.layer_offset(layer_id) + RBuff.c_offset(rnn_direction::Forward),
RBuff.layer_offset(layer_id) + RBuff.c_offset(rnn_direction::Forward));
};

auto call_gru_bias_add =
Expand Down Expand Up @@ -332,7 +336,7 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
if(cur_time == 0 && hx == nullptr)
return;

const int m = batches.at(cur_time), n = WeiBuf.weight_stride, k = hidden_size;
const int m = batches.at(cur_time), n = WeiBuf.uni_stride, k = hidden_size;

const int lda = (cur_time == 0) ? hidden_size : RBuff.gemm_write_stride();

Expand Down Expand Up @@ -364,6 +368,8 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
const auto ht_ptr = cur_time > 0 ? reserveSpace : hx;

const auto result_offset = RBuff.gemm_write_offset(layer, bacc_per_time[cur_time]);

std::cout << "call_gru_hidden_gemm " << "\n";

const miopenStatus_t gemm_status = CallGemm(handle,
gemm_desc_hx,
Expand Down Expand Up @@ -404,7 +410,7 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,

auto r_offset = RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]);
auto r_act_offset = r_offset + RBuff.activated_offset();

std::cout << "call_gru_activate_rz " << "\n";
sigDesc.Forward(handle,
&alpha,
// input tensor descriptor
Expand All @@ -426,12 +432,12 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
[&RBuff, &bacc_per_time, &batches, &handle, &wDesc, reserveSpace, hidden_size](
int layer_id, int time_id) {
auto c_offset =
RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) + RBuff.c_offset();
RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) + RBuff.c_offset(rnn_direction::Forward);
auto hidden_offset =
RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) + RBuff.hidden_offset();
auto hidden_act_offset = hidden_offset + RBuff.activated_offset();
auto r_act_offset = RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) +
RBuff.r_offset() + RBuff.activated_offset();
RBuff.r_offset(rnn_direction::Forward) + RBuff.activated_offset();

const std::vector<size_t> tensor_size{
1, static_cast<size_t>(batches.at(time_id)), static_cast<size_t>(hidden_size)};
Expand Down Expand Up @@ -496,9 +502,10 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
auto dst_desc = miopen::TensorDescriptor(wDesc.GetType(), tensor_size, tensor_stride);

auto c_offset =
RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) + RBuff.c_offset();
RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) + RBuff.c_offset(rnn_direction::Forward);
auto c_act_offset = c_offset + RBuff.activated_offset();


std::cout << "call_gru_activate_c_gate " << "\n";
tanhDesc.Forward(handle,
&alpha,
// input tensor descriptor
Expand Down Expand Up @@ -531,9 +538,9 @@ void RNNDescriptor::RNNForwardTrainingGRU(Handle& handle,
auto hidden_offset =
RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) + RBuff.hidden_offset();
auto zact_offset = RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) +
RBuff.z_offset() + RBuff.activated_offset();
RBuff.z_offset(rnn_direction::Forward) + RBuff.activated_offset();
auto cact_offset = RBuff.gemm_write_offset(layer_id, bacc_per_time[time_id]) +
RBuff.c_offset() + RBuff.activated_offset();
RBuff.c_offset(rnn_direction::Forward) + RBuff.activated_offset();

const std::vector<size_t> hidden_tensor_size{
1, static_cast<size_t>(batches.at(time_id)), static_cast<size_t>(hidden_size)};
Expand Down Expand Up @@ -3252,8 +3259,8 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(
return;
}

/*
if((rnnMode == miopenGRU) && !use_dropout && nLayers > 0 && dirMode == miopenRNNunidirection
if((rnnMode == miopenGRU) && !use_dropout && nLayers > 0
//&& dirMode == miopenRNNunidirection
&& inputMode != miopenRNNskip && !(miopen::IsDisabled(ENV(MIOPEN_RNNFWD_exp))))
{
RNNForwardTrainingGRU(
Expand All @@ -3268,7 +3275,7 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(
}
return;
}
*/


#endif // MIOPEN_USE_GEMM&& MIOPEN_BACKEND_HIP

Expand Down

0 comments on commit a9337e6

Please sign in to comment.