Skip to content

Commit

Permalink
#15642: Update shapes
Browse files Browse the repository at this point in the history
  • Loading branch information
mouliraj-mcw committed Dec 13, 2024
1 parent 636a095 commit 621f848
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,9 @@ Tensor _scatter(const Tensor& input_a, const Tensor& input_b, const std::optiona
* by running reshape.
*/
Tensor _outer(const Tensor& input_a, const Tensor& input_b, const std::optional<MemoryConfig>& output_mem_config) {
const tt::tt_metal::LegacyShape s_a = input_a.get_legacy_shape();
const tt::tt_metal::LegacyShape s_b = input_b.get_legacy_shape();
auto num_ones = [](const tt::tt_metal::LegacyShape& s) -> uint32_t {
const ttnn::SimpleShape s_a = input_a.get_padded_shape();
const ttnn::SimpleShape s_b = input_b.get_padded_shape();
auto num_ones = [](const ttnn::SimpleShape& s) -> uint32_t {
uint32_t num1s = 0;
for (uint32_t idx = 0; idx < 4; idx++) {
num1s += (uint32_t)(s[idx] == 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ BinaryDeviceOperation ::BroadcastHeightMultiCore::create(
auto& output = tensor_return_value;
auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);

const auto ashape = a.get_legacy_shape();
const auto bshape = b->get_legacy_shape();
const auto ashape = a.get_padded_shape();
const auto bshape = b->get_padded_shape();
uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
uint32_t H = ashape[-2];
Expand Down Expand Up @@ -238,8 +238,8 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCore::override_runtime_argument

auto dst_dram_buffer = output_tensor.buffer();

const auto ashape = input_tensor_a.get_legacy_shape();
const auto bshape = input_tensor_b->get_legacy_shape();
const auto ashape = input_tensor_a.get_padded_shape();
const auto bshape = input_tensor_b->get_padded_shape();
uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
uint32_t H = ashape[-2];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreShardedOptimized::create(
auto& output = tensor_return_value;
auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);

const auto ashape = a.get_legacy_shape();
const auto bshape = b->get_legacy_shape();
const auto ashape = a.get_padded_shape();
const auto bshape = b->get_padded_shape();
uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
uint32_t H = ashape[-2];
Expand Down Expand Up @@ -267,9 +267,9 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCoreShardedOptimized::override_
auto all_cores = shard_spec.grid;
uint32_t ncores = shard_spec.num_cores();
uint32_t Wt = 0, Ht = 0;
const auto ashape = input_tensor_a.get_legacy_shape();
const auto ashape = input_tensor_a.get_padded_shape();
uint32_t N = ashape[0], C = ashape[1], H = ashape[2], W = ashape[3];
uint32_t bN = input_tensor_b->get_legacy_shape()[0];
uint32_t bN = input_tensor_b->get_padded_shape()[0];
uint32_t NC = N * C;
if (a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) {
Wt = shard_spec.shape[1] / TILE_WIDTH;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreSharded::create(
auto& output = tensor_return_value;
auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);

const auto ashape = a.get_legacy_shape();
const auto bshape = b->get_legacy_shape();
const auto ashape = a.get_padded_shape();
const auto bshape = b->get_padded_shape();
uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
uint32_t H = ashape[-2];
Expand Down Expand Up @@ -127,7 +127,7 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreSharded::create(
.set_globally_allocated_address(*output.buffer());
auto out_cb = tt_metal::CreateCircularBuffer(program, all_cores, output_cb_config);

uint32_t num_input_tiles = (b->get_legacy_shape()[-1] * output.element_size() + TILE_HW - 1) / TILE_HW;
uint32_t num_input_tiles = (b->get_padded_shape()[-1] * output.element_size() + TILE_HW - 1) / TILE_HW;
uint32_t src1_cb_index = tt::CBIndex::c_1;
tt_metal::CircularBufferConfig src1_cb_config =
tt_metal::CircularBufferConfig(num_input_tiles * input1_tile_size, {{src1_cb_index, b_df}})
Expand Down Expand Up @@ -249,9 +249,9 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCoreSharded::override_runtime_a
auto all_cores = shard_spec.grid;
uint32_t ncores = shard_spec.num_cores();
uint32_t Wt = 0, Ht = 0;
const auto ashape = input_tensor_a.get_legacy_shape();
const auto ashape = input_tensor_a.get_padded_shape();
uint32_t N = ashape[0], C = ashape[1], H = ashape[2], W = ashape[3];
uint32_t bN = input_tensor_b->get_legacy_shape()[0];
uint32_t bN = input_tensor_b->get_padded_shape()[0];
uint32_t NC = N * C;
if (a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) {
Wt = shard_spec.shape[1] / TILE_WIDTH;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ BinaryDeviceOperation::BroadcastWidthMultiCore::cached_program_t BinaryDeviceOpe
auto& output = tensor_return_value;
auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);

const auto ashape = a.get_legacy_shape();
const auto bshape = b->get_legacy_shape();
const auto ashape = a.get_padded_shape();
const auto bshape = b->get_padded_shape();
uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
uint32_t H = ashape[-2];
Expand Down Expand Up @@ -240,8 +240,8 @@ void BinaryDeviceOperation::BroadcastWidthMultiCore::override_runtime_arguments(

auto dst_dram_buffer = output_tensor.buffer();

const auto ashape = input_tensor_a.get_legacy_shape();
const auto bshape = input_tensor_b->get_legacy_shape();
const auto ashape = input_tensor_a.get_padded_shape();
const auto bshape = input_tensor_b->get_padded_shape();
uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
uint32_t H = ashape[-2];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args(
if (block_or_width_sharded) {
block_size = block_width * block_height;
end_core = (*shard_spec.value().grid.ranges().begin()).end_coord;
output_width = output.get_legacy_shape()[-1] / TILE_WIDTH;
uint32_t output_height = output.volume() / output.get_legacy_shape()[-1] / TILE_HEIGHT;
output_width = output.get_padded_shape()[-1] / TILE_WIDTH;
uint32_t output_height = output.volume() / output.get_padded_shape()[-1] / TILE_HEIGHT;
last_unpadded_block_height = block_height - (round_up(output_height, block_height) - output_height);
last_unpadded_block_width = block_width - (round_up(output_width, block_width) - output_width);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,10 @@ std::vector<std::optional<Tensor>> ExecuteBackwardConcat::invoke(
if (are_required_outputs[0]) {
ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
ttnn::SmallVector<uint32_t> end_index = {
input.get_legacy_shape()[0],
input.get_legacy_shape()[1],
input.get_legacy_shape()[2],
input.get_legacy_shape()[3]};
input.get_padded_shape()[0],
input.get_padded_shape()[1],
input.get_padded_shape()[2],
input.get_padded_shape()[3]};
ttnn::SmallVector<uint32_t> step = {1, 1, 1, 1};
ttnn::slice(queue_id, grad, start_index, end_index, step, std::nullopt, input_grad);
grad_tensor[0] = input_grad;
Expand All @@ -644,19 +644,19 @@ std::vector<std::optional<Tensor>> ExecuteBackwardConcat::invoke(
if (are_required_outputs[1]) {
ttnn::SmallVector<uint32_t> start_index_2 = {0, 0, 0, 0};
if (dim == 0) {
start_index_2 = {input.get_legacy_shape()[0], 0, 0, 0};
start_index_2 = {input.get_padded_shape()[0], 0, 0, 0};
} else if (dim == 1) {
start_index_2 = {0, input.get_legacy_shape()[1], 0, 0};
start_index_2 = {0, input.get_padded_shape()[1], 0, 0};
} else if (dim == 2) {
start_index_2 = {0, 0, input.get_legacy_shape()[2], 0};
start_index_2 = {0, 0, input.get_padded_shape()[2], 0};
} else if (dim == 3) {
start_index_2 = {0, 0, 0, input.get_legacy_shape()[3]};
start_index_2 = {0, 0, 0, input.get_padded_shape()[3]};
}
ttnn::SmallVector<uint32_t> end_index_2 = {
grad.get_legacy_shape()[0],
grad.get_legacy_shape()[1],
grad.get_legacy_shape()[2],
grad.get_legacy_shape()[3]};
grad.get_padded_shape()[0],
grad.get_padded_shape()[1],
grad.get_padded_shape()[2],
grad.get_padded_shape()[3]};
ttnn::SmallVector<uint32_t> step_2 = {1, 1, 1, 1};
ttnn::slice(queue_id, grad, start_index_2, end_index_2, step_2, std::nullopt, other_grad);
grad_tensor[1] = other_grad;
Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void ComplexTensor::deallocate() {
}

ComplexTensor CreateComplexTensor::invoke(const Tensor& real, const Tensor& imag) {
TT_ASSERT(real.get_legacy_shape() == imag.get_legacy_shape(), "Tensor shapes of real and imag should be identical");
TT_ASSERT(real.get_padded_shape() == imag.get_padded_shape(), "Tensor shapes of real and imag should be identical");
return ComplexTensor({real, imag});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ Tensor _variance_impl(
const std::optional<MemoryConfig>& output_mem_config) {
ttnn::SmallVector<int> dims = {2, 3};
constexpr float correction = 0.0f;
auto shape_wh = y.get_legacy_shape();
auto shape_wh = y.get_padded_shape();
float scale = 1.0f / ((float)(shape_wh[3] * shape_wh[2]) - correction);
Tensor sqr_y_minus_mean_y = ttnn::square(y_minus_mean_y, output_mem_config);
return ttnn::sum(sqr_y_minus_mean_y, dims, true, std::nullopt, std::nullopt, scale);
Expand Down Expand Up @@ -601,10 +601,10 @@ Tensor ExecuteUnaryCompositeThreshold::invoke(
std::vector<Tensor> split_tensor_for_glu(
const Tensor& input_a, int32_t dim, const std::optional<MemoryConfig>& output_mem_config) {
std::vector<Tensor> t_split;
tt::tt_metal::LegacyShape inshape(input_a.get_legacy_shape());
ttnn::SimpleShape inshape(input_a.get_padded_shape());
TT_FATAL(((inshape[dim] / 2) % tt::constants::TILE_WIDTH == 0), "Split tensor dimension should be in full tile");
ttnn::SmallVector<uint32_t> s_a = {0, 0, 0, 0};
ttnn::SmallVector<uint32_t> e_a = {input_a.get_legacy_shape()[0], inshape[1], inshape[2], inshape[3] / 2};
ttnn::SmallVector<uint32_t> e_a = {input_a.get_padded_shape()[0], inshape[1], inshape[2], inshape[3] / 2};

ttnn::SmallVector<uint32_t> s_b = {0, 0, 0, inshape[3] / 2};
ttnn::SmallVector<uint32_t> e_b = {inshape[0], inshape[1], inshape[2], inshape[3]};
Expand Down
36 changes: 18 additions & 18 deletions ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1780,10 +1780,10 @@ std::vector<Tensor> ExecuteUnaryBackwardRepeat::invoke(
auto output_memory_config = output_mem_config.value_or(
input.memory_config()); // TODO: Remove after ternary forward ops migration is completed

auto shape_wh = input.get_legacy_shape();
auto shape_wh = input.get_padded_shape();
TT_FATAL(shape_wh[0] == 1 && "input shape[0] should be 1", "Error");
auto ttnn_device = input.device();
// input.get_legacy_shape()[0]
// input.get_padded_shape()[0]
// If repeat shape has 0's, it returns zeros of given input
if (shape[0] == 0 || shape[1] == 0 || shape[2] == 0 || shape[3] == 0) {
Tensor zero_tensor =
Expand Down Expand Up @@ -1880,7 +1880,7 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
Tensor required = ttnn::permute(grad, after_permute_dims, output_memory_config);
ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
ttnn::SmallVector<uint32_t> end_index = {
grad.get_legacy_shape()[0], 1, grad.get_legacy_shape()[1], grad.get_legacy_shape()[2]};
grad.get_padded_shape()[0], 1, grad.get_padded_shape()[1], grad.get_padded_shape()[2]};
Tensor new_slice_tensor = ttnn::slice(DefaultQueueId, required, start_index, end_index, step, std::nullopt);
after_permute_dims = {0, 2, 3, 1};
updated_grad = ttnn::permute(new_slice_tensor, after_permute_dims, output_memory_config);
Expand All @@ -1895,7 +1895,7 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
Tensor required = ttnn::permute(grad, after_permute_dims, output_memory_config);
ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
ttnn::SmallVector<uint32_t> end_index = {
grad.get_legacy_shape()[0], 1, grad.get_legacy_shape()[1], grad.get_legacy_shape()[3]};
grad.get_padded_shape()[0], 1, grad.get_padded_shape()[1], grad.get_padded_shape()[3]};
Tensor new_slice_tensor = ttnn::slice(DefaultQueueId, required, start_index, end_index, step, std::nullopt);
updated_grad = ttnn::permute(new_slice_tensor, after_permute_dims, output_memory_config);
if (updated_grad.get_layout() == Layout::ROW_MAJOR) {
Expand Down Expand Up @@ -1925,9 +1925,9 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
return grad_tensor;
} else if (dim == 1 || dim == -3) {
Tensor tensor_1_temp = reciprocal_input;
if (reciprocal_input.get_legacy_shape()[1] % 32 != 0) {
if (reciprocal_input.get_padded_shape()[1] % 32 != 0) {
ttnn::SmallVector<std::pair<uint32_t, uint32_t>> padding = {
{0, 0}, {0, 32 - (reciprocal_input.get_legacy_shape()[1] % 32)}, {0, 0}, {0, 0}};
{0, 0}, {0, 32 - (reciprocal_input.get_padded_shape()[1] % 32)}, {0, 0}, {0, 0}};
tensor_1_temp = ttnn::pad(0, reciprocal_input, padding, 0, true, std::nullopt);
}
ttnn::SmallVector<int64_t> after_permute_dims = {0, 2, 3, 1};
Expand All @@ -1945,13 +1945,13 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
after_permute_dims,
output_memory_config);
Tensor grad_result = result;
if (reciprocal_input.get_legacy_shape()[1] % 32 != 0) {
if (reciprocal_input.get_padded_shape()[1] % 32 != 0) {
ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
ttnn::SmallVector<uint32_t> end_index = {
input.get_legacy_shape()[0],
input.get_legacy_shape()[1],
input.get_legacy_shape()[2],
input.get_legacy_shape()[3]};
input.get_padded_shape()[0],
input.get_padded_shape()[1],
input.get_padded_shape()[2],
input.get_padded_shape()[3]};
auto step = ttnn::SmallVector<uint32_t>({1, 1, 1, 1});
grad_result = ttnn::slice(DefaultQueueId, result, start_index, end_index, step, std::nullopt);
}
Expand All @@ -1960,9 +1960,9 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
}
// dim 0
Tensor tensor_1_temp = reciprocal_input;
if (reciprocal_input.get_legacy_shape()[0] % 32 != 0) {
if (reciprocal_input.get_padded_shape()[0] % 32 != 0) {
ttnn::SmallVector<std::pair<uint32_t, uint32_t>> padding = {
{0, (32 - (reciprocal_input.get_legacy_shape()[0] % 32))}, {0, 0}, {0, 0}, {0, 0}};
{0, (32 - (reciprocal_input.get_padded_shape()[0] % 32))}, {0, 0}, {0, 0}, {0, 0}};
tensor_1_temp = ttnn::pad(0, reciprocal_input, padding, 0, false, std::nullopt);
}
ttnn::SmallVector<int64_t> after_permute_dims = {3, 1, 2, 0};
Expand All @@ -1979,13 +1979,13 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
after_permute_dims,
output_memory_config);
Tensor grad_result = result;
if (reciprocal_input.get_legacy_shape()[0] % 32 != 0) {
if (reciprocal_input.get_padded_shape()[0] % 32 != 0) {
ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
ttnn::SmallVector<uint32_t> end_index = {
input.get_legacy_shape()[0],
input.get_legacy_shape()[1],
input.get_legacy_shape()[2],
input.get_legacy_shape()[3]};
input.get_padded_shape()[0],
input.get_padded_shape()[1],
input.get_padded_shape()[2],
input.get_padded_shape()[3]};
grad_result = ttnn::slice(DefaultQueueId, result, start_index, end_index, step, std::nullopt);
}
grad_tensor.emplace_back(grad_result);
Expand Down

0 comments on commit 621f848

Please sign in to comment.