Skip to content

Commit

Permalink
Squashed 'vision' changes from 63ab88e..b1c2f1c (#972)
Browse files Browse the repository at this point in the history
b1c2f1c Removed compare, compareS tests
7b3be51 Update releasenotesxfopencv.rst
386de98 Merge pull request #644 from mounikk/next
5ce5940 created compare and compareS test cases
8a020a6 Merge pull request #643 from yuanqian/add_stacksize_1
2e30063 try to fix Software emulation of compute unit(s) exited unexpectedly
805a939 Moved aie-ml tests to aie_dev2 branch
aa01097 Merge pull request #573 from turrahma/rgba2grey
671266d Merge pull request #575 from turrahma/pixelwise
3597e08 Merge pull request #641 from mounikk/next
089e8fc udpated doc
7c68f11 udpated doc
f3115cc clang format applied
632c5c6 updated ltm constructor
9b24c67 udpated doc file
6507025 Added pixelwise select with background pl case
70f441c Added pixelwise select no background pl case
7e9da9b Updated test names in GMIO cases
230b188 Clang formatting applied
3fec785 Updated description.json
31fbe6c Added pixelwise select gmio test with background
24b7f5c Added pixelwise select gmio test and kernel
75c2c1e clanfg formatted
66b1f96 Clang formatted.
70964e1 Removed print statements
6247c08 added rgba2grey for aie-ml

Co-authored-by: sdausr <[email protected]>
  • Loading branch information
2 people authored and GitHub Enterprise committed Oct 19, 2023
1 parent fbdea14 commit acd9a9e
Show file tree
Hide file tree
Showing 6 changed files with 488 additions and 138 deletions.
153 changes: 153 additions & 0 deletions vision/L1/include/aie-ml/imgproc/xf_pixelwise_select.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Copyright 2021 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __XF_PIXELWISE_SELECT_
#define __XF_PIXELWISE_SELECT_

#include <adf.h>
#include <algorithm>
#include <aie_api/utils.hpp>
#include <aie_api/aie.hpp>

#include <common/xf_aie_hw_utils.hpp>

namespace xf {
namespace cv {
namespace aie {

class PixelwiseSelect {
public:
void runImpl(adf::input_buffer<uint8_t>& frame,
adf::input_buffer<uint8_t>& mask,
adf::output_buffer<uint8_t>& output);
void runImpl(adf::input_buffer<uint8_t>& in_frame,
adf::input_buffer<uint8_t>& mask,
adf::input_buffer<uint8_t>& bg_frame,
adf::output_buffer<uint8_t>& output);
void xf_pixel_wise_select(uint8_t* frame, uint8_t* mask, int16 height, int16 width, uint8_t* output);
void xf_pixel_wise_select(
uint8_t* in_frame, uint8_t* mask, uint8_t* bg_frame, int16 height, int16 width, uint8_t* output);
};

__attribute__((noinline)) void PixelwiseSelect::xf_pixel_wise_select(
uint8_t* frame, uint8_t* mask, int16 height, int16 width, uint8_t* output) {
const int16 image_width = width;
const int16 image_height = height;

uint8_t* restrict _frame = (uint8_t*)(frame);
uint8_t* restrict _mask = (uint8_t*)(mask);
uint8_t* restrict _output = (uint8_t*)(output);
int16_t num_vectors = image_width >> 5;

::aie::vector<uint8_t, 32> vec_x;
::aie::vector<uint8_t, 32> vec_x1;
::aie::vector<uint8_t, 32> ones = ::aie::broadcast<uint8, 32>(1);
::aie::vector<uint8_t, 32> t1;

::aie::accum<acc32, 32> acc_x;

for (int i = 0; i < image_height * num_vectors; i++) chess_prepare_for_pipelining chess_loop_range(1, ) {
vec_x = ::aie::load_v<32>(_frame);
vec_x1 = ::aie::load_v<32>(_mask);
acc_x = ::aie::mul(vec_x, vec_x1);
::aie::store_v(_output, acc_x.template to_vector<uint8>(0));
_frame += 32;
_mask += 32;
_output += 32;
}
}

__attribute__((noinline)) void PixelwiseSelect::xf_pixel_wise_select(
uint8_t* in_frame, uint8_t* mask, uint8_t* bg_frame, int16 height, int16 width, uint8_t* output) {
const int16 image_width = width;
const int16 image_height = height;

uint8_t* restrict _in_frame = (uint8_t*)(in_frame);
uint8_t* restrict _bg_frame = (uint8_t*)(bg_frame);
uint8_t* restrict _mask = (uint8_t*)(mask);
uint8_t* restrict _output = (uint8_t*)(output);
int16_t num_vectors = image_width >> 5;

::aie::vector<uint8_t, 32> vec_in;
::aie::vector<uint8_t, 32> vec_bg;
::aie::vector<uint8_t, 32> vec_m;
::aie::vector<uint8_t, 32> vec_out;

for (int i = 0; i < image_height * num_vectors; i++) chess_prepare_for_pipelining chess_loop_range(1, ) {
vec_in = ::aie::load_v<32>(_in_frame);
vec_bg = ::aie::load_v<32>(_bg_frame);
vec_m = ::aie::load_v<32>(_mask);
auto mask_val = ::aie::gt(vec_m, (uint8_t)0);
vec_out = ::aie::select(vec_bg, vec_in, mask_val);
::aie::store_v(_output, vec_out);
_in_frame += 32;
_bg_frame += 32;
_mask += 32;
_output += 32;
}
}

void PixelwiseSelect::runImpl(adf::input_buffer<uint8_t>& frame,
adf::input_buffer<uint8_t>& mask,
adf::output_buffer<uint8_t>& output) {
uint8_t* f = (uint8_t*)::aie::begin(frame);
uint8_t* m = (uint8_t*)::aie::begin(mask);
uint8_t* o = (uint8_t*)::aie::begin(output);

int height = xfGetTileHeight(f);
int width = xfGetTileWidth(f);

xfCopyMetaData(f, o);

uint8_t* f_ptr = (uint8_t*)xfGetImgDataPtr(f);
uint8_t* m_ptr = (uint8_t*)xfGetImgDataPtr(m);
uint8_t* o_ptr = (uint8_t*)xfGetImgDataPtr(o);

::aie::vector<int16, 16> vv = ::aie::broadcast<int16, 16>(width);
::aie::print(vv, true, "width:");

vv = ::aie::broadcast<int16, 16>(height);
::aie::print(vv, true, "height:");
xf_pixel_wise_select(f_ptr, m_ptr, height, width, o_ptr);
}

void PixelwiseSelect::runImpl(adf::input_buffer<uint8_t>& in_frame,
adf::input_buffer<uint8_t>& mask,
adf::input_buffer<uint8_t>& bg_frame,
adf::output_buffer<uint8_t>& output) {
uint8_t* f = (uint8_t*)::aie::begin(in_frame);
uint8_t* m = (uint8_t*)::aie::begin(mask);
uint8_t* b = (uint8_t*)::aie::begin(bg_frame);
uint8_t* o = (uint8_t*)::aie::begin(output);

int height = xfGetTileHeight(f);
int width = xfGetTileWidth(f);

xfCopyMetaData(f, o);

uint8_t* f_ptr = (uint8_t*)xfGetImgDataPtr(f);
uint8_t* m_ptr = (uint8_t*)xfGetImgDataPtr(m);
uint8_t* b_ptr = (uint8_t*)xfGetImgDataPtr(b);
uint8_t* o_ptr = (uint8_t*)xfGetImgDataPtr(o);

xf_pixel_wise_select(f_ptr, m_ptr, b_ptr, height, width, o_ptr);
}

} // namespace aie
} // namespace cv
} // namespace xf

#endif
136 changes: 136 additions & 0 deletions vision/L1/include/aie-ml/imgproc/xf_rgba2gray.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Copyright 2022 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <adf.h>
#include <aie_api/utils.hpp>
#include <aie_api/aie.hpp>
#include <common/xf_aie_hw_utils.hpp>
//#include <common/xf_aie_hw_utils.hpp>
// #include <stdio.h>
// #include <iostream>

#ifndef __XF_RGBA2GRAY__HPP__
#define __XF_RGBA2GRAY__HPP__

namespace xf {
namespace cv {
namespace aie {

class Rgba2Gray {
private:
static constexpr int VECTORIZATION_FACTOR = 32;

public:
void runImpl(adf::input_buffer<uint8_t>& in, adf::output_buffer<uint8_t>& out);
void xf_rgba2gray(uint8_t* ptr1, uint8_t* out_ptr, uint16_t tile_width, uint16_t tile_height);
};

__attribute__((noinline)) void Rgba2Gray::xf_rgba2gray(uint8_t* restrict ptr1,
uint8_t* restrict ptr_out,
uint16_t tile_width,
uint16_t tile_height) {
::aie::vector<uint8_t, 16> wt(77, 150, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
::aie::vector<uint8_t, 32> wt_blue = ::aie::broadcast<uint8_t, 32>(29);
::aie::vector<uint8_t, 32> rgba_channel0, rgba_channel1, rgba_channel3, rgba_channel2;
::aie::vector<uint8_t, 32> r, g, b, gray;
::aie::accum<acc32, VECTORIZATION_FACTOR> acc;
uint16_t more_pixels = 0, loop_count;
loop_count = (tile_height * tile_width) >> 5; // Divide by VECTORIZATION-FACTOR - ASSUMING VEC-FACT = 32*

for (int j = 0; j < loop_count; j += 1) {
// READ 32-bit RGBA channels of 32 pixels. Total 1024 bits.
rgba_channel0 = ::aie::load_v<32>(ptr1);
ptr1 += 32;
rgba_channel1 = ::aie::load_v<32>(ptr1);
ptr1 += 32;
rgba_channel2 = ::aie::load_v<32>(ptr1);
ptr1 += 32;
rgba_channel3 = ::aie::load_v<32>(ptr1);
ptr1 += 32;

// Unzip the interleaved channels
auto[rg_temp, ba_temp] = ::aie::interleave_unzip(::aie::concat(rgba_channel0, rgba_channel1),
::aie::concat(rgba_channel2, rgba_channel3), 2);
r = ::aie::filter_even(rg_temp, 1);
g = ::aie::filter_odd(rg_temp, 1);
b = ::aie::filter_even(ba_temp, 1);

// MAC operations and store
acc = ::aie::mul(b, wt_blue);
acc = ::aie::accumulate<VECTORIZATION_FACTOR>(acc, wt, 0, r, g);
gray = acc.template to_vector<uint8_t>(8);
::aie::store_v((uint8_t*)ptr_out, gray);
ptr_out = ptr_out + VECTORIZATION_FACTOR;
}

// Check if more pixels to be processed? // No. of more pixels to be processed
more_pixels = (tile_height * tile_width) - (loop_count * VECTORIZATION_FACTOR);

// If more pixels to be processed, then move the pointers back so that we have 32 pixels to process
if (more_pixels != 0) {
// Find the pixel-shift requried to process 32 pixels at once
more_pixels = VECTORIZATION_FACTOR - more_pixels;

// Each input pixel is 32 bit (4 uint8_t). So pointer moved back with (pixel-shift*4)
ptr1 = ptr1 - (more_pixels << 2);

// Each output pixel is 8 bit (1 uint8_t). So pointer moved back with (pixel-shift*1)
ptr_out = ptr_out - more_pixels;

// Repeat as above loop
rgba_channel0 = ::aie::load_unaligned_v<32>(ptr1);
ptr1 += 32;
rgba_channel1 = ::aie::load_unaligned_v<32>(ptr1);
ptr1 += 32;
rgba_channel2 = ::aie::load_unaligned_v<32>(ptr1);
ptr1 += 32;
rgba_channel3 = ::aie::load_unaligned_v<32>(ptr1);
auto[rg_temp, ba_temp] = ::aie::interleave_unzip(::aie::concat(rgba_channel0, rgba_channel1),
::aie::concat(rgba_channel2, rgba_channel3), 2);
r = ::aie::filter_even(rg_temp, 1);
g = ::aie::filter_odd(rg_temp, 1);
b = ::aie::filter_even(ba_temp, 1);

acc = ::aie::mul(b, wt_blue);
acc = ::aie::accumulate<VECTORIZATION_FACTOR>(acc, wt, 0, r, g);
gray = acc.template to_vector<uint8_t>(8);
::aie::store_unaligned_v((uint8_t*)ptr_out, gray);
}
}

void Rgba2Gray::runImpl(adf::input_buffer<uint8_t>& in, adf::output_buffer<uint8_t>& out) {
uint8_t* img_in = (uint8_t*)::aie::begin(in);
uint8_t* img_out = (uint8_t*)::aie::begin(out);

int16_t tile_width = xfGetTileWidth(img_in);
int16_t tile_height = xfGetTileHeight(img_in);

if (tile_width == 0 || tile_height == 0) return;

xfCopyMetaData(img_in, img_out);
xfSetTileWidth(img_out, tile_width);

xfUnsignedSaturation(img_out);

uint8_t* in_ptr = (uint8_t*)xfGetImgDataPtr(img_in);
uint8_t* out_ptr = (uint8_t*)xfGetImgDataPtr(img_out);

xf_rgba2gray(in_ptr, out_ptr, tile_width, tile_height);
}
} // aie
} // cv
} // xf
#endif
20 changes: 20 additions & 0 deletions vision/L1/include/imgproc/xf_ltm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,26 @@ class LTM {

LTM() { assert(!is_floating_point<OUT_TYPE>::value); }

LTM(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC, XFCVDEPTH_IN_1>& in,
XF_CTUNAME(IN_TYPE, NPC) omin_r[MinMaxVArrSize][MinMaxHArrSize],
XF_CTUNAME(IN_TYPE, NPC) omax_r[MinMaxVArrSize][MinMaxHArrSize],
XF_CTUNAME(IN_TYPE, NPC) omin_w[MinMaxVArrSize][MinMaxHArrSize],
XF_CTUNAME(IN_TYPE, NPC) omax_w[MinMaxVArrSize][MinMaxHArrSize],
xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC, XFCVDEPTH_OUT_1>& out) {
process(in, omin_r, omax_r, omin_w, omax_w, out);
}

LTM(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC, XFCVDEPTH_IN_1>& in,
int block_rows,
int block_cols,
XF_CTUNAME(IN_TYPE, NPC) omin_r[MinMaxVArrSize][MinMaxHArrSize],
XF_CTUNAME(IN_TYPE, NPC) omax_r[MinMaxVArrSize][MinMaxHArrSize],
XF_CTUNAME(IN_TYPE, NPC) omin_w[MinMaxVArrSize][MinMaxHArrSize],
XF_CTUNAME(IN_TYPE, NPC) omax_w[MinMaxVArrSize][MinMaxHArrSize],
xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC, XFCVDEPTH_OUT_1>& out) {
process(in, block_rows, block_cols, omin_r, omax_r, omin_w, omax_w, out);
}

// Limit implementation SFINAE principal [[
template <int T = IN_TYPE, typename std::enable_if<!is_floating_point<T>::value>::type* = nullptr>
static constexpr XF_CTUNAME(IN_TYPE, NPC) LOW() {
Expand Down
1 change: 1 addition & 0 deletions vision/L3/examples/isp_24bit_decompand/description.json
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@
],
"testinfo": {
"disable": false,
"stacksize": 16384,
"jobs": [
{
"index": 0,
Expand Down
Loading

0 comments on commit acd9a9e

Please sign in to comment.