Squashed 'vision' changes from 63ab88e..b1c2f1c (#972)

b1c2f1c Removed compare, compareS tests 7b3be51 Update releasenotesxfopencv.rst 386de98 Merge pull request #644 from mounikk/next 5ce5940 created compare and compareS test cases 8a020a6 Merge pull request #643 from yuanqian/add_stacksize_1 2e30063 try to fix Software emulation of compute unit(s) exited unexpectedly 805a939 Moved aie-ml tests to aie_dev2 branch aa01097 Merge pull request #573 from turrahma/rgba2grey 671266d Merge pull request #575 from turrahma/pixelwise 3597e08 Merge pull request #641 from mounikk/next 089e8fc udpated doc 7c68f11 udpated doc f3115cc clang format applied 632c5c6 updated ltm constructor 9b24c67 udpated doc file 6507025 Added pixelwise select with background pl case 70f441c Added pixelwise select no background pl case 7e9da9b Updated test names in GMIO cases 230b188 Clang formatting applied 3fec785 Updated description.json 31fbe6c Added pixelwise select gmio test with background 24b7f5c Added pixelwise select gmio test and kernel 75c2c1e clanfg formatted 66b1f96 Clang formatted. 70964e1 Removed print statements 6247c08 added rgba2grey for aie-ml Co-authored-by: sdausr <[email protected]>
Xilinx · Oct 19, 2023 · acd9a9e · acd9a9e
1 parent fbdea14
commit acd9a9e
Show file tree

Hide file tree

Showing 6 changed files with 488 additions and 138 deletions.
diff --git a/vision/L1/include/aie-ml/imgproc/xf_pixelwise_select.hpp b/vision/L1/include/aie-ml/imgproc/xf_pixelwise_select.hpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __XF_PIXELWISE_SELECT_
+#define __XF_PIXELWISE_SELECT_
+
+#include <adf.h>
+#include <algorithm>
+#include <aie_api/utils.hpp>
+#include <aie_api/aie.hpp>
+
+#include <common/xf_aie_hw_utils.hpp>
+
+namespace xf {
+namespace cv {
+namespace aie {
+
+class PixelwiseSelect {
+   public:
+    void runImpl(adf::input_buffer<uint8_t>& frame,
+                 adf::input_buffer<uint8_t>& mask,
+                 adf::output_buffer<uint8_t>& output);
+    void runImpl(adf::input_buffer<uint8_t>& in_frame,
+                 adf::input_buffer<uint8_t>& mask,
+                 adf::input_buffer<uint8_t>& bg_frame,
+                 adf::output_buffer<uint8_t>& output);
+    void xf_pixel_wise_select(uint8_t* frame, uint8_t* mask, int16 height, int16 width, uint8_t* output);
+    void xf_pixel_wise_select(
+        uint8_t* in_frame, uint8_t* mask, uint8_t* bg_frame, int16 height, int16 width, uint8_t* output);
+};
+
+__attribute__((noinline)) void PixelwiseSelect::xf_pixel_wise_select(
+    uint8_t* frame, uint8_t* mask, int16 height, int16 width, uint8_t* output) {
+    const int16 image_width = width;
+    const int16 image_height = height;
+
+    uint8_t* restrict _frame = (uint8_t*)(frame);
+    uint8_t* restrict _mask = (uint8_t*)(mask);
+    uint8_t* restrict _output = (uint8_t*)(output);
+    int16_t num_vectors = image_width >> 5;
+
+    ::aie::vector<uint8_t, 32> vec_x;
+    ::aie::vector<uint8_t, 32> vec_x1;
+    ::aie::vector<uint8_t, 32> ones = ::aie::broadcast<uint8, 32>(1);
+    ::aie::vector<uint8_t, 32> t1;
+
+    ::aie::accum<acc32, 32> acc_x;
+
+    for (int i = 0; i < image_height * num_vectors; i++) chess_prepare_for_pipelining chess_loop_range(1, ) {
+            vec_x = ::aie::load_v<32>(_frame);
+            vec_x1 = ::aie::load_v<32>(_mask);
+            acc_x = ::aie::mul(vec_x, vec_x1);
+            ::aie::store_v(_output, acc_x.template to_vector<uint8>(0));
+            _frame += 32;
+            _mask += 32;
+            _output += 32;
+        }
+}
+
+__attribute__((noinline)) void PixelwiseSelect::xf_pixel_wise_select(
+    uint8_t* in_frame, uint8_t* mask, uint8_t* bg_frame, int16 height, int16 width, uint8_t* output) {
+    const int16 image_width = width;
+    const int16 image_height = height;
+
+    uint8_t* restrict _in_frame = (uint8_t*)(in_frame);
+    uint8_t* restrict _bg_frame = (uint8_t*)(bg_frame);
+    uint8_t* restrict _mask = (uint8_t*)(mask);
+    uint8_t* restrict _output = (uint8_t*)(output);
+    int16_t num_vectors = image_width >> 5;
+
+    ::aie::vector<uint8_t, 32> vec_in;
+    ::aie::vector<uint8_t, 32> vec_bg;
+    ::aie::vector<uint8_t, 32> vec_m;
+    ::aie::vector<uint8_t, 32> vec_out;
+
+    for (int i = 0; i < image_height * num_vectors; i++) chess_prepare_for_pipelining chess_loop_range(1, ) {
+            vec_in = ::aie::load_v<32>(_in_frame);
+            vec_bg = ::aie::load_v<32>(_bg_frame);
+            vec_m = ::aie::load_v<32>(_mask);
+            auto mask_val = ::aie::gt(vec_m, (uint8_t)0);
+            vec_out = ::aie::select(vec_bg, vec_in, mask_val);
+            ::aie::store_v(_output, vec_out);
+            _in_frame += 32;
+            _bg_frame += 32;
+            _mask += 32;
+            _output += 32;
+        }
+}
+
+void PixelwiseSelect::runImpl(adf::input_buffer<uint8_t>& frame,
+                              adf::input_buffer<uint8_t>& mask,
+                              adf::output_buffer<uint8_t>& output) {
+    uint8_t* f = (uint8_t*)::aie::begin(frame);
+    uint8_t* m = (uint8_t*)::aie::begin(mask);
+    uint8_t* o = (uint8_t*)::aie::begin(output);
+
+    int height = xfGetTileHeight(f);
+    int width = xfGetTileWidth(f);
+
+    xfCopyMetaData(f, o);
+
+    uint8_t* f_ptr = (uint8_t*)xfGetImgDataPtr(f);
+    uint8_t* m_ptr = (uint8_t*)xfGetImgDataPtr(m);
+    uint8_t* o_ptr = (uint8_t*)xfGetImgDataPtr(o);
+
+    ::aie::vector<int16, 16> vv = ::aie::broadcast<int16, 16>(width);
+    ::aie::print(vv, true, "width:");
+
+    vv = ::aie::broadcast<int16, 16>(height);
+    ::aie::print(vv, true, "height:");
+    xf_pixel_wise_select(f_ptr, m_ptr, height, width, o_ptr);
+}
+
+void PixelwiseSelect::runImpl(adf::input_buffer<uint8_t>& in_frame,
+                              adf::input_buffer<uint8_t>& mask,
+                              adf::input_buffer<uint8_t>& bg_frame,
+                              adf::output_buffer<uint8_t>& output) {
+    uint8_t* f = (uint8_t*)::aie::begin(in_frame);
+    uint8_t* m = (uint8_t*)::aie::begin(mask);
+    uint8_t* b = (uint8_t*)::aie::begin(bg_frame);
+    uint8_t* o = (uint8_t*)::aie::begin(output);
+
+    int height = xfGetTileHeight(f);
+    int width = xfGetTileWidth(f);
+
+    xfCopyMetaData(f, o);
+
+    uint8_t* f_ptr = (uint8_t*)xfGetImgDataPtr(f);
+    uint8_t* m_ptr = (uint8_t*)xfGetImgDataPtr(m);
+    uint8_t* b_ptr = (uint8_t*)xfGetImgDataPtr(b);
+    uint8_t* o_ptr = (uint8_t*)xfGetImgDataPtr(o);
+
+    xf_pixel_wise_select(f_ptr, m_ptr, b_ptr, height, width, o_ptr);
+}
+
+} // namespace aie
+} // namespace cv
+} // namespace xf
+
+#endif
diff --git a/vision/L1/include/aie-ml/imgproc/xf_rgba2gray.hpp b/vision/L1/include/aie-ml/imgproc/xf_rgba2gray.hpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <adf.h>
+#include <aie_api/utils.hpp>
+#include <aie_api/aie.hpp>
+#include <common/xf_aie_hw_utils.hpp>
+//#include <common/xf_aie_hw_utils.hpp>
+// #include <stdio.h>
+// #include <iostream>
+
+#ifndef __XF_RGBA2GRAY__HPP__
+#define __XF_RGBA2GRAY__HPP__
+
+namespace xf {
+namespace cv {
+namespace aie {
+
+class Rgba2Gray {
+   private:
+    static constexpr int VECTORIZATION_FACTOR = 32;
+
+   public:
+    void runImpl(adf::input_buffer<uint8_t>& in, adf::output_buffer<uint8_t>& out);
+    void xf_rgba2gray(uint8_t* ptr1, uint8_t* out_ptr, uint16_t tile_width, uint16_t tile_height);
+};
+
+__attribute__((noinline)) void Rgba2Gray::xf_rgba2gray(uint8_t* restrict ptr1,
+                                                       uint8_t* restrict ptr_out,
+                                                       uint16_t tile_width,
+                                                       uint16_t tile_height) {
+    ::aie::vector<uint8_t, 16> wt(77, 150, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    ::aie::vector<uint8_t, 32> wt_blue = ::aie::broadcast<uint8_t, 32>(29);
+    ::aie::vector<uint8_t, 32> rgba_channel0, rgba_channel1, rgba_channel3, rgba_channel2;
+    ::aie::vector<uint8_t, 32> r, g, b, gray;
+    ::aie::accum<acc32, VECTORIZATION_FACTOR> acc;
+    uint16_t more_pixels = 0, loop_count;
+    loop_count = (tile_height * tile_width) >> 5; // Divide by VECTORIZATION-FACTOR - ASSUMING VEC-FACT = 32*
+
+    for (int j = 0; j < loop_count; j += 1) {
+        // READ 32-bit RGBA channels of 32 pixels. Total 1024 bits.
+        rgba_channel0 = ::aie::load_v<32>(ptr1);
+        ptr1 += 32;
+        rgba_channel1 = ::aie::load_v<32>(ptr1);
+        ptr1 += 32;
+        rgba_channel2 = ::aie::load_v<32>(ptr1);
+        ptr1 += 32;
+        rgba_channel3 = ::aie::load_v<32>(ptr1);
+        ptr1 += 32;
+
+        // Unzip the interleaved channels
+        auto[rg_temp, ba_temp] = ::aie::interleave_unzip(::aie::concat(rgba_channel0, rgba_channel1),
+                                                         ::aie::concat(rgba_channel2, rgba_channel3), 2);
+        r = ::aie::filter_even(rg_temp, 1);
+        g = ::aie::filter_odd(rg_temp, 1);
+        b = ::aie::filter_even(ba_temp, 1);
+
+        // MAC operations and store
+        acc = ::aie::mul(b, wt_blue);
+        acc = ::aie::accumulate<VECTORIZATION_FACTOR>(acc, wt, 0, r, g);
+        gray = acc.template to_vector<uint8_t>(8);
+        ::aie::store_v((uint8_t*)ptr_out, gray);
+        ptr_out = ptr_out + VECTORIZATION_FACTOR;
+    }
+
+    // Check if more pixels to be processed? // No. of more pixels to be processed
+    more_pixels = (tile_height * tile_width) - (loop_count * VECTORIZATION_FACTOR);
+
+    // If more pixels to be processed, then move the pointers back so that we have 32 pixels to process
+    if (more_pixels != 0) {
+        // Find the pixel-shift requried to process 32 pixels at once
+        more_pixels = VECTORIZATION_FACTOR - more_pixels;
+
+        // Each input pixel is 32 bit (4 uint8_t). So pointer moved back with (pixel-shift*4)
+        ptr1 = ptr1 - (more_pixels << 2);
+
+        // Each output pixel is 8 bit (1 uint8_t). So pointer moved back with (pixel-shift*1)
+        ptr_out = ptr_out - more_pixels;
+
+        // Repeat as above loop
+        rgba_channel0 = ::aie::load_unaligned_v<32>(ptr1);
+        ptr1 += 32;
+        rgba_channel1 = ::aie::load_unaligned_v<32>(ptr1);
+        ptr1 += 32;
+        rgba_channel2 = ::aie::load_unaligned_v<32>(ptr1);
+        ptr1 += 32;
+        rgba_channel3 = ::aie::load_unaligned_v<32>(ptr1);
+        auto[rg_temp, ba_temp] = ::aie::interleave_unzip(::aie::concat(rgba_channel0, rgba_channel1),
+                                                         ::aie::concat(rgba_channel2, rgba_channel3), 2);
+        r = ::aie::filter_even(rg_temp, 1);
+        g = ::aie::filter_odd(rg_temp, 1);
+        b = ::aie::filter_even(ba_temp, 1);
+
+        acc = ::aie::mul(b, wt_blue);
+        acc = ::aie::accumulate<VECTORIZATION_FACTOR>(acc, wt, 0, r, g);
+        gray = acc.template to_vector<uint8_t>(8);
+        ::aie::store_unaligned_v((uint8_t*)ptr_out, gray);
+    }
+}
+
+void Rgba2Gray::runImpl(adf::input_buffer<uint8_t>& in, adf::output_buffer<uint8_t>& out) {
+    uint8_t* img_in = (uint8_t*)::aie::begin(in);
+    uint8_t* img_out = (uint8_t*)::aie::begin(out);
+
+    int16_t tile_width = xfGetTileWidth(img_in);
+    int16_t tile_height = xfGetTileHeight(img_in);
+
+    if (tile_width == 0 || tile_height == 0) return;
+
+    xfCopyMetaData(img_in, img_out);
+    xfSetTileWidth(img_out, tile_width);
+
+    xfUnsignedSaturation(img_out);
+
+    uint8_t* in_ptr = (uint8_t*)xfGetImgDataPtr(img_in);
+    uint8_t* out_ptr = (uint8_t*)xfGetImgDataPtr(img_out);
+
+    xf_rgba2gray(in_ptr, out_ptr, tile_width, tile_height);
+}
+} // aie
+} // cv
+} // xf
+#endif
diff --git a/vision/L1/include/imgproc/xf_ltm.hpp b/vision/L1/include/imgproc/xf_ltm.hpp
@@ -218,6 +218,26 @@ class LTM {
 
     LTM() { assert(!is_floating_point<OUT_TYPE>::value); }
 
+    LTM(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC, XFCVDEPTH_IN_1>& in,
+        XF_CTUNAME(IN_TYPE, NPC) omin_r[MinMaxVArrSize][MinMaxHArrSize],
+        XF_CTUNAME(IN_TYPE, NPC) omax_r[MinMaxVArrSize][MinMaxHArrSize],
+        XF_CTUNAME(IN_TYPE, NPC) omin_w[MinMaxVArrSize][MinMaxHArrSize],
+        XF_CTUNAME(IN_TYPE, NPC) omax_w[MinMaxVArrSize][MinMaxHArrSize],
+        xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC, XFCVDEPTH_OUT_1>& out) {
+        process(in, omin_r, omax_r, omin_w, omax_w, out);
+    }
+
+    LTM(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC, XFCVDEPTH_IN_1>& in,
+        int block_rows,
+        int block_cols,
+        XF_CTUNAME(IN_TYPE, NPC) omin_r[MinMaxVArrSize][MinMaxHArrSize],
+        XF_CTUNAME(IN_TYPE, NPC) omax_r[MinMaxVArrSize][MinMaxHArrSize],
+        XF_CTUNAME(IN_TYPE, NPC) omin_w[MinMaxVArrSize][MinMaxHArrSize],
+        XF_CTUNAME(IN_TYPE, NPC) omax_w[MinMaxVArrSize][MinMaxHArrSize],
+        xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC, XFCVDEPTH_OUT_1>& out) {
+        process(in, block_rows, block_cols, omin_r, omax_r, omin_w, omax_w, out);
+    }
+
     // Limit implementation SFINAE principal [[
     template <int T = IN_TYPE, typename std::enable_if<!is_floating_point<T>::value>::type* = nullptr>
     static constexpr XF_CTUNAME(IN_TYPE, NPC) LOW() {

diff --git a/vision/L3/examples/isp_24bit_decompand/description.json b/vision/L3/examples/isp_24bit_decompand/description.json
@@ -182,6 +182,7 @@
     ], 
     "testinfo": {
         "disable": false, 
+        "stacksize": 16384,
         "jobs": [
             {
                 "index": 0,