Merge branch 'develop' into gru_refacotr

ROCm · Dec 12, 2023 · 25a1c90 · 25a1c90
2 parents a9337e6 + dfaff6a
commit 25a1c90
Show file tree

Hide file tree

Showing 25 changed files with 705 additions and 251 deletions.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: true
diff --git a/.github/ISSUE_TEMPLATE/issue_report.yml b/.github/ISSUE_TEMPLATE/issue_report.yml
@@ -0,0 +1,216 @@
+name: Issue Report
+description: File a report for ROCm related issues on Linux and Windows. For issues pertaining to documentation or non-bug related, please open a blank issue located below.
+title: "[Issue]: "
+
+body:
+- type: markdown
+  attributes:
+    value: |
+      Thank you for taking the time to fill out this report!
+
+      You can acquire your OS, CPU, GPU (for filling out this report) with the following commands:
+
+      Linux:
+        echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)";
+        echo "CPU: " && cat /proc/cpuinfo | grep "model name" | sort --unique;
+        echo "GPU:" && /opt/rocm/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)";
+        
+      Windows:
+         (Get-WmiObject Win32_OperatingSystem).Version
+         (Get-WmiObject win32_Processor).Name
+         (Get-WmiObject win32_VideoController).Name
+- type: textarea
+  attributes:
+    label: Problem Description
+    description: Describe the issue you encountered.
+  validations:
+    required: true
+- type: input
+  attributes:
+    label: Operating System
+    description: What is the name and version number of the OS?
+    placeholder: "e.g. Ubuntu 22.04.3 LTS (Jammy Jellyfish)"
+  validations:
+    required: true
+- type: input
+  attributes:
+    label: CPU
+    description: What CPU did you encounter the issue on?
+    placeholder: "e.g. AMD Ryzen 9 5900HX with Radeon Graphics"
+  validations:
+    required: true
+- type: dropdown
+  attributes:
+    label: GPU
+    description: What GPU(s) did you encounter the issue on (you can select multiple GPUs from the list)
+    multiple: true
+    options:
+        - AMD Instinct MI250X
+        - AMD Instinct MI250
+        - AMD Instinct MI210
+        - AMD Instinct MI100
+        - AMD Instinct MI50
+        - AMD Instinct MI25
+        - AMD Radeon Pro V620
+        - AMD Radeon Pro VII
+        - AMD Radeon RX 7900 XTX
+        - AMD Radeon VII
+        - AMD Radeon Pro W7900
+        - AMD Radeon Pro W7800
+        - AMD Radeon Pro W6800
+        - AMD Radeon Pro W6600
+        - AMD Radeon Pro W5500
+        - AMD Radeon RX 7900 XT
+        - AMD Radeon RX 7600
+        - AMD Radeon RX 6950 XT
+        - AMD Radeon RX 6900 XT
+        - AMD Radeon RX 6800 XT
+        - AMD Radeon RX 6800
+        - AMD Radeon RX 6750
+        - AMD Radeon RX 6700 XT
+        - AMD Radeon RX 6700
+        - AMD Radeon RX 6650 XT
+        - AMD Radeon RX 6600 XT
+        - AMD Radeon RX 6600
+        - Other
+  validations:
+    required: true
+- type: input
+  attributes:
+    label: Other
+    description: If you selected Other, please specify
+- type: dropdown
+  attributes:
+    label: ROCm Version
+    description: What version(s) of ROCm did you encounter the issue on?
+    multiple: true
+    options:
+        - ROCm 5.7.1
+        - ROCm 5.7.0
+        - ROCm 5.6.0
+        - ROCm 5.5.1
+        - ROCm 5.5.0
+  validations:
+    required: true
+- type: dropdown
+  attributes:
+    label: ROCm Component
+    description: (Optional) If this issue relates to a specific ROCm component, it can be mentioned here.
+    multiple: true
+    options:
+        - Other
+        - AMD Common Language Runtime
+        - AMD MIGraphX
+        - AMD System Management Interface
+        - amdgpu KCL/autoconf
+        - amdgpu Kernel-mode GPU Driver
+        - amdgpu-install
+        - AOMP
+        - AOMP Extras
+        - AqlProfile
+        - build-infra
+        - chelsio
+        - clang-ocl
+        - Composable Kernel
+        - dkms
+        - docker / ROCm-docker
+        - flang
+        - gpuburn
+        - half
+        - HIP
+        - HIP Examples
+        - hipBLAS
+        - hipBLASLt
+        - HIPCC
+        - hipCUB
+        - hip-examples-private
+        - hipFFT
+        - hipfort
+        - HIPIFY
+        - hipRAND
+        - hipSOLVER
+        - hipSPARSE
+        - hipSPARSELt
+        - hipTensor
+        - hip-tests
+        - HSA Runtime
+        - infrastructure
+        - jenkins-utils
+        - libdrm
+        - Linux BPI packaging framework
+        - llvm-project
+        - Mesa
+        - meta
+        - MIOpen
+        - MIVisionX
+        - ml-framework-ci
+        - MLSEQA_TestRepo
+        - OpenCL API C++ Bindings
+        - OpenCL API Headers
+        - OpenCL Conformance Test Suite
+        - OpenCL ICD Loader
+        - perftest-p2p
+        - prototype
+        - RCCL
+        - rccl-rdma-sharp-plugins
+        - rocALUTION
+        - rocBLAS
+        - ROCdbgapi
+        - ROCdebug-agent
+        - rocFFT
+        - ROCgdb
+        - ROCK
+        - ROCm Documentation/Website
+        - ROCm Data Center Tool
+        - ROCm Examples
+        - ROCm for Windows
+        - ROCm Performance Primitives
+        - ROCm System Management Interface Library
+        - ROCm Thrust
+        - ROCm Validation Suite
+        - rocm_bandwidth_test
+        - rocm-cmake
+        - rocm-core
+        - rocm-docs-core
+        - rocminfo
+        - rocMLIR
+        - rocmtools
+        - rocPRIM
+        - rocprofiler
+        - rocRAND
+        - ROCR-Runtime
+        - rocSOLVER
+        - rocSPARSE
+        - roctracer
+        - ROCT-Thunk-Interface
+        - rocWMMA
+        - Tensile
+        - umr
+        - ibv_rc_pingpong-amd
+        - mellanox
+        - mpitest
+        - Pytorch
+        - Tensorflow
+        - APEX
+        - torchvision
+        - Magma
+- type: textarea
+  attributes:
+    label: Steps to Reproduce
+    description: (Optional) Detailed steps to reproduce the issue.
+  validations:
+    required: false
+
+- type: textarea
+  attributes:
+    label: (Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support
+    description: The output of rocminfo --support could help to better address the problem.
+  validations:
+    required: false
+
+- type: textarea
+  attributes:
+    label: Additional Information
+    description: (Optional) Any additional information that is relevant, e.g. relevant environment variables, dockerfiles, log files, dmesg output (on Linux), etc.
+  validations:
+    required: false
diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile
@@ -2452,3 +2452,12 @@ GENERATE_LEGEND        = YES
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_CLEANUP            = YES
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
+# The default value is: NO.
+
+WARN_AS_ERROR          = YES
diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in
@@ -1 +1 @@
-rocm-docs-core==0.30.0
+rocm-docs-core==0.30.1
diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt
@@ -100,7 +100,7 @@ requests==2.31.0
     # via
     #   pygithub
     #   sphinx
-rocm-docs-core==0.30.0
+rocm-docs-core==0.30.1
     # via -r requirements.in
 smmap==5.0.0
     # via gitdb

diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h
@@ -2994,9 +2994,10 @@ MIOPEN_EXPORT miopenStatus_t miopenFusionPlanGetOp(miopenFusionPlanDescriptor_t
                                                    miopenFusionOpDescriptor_t* op);
 
 /*! @brief Query the workspace size required for the fusion plan
- *
+ * @param handle         MIOpen handle (input)
  * @param fusePlanDesc   A fusion plan descriptor (input)
  * @param workSpaceSize  Pointer to memory to return size in bytes (output)
+ * @param algo           Algorithm selected (inputs)
  * @return               miopenStatus_t
  */
 MIOPEN_EXPORT miopenStatus_t
@@ -3327,12 +3328,29 @@ miopenExecuteFusionPlan(const miopenHandle_t handle,
                         void* output,
                         miopenOperatorArgs_t args);
 
-/*! @brief Prepares and executes the Convlution+Bias+Activation Fusion
- *
- *
- * @param handle           MIOpen handle (input)
- * @return           miopenStatus_t
+/*! @brief Prepares and executes the Convlution+Bias+Activation Fusion.
+ *
+ *
+ * @param handle               MIOpen handle (input)
+ * @param alpha1               floating point scaling factor, allocated on the host (input)
+ * @param xDesc                Tensor descriptor for input data tensor x (input)
+ * @param x                    Data tensor x (input)
+ * @param wDesc                Tensor descriptor for weight tensor w (input)
+ * @param w                    Weights tensor w (input)
+ * @param convDesc             Convolution layer descriptor (input)
+ * @param algo                 Algorithm selected (inputs)
+ * @param workspace            Pointer to workspace required (input)
+ * @param workspaceSizeInBytes Size of the memory in bytes pointed to by workSpace above
+ * @param alpha2               floating point scaling factor, allocated on the host (input)
+ * @param zDesc                Tensor descriptor for tensor z (input)
+ * @param z                    Data tensor z (input)
+ * @param biasDesc             Tensor descriptor for input data tensor x (input)
+ * @param bias                 Data tensor bias (input)
+ * @param activationDesc       Activation descriptor that specifies the activation mode
+ * @param yDesc                Tensor descriptor for output data tensor y (input)
+ * @param y                    Output data tensor
  */
+
 MIOPEN_EXPORT miopenStatus_t
 miopenConvolutionBiasActivationForward(miopenHandle_t handle,
                                        const void* alpha1,
@@ -4255,7 +4273,7 @@ MIOPEN_EXPORT miopenStatus_t miopenGetRNNPaddingMode(miopenRNNDescriptor_t rnnDe
  *
  * @param handle                MIOpen handle (input)
  * @param rnnDesc               RNN layer descriptor type (input)
- *
+ * @param fwdMode          Specifies in which mode the buffers will be used.
  * @param xDesc                 An input tensor descriptor for sequenced RNN data. This
  * miopenSeqTensorDescriptor_t should be initialyzed by `miopenSetRNNDataSeqTensorDescriptor`
  * function.(input)

diff --git a/requirements.txt b/requirements.txt
@@ -6,4 +6,4 @@ nlohmann/[email protected] -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/[email protected]
 ROCmSoftwarePlatform/[email protected]
 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
-ROCmSoftwarePlatform/composable_kernel@e9047ab94bc7a35cd8140c0406bb9037f8438df6 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
+ROCmSoftwarePlatform/composable_kernel@df467969684505876ef3a95fef94b77645836494 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -906,7 +906,11 @@ else()
     file(GLOB PERF_DB_FILES kernels/*.db)
     file(GLOB KERN_DB_FILES kernels/*.kdb)
     list(APPEND FIND_DB_FILES ${PERF_DB_FILES})
+# Install the kdb files for the kdb sync test only, a better approach would be to 
+# install the kdb package instead
+if(MIOPEN_TEST_DBSYNC)
     list(APPEND FIND_DB_FILES ${KERN_DB_FILES})
+endif()
     if(NOT MIOPEN_DISABLE_SYSDB)
         if( NOT ENABLE_ASAN_PACKAGING )
           install(FILES

diff --git a/src/conv/heuristics/ai_heuristics.cpp b/src/conv/heuristics/ai_heuristics.cpp
@@ -436,7 +436,8 @@ Metadata::Metadata(const std::string& arch, const std::string& solver)
     const nlohmann::json metadata =
         common::LoadJSON(GetSystemDbPath() + "/" + arch + "_" + solver + "_metadata.ktn.model");
     num_tuning_params = metadata["num_tuning_params"].get<std::size_t>();
-    tuning_decodings = metadata["decodings"]["tunings"].get<std::unordered_map<std::string, int>>();
+    tuning_decodings =
+        metadata["decodings"]["tunings"].get<std::unordered_map<std::string, std::string>>();
 }
 
 class Model
@@ -450,9 +451,11 @@ class Model
     {
     }
     virtual ~Model() = default;
-    fdeep::tensors Encode(const std::vector<float>& features, std::size_t dim) const
+    fdeep::tensors Encode(const std::vector<float>& features, std::size_t dim, bool transform) const
     {
-        fdeep::tensor input_tensor = fdeep::tensor(fdeep::tensor_shape(dim, dim), features);
+        const auto tensor_shape_depth = transform ? dim : 1;
+        fdeep::tensor input_tensor =
+            fdeep::tensor(fdeep::tensor_shape(dim, tensor_shape_depth), features);
         return encoder.predict({input_tensor});
     }
     fdeep::tensors Decode(const float prev_token, const fdeep::tensors& context) const
@@ -488,10 +491,6 @@ class Model
 
 std::shared_ptr<Model> GetModel(const std::string& arch, const std::string& solver)
 {
-    static const std::string prevArch{arch};
-
-    if(prevArch != arch)
-        MIOPEN_THROW("Cannot use AI tuning models for multiple gpu architectures");
     static std::map<std::string, std::shared_ptr<Model>> models;
     auto it = models.find(solver);
     if(it == models.end())
@@ -509,11 +508,16 @@ std::shared_ptr<Model> GetModel(const std::string& arch, const std::string& solv
 bool ModelSetParams(const std::string& arch,
                     const std::string& solver,
                     const std::vector<float>& features,
-                    std::function<bool(int, int)> validator)
+                    bool transform_features,
+                    std::function<bool(std::size_t, std::string)> validator)
 {
-    auto model             = GetModel(arch, solver);
-    int dim                = std::sqrt(features.size());
-    fdeep::tensors context = model->Encode(features, dim);
+    auto model = GetModel(arch, solver);
+    int dim    = 0;
+    if(transform_features)
+        dim = std::sqrt(features.size());
+    else
+        dim = features.size();
+    fdeep::tensors context = model->Encode(features, dim, transform_features);
     float decoder_input    = 0.0;
     for(std::size_t i = 0; i < model->metadata.num_tuning_params; ++i)
     {
@@ -529,9 +533,9 @@ bool ModelSetParams(const std::string& arch,
         {
             int token = pq.top().second;
             // convert index to token value
-            int value = model->metadata.tuning_decodings[std::to_string(token)];
+            std::string value = model->metadata.tuning_decodings[std::to_string(token)];
             pq.pop();
-            if(value < 0)
+            if(value == "-1")
                 return false;
             if(validator(i, value))
             {