diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp
index 73e17380ae..94251cc6d0 100644
--- a/src/ocl/convolutionocl.cpp
+++ b/src/ocl/convolutionocl.cpp
@@ -131,6 +131,18 @@ static inline void ValidateGroupCount(const TensorDescriptor& x,
         MIOPEN_THROW(miopenStatusBadParm, "Invalid group number");
 }
 
+static inline void ValidateWorkspace(Data_t workSpace, const size_t workSpaceSize)
+{
+
+    [[maybe_unused]] bool x = (workSpace != nullptr);
+    [[maybe_unused]] bool y = (workSpaceSize != 0);
+
+    assert(((x && y) || (!x && !y)) && "workspace pointer and size don't match. Either both should "
+                                       "be zero or both should be non-zero");
+
+    /// \todo could add a check here that workSpace points to GPU memory
+}
+
 static Invoker PrepareInvoker(ExecutionContext ctx,
                               const conv::ProblemDescription& problem,
                               const NetworkConfig& config,
@@ -260,6 +272,7 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,
                                                  bool exhaustiveSearch) const
 {
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(x == nullptr || w == nullptr || y == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -495,6 +508,7 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
                                                size_t workSpaceSize) const
 {
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
 
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
     ValidateTensors(tensors);
@@ -812,6 +826,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle,
                                                         const solver::Id solver_id) const
 {
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
 
     ValidateTensors(tensors);
@@ -846,6 +861,7 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle,
                                                      bool exhaustiveSearch) const
 {
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(dx == nullptr || w == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -944,6 +960,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle,
                                                     size_t workSpaceSize) const
 {
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
 
     auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};
 
@@ -1015,6 +1032,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle,
                                                          solver::Id solver_id) const
 {
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};
 
     ValidateTensors(tensors);
@@ -1055,6 +1073,7 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle,
                                                         bool exhaustiveSearch) const
 {
     MIOPEN_LOG_I("requestAlgoCount = " << requestAlgoCount << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     if(x == nullptr || dw == nullptr || dy == nullptr)
         MIOPEN_THROW(miopenStatusBadParm, "Buffers cannot be NULL");
     if(returnedAlgoCount == nullptr)
@@ -1151,6 +1170,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle,
                                                        size_t workSpaceSize) const
 {
     MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
     ValidateAlphaBeta(alpha, beta);
@@ -1218,6 +1238,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle,
                                                     solver::Id solver_id) const
 {
     MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
+    ValidateWorkspace(workSpace, workSpaceSize);
     auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
     ValidateTensors(tensors);
 
diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index efaf4d8f0a..e387e98b5b 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -25,6 +25,7 @@
  *******************************************************************************/
 #pragma once
 #include "test.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <iostream>
 #include <iterator>
@@ -330,13 +331,11 @@ struct conv_base
             EXPECT_EQUAL(miopenStatusSuccess,
                          miopenGetSolutionWorkspaceSize(solution, &workspace_size));
 
-            const auto workspace_dev = workspace_size != 0
-                                           ? get_handle().Write(std::vector<char>(workspace_size))
-                                           : nullptr;
+            Workspace wspace{workspace_size};
 
-            EXPECT_EQUAL(miopenStatusSuccess,
-                         miopenRunSolution(
-                             handle, solution, 3, arguments, workspace_dev.get(), workspace_size));
+            EXPECT_EQUAL(
+                miopenStatusSuccess,
+                miopenRunSolution(handle, solution, 3, arguments, wspace.ptr(), wspace.size()));
         }
 
         const auto& solution_deref = miopen::deref(solutions.front());
@@ -596,19 +595,6 @@ struct verify_forward_conv : conv_base<T, Tout>
         return rout;
     }
 
-    void resize_workspace(miopen::Handle& h,
-                          const std::size_t sz,
-                          std::vector<char>& ws,
-                          miopen::Allocator::ManageDataPtr& ws_dev) const
-    {
-        ws_dev.reset();
-        if(sz > 0)
-        {
-            ws.resize(sz);
-            ws_dev = h.Write(ws);
-        }
-    }
-
     tensor<Tout> gpu()
     {
         auto&& handle = get_handle();
@@ -629,8 +615,7 @@ struct verify_forward_conv : conv_base<T, Tout>
         bool fallback_path_taken      = false;
         std::size_t count             = 0;
 
-        std::vector<char> ws;
-        miopen::Allocator::ManageDataPtr ws_dev = nullptr;
+        Workspace wspace{};
 
         const auto ctx     = ExecutionContext{&handle};
         const auto problem = ConvProblemDescription{
@@ -649,8 +634,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 {
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    resize_workspace(handle, workspace_size, ws, ws_dev);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     filter.FindConvBwdDataAlgorithm(handle,
                                                     input.desc,
@@ -662,8 +646,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    ws_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -696,7 +680,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                                   << " != " << ws_size << std::endl;
                     }
                 }
-                resize_workspace(handle, selected.workspace_size, ws, ws_dev);
+                wspace.resize(selected.workspace_size);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
@@ -707,8 +691,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     wei_dev.get(),
                                                     rout.desc,
                                                     out_dev.get(),
-                                                    ws_dev.get(),
-                                                    selected.workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     selected.solution_id);
             }
             else
@@ -717,8 +701,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                 {
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    resize_workspace(handle, workspace_size, ws, ws_dev);
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     filter.FindConvFwdAlgorithm(handle,
                                                 input.desc,
@@ -730,8 +713,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                ws_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
                 }
 
@@ -765,7 +748,7 @@ struct verify_forward_conv : conv_base<T, Tout>
                                   << " != " << ws_size << std::endl;
                     }
                 }
-                resize_workspace(handle, selected.workspace_size, ws, ws_dev);
+                wspace.resize(selected.workspace_size);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
@@ -776,8 +759,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                    in_dev.get(),
                                                    rout.desc,
                                                    out_dev.get(),
-                                                   ws_dev.get(),
-                                                   selected.workspace_size,
+                                                   wspace.ptr(),
+                                                   wspace.size(),
                                                    selected.solution_id);
             }
             break;
@@ -832,9 +815,7 @@ struct verify_forward_conv : conv_base<T, Tout>
 
                 if(api == ConvApi::Find_1_0)
                 {
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    std::vector<char> workspace(workspace_size);
-                    auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -851,15 +832,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
 
-                    workspace_dev.reset();
                     if(perf.memory > 0)
                     {
-                        workspace.resize(perf.memory);
-                        workspace_dev = handle.Write(workspace);
+                        wspace.resize(perf.memory);
                     }
 
                     filter.ConvolutionForward(handle,
@@ -872,8 +851,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                               &beta,
                                               rout.desc,
                                               out_dev.get(),
-                                              workspace_dev.get(),
-                                              workspace_size);
+                                              wspace.ptr(),
+                                              wspace.size());
 
                     /// \ref read_solver_name
                     auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken);
@@ -904,9 +883,7 @@ struct verify_forward_conv : conv_base<T, Tout>
             {
                 if(api == ConvApi::Find_1_0)
                 {
-                    const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-                    std::vector<char> workspace(workspace_size);
-                    auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+                    wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
                     int ret_algo_count;
                     miopenConvAlgoPerf_t perf;
@@ -925,15 +902,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                         1,
                                                         &ret_algo_count,
                                                         &perf,
-                                                        workspace_dev.get(),
-                                                        workspace_size,
+                                                        wspace.ptr(),
+                                                        wspace.size(),
                                                         search);
 
-                        workspace_dev.reset();
                         if(perf.memory > 0)
                         {
-                            workspace.resize(perf.memory);
-                            workspace_dev = handle.Write(workspace);
+                            wspace.resize(perf.memory);
                         }
 
                         filter.ConvolutionBackwardData(handle,
@@ -946,8 +921,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                        &beta,
                                                        rout.desc,
                                                        out_dev.get(),
-                                                       workspace_dev.get(),
-                                                       workspace_size);
+                                                       wspace.ptr(),
+                                                       wspace.size());
                     }
                     else
                     {
@@ -961,15 +936,13 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    workspace_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
 
-                        workspace_dev.reset();
                         if(perf.memory > 0)
                         {
-                            workspace.resize(perf.memory);
-                            workspace_dev = handle.Write(workspace);
+                            wspace.resize(perf.memory);
                         }
 
                         filter.ConvolutionForward(handle,
@@ -982,8 +955,8 @@ struct verify_forward_conv : conv_base<T, Tout>
                                                   &beta,
                                                   rout.desc,
                                                   out_dev.get(),
-                                                  workspace_dev.get(),
-                                                  workspace_size);
+                                                  wspace.ptr(),
+                                                  wspace.size());
                     }
 
                     /// \ref read_solver_name
@@ -1118,6 +1091,8 @@ struct verify_backward_conv : conv_base<T>
         auto wei_dev = handle.Write(weights.data);
         auto in_dev  = handle.Write(rinput.data);
 
+        Workspace wspace{};
+
         miopenConvSolution_t selected;
         bool fallback_path_taken = false;
         std::size_t count        = 0;
@@ -1133,9 +1108,7 @@ struct verify_backward_conv : conv_base<T>
         switch(api)
         {
         case ConvApi::Immediate: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1154,8 +1127,8 @@ struct verify_backward_conv : conv_base<T>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -1183,18 +1156,14 @@ struct verify_backward_conv : conv_base<T>
                 });
                 selected = std::move(solutions.front());
 
-                std::size_t ws_size;
-
-                ws_size = filter.GetForwardSolutionWorkspaceSize(
+                [[maybe_unused]] std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize(
                     handle, weights.desc, out.desc, rinput.desc, selected.solution_id);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
-                workspace_dev.reset();
                 if(selected.workspace_size > 0)
                 {
-                    workspace.resize(selected.workspace_size);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionForwardImmediate(handle,
@@ -1204,8 +1173,8 @@ struct verify_backward_conv : conv_base<T>
                                                    out_dev.get(),
                                                    rinput.desc,
                                                    in_dev.get(),
-                                                   workspace_dev.get(),
-                                                   ws_size,
+                                                   wspace.ptr(),
+                                                   wspace.size(),
                                                    selected.solution_id);
             }
             else
@@ -1222,8 +1191,8 @@ struct verify_backward_conv : conv_base<T>
                                                     1,
                                                     &ret_algo_count,
                                                     &perf,
-                                                    workspace_dev.get(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     search);
                 }
                 count = filter.GetSolutionCount(ctx, problem);
@@ -1250,18 +1219,14 @@ struct verify_backward_conv : conv_base<T>
                 });
                 selected = std::move(solutions.front());
 
-                std::size_t ws_size;
-
-                ws_size = filter.GetBackwardSolutionWorkspaceSize(
+                [[maybe_unused]] std::size_t ws_size = filter.GetBackwardSolutionWorkspaceSize(
                     handle, out.desc, weights.desc, rinput.desc, selected.solution_id);
 
                 filter.CompileSolution(ctx, problem, selected.solution_id);
 
-                workspace_dev.reset();
                 if(selected.workspace_size > 0)
                 {
-                    workspace.resize(selected.workspace_size);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(selected.workspace_size);
                 }
 
                 filter.ConvolutionBackwardImmediate(handle,
@@ -1271,16 +1236,14 @@ struct verify_backward_conv : conv_base<T>
                                                     wei_dev.get(),
                                                     rinput.desc,
                                                     in_dev.get(),
-                                                    workspace_dev.get(),
-                                                    ws_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     selected.solution_id);
             }
             break;
         }
         case ConvApi::Find_1_0: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1299,15 +1262,13 @@ struct verify_backward_conv : conv_base<T>
                                             1,
                                             &ret_algo_count,
                                             &perf,
-                                            workspace_dev.get(),
-                                            workspace_size,
+                                            wspace.ptr(),
+                                            wspace.size(),
                                             search);
 
-                workspace_dev.reset();
                 if(perf.memory > 0)
                 {
-                    workspace.resize(perf.memory);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(perf.memory);
                 }
 
                 filter.ConvolutionForward(handle,
@@ -1320,8 +1281,8 @@ struct verify_backward_conv : conv_base<T>
                                           &beta,
                                           rinput.desc,
                                           in_dev.get(),
-                                          workspace_dev.get(),
-                                          workspace_size);
+                                          wspace.ptr(),
+                                          wspace.size());
             }
             else
             {
@@ -1335,15 +1296,13 @@ struct verify_backward_conv : conv_base<T>
                                                 1,
                                                 &ret_algo_count,
                                                 &perf,
-                                                workspace_dev.get(),
-                                                workspace_size,
+                                                wspace.ptr(),
+                                                wspace.size(),
                                                 search);
 
-                workspace_dev.reset();
                 if(perf.memory > 0)
                 {
-                    workspace.resize(perf.memory);
-                    workspace_dev = handle.Write(workspace);
+                    wspace.resize(perf.memory);
                 }
 
                 filter.ConvolutionBackwardData(handle,
@@ -1356,8 +1315,8 @@ struct verify_backward_conv : conv_base<T>
                                                &beta,
                                                rinput.desc,
                                                in_dev.get(),
-                                               workspace_dev.get(),
-                                               workspace_size);
+                                               wspace.ptr(),
+                                               wspace.size());
             }
 
             /// \ref read_solver_name
@@ -1487,6 +1446,7 @@ struct verify_backward_weights_conv : conv_base<T>
         auto out_dev = handle.Write(out.data);
         auto wei_dev = handle.Write(rweights.data);
         auto in_dev  = handle.Write(input.data);
+        Workspace wspace{};
 
         miopenConvSolution_t selected;
         bool fallback_path_taken = false;
@@ -1503,9 +1463,7 @@ struct verify_backward_weights_conv : conv_base<T>
         switch(api)
         {
         case ConvApi::Immediate: {
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1523,8 +1481,8 @@ struct verify_backward_weights_conv : conv_base<T>
                     1,
                     &ret_algo_count,
                     &perf,
-                    workspace_dev.get(),
-                    workspace_size,
+                    wspace.ptr(),
+                    wspace.size(),
                     search);
             }
 
@@ -1551,9 +1509,7 @@ struct verify_backward_weights_conv : conv_base<T>
             });
             selected = std::move(solutions.front());
 
-            std::size_t ws_size;
-
-            ws_size = filter.GetWrwSolutionWorkspaceSize(
+            [[maybe_unused]] std::size_t ws_size = filter.GetWrwSolutionWorkspaceSize(
                 handle,
                 filter.mode == miopenTranspose ? input.desc : out.desc,
                 filter.mode == miopenTranspose ? out.desc : input.desc,
@@ -1562,11 +1518,9 @@ struct verify_backward_weights_conv : conv_base<T>
 
             filter.CompileSolution(ctx, problem, selected.solution_id);
 
-            workspace_dev.reset();
             if(selected.workspace_size > 0)
             {
-                workspace.resize(selected.workspace_size);
-                workspace_dev = handle.Write(workspace);
+                wspace.resize(selected.workspace_size);
             }
 
             filter.ConvolutionWrwImmediate(
@@ -1577,16 +1531,14 @@ struct verify_backward_weights_conv : conv_base<T>
                 filter.mode == miopenTranspose ? out_dev.get() : in_dev.get(),
                 rweights.desc,
                 wei_dev.get(),
-                workspace_dev.get(),
-                ws_size,
+                wspace.ptr(),
+                wspace.size(),
                 selected.solution_id);
             break;
         }
         case ConvApi::Find_1_0: {
 
-            const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-            std::vector<char> workspace(workspace_size);
-            auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+            wspace.resize(filter.GetWorkSpaceSize(ctx, problem));
 
             int ret_algo_count;
             miopenConvAlgoPerf_t perf;
@@ -1603,15 +1555,13 @@ struct verify_backward_weights_conv : conv_base<T>
                 1,
                 &ret_algo_count,
                 &perf,
-                workspace_dev.get(),
-                workspace_size,
+                wspace.ptr(),
+                wspace.size(),
                 search);
 
-            workspace_dev.reset();
             if(perf.memory > 0)
             {
-                workspace.resize(perf.memory);
-                workspace_dev = handle.Write(workspace);
+                wspace.resize(perf.memory);
             }
 
             filter.ConvolutionBackwardWeights(
@@ -1625,8 +1575,8 @@ struct verify_backward_weights_conv : conv_base<T>
                 &beta,
                 rweights.desc,
                 wei_dev.get(),
-                workspace_dev.get(),
-                workspace_size);
+                wspace.ptr(),
+                wspace.size());
 
             /// \ref read_solver_name
             const auto solutions = filter.GetSolutions(ctx, problem, 1, &fallback_path_taken);
@@ -1778,9 +1728,7 @@ struct verify_forward_conv_int8 : conv_base<T>
                                     wei_vpad_dev.get());
         }
 
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-        std::vector<char> workspace(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         int ret_algo_count;
         miopenConvAlgoPerf_t perf;
@@ -1797,8 +1745,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                         1,
                                         &ret_algo_count,
                                         &perf,
-                                        workspace_dev.get(),
-                                        workspace_size,
+                                        wspace.ptr(),
+                                        wspace.size(),
                                         search);
         }
 
@@ -1826,9 +1774,7 @@ struct verify_forward_conv_int8 : conv_base<T>
         });
         auto selected = std::move(solutions.front());
 
-        std::size_t ws_size;
-
-        ws_size =
+        [[maybe_unused]] std::size_t ws_size =
             filter.GetForwardSolutionWorkspaceSize(handle,
                                                    (is_transform ? weight_vpad_desc : weights.desc),
                                                    (is_transform ? input_vpad_desc : input.desc),
@@ -1837,11 +1783,9 @@ struct verify_forward_conv_int8 : conv_base<T>
 
         filter.CompileSolution(ctx, problem, selected.solution_id);
 
-        workspace_dev.reset();
         if(selected.workspace_size > 0)
         {
-            workspace.resize(selected.workspace_size);
-            workspace_dev = handle.Write(workspace);
+            wspace.resize(selected.workspace_size);
         }
 
         filter.ConvolutionForwardImmediate(handle,
@@ -1851,8 +1795,8 @@ struct verify_forward_conv_int8 : conv_base<T>
                                            (is_transform ? in_vpad_dev.get() : in_dev.get()),
                                            rout.desc,
                                            out_dev.get(),
-                                           workspace_dev.get(),
-                                           ws_size,
+                                           wspace.ptr(),
+                                           wspace.size(),
                                            selected.solution_id);
 
         if(count != 0)
diff --git a/test/ctc.cpp b/test/ctc.cpp
index d812cef801..1c759220f2 100644
--- a/test/ctc.cpp
+++ b/test/ctc.cpp
@@ -29,6 +29,7 @@
 #include "tensor_holder.hpp"
 #include "test.hpp"
 #include "verify.hpp"
+#include "workspace.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
 #include <array>
@@ -651,8 +652,7 @@ struct verify_ctcloss
                                                                    inputLengths.data(),
                                                                    miopenCTCLossAlgo_t(0));
 
-        auto workSpace     = tensor<T>{workSpaceSize / sizeof(T)};
-        auto workSpace_dev = handle.Write(workSpace.data);
+        Workspace wspace{workSpaceSize};
 
         auto losses_gpu = losses;
         auto grads_gpu  = grads;
@@ -671,8 +671,8 @@ struct verify_ctcloss
                             grads.desc,
                             grads_dev.get(),
                             miopenCTCLossAlgo_t(0),
-                            workSpace_dev.get(),
-                            workSpaceSize);
+                            wspace.ptr(),
+                            wspace.size());
 
         losses_gpu.data = handle.Read<T>(losses_dev, losses_gpu.data.size());
         grads_gpu.data  = handle.Read<T>(grads_dev, grads_gpu.data.size());
diff --git a/test/find_2_conv.cpp b/test/find_2_conv.cpp
index a89ce942a7..6e636e265f 100644
--- a/test/find_2_conv.cpp
+++ b/test/find_2_conv.cpp
@@ -27,6 +27,7 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "get_handle.hpp"
+#include "workspace.hpp"
 
 #include <miopen/miopen.h>
 
@@ -210,13 +211,11 @@ struct Find2Test : test_driver
                 }
 
                 const auto workspace_size = std::min(workspace_limit, workspace_max);
-                workspace_dev             = workspace_size != 0
-                                                ? miopen::deref(handle).Write(std::vector<char>(workspace_size))
-                                                : nullptr;
+                Workspace wspace{workspace_size};
 
-                EXPECT_EQUAL(miopenSetFindOptionPreallocatedWorkspace(
-                                 options, workspace_dev.get(), workspace_size),
-                             miopenStatusSuccess);
+                EXPECT_EQUAL(
+                    miopenSetFindOptionPreallocatedWorkspace(options, wspace.ptr(), wspace.size()),
+                    miopenStatusSuccess);
 
                 EXPECT_EQUAL(miopenSetFindOptionPreallocatedTensor(
                                  options, miopenTensorConvolutionX, x_dev.get()),
@@ -312,14 +311,11 @@ struct Find2Test : test_driver
     {
         std::cerr << "Running a solution..." << std::endl;
 
-        auto& handle_deref = get_handle();
-
         std::size_t workspace_size;
         EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size),
                      miopenStatusSuccess);
 
-        auto workspace_dev =
-            workspace_size != 0 ? handle_deref.Write(std::vector<char>(workspace_size)) : nullptr;
+        Workspace wspace{workspace_size};
 
         const auto checked_run_solution = [&](miopenTensorDescriptor_t* descriptors_) {
             auto arguments = std::make_unique<miopenTensorArgument_t[]>(num_arguments);
@@ -331,10 +327,9 @@ struct Find2Test : test_driver
                 arguments[i].buffer     = buffers[i];
             }
 
-            EXPECT_EQUAL(
-                miopenRunSolution(
-                    handle, solution, 3, arguments.get(), workspace_dev.get(), workspace_size),
-                miopenStatusSuccess);
+            EXPECT_EQUAL(miopenRunSolution(
+                             handle, solution, 3, arguments.get(), wspace.ptr(), wspace.size()),
+                         miopenStatusSuccess);
         };
 
         // Without descriptors
diff --git a/test/find_db.cpp b/test/find_db.cpp
index b07167f80e..47c485182e 100644
--- a/test/find_db.cpp
+++ b/test/find_db.cpp
@@ -27,6 +27,7 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "get_handle.hpp"
+#include "workspace.hpp"
 
 #include <miopen/convolution.hpp>
 #include <miopen/conv/problem_description.hpp>
@@ -111,10 +112,7 @@ struct FindDbTest : test_driver
         const auto ctx = ExecutionContext{&handle};
         const auto problem =
             conv::ProblemDescription{y.desc, w.desc, x.desc, filter, conv::Direction::BackwardData};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -130,8 +128,8 @@ struct FindDbTest : test_driver
                                             1,
                                             &ret_algo_count,
                                             perf,
-                                            workspace_dev.get(),
-                                            workspace_size,
+                                            wspace.ptr(),
+                                            wspace.size(),
                                             false);
         };
 
@@ -145,10 +143,7 @@ struct FindDbTest : test_driver
         const auto ctx = ExecutionContext{&handle};
         const auto problem =
             conv::ProblemDescription{x.desc, w.desc, y.desc, filter, conv::Direction::Forward};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -164,8 +159,8 @@ struct FindDbTest : test_driver
                                         1,
                                         &ret_algo_count,
                                         perf,
-                                        workspace_dev.get(),
-                                        workspace_size,
+                                        wspace.ptr(),
+                                        wspace.size(),
                                         false);
         };
 
@@ -179,10 +174,7 @@ struct FindDbTest : test_driver
         const auto ctx     = ExecutionContext{&handle};
         const auto problem = conv::ProblemDescription{
             y.desc, w.desc, x.desc, filter, conv::Direction::BackwardWeights};
-        const auto workspace_size = filter.GetWorkSpaceSize(ctx, problem);
-
-        auto workspace     = std::vector<char>(workspace_size);
-        auto workspace_dev = workspace_size != 0 ? handle.Write(workspace) : nullptr;
+        Workspace wspace{filter.GetWorkSpaceSize(ctx, problem)};
 
         auto filterCall = [&]() {
             int ret_algo_count;
@@ -198,8 +190,8 @@ struct FindDbTest : test_driver
                                                1,
                                                &ret_algo_count,
                                                perf,
-                                               workspace_dev.get(),
-                                               workspace_size,
+                                               wspace.ptr(),
+                                               wspace.size(),
                                                false);
         };
 
diff --git a/test/gru_common.hpp b/test/gru_common.hpp
index 51e3c09012..4df6209e9d 100644
--- a/test/gru_common.hpp
+++ b/test/gru_common.hpp
@@ -35,6 +35,7 @@
 #include "verify.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <cmath>
 #include <ctime>
@@ -1963,8 +1964,8 @@ struct verify_forward_infer_gru
 #endif
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t out_sz         = 0;
+        size_t workspace_size = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1979,9 +1980,8 @@ struct verify_forward_infer_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
 
         auto input_dev = handle.Write(input);
 
@@ -1994,8 +1994,6 @@ struct verify_forward_infer_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2027,8 +2025,8 @@ struct verify_forward_infer_gru
                                   ((nohy) ? nullptr : hy_dev.get()),
                                   &hiddenDesc,
                                   nullptr,
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2249,7 +2247,7 @@ struct verify_forward_train_gru
         auto&& handle = get_handle();
 
         size_t out_sz           = 0;
-        size_t workSpaceSize    = 0;
+        size_t workspace_size   = 0;
         size_t reserveSpaceSize = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
@@ -2265,12 +2263,14 @@ struct verify_forward_train_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
+
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
+        reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) & ~(sizeof(T) - 1);
+        assert(reserveSpaceSize % sizeof(T) == 0);
+        Workspace rspace{reserveSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -2284,9 +2284,6 @@ struct verify_forward_train_gru
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -2318,10 +2315,10 @@ struct verify_forward_train_gru
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  nullptr,
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_GRU_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -2331,10 +2328,9 @@ struct verify_forward_train_gru
         }
 #endif
 
-        auto retSet = std::make_tuple(
-            handle.Read<T>(output_dev, output.size()),
-            (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)));
+        auto retSet = std::make_tuple(handle.Read<T>(output_dev, output.size()),
+                                      (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
+                                      rspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2484,7 +2480,7 @@ struct verify_backward_data_gru
         int bi        = dirMode != 0 ? 2 : 1;
         int hy_h      = hiddenSize;
         int bi_stride = bi * hy_h;
-        size_t workSpaceSize;
+        size_t workspace_size;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2494,8 +2490,8 @@ struct verify_backward_data_gru
         // Outputs ----------
         size_t in_sz = 0;
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, inputDescs.data(), &in_sz);
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        std::vector<T> workSpace(workspace_size / sizeof(T));
         std::vector<T> dx(in_sz / sizeof(T));
         std::vector<T> dhx(initHidden.size());
 
@@ -2562,8 +2558,7 @@ struct verify_backward_data_gru
 
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t out_sz = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -2578,15 +2573,17 @@ struct verify_backward_data_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        auto workSpace_dev = handle.Write(workSpace);
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
-        auto yin_dev          = handle.Write(yin);
-        auto dyin_dev         = handle.Write(dy);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-        auto weights_dev      = handle.Write(weights);
+        auto yin_dev     = handle.Write(yin);
+        auto dyin_dev    = handle.Write(dy);
+        auto weights_dev = handle.Write(weights);
+
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
 
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
@@ -2633,15 +2630,15 @@ struct verify_backward_data_gru
                               ((nodhx) ? nullptr : dhx_dev.get()),
                               &hiddenDesc,
                               nullptr,
-                              workSpace_dev.get(),
-                              workSpaceSize,
-                              reserveSpace_dev.get(),
-                              reserveSpace.size() * sizeof(T));
+                              wspace.ptr(),
+                              wspace.size(),
+                              rspace.ptr(),
+                              rspace.size());
 
         auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                       (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
-                                      handle.Read<T>(reserveSpace_dev, reserveSpace.size()),
-                                      handle.Read<T>(workSpace_dev, workSpace.size()));
+                                      rspace.Read<std::vector<T>>(),
+                                      wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2840,8 +2837,11 @@ struct verify_backward_weights_gru
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        wspace.Write(workSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<T> dweights(weightSize);
         auto dweights_dev = handle.Write(dweights);
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -2869,10 +2869,10 @@ struct verify_backward_weights_gru
                                  dy_dev.get(),
                                  &weightDesc,
                                  dweights_dev.get(),
-                                 workSpace_dev.get(),
-                                 workSpace.size() * sizeof(T),
-                                 reserveSpace_dev.get(),
-                                 reserveSpace.size() * sizeof(T));
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -3123,10 +3123,10 @@ struct gru_basic_driver : test_driver
         size_t reserveSpaceSize;
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-        size_t workSpaceSize;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize + 2 * out_sz +
+        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size + 2 * out_sz +
                            (in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) +
                             (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz)) *
                                sizeof(T);
diff --git a/test/gtest/conv_api_strided_tensors.cpp b/test/gtest/conv_api_strided_tensors.cpp
index 9a2876b3f0..d4e49201f4 100644
--- a/test/gtest/conv_api_strided_tensors.cpp
+++ b/test/gtest/conv_api_strided_tensors.cpp
@@ -29,6 +29,7 @@
 #include <gtest/gtest.h>
 #include <miopen/miopen.h>
 #include "platform.hpp"
+#include "../workspace.hpp"
 
 #define MIOPEN_CHECK_RET(val) ASSERT_EQ(val, miopenStatusSuccess)
 
@@ -66,10 +67,12 @@ class ConvStridedTensors : public ::testing::Test
         MIOPEN_CHECK_RET(miopenSetConvolutionGroupCount(conv_descr, 1));
 
         // Workspace
+        size_t sz = 0;
         MIOPEN_CHECK_RET(miopenConvolutionForwardGetWorkSpaceSize(
-            handle, filter_descr, input_descr, conv_descr, output_descr, &workspace_size));
+            handle, filter_descr, input_descr, conv_descr, output_descr, &sz));
 
         // Data
+        wspace.resize(sz);
         h_input.resize(input_size);
         h_filter.resize(filter_size);
         h_output.resize(output_size);
@@ -125,7 +128,7 @@ class ConvStridedTensors : public ::testing::Test
     std::vector<int> dilation                = {1, 1, 1};
 
     // Workspace
-    size_t workspace_size;
+    Workspace wspace{};
 
     // Data
     const size_t input_size   = input_dims[0] * input_strides[0];
@@ -143,10 +146,9 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
 {
     auto device = Device(handle);
 
-    auto d_workspace = device.Malloc(workspace_size);
-    auto d_input     = device.Malloc(input_bytes);
-    auto d_filter    = device.Malloc(filter_bytes);
-    auto d_output    = device.Malloc(output_bytes);
+    auto d_input  = device.Malloc(input_bytes);
+    auto d_filter = device.Malloc(filter_bytes);
+    auto d_output = device.Malloc(output_bytes);
 
     std::fill_n(h_input.begin(), h_input.size(), 1.f);
     ASSERT_TRUE(d_input.CopyToDevice(h_input.data(), input_bytes));
@@ -168,8 +170,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
                                                     sizeof(perf_results) / sizeof(perf_results[0]),
                                                     &perf_results_count,
                                                     perf_results,
-                                                    d_workspace.Data(),
-                                                    workspace_size,
+                                                    wspace.ptr(),
+                                                    wspace.size(),
                                                     true),
               miopenStatusSuccess);
     ASSERT_GT(perf_results_count, 0);
@@ -189,8 +191,8 @@ TEST_F(ConvStridedTensors, ConvStridedTensorsNotImplemented)
                                        &beta,
                                        output_descr,
                                        d_output.Data(),
-                                       d_workspace.Data(),
-                                       workspace_size),
+                                       wspace.ptr(),
+                                       wspace.size()),
               miopenStatusSuccess);
     ASSERT_TRUE(device.Synchronize());
 }
diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp
index cb55a5951b..6511800ce6 100644
--- a/test/gtest/solver_bwd.hpp
+++ b/test/gtest/solver_bwd.hpp
@@ -40,6 +40,7 @@
 #include "conv_tensor_gen.hpp"
 
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvBwdSolverTest
@@ -76,15 +77,11 @@ struct ConvBwdSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::DataInvokeParams{tensors,
-                                           workspace_dev.get(),
-                                           workspace_size,
-                                           conv_desc.attribute.gfx90aFp16alt.GetBwd()};
+        const auto invoke_params = miopen::conv::DataInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
         ASSERT_TRUE(sol.Succeeded());
@@ -177,8 +174,7 @@ struct ConvBwdSolverTest
     miopen::Allocator::ManageDataPtr in_dev;
     miopen::Allocator::ManageDataPtr wei_dev;
     miopen::Allocator::ManageDataPtr out_dev;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
 };
diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp
index 88fa9a9c55..20b16fcc32 100644
--- a/test/gtest/solver_fwd.hpp
+++ b/test/gtest/solver_fwd.hpp
@@ -38,6 +38,7 @@
 
 #include "conv_test_base.hpp"
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvFwdSolverTest
@@ -77,15 +78,11 @@ struct ConvFwdSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::DataInvokeParams{tensors,
-                                           workspace_dev.get(),
-                                           workspace_size,
-                                           this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
+        const auto invoke_params = miopen::conv::DataInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), this->conv_desc.attribute.gfx90aFp16alt.GetFwd()};
 
         // auto sol = solv.GetSolution(ctx, problem);
         // This is complicated due to the split between tunable and non-tunable solvers
@@ -116,8 +113,7 @@ struct ConvFwdSolverTest
     }
 
     ConvTestCase conv_config;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
     miopenTensorLayout_t tensor_layout;
diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp
index dde92e2071..dcf8311d83 100644
--- a/test/gtest/solver_wrw.hpp
+++ b/test/gtest/solver_wrw.hpp
@@ -40,6 +40,7 @@
 #include "conv_tensor_gen.hpp"
 
 #include "get_solver.hpp"
+#include "../workspace.hpp"
 
 template <typename T = float, typename Tref = float, bool use_cpu_ref = false>
 struct ConvWrwSolverTest
@@ -76,15 +77,11 @@ struct ConvWrwSolverTest
         if(solv.MayNeedWorkspace())
         {
             const auto cur_sol_ws = solv.GetWorkspaceSize(ctx, problem);
-            workspace_dev         = handle.Create<T>(cur_sol_ws);
-            workspace_size        = cur_sol_ws;
+            wspace.resize(cur_sol_ws);
         }
 
-        const auto invoke_params =
-            miopen::conv::WrWInvokeParams{tensors,
-                                          workspace_dev.get(),
-                                          workspace_size,
-                                          conv_desc.attribute.gfx90aFp16alt.GetBwd()};
+        const auto invoke_params = miopen::conv::WrWInvokeParams{
+            tensors, wspace.ptr(), wspace.size(), conv_desc.attribute.gfx90aFp16alt.GetBwd()};
 
         auto sol = GetSolution(solv, ctx, problem);
         ASSERT_TRUE(sol.Succeeded());
@@ -179,8 +176,7 @@ struct ConvWrwSolverTest
     miopen::Allocator::ManageDataPtr in_dev;
     miopen::Allocator::ManageDataPtr wei_dev;
     miopen::Allocator::ManageDataPtr out_dev;
-    miopen::Allocator::ManageDataPtr workspace_dev;
-    size_t workspace_size;
+    Workspace wspace{};
     miopenConvFwdAlgorithm_t algo = miopenConvolutionFwdAlgoDirect;
     bool test_skipped             = false;
 };
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index a09aab4209..d2b7d1a077 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -27,6 +27,7 @@
 #ifndef GUARD_MIOPEN_TEST_LSTM_COMMON_HPP
 #define GUARD_MIOPEN_TEST_LSTM_COMMON_HPP
 
+#include "workspace.hpp"
 #include "driver.hpp"
 #include "dropout_util.hpp"
 #include "get_handle.hpp"
@@ -96,6 +97,8 @@ struct verify_backward_data_lstm
     std::vector<T> initHidden; // HX
     std::vector<T> initCell;   // CX
     std::vector<T> weights;
+    std::vector<T>& RSVgpu;
+    std::vector<T>& RSVcpu;
     std::vector<int> batch_seq;
     int hiddenSize;
     int seqLength;
@@ -115,8 +118,6 @@ struct verify_backward_data_lstm
     bool nodcx;
     bool use_dropout;
     bool use_seqPadding;
-    typename std::vector<T>::iterator RSVgpu;
-    typename std::vector<T>::iterator RSVcpu;
 
     verify_backward_data_lstm(miopenRNNDescriptor_t pRD,
                               const std::vector<T>& py,
@@ -153,6 +154,8 @@ struct verify_backward_data_lstm
           initHidden(phx),
           initCell(pcx),
           weights(pW),
+          RSVgpu(pRSVgpu),
+          RSVcpu(pRSVcpu),
           batch_seq(pBS),
           hiddenSize(pHS),
           seqLength(pS),
@@ -171,9 +174,7 @@ struct verify_backward_data_lstm
           nodhx(pnodhx),
           nodcx(pnodcx),
           use_dropout(puse_dropout),
-          use_seqPadding(puse_seqPadding),
-          RSVgpu(pRSVgpu.begin()),
-          RSVcpu(pRSVcpu.begin())
+          use_seqPadding(puse_seqPadding)
     {
         if(!nohx)
             initHidden = phx; // this may be intentionally a nullptr
@@ -248,6 +249,8 @@ struct verify_backward_weights_lstm
     std::vector<T> input;      // Y
     std::vector<T> dy;         // dY
     std::vector<T> initHidden; // HX
+    std::vector<T> reserveSpace_gpu;
+    std::vector<T> reserveSpace_cpu;
     std::vector<T> workSpace;
     std::vector<int> batch_seq;
     int weightSize;
@@ -264,8 +267,6 @@ struct verify_backward_weights_lstm
     bool nohx;
     bool use_dropout;
     bool use_seqPadding;
-    typename std::vector<T> reserveSpace_gpu;
-    typename std::vector<T> reserveSpace_cpu;
 
     verify_backward_weights_lstm(miopenRNNDescriptor_t pRD,
                                  const std::vector<T>& px,
@@ -291,6 +292,8 @@ struct verify_backward_weights_lstm
         : input(px),
           dy(pdy),
           initHidden(phx),
+          reserveSpace_gpu(pRSVgpu),
+          reserveSpace_cpu(pRSVcpu),
           workSpace(pWS),
           batch_seq(pBS),
           weightSize(pW),
@@ -306,9 +309,7 @@ struct verify_backward_weights_lstm
           realHiddenSize(pHXZ),
           nohx(pnohx),
           use_dropout(puse_dropout),
-          use_seqPadding(puse_seqPadding),
-          reserveSpace_gpu(pRSVgpu),
-          reserveSpace_cpu(pRSVcpu)
+          use_seqPadding(puse_seqPadding)
     {
         if(!nohx)
             initHidden = phx; // this may be intentionally a nullptr
@@ -533,8 +534,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
 
         auto&& handle = get_handle();
 
-        size_t out_sz        = 0;
-        size_t workSpaceSize = 0;
+        size_t out_sz = 0;
 
         std::vector<miopen::TensorDescriptor> inputCPPDescs;
         std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -549,9 +549,10 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        Workspace wspace{workspace_size};
 
         auto input_dev = handle.Write(input);
 
@@ -565,8 +566,6 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         auto cy = initCell;
         std::fill(cy.begin(), cy.end(), 0.);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -577,6 +576,9 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         wlen[0] = weights.size();
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen);
 
+        /// \todo: fix the handle.Write() calls below because they generate
+        /// temporary objects that may get destroyed before the
+        /// miopenRNNForwardInference call happens
         miopenRNNForwardInference(&handle,
                                   rnnDesc,
                                   seqLength,
@@ -594,8 +596,8 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
                                   ((nohy) ? nullptr : handle.Write(hy).get()),
                                   &hiddenDesc,
                                   ((nocy) ? nullptr : handle.Write(cy).get()),
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_LSTM_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -673,9 +675,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
     using verify_forward_lstm<T>::nocy;
     using verify_forward_lstm<T>::use_seqPadding;
 
+    std::vector<T>& RSVgpu;
+    std::vector<T>& RSVcpu;
+
     bool use_dropout;
-    typename std::vector<T>::iterator RSVgpu;
-    typename std::vector<T>::iterator RSVcpu;
 
     verify_forward_train_lstm(miopenRNNDescriptor_t pRD,
                               const std::vector<T>& px,
@@ -700,7 +703,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
                               const bool pnocy           = false,
                               const bool puse_dropout    = false,
                               const bool puse_seqPadding = false)
-        : RSVgpu(pRSVgpu.begin()), RSVcpu(pRSVcpu.begin())
+        : RSVgpu(pRSVgpu), RSVcpu(pRSVcpu)
     {
         input          = px;
         initHidden     = phx;
@@ -861,7 +864,11 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             ChangeDataPadding(*packed_output, output, batch_seq, batch_seq[0], out_h, true);
         }
 
-        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu);
+        if(reserveSpace.size() != RSVcpu.size())
+        {
+            std::abort();
+        }
+        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
         auto retSet = std::make_tuple(
             output, (nohy ? initHidden : hiddenState), (nocy ? initCell : cellState));
@@ -908,17 +915,15 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
         std::fill(output.begin(), output.end(), static_cast<T>(0));
         auto output_dev = handle.Write(output);
 
-        size_t workSpaceSize    = 0;
+        size_t workspace_size = 0;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+        Workspace wspace{workspace_size};
+
         size_t reserveSpaceSize = 0;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
+        Workspace rspace{reserveSpaceSize};
 
         auto weights_dev = handle.Write(weights);
 
@@ -957,10 +962,10 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  ((nocy) ? nullptr : cy_dev.get()),
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_LSTM_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -969,9 +974,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             printf("GPU outdata[%d]: %f\n", i, outdata[i]);
         }
 #endif
-        reserveSpace =
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-        std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu);
+        rspace.ReadTo(RSVgpu);
 
         std::vector<T> output_gpu = handle.Read<T>(output_dev, output.size());
 
@@ -1049,7 +1052,7 @@ verify_backward_data_lstm<T>::cpu() const
     int hy_h      = hiddenSize;
     int bi_stride = bi * hy_h;
     int out_h     = hiddenSize * ((dirMode != 0) ? 2 : 1);
-    size_t workSpaceSize;
+    size_t workspace_size;
 
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
@@ -1066,8 +1069,8 @@ verify_backward_data_lstm<T>::cpu() const
                                       true,
                                       use_seqPadding);
 
-    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-    std::vector<T> workSpace(workSpaceSize / sizeof(T));
+    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    std::vector<T> workSpace(workspace_size / sizeof(T));
     std::vector<T> dx(in_sz);
     std::vector<T> dhx(initHidden.size());
     std::vector<T> dcx(initHidden.size());
@@ -1086,8 +1089,11 @@ verify_backward_data_lstm<T>::cpu() const
         reserveSpaceSize = (reserveSpaceSize + sizeof(T) - 1) / sizeof(T);
     }
 
-    std::vector<T> reserveSpace(reserveSpaceSize);
-    std::copy(RSVcpu, RSVcpu + reserveSpaceSize, reserveSpace.begin());
+    if(reserveSpaceSize != RSVcpu.size())
+    {
+        std::abort();
+    }
+    std::vector<T> reserveSpace(RSVcpu);
 
     std::vector<T> converted_dinput;
     std::vector<T> converted_output;
@@ -1119,7 +1125,7 @@ verify_backward_data_lstm<T>::cpu() const
         packed_doutput = &converted_doutput;
 
         // WA
-        wa_workSpace.resize(workSpaceSize / sizeof(T) - (packedXInSize + packedYOutSize));
+        wa_workSpace.resize(workspace_size / sizeof(T) - (packedXInSize + packedYOutSize));
         wa_shifted_workSpace = &wa_workSpace;
     }
     else
@@ -1186,7 +1192,7 @@ verify_backward_data_lstm<T>::cpu() const
                   workSpace.begin() + converted_doutput.size() + converted_dinput.size());
     }
 
-    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu);
+    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVcpu.begin());
 
     // TODO: remove workSpace
     auto retSet =
@@ -1210,8 +1216,6 @@ verify_backward_data_lstm<T>::gpu() const
 
     auto&& handle = get_handle();
 
-    size_t workSpaceSize = 0;
-
     std::vector<miopen::TensorDescriptor> inputCPPDescs;
     std::vector<miopenTensorDescriptor_t> inputDescs;
     createTensorDescArray(
@@ -1225,20 +1229,37 @@ verify_backward_data_lstm<T>::gpu() const
                           hiddenSize * ((dirMode != 0) ? 2 : 1),
                           miopen::deref(rnnDesc).dataType);
 
-    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-    std::vector<T> workSpace(workSpaceSize / sizeof(T));
-    auto workSpace_dev = handle.Write(workSpace);
-
-    size_t reserveSpaceSize;
+    size_t workspace_size = 0;
+    miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
+    if(workspace_size % sizeof(T) != 0)
+    {
+        std::abort();
+    }
+    Workspace wspace{};
+    // Needed to zero out the workspace (happens in std::vector's constructor)
+    // or else this test fails verification when workspace is compared against the
+    // workspace returned by ::cpu method in this class
+    wspace.Write(std::vector<T>(workspace_size / sizeof(T)));
+    // wspace.resize(workspace_size);
+
+    size_t reserveSpaceSize = 0;
     miopenGetRNNTrainingReserveSize(
         &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
-    std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
-    std::copy(RSVgpu, RSVgpu + reserveSpace.size(), reserveSpace.begin());
+    /// \todo: fix miopenGetRNNTrainingReserveSize to return a multiple of
+    /// sizeof(T)
+    // Needed because reserveSpaceSize returned is not a multiple of sizeof(T).
+    reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
-    auto yin_dev          = handle.Write(yin);
-    auto dyin_dev         = handle.Write(dy);
-    auto reserveSpace_dev = handle.Write(reserveSpace);
-    auto weights_dev      = handle.Write(weights);
+    if(reserveSpaceSize != (RSVgpu.size() * sizeof(T)))
+    {
+        std::abort();
+    }
+    Workspace rspace{};
+    rspace.Write(RSVgpu);
+
+    auto yin_dev     = handle.Write(yin);
+    auto dyin_dev    = handle.Write(dy);
+    auto weights_dev = handle.Write(weights);
 
     std::vector<int> hlens(3, 0);
     hlens[0] = nLayers * (dirMode != 0 ? 2 : 1);
@@ -1284,18 +1305,18 @@ verify_backward_data_lstm<T>::gpu() const
                           ((nodhx) ? nullptr : dhx_dev.get()),
                           &hiddenDesc,
                           ((nodcx) ? nullptr : dcx_dev.get()),
-                          workSpace_dev.get(),
-                          workSpaceSize,
-                          reserveSpace_dev.get(),
-                          reserveSpace.size() * sizeof(T));
+                          wspace.ptr(),
+                          wspace.size(),
+                          rspace.ptr(),
+                          rspace.size());
 
-    reserveSpace = handle.Read<T>(reserveSpace_dev, reserveSpace.size());
-    std::copy(reserveSpace.begin(), reserveSpace.end(), RSVgpu);
+    assert(RSVgpu.size() * sizeof(T) == rspace.size());
+    rspace.ReadTo(RSVgpu);
     // TODO: remove workSpace
     auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                   (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
                                   (nodcx ? initCell : handle.Read<T>(dcx_dev, dcx.size())),
-                                  handle.Read<T>(workSpace_dev, workSpace.size()));
+                                  wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
     auto t_end = std::chrono::high_resolution_clock::now();
@@ -1427,8 +1448,12 @@ std::vector<T> verify_backward_weights_lstm<T>::gpu() const
                           hiddenSize * ((dirMode != 0) ? 2 : 1),
                           miopen::deref(rnnDesc).dataType);
 
-    auto workSpace_dev    = handle.Write(workSpace);
-    auto reserveSpace_dev = handle.Write(reserveSpace_gpu);
+    Workspace wspace{};
+    wspace.Write(workSpace);
+
+    Workspace rspace{};
+    rspace.Write(reserveSpace_gpu);
+
     std::vector<T> dweights(weightSize);
     auto dweights_dev = handle.Write(dweights);
     miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -1452,10 +1477,10 @@ std::vector<T> verify_backward_weights_lstm<T>::gpu() const
                              dy_dev.get(),
                              &weightDesc,
                              dweights_dev.get(),
-                             workSpace_dev.get(),
-                             workSpace.size() * sizeof(T),
-                             reserveSpace_dev.get(),
-                             reserveSpace_gpu.size() * sizeof(T));
+                             wspace.ptr(),
+                             wspace.size(),
+                             rspace.ptr(),
+                             rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
     auto t_end = std::chrono::high_resolution_clock::now();
@@ -1732,10 +1757,10 @@ struct lstm_basic_driver : test_driver
                                            false,
                                            usePadding);
 
-        size_t workSpaceSize;
-        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        size_t workspace_size;
+        miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workspace_size);
 
-        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workSpaceSize +
+        size_t total_mem = statesSizeInBytes + reserveSpaceSize + workspace_size +
                            (2 * out_sz + in_sz + wei_sz + (nohx ? 0 : hx_sz) + (nohy ? 0 : hx_sz) +
                             (nodhx ? 0 : hx_sz) + (nodhy ? 0 : hx_sz) + (nocx ? 0 : hx_sz) +
                             (nocy ? 0 : hx_sz) + (nodcx ? 0 : hx_sz) + (nodcy ? 0 : hx_sz)) *
@@ -1815,7 +1840,7 @@ struct lstm_basic_driver : test_driver
 #if(MIO_LSTM_TEST_DEBUG > 0)
         printf("Running backward weights LSTM.\n");
         printf("reserve sz: %d, workSpace sz: %d, weight sz: %d\n",
-               reserveSpaceBwdData.size(),
+               rsvcpu.size(),
                workSpaceBwdData.size(),
                wei_sz);
         fflush(nullptr);
diff --git a/test/main.cpp b/test/main.cpp
index b48705754d..a1efb2eff8 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -23,8 +23,10 @@
  * SOFTWARE.
  *
  *******************************************************************************/
+#include "get_handle.hpp"
 #include "test.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <iterator>
 #include <memory>
@@ -37,17 +39,8 @@
 struct handle_fixture
 {
     miopenHandle_t handle{};
-#if MIOPEN_BACKEND_OPENCL
-    cl_command_queue q{};
-#endif
 
-    handle_fixture()
-    {
-        miopenCreate(&handle);
-#if MIOPEN_BACKEND_OPENCL
-        miopenGetStream(handle, &q);
-#endif
-    }
+    handle_fixture() { miopenCreate(&handle); }
 
     ~handle_fixture() { miopenDestroy(handle); }
 };
@@ -167,8 +160,6 @@ struct conv_forward : output_tensor_fixture
     {
         float alpha = 1, beta = 0;
 
-        // Setup OpenCL buffers
-
         int n, h, c, w;
         STATUS(miopenGet4dTensorDescriptorLengths(inputTensor, &n, &c, &h, &w));
         size_t sz_in = static_cast<size_t>(n) * c * h * w;
@@ -182,15 +173,12 @@ struct conv_forward : output_tensor_fixture
         size_t sz_fwd_workspace;
         STATUS(miopenConvolutionForwardGetWorkSpaceSize(
             handle, convFilter, inputTensor, convDesc, outputTensor, &sz_fwd_workspace));
-        // OCL fails to allocate zero workspace. Let's allocate small workspace instead to simplify
-        // subsequent code.
-        if(sz_fwd_workspace == 0)
-            sz_fwd_workspace = 256;
+
+        Workspace wspace{sz_fwd_workspace};
 
         std::vector<float> in(sz_in);
         std::vector<float> wei(sz_wei);
         std::vector<float> out(sz_out);
-        std::vector<float> fwd_workspace(sz_fwd_workspace / 4);
 
         for(size_t i = 0; i < sz_in; i++)
         {
@@ -201,60 +189,16 @@ struct conv_forward : output_tensor_fixture
             wei[i] = prng::gen_A_to_B(-0.5f, 0.5f) * 0.001f;
         }
 
-#if MIOPEN_BACKEND_OPENCL
-
-        cl_context ctx;
-        clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, nullptr);
-
-        cl_int status  = CL_SUCCESS;
-        cl_mem in_dev  = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_in, nullptr, &status);
-        cl_mem wei_dev = clCreateBuffer(ctx, CL_MEM_READ_ONLY, 4 * sz_wei, nullptr, nullptr);
-        cl_mem out_dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, 4 * sz_out, nullptr, nullptr);
-        cl_mem fwd_workspace_dev =
-            clCreateBuffer(ctx, CL_MEM_READ_WRITE, sz_fwd_workspace, nullptr, nullptr);
-
-        status =
-            clEnqueueWriteBuffer(q, in_dev, CL_TRUE, 0, 4 * sz_in, in.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(
-            q, wei_dev, CL_TRUE, 0, 4 * sz_wei, wei.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(
-            q, out_dev, CL_TRUE, 0, 4 * sz_out, out.data(), 0, nullptr, nullptr);
-        status |= clEnqueueWriteBuffer(q,
-                                       fwd_workspace_dev,
-                                       CL_TRUE,
-                                       0,
-                                       sz_fwd_workspace,
-                                       fwd_workspace.data(),
-                                       0,
-                                       nullptr,
-                                       nullptr);
-        EXPECT(status == CL_SUCCESS);
-
-#elif MIOPEN_BACKEND_HIP
-
-        void* in_dev;
-        void* wei_dev;
-        void* out_dev;
-        void* fwd_workspace_dev;
-
-        EXPECT(hipMalloc(&in_dev, 4 * sz_in) == hipSuccess);
-        EXPECT(hipMalloc(&wei_dev, 4 * sz_wei) == hipSuccess);
-        EXPECT(hipMalloc(&out_dev, 4 * sz_out) == hipSuccess);
-        EXPECT(hipMalloc(&fwd_workspace_dev, sz_fwd_workspace) == hipSuccess);
-
-        EXPECT(hipMemcpy(in_dev, in.data(), 4 * sz_in, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(wei_dev, wei.data(), 4 * sz_wei, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(out_dev, out.data(), 4 * sz_out, hipMemcpyHostToDevice) == hipSuccess);
-        EXPECT(hipMemcpy(fwd_workspace_dev,
-                         fwd_workspace.data(),
-                         sz_fwd_workspace,
-                         hipMemcpyHostToDevice) == hipSuccess);
+        auto& mhand = get_handle();
+
+        auto in_dev  = mhand.Write(in);
+        auto wei_dev = mhand.Write(wei);
+        auto out_dev = mhand.Write(out);
 
-#endif
         int value = 10;
-        STATUS(miopenSetTensor(handle, inputTensor, in_dev, &value));
+        STATUS(miopenSetTensor(handle, inputTensor, in_dev.get(), &value));
 
-        STATUS(miopenScaleTensor(handle, inputTensor, in_dev, &alpha));
+        STATUS(miopenScaleTensor(handle, inputTensor, in_dev.get(), &alpha));
 
         float time;
 
@@ -276,32 +220,32 @@ struct conv_forward : output_tensor_fixture
             STATUS(miopenFindConvolutionForwardAlgorithm(
                 used_handle,
                 inputTensor,
-                in_dev,
+                in_dev.get(),
                 convFilter,
-                wei_dev,
+                wei_dev.get(),
                 convDesc,
                 outputTensor,
-                out_dev,
+                out_dev.get(),
                 1,
                 &ret_algo_count,
                 &perf,
-                fwd_workspace_dev,
-                sz_fwd_workspace,
+                wspace.ptr(),
+                wspace.size(),
                 0)); // MD: Not performing exhaustiveSearch by default for now
 
             STATUS(miopenConvolutionForward(used_handle,
                                             &alpha,
                                             inputTensor,
-                                            in_dev,
+                                            in_dev.get(),
                                             convFilter,
-                                            wei_dev,
+                                            wei_dev.get(),
                                             convDesc,
                                             perf.fwd_algo,
                                             &beta,
                                             outputTensor,
-                                            out_dev,
-                                            fwd_workspace_dev,
-                                            sz_fwd_workspace));
+                                            out_dev.get(),
+                                            wspace.ptr(),
+                                            wspace.size()));
 
             STATUS(miopenGetKernelTime(used_handle, &time));
 
@@ -318,20 +262,6 @@ struct conv_forward : output_tensor_fixture
         {
             CHECK(time == 0.0);
         }
-
-// Potential memory leak free memory at end of function
-#if MIOPEN_BACKEND_OPENCL
-        clReleaseMemObject(in_dev);
-        clReleaseMemObject(wei_dev);
-        clReleaseMemObject(out_dev);
-        clReleaseMemObject(fwd_workspace_dev);
-
-#elif MIOPEN_BACKEND_HIP
-        hipFree(in_dev);
-        hipFree(wei_dev);
-        hipFree(out_dev);
-        hipFree(fwd_workspace_dev);
-#endif
     }
 };
 
diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp
index ae55b2e133..33bcb7164f 100644
--- a/test/pooling_common.hpp
+++ b/test/pooling_common.hpp
@@ -46,6 +46,7 @@
 #include "tensor_holder.hpp"
 #include "verify.hpp"
 #include "cpu_conv.hpp"
+#include "workspace.hpp"
 
 #define TEST_PADDING_MODE 0
 // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
@@ -199,9 +200,10 @@ struct verify_forward_pooling
         auto out      = get_output_tensor(filter, input);
         indices.resize(out.data.size(), 0);
 
-        auto in_dev        = handle.Write(input.data);
-        auto out_dev       = handle.Create<T>(out.data.size());
-        auto workspace_dev = handle.Write(indices);
+        auto in_dev  = handle.Write(input.data);
+        auto out_dev = handle.Create<T>(out.data.size());
+        Workspace wspace{};
+        wspace.Write(indices);
 
         float alpha = 1, beta = 0;
         filter.Forward(handle,
@@ -212,10 +214,10 @@ struct verify_forward_pooling
                        out.desc,
                        out_dev.get(),
                        true,
-                       workspace_dev.get(),
-                       indices.size() * sizeof(Index));
+                       wspace.ptr(),
+                       wspace.size());
 
-        indices  = handle.Read<Index>(workspace_dev, indices.size());
+        indices  = wspace.Read<std::vector<Index>>();
         out.data = handle.Read<T>(out_dev, out.data.size());
         return out;
     }
@@ -403,9 +405,8 @@ struct verify_backward_pooling
         auto out_dev  = handle.Write(out.data);
         auto din_dev  = handle.Create<T>(dinput.data.size());
 
-        // std::vector<char> workspace(filter.GetWorkSpaceSize(out.desc));
-        // auto workspace_dev = handle.Write(workspace);
-        auto workspace_dev = handle.Write(indices);
+        Workspace wspace{};
+        wspace.Write(indices);
 
         float alpha = 1, beta = 0;
         filter.Backward(handle,
@@ -423,7 +424,7 @@ struct verify_backward_pooling
                         // dx
                         dinput.desc,
                         din_dev.get(),
-                        workspace_dev.get());
+                        wspace.ptr());
 
         dinput.data = handle.Read<T>(din_dev, dinput.data.size());
         return dinput;
diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp
index 3ca771fc64..dc92a20318 100644
--- a/test/reduce_test.cpp
+++ b/test/reduce_test.cpp
@@ -27,6 +27,7 @@
 #include "driver.hpp"
 #include "test.hpp"
 #include "verify.hpp"
+#include "workspace.hpp"
 #include "get_handle.hpp"
 #include "tensor_holder.hpp"
 #include "random.hpp"
@@ -60,28 +61,26 @@ struct verify_reduce_with_indices
     miopenReduceTensorIndices_t indicesOpt;
     miopenIndicesType_t indicesType;
 
-    verify_reduce_with_indices( // NOLINT (hicpp-member-init)
-        const miopen::ReduceTensorDescriptor& reduce_,
-        const tensor<T>& input_,
-        const tensor<T>& output_,
-        const tensor<T>& workspace_,
-        const tensor<int>& indices_,
-        float alpha_,
-        float beta_)
+    verify_reduce_with_indices(const miopen::ReduceTensorDescriptor& reduce_,
+                               const tensor<T>& input_,
+                               const tensor<T>& output_,
+                               const tensor<T>& workspace_,
+                               const tensor<int>& indices_,
+                               float alpha_,
+                               float beta_)
+        : reduce(reduce_),
+          input(input_),
+          output(output_),
+          workspace(workspace_),
+          indices(indices_),
+          alpha(alpha_),
+          beta(beta_),
+          reduceOp(reduce.reduceTensorOp_),
+          compTypeVal(reduce.reduceTensorCompType_),
+          nanOpt(reduce.reduceTensorNanOpt_),
+          indicesOpt(reduce.reduceTensorIndices_),
+          indicesType(reduce.reduceTensorIndicesType_)
     {
-        reduce    = reduce_;
-        input     = input_;
-        output    = output_;
-        workspace = workspace_;
-        indices   = indices_;
-        alpha     = alpha_;
-        beta      = beta_;
-
-        reduceOp    = reduce.reduceTensorOp_;
-        compTypeVal = reduce.reduceTensorCompType_;
-        nanOpt      = reduce.reduceTensorNanOpt_;
-        indicesOpt  = reduce.reduceTensorIndices_;
-        indicesType = reduce.reduceTensorIndicesType_;
     }
 
     tensor<float> cpu() const
@@ -343,10 +342,11 @@ struct verify_reduce_with_indices
         auto res         = output;
         auto res_indices = indices;
 
-        auto indices_dev = handle.Write(indices.data);
+        Workspace idxspace{};
+        idxspace.Write(indices.data);
 
-        std::size_t ws_sizeInBytes      = workspace.desc.GetElementSize() * sizeof(T);
-        std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int);
+        Workspace wspace{};
+        wspace.Write(workspace.data);
 
         const double alpha64 = alpha;
         const double beta64  = beta;
@@ -358,15 +358,13 @@ struct verify_reduce_with_indices
                                          ? static_cast<const void*>(&beta64)
                                          : static_cast<const void*>(&beta);
 
-        if(ws_sizeInBytes > 0)
+        if(wspace.size() > 0)
         {
-            auto workspace_dev = handle.Write(workspace.data);
-
             reduce.ReduceTensor(get_handle(),
-                                indices_dev.get(),
-                                indices_sizeInBytes,
-                                workspace_dev.get(),
-                                ws_sizeInBytes,
+                                idxspace.ptr(),
+                                idxspace.size(),
+                                wspace.ptr(),
+                                wspace.size(),
                                 alphaPtr,
                                 input.desc,
                                 input_dev.get(),
@@ -377,8 +375,8 @@ struct verify_reduce_with_indices
         else
         {
             reduce.ReduceTensor(get_handle(),
-                                indices_dev.get(),
-                                indices_sizeInBytes,
+                                idxspace.ptr(),
+                                idxspace.size(),
                                 nullptr,
                                 0,
                                 alphaPtr,
@@ -390,7 +388,7 @@ struct verify_reduce_with_indices
         };
 
         res.data         = handle.Read<T>(output_dev, res.data.size());
-        res_indices.data = handle.Read<int>(indices_dev, res_indices.data.size());
+        res_indices.data = idxspace.Read<decltype(res_indices.data)>();
 
         return (std::make_tuple(res, res_indices));
     }
@@ -647,7 +645,8 @@ struct verify_reduce_no_indices
         // replicate
         auto res = output;
 
-        std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T);
+        Workspace wspace{};
+        wspace.Write(workspace.data);
 
         const double alpha64 = alpha;
         const double beta64  = beta;
@@ -659,15 +658,13 @@ struct verify_reduce_no_indices
                                          ? static_cast<const void*>(&beta64)
                                          : static_cast<const void*>(&beta);
 
-        if(ws_sizeInBytes > 0)
+        if(wspace.size() > 0)
         {
-            auto workspace_dev = handle.Write(workspace.data);
-
             reduce.ReduceTensor(get_handle(),
                                 nullptr,
                                 0,
-                                workspace_dev.get(),
-                                ws_sizeInBytes,
+                                wspace.ptr(),
+                                wspace.size(),
                                 alphaPtr,
                                 input.desc,
                                 input_dev.get(),
diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp
index ef5c1088c3..854f682068 100644
--- a/test/rnn_vanilla_common.hpp
+++ b/test/rnn_vanilla_common.hpp
@@ -35,6 +35,7 @@
 #include "verify.hpp"
 #include "rnn_util.hpp"
 #include "random.hpp"
+#include "workspace.hpp"
 #include <array>
 #include <cmath>
 #include <ctime>
@@ -1436,8 +1437,7 @@ struct verify_forward_infer_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
+        Workspace wspace{workSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -1450,8 +1450,6 @@ struct verify_forward_infer_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev = handle.Write(workSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -1479,8 +1477,8 @@ struct verify_forward_infer_rnn
                                   ((nohy) ? nullptr : hy_dev.get()),
                                   &hiddenDesc,
                                   nullptr,
-                                  workSpace_dev.get(),
-                                  workSpaceSize);
+                                  wspace.ptr(),
+                                  wspace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -1708,11 +1706,13 @@ struct verify_forward_train_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
+        Workspace wspace{workSpaceSize};
+
         miopenGetRNNTrainingReserveSize(
             &handle, rnnDesc, seqLength, inputDescs.data(), &reserveSpaceSize);
+        reserveSpaceSize = (reserveSpaceSize + (sizeof(T) - 1)) & ~(sizeof(T) - 1);
 
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        std::vector<T> reserveSpace((reserveSpaceSize + sizeof(T) - 1) / sizeof(T));
+        Workspace rspace{reserveSpaceSize};
 
         auto input_dev = handle.Write(input);
 
@@ -1726,9 +1726,6 @@ struct verify_forward_train_rnn
         std::fill(hy.begin(), hy.end(), 0.);
         auto hy_dev = handle.Write(hy);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-
         std::vector<int> hlens(3, 0);
         hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1);
         hlens[1] = batch_seq[0];
@@ -1756,10 +1753,10 @@ struct verify_forward_train_rnn
                                  ((nohy) ? nullptr : hy_dev.get()),
                                  &hiddenDesc,
                                  nullptr,
-                                 workSpace_dev.get(),
-                                 workSpaceSize,
-                                 reserveSpace_dev.get(),
-                                 reserveSpaceSize);
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TEST_DEBUG == 2)
         auto outdata = handle.Read<T>(output_dev, output.size());
@@ -1769,10 +1766,9 @@ struct verify_forward_train_rnn
         }
 #endif
 
-        auto retSet = std::make_tuple(
-            handle.Read<T>(output_dev, output.size()),
-            (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
-            handle.Read<T>(reserveSpace_dev, (reserveSpaceSize + sizeof(T) - 1) / sizeof(T)));
+        auto retSet = std::make_tuple(handle.Read<T>(output_dev, output.size()),
+                                      (nohy ? initHidden : handle.Read<T>(hy_dev, hy.size())),
+                                      rspace.Read<std::vector<T>>());
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
 
@@ -2002,15 +1998,15 @@ struct verify_backward_data_rnn
                               miopen::deref(rnnDesc).dataType);
 
         miopenGetRNNWorkspaceSize(&handle, rnnDesc, seqLength, inputDescs.data(), &workSpaceSize);
-        std::vector<T> workSpace(workSpaceSize / sizeof(T));
-        auto workSpace_dev = handle.Write(workSpace);
+        Workspace wspace{workSpaceSize};
 
         miopenGetRNNInputTensorSize(&handle, rnnDesc, seqLength, outputDescs.data(), &out_sz);
         auto yin_dev  = handle.Write(yin);
         auto dyin_dev = handle.Write(dy);
         // auto dhyin_dev        = handle.Write(dhy);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
-        auto weights_dev      = handle.Write(weights);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+        auto weights_dev = handle.Write(weights);
         // auto hx_dev           = handle.Write(initHidden);
 
         std::vector<int> hlens(3, 0);
@@ -2054,15 +2050,15 @@ struct verify_backward_data_rnn
                               ((nodhx) ? nullptr : dhx_dev.get()),
                               &hiddenDesc,
                               nullptr,
-                              workSpace_dev.get(),
-                              workSpaceSize,
-                              reserveSpace_dev.get(),
-                              reserveSpace.size() * sizeof(T));
+                              wspace.ptr(),
+                              wspace.size(),
+                              rspace.ptr(),
+                              rspace.size());
 
         auto retSet = std::make_tuple(handle.Read<T>(dx_dev, dx.size()),
                                       (nodhx ? initHidden : handle.Read<T>(dhx_dev, dhx.size())),
-                                      handle.Read<T>(reserveSpace_dev, reserveSpace.size()),
-                                      handle.Read<T>(workSpace_dev, workSpace.size()));
+                                      rspace.Read<std::vector<T>>(),
+                                      wspace.Read<std::vector<T>>());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
@@ -2257,8 +2253,11 @@ struct verify_backward_weights_rnn
                               hiddenSize * ((dirMode != 0) ? 2 : 1),
                               miopen::deref(rnnDesc).dataType);
 
-        auto workSpace_dev    = handle.Write(workSpace);
-        auto reserveSpace_dev = handle.Write(reserveSpace);
+        Workspace wspace{};
+        wspace.Write(workSpace);
+        Workspace rspace{};
+        rspace.Write(reserveSpace);
+
         std::vector<T> dweights(weightSize);
         auto dweights_dev = handle.Write(dweights);
         miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize});
@@ -2283,10 +2282,10 @@ struct verify_backward_weights_rnn
                                  dy_dev.get(),
                                  &weightDesc,
                                  dweights_dev.get(),
-                                 workSpace_dev.get(),
-                                 workSpace.size() * sizeof(T),
-                                 reserveSpace_dev.get(),
-                                 reserveSpace.size() * sizeof(T));
+                                 wspace.ptr(),
+                                 wspace.size(),
+                                 rspace.ptr(),
+                                 rspace.size());
 
 #if(MIO_RNN_TIME_EVERYTHING == 1)
         auto t_end = std::chrono::high_resolution_clock::now();
diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp
index dc1a38f508..bf40e7ee38 100644
--- a/test/tensor_reorder.cpp
+++ b/test/tensor_reorder.cpp
@@ -38,6 +38,8 @@
 #include "test.hpp"
 #include "driver.hpp"
 #include "random.hpp"
+#include "get_handle.hpp"
+#include "workspace.hpp"
 
 template <>
 struct miopen_type<uint8_t> : std::integral_constant<miopenDataType_t, miopenInt8>
@@ -286,19 +288,6 @@ bool verify_tensor(tensor<T>& t_gpu, tensor<T>& t_cpu)
 
 struct tensor_reorder_base_driver : test_driver
 {
-    miopenHandle_t handle{};
-#if MIOPEN_BACKEND_OPENCL
-    cl_command_queue q{};
-#endif
-
-    tensor_reorder_base_driver()
-    {
-        miopenCreate(&handle);
-#if MIOPEN_BACKEND_OPENCL
-        miopenGetStream(handle, &q);
-#endif
-    }
-    ~tensor_reorder_base_driver() { miopenDestroy(handle); }
 
     static std::vector<uint32_t> get_dim_3_size() { return {1, 9}; }
     static std::vector<uint32_t> get_dim_2_size() { return {1, 9}; }
@@ -363,14 +352,14 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
     // NOLINTBEGIN(clang-analyzer-cplusplus.NewDeleteLeaks)
     void run()
     {
-        auto run_reorder = [this](uint32_t dim_0,
-                                  uint32_t dim_1,
-                                  uint32_t dim_2,
-                                  uint32_t dim_3,
-                                  uint32_t order_0,
-                                  uint32_t order_1,
-                                  uint32_t order_2,
-                                  uint32_t order_3) {
+        auto run_reorder = [](uint32_t dim_0,
+                              uint32_t dim_1,
+                              uint32_t dim_2,
+                              uint32_t dim_3,
+                              uint32_t order_0,
+                              uint32_t order_1,
+                              uint32_t order_2,
+                              uint32_t order_3) {
             int tensor_sz = dim_0 * dim_1 * dim_2 * dim_3;
             std::vector<int> tensor_len({static_cast<int>(dim_0),
                                          static_cast<int>(dim_1),
@@ -392,8 +381,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
             tensor<T> t_dst_gpu(tensor_len, tensor_strides);
             rand_tensor_integer(t_src);
 
+            auto& handle = get_handle();
             miopen::ExecutionContext ctx;
-            ctx.SetStream(&miopen::deref(this->handle));
+            ctx.SetStream(&handle);
             // ctx.SetupFloats();
             auto reorder_sol = MakeTensorReorderAttributes(ctx,
                                                            to_miopen_data_type<T>::get(),
@@ -406,36 +396,13 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                                            order_2,
                                                            order_3);
             EXPECT(reorder_sol != nullptr);
-            size_t workspace = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
-                                                          : reorder_sol->GetOutputTensorSize();
-#if MIOPEN_BACKEND_OPENCL
-            cl_context cl_ctx;
-            clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(cl_context), &cl_ctx, nullptr);
-            cl_int status = CL_SUCCESS;
-            cl_mem src_dev =
-                clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, sizeof(T) * tensor_sz, nullptr, &status);
-            cl_mem dst_dev = clCreateBuffer(cl_ctx, CL_MEM_READ_WRITE, workspace, nullptr, nullptr);
-            status |= clEnqueueWriteBuffer(q,
-                                           src_dev,
-                                           CL_TRUE,
-                                           0,
-                                           sizeof(T) * tensor_sz,
-                                           t_src.data.data(),
-                                           0,
-                                           nullptr,
-                                           nullptr);
-            EXPECT(status == CL_SUCCESS);
-#elif MIOPEN_BACKEND_HIP
-            void* src_dev;
-            void* dst_dev;
-            EXPECT(hipMalloc(&src_dev, sizeof(T) * tensor_sz) == hipSuccess);
-            EXPECT(hipMalloc(&dst_dev, workspace) == hipSuccess);
-            EXPECT(hipMemcpy(
-                       src_dev, t_src.data.data(), sizeof(T) * tensor_sz, hipMemcpyHostToDevice) ==
-                   hipSuccess);
-#endif
-            const auto invoke_param = reorder_invoke_param{
-                DataCast(static_cast<const void*>(src_dev)), DataCast(dst_dev)};
+            size_t workspace_size = reorder_sol->IsSkippable() ? sizeof(T) * tensor_sz
+                                                               : reorder_sol->GetOutputTensorSize();
+            Workspace wspace{workspace_size};
+
+            auto src_dev = handle.Write(t_src.data);
+
+            const auto invoke_param         = reorder_invoke_param{src_dev.get(), wspace.ptr()};
             std::vector<OpKernelArg> opArgs = reorder_sol->GetKernelArg();
             boost::optional<miopen::InvokerFactory> invoker_factory(
                 [=](const std::vector<miopen::Kernel>& kernels) mutable {
@@ -451,10 +418,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                 });
             std::vector<miopen::solver::KernelInfo> construction_params{
                 reorder_sol->GetKernelInfo()};
-            const auto invoker =
-                miopen::deref(this->handle).PrepareInvoker(*invoker_factory, construction_params);
+            const auto invoker = handle.PrepareInvoker(*invoker_factory, construction_params);
             // run gpu
-            invoker(miopen::deref(this->handle), invoke_param);
+            invoker(handle, invoke_param);
             // run cpu
             cpu_reorder<T>::run(t_dst.data.data(),
                                 t_src.data.data(),
@@ -467,18 +433,9 @@ struct tensor_reorder_driver : tensor_reorder_base_driver
                                 order_2,
                                 order_3);
             invoker_factory = boost::none;
-#if MIOPEN_BACKEND_OPENCL
-            status = clEnqueueReadBuffer(
-                q, dst_dev, CL_TRUE, 0, workspace, t_dst_gpu.data.data(), 0, nullptr, nullptr);
-            EXPECT(status == CL_SUCCESS);
-            clReleaseMemObject(dst_dev);
-            clReleaseMemObject(src_dev);
-#elif MIOPEN_BACKEND_HIP
-            EXPECT(hipMemcpy(t_dst_gpu.data.data(), dst_dev, workspace, hipMemcpyDeviceToHost) ==
-                   hipSuccess);
-            hipFree(dst_dev);
-            hipFree(src_dev);
-#endif
+
+            t_dst_gpu.data = wspace.Read<decltype(t_dst_gpu.data)>();
+
             // we expect excact match, since use integer
             bool valid_result = verify_tensor(t_dst_gpu, t_dst);
             std::cout << "[" << reorder_str::get(order_0, order_1, order_2, order_3) << ", b"
diff --git a/test/workspace.hpp b/test/workspace.hpp
new file mode 100644
index 0000000000..93522b1cd4
--- /dev/null
+++ b/test/workspace.hpp
@@ -0,0 +1,163 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include <hip/hip_runtime.h>
+
+#include "get_handle.hpp"
+
+#define HIP_CHECK(exp)                                                                 \
+    if((exp) != hipSuccess)                                                            \
+    {                                                                                  \
+        MIOPEN_LOG_E(#exp "failed at line: " << __LINE__ << " in file: " << __FILE__); \
+    }
+
+class Workspace
+{
+
+    // RAII class for hip allocations
+    class GPUBuffer
+    {
+    public:
+        GPUBuffer() = default;
+
+        explicit GPUBuffer(size_t num_bytes) : sz_(num_bytes)
+        {
+            if(num_bytes > 0)
+            {
+                HIP_CHECK(hipMalloc(&buf_, num_bytes));
+                assert(buf_ != nullptr);
+            }
+            else
+            {
+                buf_ = nullptr;
+            }
+        }
+
+        ~GPUBuffer() { FreeBuf(); }
+
+        void* ptr() { return buf_; }
+        void* ptr() const { return buf_; }
+
+        auto size() const { return sz_; }
+
+        GPUBuffer(const GPUBuffer&) = delete;
+        GPUBuffer& operator=(const GPUBuffer&) = delete;
+
+        GPUBuffer(GPUBuffer&& that) noexcept : buf_(that.buf_), sz_(that.sz_)
+        {
+            that.buf_ = nullptr; // take over ownership
+            that.sz_  = 0;
+        }
+
+        GPUBuffer& operator=(GPUBuffer&& that) noexcept
+        {
+            FreeBuf();
+            std::swap(this->buf_, that.buf_);
+            std::swap(this->sz_, that.sz_);
+            return *this;
+        }
+
+    private:
+        void FreeBuf()
+        {
+            HIP_CHECK(hipFree(buf_));
+            buf_ = nullptr;
+            sz_  = 0;
+        }
+
+        void* buf_ = nullptr;
+        size_t sz_ = 0;
+    };
+
+    // for use in miopen .*GetWorkSpaceSize() methods where a pointer to size_t is
+    // passed to capture the size. Must call AdjustToSize() after calling such a method
+    size_t* SizePtr() { return &sz_; }
+
+    void AdjustToSize()
+    {
+        if(sz_ != gpu_buf_.size())
+        {
+            gpu_buf_ = GPUBuffer(sz_);
+        }
+    }
+
+public:
+    explicit Workspace(size_t sz = 0) : sz_(sz) { AdjustToSize(); }
+
+    Workspace(const Workspace&) = delete;
+    Workspace& operator=(const Workspace&) = delete;
+    Workspace(Workspace&&)                 = default;
+    Workspace& operator=(Workspace&&) = default;
+
+    size_t size() const { return sz_; }
+
+    void resize(size_t sz_in_bytes)
+    {
+        sz_ = sz_in_bytes;
+        AdjustToSize();
+    }
+
+    auto ptr() const { return gpu_buf_.ptr(); }
+
+    auto ptr() { return gpu_buf_.ptr(); }
+
+    template <typename V>
+    void Write(const V& vec)
+    {
+        using T = typename V::value_type;
+        resize(vec.size() * sizeof(T));
+        HIP_CHECK(hipMemcpy(this->ptr(), &vec[0], size(), hipMemcpyHostToDevice));
+    }
+
+    template <typename V>
+    void ReadTo(V& vec) const
+    {
+        using T = typename V::value_type;
+        if(vec.size() * sizeof(T) != size())
+        {
+            MIOPEN_LOG_E("vector of wrong size passed");
+            std::abort();
+        }
+        HIP_CHECK(hipMemcpy(&vec[0], ptr(), size(), hipMemcpyDeviceToHost));
+    }
+
+    template <typename V>
+    V Read() const
+    {
+        using T         = typename V::value_type;
+        size_t num_elem = size() / sizeof(T);
+        V ret(num_elem);
+        ReadTo(ret);
+        return ret;
+    }
+
+private:
+    // miopen::Handle& handle_;
+    // miopen::Allocator::ManageDataPtr data_{};
+    GPUBuffer gpu_buf_{};
+    size_t sz_{};
+};