-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1ba87b5
commit d83f37b
Showing
14 changed files
with
662 additions
and
622 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,41 @@ | ||
// cuda_stream_manager.hpp | ||
#pragma once | ||
#include <cuda_runtime.h> | ||
#include <memory> | ||
|
||
class CudaStreamManager { | ||
public: | ||
CudaStreamManager() { | ||
// Create a single CUDA stream | ||
cudaStreamCreate(&stream_); | ||
|
||
// Create CUDA events | ||
cudaEventCreate(&preprocess_done_); | ||
cudaEventCreate(&inference_done_); | ||
} | ||
|
||
~CudaStreamManager() { | ||
// Destroy CUDA stream and events | ||
cudaStreamDestroy(stream_); | ||
cudaEventDestroy(preprocess_done_); | ||
cudaEventDestroy(inference_done_); | ||
} | ||
|
||
cudaStream_t getStream() const { | ||
return stream_; | ||
} | ||
|
||
cudaEvent_t& getPreprocessEvent() { | ||
return preprocess_done_; | ||
} | ||
|
||
cudaEvent_t& getInferenceEvent() { | ||
return inference_done_; | ||
} | ||
|
||
private: | ||
cudaStream_t stream_; | ||
cudaEvent_t preprocess_done_, inference_done_; | ||
}; | ||
|
||
using CudaStreamManagerPtr = std::shared_ptr<CudaStreamManager>; | ||
// cuda_stream_manager.hpp | ||
#pragma once | ||
#include <cuda_runtime.h> | ||
#include <memory> | ||
|
||
class CudaStreamManager { | ||
public: | ||
CudaStreamManager() { | ||
// Create a single CUDA stream | ||
cudaStreamCreate(&stream_); | ||
|
||
// Create CUDA events | ||
cudaEventCreate(&preprocess_done_); | ||
cudaEventCreate(&inference_done_); | ||
} | ||
|
||
~CudaStreamManager() { | ||
// Destroy CUDA stream and events | ||
cudaStreamDestroy(stream_); | ||
cudaEventDestroy(preprocess_done_); | ||
cudaEventDestroy(inference_done_); | ||
} | ||
|
||
cudaStream_t getStream() const { | ||
return stream_; | ||
} | ||
|
||
cudaEvent_t& getPreprocessEvent() { | ||
return preprocess_done_; | ||
} | ||
|
||
cudaEvent_t& getInferenceEvent() { | ||
return inference_done_; | ||
} | ||
|
||
private: | ||
cudaStream_t stream_; | ||
cudaEvent_t preprocess_done_, inference_done_; | ||
}; | ||
|
||
using CudaStreamManagerPtr = std::shared_ptr<CudaStreamManager>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,23 @@ | ||
// preprocessing_node.cpp | ||
#include <rclcpp/rclcpp.hpp> | ||
#include "cuda_stream_manager.hpp" | ||
|
||
class PreprocessingNode : public rclcpp::Node { | ||
public: | ||
PreprocessingNode(const CudaStreamManagerPtr& cuda_manager) | ||
: Node("preprocessing_node"), cuda_manager_(cuda_manager) {} | ||
|
||
void preprocess() { | ||
// Perform GPU preprocessing here using cuda_manager_->getStream() | ||
|
||
// Signal that preprocessing is done | ||
cudaEventRecord(cuda_manager_->getPreprocessEvent(), cuda_manager_->getStream()); | ||
} | ||
|
||
private: | ||
CudaStreamManagerPtr cuda_manager_; | ||
}; | ||
|
||
// Register as a composable node | ||
#include "rclcpp_components/register_node_macro.hpp" | ||
RCLCPP_COMPONENTS_REGISTER_NODE(PreprocessingNode) | ||
// preprocessing_node.cpp | ||
#include <rclcpp/rclcpp.hpp> | ||
#include "cuda_stream_manager.hpp" | ||
|
||
class PreprocessingNode : public rclcpp::Node { | ||
public: | ||
PreprocessingNode(const CudaStreamManagerPtr& cuda_manager) | ||
: Node("preprocessing_node"), cuda_manager_(cuda_manager) {} | ||
|
||
void preprocess() { | ||
// Perform GPU preprocessing here using cuda_manager_->getStream() | ||
|
||
// Signal that preprocessing is done | ||
cudaEventRecord(cuda_manager_->getPreprocessEvent(), cuda_manager_->getStream()); | ||
} | ||
|
||
private: | ||
CudaStreamManagerPtr cuda_manager_; | ||
}; | ||
|
||
// Register as a composable node | ||
#include "rclcpp_components/register_node_macro.hpp" | ||
RCLCPP_COMPONENTS_REGISTER_NODE(PreprocessingNode) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,42 @@ | ||
import pyzed.sl as sl | ||
|
||
# Create a ZED Camera object | ||
zed = sl.Camera() | ||
|
||
# Create InitParameters object and set configuration parameters | ||
init_params = sl.InitParameters() | ||
init_params.camera_resolution = sl.RESOLUTION.HD720 # Set resolution | ||
init_params.depth_mode = sl.DEPTH_MODE.ULTRA # Set depth mode | ||
|
||
# Open the camera | ||
status = zed.open(init_params) | ||
if status != sl.ERROR_CODE.SUCCESS: | ||
print(f"Camera failed to open: {status}") | ||
exit(1) | ||
|
||
# Create a Mat object for the image (GPU memory type) | ||
image_gpu = sl.Mat(zed.get_camera_information().camera_resolution.width, | ||
zed.get_camera_information().camera_resolution.height, | ||
sl.MAT_TYPE.U8_C4, sl.MEM.GPU) | ||
|
||
# Capture an image frame | ||
runtime_params = sl.RuntimeParameters() | ||
|
||
if zed.grab(runtime_params) == sl.ERROR_CODE.SUCCESS: | ||
# Retrieve image directly into GPU memory | ||
zed.retrieve_image(image_gpu, sl.VIEW.LEFT, sl.MEM.GPU) | ||
|
||
# Now `image_gpu` holds the image in GPU memory | ||
print("Image captured and stored in CUDA memory") | ||
|
||
# Close the camera | ||
zed.close() | ||
|
||
# Create a CPU Mat to store the image | ||
image_cpu = sl.Mat() | ||
|
||
# Copy image from GPU to CPU | ||
image_gpu.copy_to(image_cpu) | ||
|
||
# Save the image (this is in CPU memory now) | ||
image_cpu.write("image_from_cuda.png") | ||
import pyzed.sl as sl | ||
|
||
# Create a ZED Camera object | ||
zed = sl.Camera() | ||
|
||
# Create InitParameters object and set configuration parameters | ||
init_params = sl.InitParameters() | ||
init_params.camera_resolution = sl.RESOLUTION.HD720 # Set resolution | ||
init_params.depth_mode = sl.DEPTH_MODE.ULTRA # Set depth mode | ||
|
||
# Open the camera | ||
status = zed.open(init_params) | ||
if status != sl.ERROR_CODE.SUCCESS: | ||
print(f"Camera failed to open: {status}") | ||
exit(1) | ||
|
||
# Create a Mat object for the image (GPU memory type) | ||
image_gpu = sl.Mat(zed.get_camera_information().camera_resolution.width, | ||
zed.get_camera_information().camera_resolution.height, | ||
sl.MAT_TYPE.U8_C4, sl.MEM.GPU) | ||
|
||
# Capture an image frame | ||
runtime_params = sl.RuntimeParameters() | ||
|
||
if zed.grab(runtime_params) == sl.ERROR_CODE.SUCCESS: | ||
# Retrieve image directly into GPU memory | ||
zed.retrieve_image(image_gpu, sl.VIEW.LEFT, sl.MEM.GPU) | ||
|
||
# Now `image_gpu` holds the image in GPU memory | ||
print("Image captured and stored in CUDA memory") | ||
|
||
# Close the camera | ||
zed.close() | ||
|
||
# Create a CPU Mat to store the image | ||
image_cpu = sl.Mat() | ||
|
||
# Copy image from GPU to CPU | ||
image_gpu.copy_to(image_cpu) | ||
|
||
# Save the image (this is in CPU memory now) | ||
image_cpu.write("image_from_cuda.png") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,51 @@ | ||
# inference_node.py | ||
import rclpy | ||
from rclpy.node import Node | ||
from cuda_manager import CudaStreamManager | ||
import pycuda.driver as cuda | ||
import numpy as np | ||
|
||
class InferenceNode(Node): | ||
def __init__(self, cuda_manager): | ||
super().__init__('inference_node') | ||
self.cuda_manager = cuda_manager | ||
|
||
def infer(self): | ||
self.get_logger().info("Waiting for preprocessing to complete...") | ||
self.cuda_manager.get_preprocess_event().synchronize() | ||
self.get_logger().info("Starting inference on GPU...") | ||
|
||
# Simulate inference on GPU | ||
data = np.random.randn(1024, 1024).astype(np.float32) | ||
gpu_data = cuda.mem_alloc(data.nbytes) | ||
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream()) | ||
|
||
# Signal inference completion | ||
self.cuda_manager.get_inference_event().record(self.cuda_manager.get_stream()) | ||
self.get_logger().info("Inference complete.") | ||
|
||
# post processing: | ||
# postprocessing_node.py | ||
import rclpy | ||
from rclpy.node import Node | ||
from cuda_manager import CudaStreamManager | ||
import pycuda.driver as cuda | ||
import numpy as np | ||
|
||
class PostprocessingNode(Node): | ||
def __init__(self, cuda_manager): | ||
super().__init__('postprocessing_node') | ||
self.cuda_manager = cuda_manager | ||
|
||
def postprocess(self): | ||
self.get_logger().info("Waiting for inference to complete...") | ||
self.cuda_manager.get_inference_event().synchronize() | ||
self.get_logger().info("Starting postprocessing on GPU...") | ||
|
||
# Simulate postprocessing on GPU | ||
data = np.random.randn(1024, 1024).astype(np.float32) | ||
gpu_data = cuda.mem_alloc(data.nbytes) | ||
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream()) | ||
|
||
# Assume postprocessing is complete | ||
self.get_logger().info("Postprocessing complete.") | ||
# inference_node.py | ||
import rclpy | ||
from rclpy.node import Node | ||
from cuda_manager import CudaStreamManager | ||
import pycuda.driver as cuda | ||
import numpy as np | ||
|
||
class InferenceNode(Node): | ||
def __init__(self, cuda_manager): | ||
super().__init__('inference_node') | ||
self.cuda_manager = cuda_manager | ||
|
||
def infer(self): | ||
self.get_logger().info("Waiting for preprocessing to complete...") | ||
self.cuda_manager.get_preprocess_event().synchronize() | ||
self.get_logger().info("Starting inference on GPU...") | ||
|
||
# Simulate inference on GPU | ||
data = np.random.randn(1024, 1024).astype(np.float32) | ||
gpu_data = cuda.mem_alloc(data.nbytes) | ||
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream()) | ||
|
||
# Signal inference completion | ||
self.cuda_manager.get_inference_event().record(self.cuda_manager.get_stream()) | ||
self.get_logger().info("Inference complete.") | ||
|
||
# post processing: | ||
# postprocessing_node.py | ||
import rclpy | ||
from rclpy.node import Node | ||
from cuda_manager import CudaStreamManager | ||
import pycuda.driver as cuda | ||
import numpy as np | ||
|
||
class PostprocessingNode(Node): | ||
def __init__(self, cuda_manager): | ||
super().__init__('postprocessing_node') | ||
self.cuda_manager = cuda_manager | ||
|
||
def postprocess(self): | ||
self.get_logger().info("Waiting for inference to complete...") | ||
self.cuda_manager.get_inference_event().synchronize() | ||
self.get_logger().info("Starting postprocessing on GPU...") | ||
|
||
# Simulate postprocessing on GPU | ||
data = np.random.randn(1024, 1024).astype(np.float32) | ||
gpu_data = cuda.mem_alloc(data.nbytes) | ||
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream()) | ||
|
||
# Assume postprocessing is complete | ||
self.get_logger().info("Postprocessing complete.") |
Oops, something went wrong.