Skip to content

Commit

Permalink
dev container changes + poc code
Browse files Browse the repository at this point in the history
  • Loading branch information
Ishaan-Datta committed Oct 30, 2024
1 parent 1ba87b5 commit d83f37b
Show file tree
Hide file tree
Showing 14 changed files with 662 additions and 622 deletions.
44 changes: 43 additions & 1 deletion .devcontainer/ROS/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,46 @@
"-it"
],
"postCreateCommand": "bash -c 'echo source /opt/ros/humble/setup.bash >> ~/.bashrc && source /opt/ros/humble/setup.bash'"
}
}

// {
// "name": "ROS 2 Development Container",
// "privileged": true,
// "remoteUser": "YOUR_USERNAME",
// "build": {
// "dockerfile": "Dockerfile",
// "args": {
// "USERNAME": "YOUR_USERNAME"
// }
// },
// "workspaceFolder": "/home/ws",
// "workspaceMount": "source=${localWorkspaceFolder},target=/home/ws,type=bind",
// "customizations": {
// "vscode": {
// "extensions":[
// "ms-vscode.cpptools",
// "ms-vscode.cpptools-themes",
// "twxs.cmake",
// "donjayamanne.python-extension-pack",
// "eamodio.gitlens",
// "ms-iot.vscode-ros"
// ]
// }
// },
// "containerEnv": {
// "DISPLAY": "unix:0",
// "ROS_LOCALHOST_ONLY": "1",
// "ROS_DOMAIN_ID": "42"
// },
// "runArgs": [
// "--net=host",
// "--pid=host",
// "--ipc=host",
// "-e", "DISPLAY=${env:DISPLAY}"
// ],
// "mounts": [
// "source=/tmp/.X11-unix,target=/tmp/.X11-unix,type=bind,consistency=cached",
// "source=/dev/dri,target=/dev/dri,type=bind,consistency=cached"
// ],
// "postCreateCommand": "sudo rosdep update && sudo rosdep install --from-paths src --ignore-src -y && sudo chown -R $(whoami) /home/ws/"
// }
82 changes: 41 additions & 41 deletions cpp_wip/cuda_stream_manager.cpp
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
// cuda_stream_manager.hpp
#pragma once
#include <cuda_runtime.h>
#include <memory>

class CudaStreamManager {
public:
CudaStreamManager() {
// Create a single CUDA stream
cudaStreamCreate(&stream_);

// Create CUDA events
cudaEventCreate(&preprocess_done_);
cudaEventCreate(&inference_done_);
}

~CudaStreamManager() {
// Destroy CUDA stream and events
cudaStreamDestroy(stream_);
cudaEventDestroy(preprocess_done_);
cudaEventDestroy(inference_done_);
}

cudaStream_t getStream() const {
return stream_;
}

cudaEvent_t& getPreprocessEvent() {
return preprocess_done_;
}

cudaEvent_t& getInferenceEvent() {
return inference_done_;
}

private:
cudaStream_t stream_;
cudaEvent_t preprocess_done_, inference_done_;
};

using CudaStreamManagerPtr = std::shared_ptr<CudaStreamManager>;
// cuda_stream_manager.hpp
#pragma once
#include <cuda_runtime.h>
#include <memory>

class CudaStreamManager {
public:
CudaStreamManager() {
// Create a single CUDA stream
cudaStreamCreate(&stream_);

// Create CUDA events
cudaEventCreate(&preprocess_done_);
cudaEventCreate(&inference_done_);
}

~CudaStreamManager() {
// Destroy CUDA stream and events
cudaStreamDestroy(stream_);
cudaEventDestroy(preprocess_done_);
cudaEventDestroy(inference_done_);
}

cudaStream_t getStream() const {
return stream_;
}

cudaEvent_t& getPreprocessEvent() {
return preprocess_done_;
}

cudaEvent_t& getInferenceEvent() {
return inference_done_;
}

private:
cudaStream_t stream_;
cudaEvent_t preprocess_done_, inference_done_;
};

using CudaStreamManagerPtr = std::shared_ptr<CudaStreamManager>;
46 changes: 23 additions & 23 deletions cpp_wip/preprocessing_node.cpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
// preprocessing_node.cpp
#include <rclcpp/rclcpp.hpp>
#include "cuda_stream_manager.hpp"

class PreprocessingNode : public rclcpp::Node {
public:
PreprocessingNode(const CudaStreamManagerPtr& cuda_manager)
: Node("preprocessing_node"), cuda_manager_(cuda_manager) {}

void preprocess() {
// Perform GPU preprocessing here using cuda_manager_->getStream()

// Signal that preprocessing is done
cudaEventRecord(cuda_manager_->getPreprocessEvent(), cuda_manager_->getStream());
}

private:
CudaStreamManagerPtr cuda_manager_;
};

// Register as a composable node
#include "rclcpp_components/register_node_macro.hpp"
RCLCPP_COMPONENTS_REGISTER_NODE(PreprocessingNode)
// preprocessing_node.cpp
#include <rclcpp/rclcpp.hpp>
#include "cuda_stream_manager.hpp"

class PreprocessingNode : public rclcpp::Node {
public:
PreprocessingNode(const CudaStreamManagerPtr& cuda_manager)
: Node("preprocessing_node"), cuda_manager_(cuda_manager) {}

void preprocess() {
// Perform GPU preprocessing here using cuda_manager_->getStream()

// Signal that preprocessing is done
cudaEventRecord(cuda_manager_->getPreprocessEvent(), cuda_manager_->getStream());
}

private:
CudaStreamManagerPtr cuda_manager_;
};

// Register as a composable node
#include "rclcpp_components/register_node_macro.hpp"
RCLCPP_COMPONENTS_REGISTER_NODE(PreprocessingNode)
84 changes: 42 additions & 42 deletions python_wip/cuda_download_1.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
import pyzed.sl as sl

# Create a ZED Camera object
zed = sl.Camera()

# Create InitParameters object and set configuration parameters
init_params = sl.InitParameters()
init_params.camera_resolution = sl.RESOLUTION.HD720 # Set resolution
init_params.depth_mode = sl.DEPTH_MODE.ULTRA # Set depth mode

# Open the camera
status = zed.open(init_params)
if status != sl.ERROR_CODE.SUCCESS:
print(f"Camera failed to open: {status}")
exit(1)

# Create a Mat object for the image (GPU memory type)
image_gpu = sl.Mat(zed.get_camera_information().camera_resolution.width,
zed.get_camera_information().camera_resolution.height,
sl.MAT_TYPE.U8_C4, sl.MEM.GPU)

# Capture an image frame
runtime_params = sl.RuntimeParameters()

if zed.grab(runtime_params) == sl.ERROR_CODE.SUCCESS:
# Retrieve image directly into GPU memory
zed.retrieve_image(image_gpu, sl.VIEW.LEFT, sl.MEM.GPU)

# Now `image_gpu` holds the image in GPU memory
print("Image captured and stored in CUDA memory")

# Close the camera
zed.close()

# Create a CPU Mat to store the image
image_cpu = sl.Mat()

# Copy image from GPU to CPU
image_gpu.copy_to(image_cpu)

# Save the image (this is in CPU memory now)
image_cpu.write("image_from_cuda.png")
import pyzed.sl as sl

# Create a ZED Camera object
zed = sl.Camera()

# Create InitParameters object and set configuration parameters
init_params = sl.InitParameters()
init_params.camera_resolution = sl.RESOLUTION.HD720 # Set resolution
init_params.depth_mode = sl.DEPTH_MODE.ULTRA # Set depth mode

# Open the camera
status = zed.open(init_params)
if status != sl.ERROR_CODE.SUCCESS:
print(f"Camera failed to open: {status}")
exit(1)

# Create a Mat object for the image (GPU memory type)
image_gpu = sl.Mat(zed.get_camera_information().camera_resolution.width,
zed.get_camera_information().camera_resolution.height,
sl.MAT_TYPE.U8_C4, sl.MEM.GPU)

# Capture an image frame
runtime_params = sl.RuntimeParameters()

if zed.grab(runtime_params) == sl.ERROR_CODE.SUCCESS:
# Retrieve image directly into GPU memory
zed.retrieve_image(image_gpu, sl.VIEW.LEFT, sl.MEM.GPU)

# Now `image_gpu` holds the image in GPU memory
print("Image captured and stored in CUDA memory")

# Close the camera
zed.close()

# Create a CPU Mat to store the image
image_cpu = sl.Mat()

# Copy image from GPU to CPU
image_gpu.copy_to(image_cpu)

# Save the image (this is in CPU memory now)
image_cpu.write("image_from_cuda.png")
102 changes: 51 additions & 51 deletions python_wip/cuda_stream_inference.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,51 @@
# inference_node.py
import rclpy
from rclpy.node import Node
from cuda_manager import CudaStreamManager
import pycuda.driver as cuda
import numpy as np

class InferenceNode(Node):
def __init__(self, cuda_manager):
super().__init__('inference_node')
self.cuda_manager = cuda_manager

def infer(self):
self.get_logger().info("Waiting for preprocessing to complete...")
self.cuda_manager.get_preprocess_event().synchronize()
self.get_logger().info("Starting inference on GPU...")

# Simulate inference on GPU
data = np.random.randn(1024, 1024).astype(np.float32)
gpu_data = cuda.mem_alloc(data.nbytes)
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream())

# Signal inference completion
self.cuda_manager.get_inference_event().record(self.cuda_manager.get_stream())
self.get_logger().info("Inference complete.")

# post processing:
# postprocessing_node.py
import rclpy
from rclpy.node import Node
from cuda_manager import CudaStreamManager
import pycuda.driver as cuda
import numpy as np

class PostprocessingNode(Node):
def __init__(self, cuda_manager):
super().__init__('postprocessing_node')
self.cuda_manager = cuda_manager

def postprocess(self):
self.get_logger().info("Waiting for inference to complete...")
self.cuda_manager.get_inference_event().synchronize()
self.get_logger().info("Starting postprocessing on GPU...")

# Simulate postprocessing on GPU
data = np.random.randn(1024, 1024).astype(np.float32)
gpu_data = cuda.mem_alloc(data.nbytes)
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream())

# Assume postprocessing is complete
self.get_logger().info("Postprocessing complete.")
# inference_node.py
import rclpy
from rclpy.node import Node
from cuda_manager import CudaStreamManager
import pycuda.driver as cuda
import numpy as np

class InferenceNode(Node):
def __init__(self, cuda_manager):
super().__init__('inference_node')
self.cuda_manager = cuda_manager

def infer(self):
self.get_logger().info("Waiting for preprocessing to complete...")
self.cuda_manager.get_preprocess_event().synchronize()
self.get_logger().info("Starting inference on GPU...")

# Simulate inference on GPU
data = np.random.randn(1024, 1024).astype(np.float32)
gpu_data = cuda.mem_alloc(data.nbytes)
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream())

# Signal inference completion
self.cuda_manager.get_inference_event().record(self.cuda_manager.get_stream())
self.get_logger().info("Inference complete.")

# post processing:
# postprocessing_node.py
import rclpy
from rclpy.node import Node
from cuda_manager import CudaStreamManager
import pycuda.driver as cuda
import numpy as np

class PostprocessingNode(Node):
def __init__(self, cuda_manager):
super().__init__('postprocessing_node')
self.cuda_manager = cuda_manager

def postprocess(self):
self.get_logger().info("Waiting for inference to complete...")
self.cuda_manager.get_inference_event().synchronize()
self.get_logger().info("Starting postprocessing on GPU...")

# Simulate postprocessing on GPU
data = np.random.randn(1024, 1024).astype(np.float32)
gpu_data = cuda.mem_alloc(data.nbytes)
cuda.memcpy_htod_async(gpu_data, data, self.cuda_manager.get_stream())

# Assume postprocessing is complete
self.get_logger().info("Postprocessing complete.")
Loading

0 comments on commit d83f37b

Please sign in to comment.