diff --git a/Calibration Tools/recorder.py b/Calibration Tools/recorder.py deleted file mode 100644 index d3cbe17..0000000 --- a/Calibration Tools/recorder.py +++ /dev/null @@ -1,26 +0,0 @@ -import cv2 -import numpy as np -import os - -os.chdir("23-I-12_SysArch/Experiments/Calibration Utilities") - -def video_writer(frames, fps=20): - frame_width = frames[0].shape[1] - frame_height = frames[0].shape[0] - - fourcc = cv2.VideoWriter_fourcc(*'XVID') - output = cv2.VideoWriter('output.mp4', fourcc, fps, (frame_width, frame_height)) - - frames_list = frames.keys() - - timestamps = frames.items() - - time_intervals = [j-i for i, j in zip(timestamps[:-1], timestamps[1:])] # unix timestamps in seconds - - for frame, interval in zip(frames, time_intervals): - duplicates = int(round(interval * fps)) - - for _ in range(duplicates): - output.write(frame) - - output.release() \ No newline at end of file diff --git a/Experiments/utils/model_conversion_tools.py b/Experiments/utils/model_conversion_tools.py deleted file mode 100644 index 87a0b65..0000000 --- a/Experiments/utils/model_conversion_tools.py +++ /dev/null @@ -1,83 +0,0 @@ -import tensorflow as tf -import numpy as np -import tf2onnx -import onnx -import onnxruntime as ort -import torch - - -# assumes that the model only has a single input and a single output layer - -OPSET_VERS = 13 -# given a tensorflow model, convert it to onnx, save it to dest file and return an onnx inference session -# takes in a tensorflow model and a target path for the onnx model file -# onnx models are saved with a .onnx extension -def tf_to_onnx(model, dest_path): - input_signature = [tf.TensorSpec( model.input_shape, model.input.dtype, name="input")] - # Use from_function for tf functions - onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=OPSET_VERS) - onnx.save(onnx_model, dest_path) - - # Convert the model to a serialized string format - onnx_model_str = onnx_model.SerializeToString() - - # Create an InferenceSession. This is the object that will run the model. - return loadOnnxModel(onnx_model_str) - - -# given a torch model, convert it to onnx, save it to dest file and return an onnx inference session -def torch_to_onnx(model, example_input, dest_path): - torch.onnx.export(model, # model being run - example_input, # model input (or a tuple for multiple inputs) - dest_path, # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=OPSET_VERS, # the ONNX version to export the model to - # do_constant_folding=True, # whether to execute constant folding for optimization - dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes - 'output' : {0 : 'batch_size'}}) - return loadOnnxModel(dest_path) - -def loadOnnxModel(path, providers=["CUDAExecutionProvider"]): - return ort.InferenceSession(path,providers=providers) - - -# given an array of test inputs and a path to onnx model or a session returns the predictions -def predictOnnx(x_test,session=None,dest_path=""): - if session is None and dest_path == "": - raise ValueError("No model or path provided, please specifiy one of them.") - if session is None: - session = loadOnnxModel(dest_path) - - results_ort = session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: x_test}) - return np.array(results_ort[0]) - - -# given the predictions from the original model and the converted model, check if they are consistent -# shape of predictions_original and converted_results should be the same -# only checks for the predicted class (aka the argmax) -# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence -def checkPredictionConsistency(predictions_original, converted_results): - for n in range(predictions_original.shape[0]): - if np.argmax(predictions_original[n]) != np.argmax(converted_results[n]): - print(f"Original: {np.argmax(predictions_original[n])}, ONNX: {np.argmax(converted_results[n])}") - print(f"{predictions_original[n]}, \n{converted_results[n]}") - print("=====================================") - raise ValueError("Predictions are not consistent") - - print("All predictions are consistent") - -# given the predictions from the original model and the converted model, check if they are consistent -# shape of predictions_original and converted_results should be the same -# only checks for the difference in confidence -# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence -# tolerance: the maximum difference in confidence that is allowed -def checkConfidenceConsistency(predictions_original, converted_results, tolerance=1e-5): - np.testing.assert_allclose(predictions_original, converted_results,atol=tolerance) - # for n in range(predictions_original.shape[0]): - # if not np.allclose(predictions_original[n], converted_results[n], atol=tolerance): - # print(f"Original: \t {predictions_original[n]}, \nONNX: \t{converted_results[n]}") - # print("=====================================") - # return - - print("All confidence percentages are consistent") - diff --git a/Experiments/valery_tests/.gitignore b/Experiments/valery_tests/.gitignore deleted file mode 100644 index 4233f44..0000000 --- a/Experiments/valery_tests/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -ros2_ws/build/ -ros2_ws/install/ -ros2_ws/log/ - -ros2_ws/src/onnx_cpp/src/images/ -*.onnx \ No newline at end of file diff --git a/Experiments/valery_tests/conversion util demo/conversion_utility_demo.ipynb b/Experiments/valery_tests/conversion util demo/conversion_utility_demo.ipynb deleted file mode 100644 index b75c439..0000000 --- a/Experiments/valery_tests/conversion util demo/conversion_utility_demo.ipynb +++ /dev/null @@ -1,389 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n", - "\n", - "WARNING:tensorflow:From c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\tf2onnx\\tf_loader.py:68: The name tf.reset_default_graph is deprecated. Please use tf.compat.v1.reset_default_graph instead.\n", - "\n", - "WARNING:tensorflow:From c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\tf2onnx\\tf_loader.py:72: The name tf.train.import_meta_graph is deprecated. Please use tf.compat.v1.train.import_meta_graph instead.\n", - "\n" - ] - } - ], - "source": [ - "import tensorflow as tf\n", - "import numpy as np\n", - "\n", - "import sys\n", - "sys.path.append('./../../utils')\n", - "import model_conversion_tools as mct\n", - "from sample_pytorch_mnist import Net, train, test" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Part 1: sample Tensorflow project\n", - "\n", - "sample project from our yolo test model" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Step 1: Load and Preprocess Fashion MNIST Data\n", - "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()\n", - "# Preprocess x_test\n", - "x_test = x_test.reshape((10000, 28, 28, 1)).astype('float32') / 255\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### training a sample tensorflow model:\n", - "\n", - "To get the model for this example, feel free to look at https://github.com/UBCAgroBot/AppliedAI/blob/13-conversion-tools/23-I-12_SysArch/Test%20Models/YOLO_testmodel.py " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### loading the model, converting to onnx and saving the model" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n", - "\n", - "WARNING:tensorflow:From c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\onnxruntime\\capi\\onnxruntime_inference_collection.py:69: UserWarning: Specified provider 'CUDAExecutionProvider' is not in available provider names.Available providers: 'AzureExecutionProvider, CPUExecutionProvider'\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "# Later, if you want to load and evaluate the model without retraining\n", - "tf_model = tf.keras.models.load_model(\"fashion_mnist_model.h5\")\n", - "\n", - "tf_onnx_dest_path = \".\\\\tf_model.onnx\"\n", - "\n", - "onnx_sess = mct.tf_to_onnx(tf_model, tf_onnx_dest_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Running the model" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "313/313 [==============================] - 1s 2ms/step\n", - "All predictions are consistent\n", - "All confidence percentages are consistent\n" - ] - } - ], - "source": [ - "tf_predictions = tf_model.predict(x_test)\n", - "results_tf_ort = mct.predictOnnx(x_test, session=onnx_sess)\n", - "mct.checkPredictionConsistency(tf_predictions, results_tf_ort)\n", - "mct.checkConfidenceConsistency(tf_predictions, results_tf_ort)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Part 2: sample pyTorch workflow" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### creating sample pytorch model \n", - "\n", - "The example model is from this website: https://nextjournal.com/gkoehler/pytorch-mnist" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torchvision\n", - "import torch.optim as optim\n", - "\n", - "batch_size_test = 1000\n", - "learning_rate = 0.01\n", - "momentum = 0.5\n", - "random_seed = 1\n", - "torch.backends.cudnn.enabled = False\n", - "torch.manual_seed(random_seed)\n", - "\n", - "test_loader = torch.utils.data.DataLoader(\n", - " torchvision.datasets.MNIST('/files/', train=False, download=True,\n", - " transform=torchvision.transforms.Compose([\n", - " torchvision.transforms.ToTensor(),\n", - " torchvision.transforms.Normalize(\n", - " (0.1307,), (0.3081,))\n", - " ])),\n", - " batch_size=batch_size_test, shuffle=True)\n", - "\n", - "examples = enumerate(test_loader)\n", - "batch_idx, (example_data, example_targets) = next(examples)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train Epoch: 1 [0/60000 (0%)]\tLoss: 0.201695\n", - "Train Epoch: 1 [640/60000 (1%)]\tLoss: 0.402293\n", - "Train Epoch: 1 [1280/60000 (2%)]\tLoss: 0.514808\n", - "Train Epoch: 1 [1920/60000 (3%)]\tLoss: 0.204468\n", - "Train Epoch: 1 [2560/60000 (4%)]\tLoss: 0.443682\n", - "Train Epoch: 1 [3200/60000 (5%)]\tLoss: 0.483486\n", - "Train Epoch: 1 [3840/60000 (6%)]\tLoss: 0.182736\n", - "Train Epoch: 1 [4480/60000 (7%)]\tLoss: 0.376792\n", - "Train Epoch: 1 [5120/60000 (9%)]\tLoss: 0.436870\n", - "Train Epoch: 1 [5760/60000 (10%)]\tLoss: 0.322312\n", - "Train Epoch: 1 [6400/60000 (11%)]\tLoss: 0.328916\n", - "Train Epoch: 1 [7040/60000 (12%)]\tLoss: 0.346527\n", - "Train Epoch: 1 [7680/60000 (13%)]\tLoss: 0.353329\n", - "Train Epoch: 1 [8320/60000 (14%)]\tLoss: 0.643490\n", - "Train Epoch: 1 [8960/60000 (15%)]\tLoss: 0.310386\n", - "Train Epoch: 1 [9600/60000 (16%)]\tLoss: 0.170429\n", - "Train Epoch: 1 [10240/60000 (17%)]\tLoss: 0.542867\n", - "Train Epoch: 1 [10880/60000 (18%)]\tLoss: 0.262466\n", - "Train Epoch: 1 [11520/60000 (19%)]\tLoss: 0.291557\n", - "Train Epoch: 1 [12160/60000 (20%)]\tLoss: 0.175815\n", - "Train Epoch: 1 [12800/60000 (21%)]\tLoss: 0.280448\n", - "Train Epoch: 1 [13440/60000 (22%)]\tLoss: 0.237583\n", - "Train Epoch: 1 [14080/60000 (23%)]\tLoss: 0.476997\n", - "Train Epoch: 1 [14720/60000 (25%)]\tLoss: 0.298690\n", - "Train Epoch: 1 [15360/60000 (26%)]\tLoss: 0.455927\n", - "Train Epoch: 1 [16000/60000 (27%)]\tLoss: 0.544582\n", - "Train Epoch: 1 [16640/60000 (28%)]\tLoss: 0.389590\n", - "Train Epoch: 1 [17280/60000 (29%)]\tLoss: 0.381361\n", - "Train Epoch: 1 [17920/60000 (30%)]\tLoss: 0.529101\n", - "Train Epoch: 1 [18560/60000 (31%)]\tLoss: 0.397620\n", - "Train Epoch: 1 [19200/60000 (32%)]\tLoss: 0.275069\n", - "Train Epoch: 1 [19840/60000 (33%)]\tLoss: 0.131036\n", - "Train Epoch: 1 [20480/60000 (34%)]\tLoss: 0.313424\n", - "Train Epoch: 1 [21120/60000 (35%)]\tLoss: 0.159157\n", - "Train Epoch: 1 [21760/60000 (36%)]\tLoss: 0.187306\n", - "Train Epoch: 1 [22400/60000 (37%)]\tLoss: 0.216002\n", - "Train Epoch: 1 [23040/60000 (38%)]\tLoss: 0.205954\n", - "Train Epoch: 1 [23680/60000 (39%)]\tLoss: 0.246680\n", - "Train Epoch: 1 [24320/60000 (41%)]\tLoss: 0.413843\n", - "Train Epoch: 1 [24960/60000 (42%)]\tLoss: 0.144111\n", - "Train Epoch: 1 [25600/60000 (43%)]\tLoss: 0.283287\n", - "Train Epoch: 1 [26240/60000 (44%)]\tLoss: 0.373115\n", - "Train Epoch: 1 [26880/60000 (45%)]\tLoss: 0.301240\n", - "Train Epoch: 1 [27520/60000 (46%)]\tLoss: 0.197964\n", - "Train Epoch: 1 [28160/60000 (47%)]\tLoss: 0.236263\n", - "Train Epoch: 1 [28800/60000 (48%)]\tLoss: 0.328294\n", - "Train Epoch: 1 [29440/60000 (49%)]\tLoss: 0.208403\n", - "Train Epoch: 1 [30080/60000 (50%)]\tLoss: 0.307872\n", - "Train Epoch: 1 [30720/60000 (51%)]\tLoss: 0.238934\n", - "Train Epoch: 1 [31360/60000 (52%)]\tLoss: 0.212886\n", - "Train Epoch: 1 [32000/60000 (53%)]\tLoss: 0.127136\n", - "Train Epoch: 1 [32640/60000 (54%)]\tLoss: 0.564887\n", - "Train Epoch: 1 [33280/60000 (55%)]\tLoss: 0.333053\n", - "Train Epoch: 1 [33920/60000 (57%)]\tLoss: 0.146245\n", - "Train Epoch: 1 [34560/60000 (58%)]\tLoss: 0.153743\n", - "Train Epoch: 1 [35200/60000 (59%)]\tLoss: 0.335380\n", - "Train Epoch: 1 [35840/60000 (60%)]\tLoss: 0.273913\n", - "Train Epoch: 1 [36480/60000 (61%)]\tLoss: 0.194380\n", - "Train Epoch: 1 [37120/60000 (62%)]\tLoss: 0.306026\n", - "Train Epoch: 1 [37760/60000 (63%)]\tLoss: 0.205702\n", - "Train Epoch: 1 [38400/60000 (64%)]\tLoss: 0.411659\n", - "Train Epoch: 1 [39040/60000 (65%)]\tLoss: 0.370080\n", - "Train Epoch: 1 [39680/60000 (66%)]\tLoss: 0.114386\n", - "Train Epoch: 1 [40320/60000 (67%)]\tLoss: 0.374712\n", - "Train Epoch: 1 [40960/60000 (68%)]\tLoss: 0.429138\n", - "Train Epoch: 1 [41600/60000 (69%)]\tLoss: 0.338773\n", - "Train Epoch: 1 [42240/60000 (70%)]\tLoss: 0.333577\n", - "Train Epoch: 1 [42880/60000 (71%)]\tLoss: 0.172249\n", - "Train Epoch: 1 [43520/60000 (72%)]\tLoss: 0.172840\n", - "Train Epoch: 1 [44160/60000 (74%)]\tLoss: 0.222220\n", - "Train Epoch: 1 [44800/60000 (75%)]\tLoss: 0.211585\n", - "Train Epoch: 1 [45440/60000 (76%)]\tLoss: 0.192030\n", - "Train Epoch: 1 [46080/60000 (77%)]\tLoss: 0.394633\n", - "Train Epoch: 1 [46720/60000 (78%)]\tLoss: 0.291842\n", - "Train Epoch: 1 [47360/60000 (79%)]\tLoss: 0.288159\n", - "Train Epoch: 1 [48000/60000 (80%)]\tLoss: 0.356571\n", - "Train Epoch: 1 [48640/60000 (81%)]\tLoss: 0.411447\n", - "Train Epoch: 1 [49280/60000 (82%)]\tLoss: 0.454328\n", - "Train Epoch: 1 [49920/60000 (83%)]\tLoss: 0.208373\n", - "Train Epoch: 1 [50560/60000 (84%)]\tLoss: 0.370765\n", - "Train Epoch: 1 [51200/60000 (85%)]\tLoss: 0.288299\n", - "Train Epoch: 1 [51840/60000 (86%)]\tLoss: 0.129866\n", - "Train Epoch: 1 [52480/60000 (87%)]\tLoss: 0.195489\n", - "Train Epoch: 1 [53120/60000 (88%)]\tLoss: 0.166831\n", - "Train Epoch: 1 [53760/60000 (90%)]\tLoss: 0.101241\n", - "Train Epoch: 1 [54400/60000 (91%)]\tLoss: 0.237279\n", - "Train Epoch: 1 [55040/60000 (92%)]\tLoss: 0.206515\n", - "Train Epoch: 1 [55680/60000 (93%)]\tLoss: 0.204575\n", - "Train Epoch: 1 [56320/60000 (94%)]\tLoss: 0.352887\n", - "Train Epoch: 1 [56960/60000 (95%)]\tLoss: 0.273244\n", - "Train Epoch: 1 [57600/60000 (96%)]\tLoss: 0.239463\n", - "Train Epoch: 1 [58240/60000 (97%)]\tLoss: 0.401538\n", - "Train Epoch: 1 [58880/60000 (98%)]\tLoss: 0.140155\n", - "Train Epoch: 1 [59520/60000 (99%)]\tLoss: 0.235840\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\torch\\nn\\_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", - " warnings.warn(warning.format(ret))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Test set: Avg. loss: 0.0907, Accuracy: 9714/10000 (97%)\n", - "\n" - ] - } - ], - "source": [ - "network = Net()\n", - "optimizer = optim.SGD(network.parameters(), lr=learning_rate,\n", - " momentum=momentum)\n", - "\n", - "for epoch in range(1,2):\n", - " train(epoch)\n", - " test()\n", - "\n", - "network.eval()\n", - "torch_out_load = network(example_data)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Valery\\Documents\\GitHub\\AppliedAI\\23-I-12_SysArch\\Experiments\\valery_tests\\conversion util demo\\sample_pytorch_mnist.py:62: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.\n", - " return F.log_softmax(x)\n", - "c:\\Users\\Valery\\miniconda3\\envs\\cpsc330\\lib\\site-packages\\onnxruntime\\capi\\onnxruntime_inference_collection.py:69: UserWarning: Specified provider 'CUDAExecutionProvider' is not in available provider names.Available providers: 'AzureExecutionProvider, CPUExecutionProvider'\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "mct.torch_to_onnx(network, example_data, \"torch_model.onnx\")\n", - "\n", - "ort_session = mct.loadOnnxModel(\"torch_model.onnx\")\n", - "ort_predictions = mct.predictOnnx(example_data.numpy(), session=ort_session)\n", - "\n", - "ort_predictions[0][0] = 3\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original: 1, ONNX: 0\n", - "[-2.4549718 -2.1185706 -2.4687483 -2.2076657 -2.3239005 -2.2315974\n", - " -2.2770364 -2.3017619 -2.6148717 -2.1354787], \n", - "[ 3. -2.1185706 -2.4687483 -2.2076654 -2.3239005 -2.2315974\n", - " -2.2770364 -2.3017619 -2.6148717 -2.1354787]\n", - "=====================================\n", - "All predictions are consistent\n", - "Original: \t [-2.4549718 -2.1185706 -2.4687483 -2.2076657 -2.3239005 -2.2315974\n", - " -2.2770364 -2.3017619 -2.6148717 -2.1354787], \n", - "ONNX: \t[ 3. -2.1185706 -2.4687483 -2.2076654 -2.3239005 -2.2315974\n", - " -2.2770364 -2.3017619 -2.6148717 -2.1354787]\n", - "=====================================\n", - "All confidence percentages are consistent\n" - ] - } - ], - "source": [ - "mct.checkPredictionConsistency(torch_out_load.detach().numpy(), ort_predictions)\n", - "mct.checkConfidenceConsistency(torch_out_load.detach().numpy(), ort_predictions)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Experiments/valery_tests/conversion util demo/sample_pytorch_mnist.py b/Experiments/valery_tests/conversion util demo/sample_pytorch_mnist.py deleted file mode 100644 index 939d94a..0000000 --- a/Experiments/valery_tests/conversion util demo/sample_pytorch_mnist.py +++ /dev/null @@ -1,105 +0,0 @@ -#creating sample pytorch model -#The example model is from this website: https://nextjournal.com/gkoehler/pytorch-mnist -dest_torch_path = "" - - -import torch -import torch.nn as nn -import torch.nn.functional as F - -import torch.optim as optim -import torchvision - -n_epochs = 3 -batch_size_train = 64 -batch_size_test = 1000 -learning_rate = 0.01 -momentum = 0.5 -log_interval = 10 - -random_seed = 1 -torch.backends.cudnn.enabled = False -torch.manual_seed(random_seed) - -train_loader = torch.utils.data.DataLoader( - torchvision.datasets.MNIST('/files/', train=True, download=True, - transform=torchvision.transforms.Compose([ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize( - (0.1307,), (0.3081,)) - ])), - batch_size=batch_size_train, shuffle=True) - -test_loader = torch.utils.data.DataLoader( - torchvision.datasets.MNIST('/files/', train=False, download=True, - transform=torchvision.transforms.Compose([ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize( - (0.1307,), (0.3081,)) - ])), - batch_size=batch_size_test, shuffle=True) - -examples = enumerate(test_loader) -batch_idx, (example_data, example_targets) = next(examples) - - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 10, kernel_size=5) - self.conv2 = nn.Conv2d(10, 20, kernel_size=5) - self.conv2_drop = nn.Dropout2d() - self.fc1 = nn.Linear(320, 50) - self.fc2 = nn.Linear(50, 10) - - def forward(self, x): - x = F.relu(F.max_pool2d(self.conv1(x), 2)) - x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) - x = x.view(-1, 320) - x = F.relu(self.fc1(x)) - x = F.dropout(x, training=self.training) - x = self.fc2(x) - return F.log_softmax(x) - -network = Net() -optimizer = optim.SGD(network.parameters(), lr=learning_rate, - momentum=momentum) - -train_losses = [] -train_counter = [] -test_losses = [] -test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)] - -def train(epoch): - network.train() - for batch_idx, (data, target) in enumerate(train_loader): - optimizer.zero_grad() - output = network(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % log_interval == 0: - print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( - epoch, batch_idx * len(data), len(train_loader.dataset), - 100. * batch_idx / len(train_loader), loss.item())) - train_losses.append(loss.item()) - train_counter.append( - (batch_idx*64) + ((epoch-1)*len(train_loader.dataset))) - torch.save(network.state_dict(), dest_torch_path + 'model.pth') - torch.save(optimizer.state_dict(), dest_torch_path + 'optimizer.pth') - -def test(): - network.eval() - test_loss = 0 - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - output = network(data) - test_loss += F.nll_loss(output, target, size_average=False).item() - pred = output.data.max(1, keepdim=True)[1] - correct += pred.eq(target.data.view_as(pred)).sum() - test_loss /= len(test_loader.dataset) - test_losses.append(test_loss) - print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( - test_loss, correct, len(test_loader.dataset), - 100. * correct / len(test_loader.dataset))) diff --git a/Experiments/valery_tests/ros2_ws/README.MD b/Experiments/valery_tests/ros2_ws/README.MD deleted file mode 100644 index d0ff754..0000000 --- a/Experiments/valery_tests/ros2_ws/README.MD +++ /dev/null @@ -1,23 +0,0 @@ -## Testing the performance of the zed ros2 wrapper: - -### using top: -ros2 node list -top -c -p $(pgrep -d',' -f name_of_command) - - -### using rqt: -install rqt with - -`sudo apt install ros-humble-rqt*` -sudo apt install ros-${ROS_DISTRO}-rqt-top -to launch, just type `rqt` into the terminal - - -### visualizing the image topics -source: https://ros2jsguy.medium.com/4-data-visualization-using-ros-2-rviz2-pub-sub-communications-and-javascript-typescript-6e43cde75029 - - - - -### looking at the rviz2 display -`ros2 launch zed_display_rviz2 display_zed_cam.launch.py camera_model:=zed2` diff --git a/Experiments/valery_tests/ros2_ws/src/custom_interface/CMakeLists.txt b/Experiments/valery_tests/ros2_ws/src/custom_interface/CMakeLists.txt deleted file mode 100644 index 7cfe3db..0000000 --- a/Experiments/valery_tests/ros2_ws/src/custom_interface/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -cmake_minimum_required(VERSION 3.8) -project(custom_interface) - -if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - add_compile_options(-Wall -Wextra -Wpedantic) -endif() - -# find dependencies - -find_package(geometry_msgs REQUIRED) -find_package(rosidl_default_generators REQUIRED) - -rosidl_generate_interfaces(${PROJECT_NAME} - "msg/BoundingBox.msg" -) - - -find_package(ament_cmake REQUIRED) -# uncomment the following section in order to fill in -# further dependencies manually. -# find_package( REQUIRED) - -if(BUILD_TESTING) - find_package(ament_lint_auto REQUIRED) - # the following line skips the linter which checks for copyrights - # comment the line when a copyright and license is added to all source files - set(ament_cmake_copyright_FOUND TRUE) - # the following line skips cpplint (only works in a git repo) - # comment the line when this package is in a git repo and when - # a copyright and license is added to all source files - set(ament_cmake_cpplint_FOUND TRUE) - ament_lint_auto_find_test_dependencies() -endif() - -ament_package() diff --git a/Experiments/valery_tests/ros2_ws/src/custom_interface/LICENSE b/Experiments/valery_tests/ros2_ws/src/custom_interface/LICENSE deleted file mode 100644 index d645695..0000000 --- a/Experiments/valery_tests/ros2_ws/src/custom_interface/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Experiments/valery_tests/ros2_ws/src/custom_interface/msg/BoundingBox.msg b/Experiments/valery_tests/ros2_ws/src/custom_interface/msg/BoundingBox.msg deleted file mode 100644 index 9a42642..0000000 --- a/Experiments/valery_tests/ros2_ws/src/custom_interface/msg/BoundingBox.msg +++ /dev/null @@ -1,9 +0,0 @@ -# std_msgs/Header header -# since ros messages can only be 1D arrays, the 2d array gets flattened - -# instead try int32[] xbox, ybox, wbox, hbox (in order of unpacking) - -int32 BOX_WIDTH=4 #width of each box -int32[] box # [x1, y1, x2, y2] top left bottom right -float32[] scores # confidence score of each box -int32[] class_num # class number of each box \ No newline at end of file diff --git a/Experiments/valery_tests/ros2_ws/src/custom_interface/package.xml b/Experiments/valery_tests/ros2_ws/src/custom_interface/package.xml deleted file mode 100644 index e9e7c66..0000000 --- a/Experiments/valery_tests/ros2_ws/src/custom_interface/package.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - custom_interface - 0.0.0 - TODO: Package description - valery - Apache-2.0 - - ament_cmake - rosidl_default_generators - rosidl_default_runtime - rosidl_interface_packages - ament_lint_auto - ament_lint_common - - - ament_cmake - - diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/CMakeLists.txt b/Experiments/valery_tests/ros2_ws/src/node_test/CMakeLists.txt deleted file mode 100644 index eb681fd..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -cmake_minimum_required(VERSION 3.5) -project(node_test) - -if(NOT CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 14) -endif() - -if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - add_compile_options(-Wall -Wextra -Wpedantic) -endif() - -find_package(custom_interface REQUIRED) # CHANGE - - -find_package(ament_cmake REQUIRED) -find_package(rclcpp REQUIRED) -find_package(std_msgs REQUIRED) - -find_package(CURL REQUIRED) -find_package( OpenCV REQUIRED ) -find_package(cv_bridge REQUIRED) -find_package(sensor_msgs REQUIRED) - -add_executable(camera_node src/camera_node.cpp) -ament_target_dependencies(camera_node -OpenCV -cv_bridge -rclcpp -sensor_msgs -std_msgs -) - -# /usr/local/include -add_executable(jetson_node src/jetson_node.cpp) -ament_target_dependencies(jetson_node rclcpp std_msgs -OpenCV -custom_interface -cv_bridge -sensor_msgs -) -include_directories(/usr/local/include) -target_link_libraries(jetson_node onnxruntime) - -install(TARGETS -camera_node - jetson_node - DESTINATION lib/${PROJECT_NAME}) - -ament_package() \ No newline at end of file diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/README.md b/Experiments/valery_tests/ros2_ws/src/node_test/README.md deleted file mode 100644 index f67379d..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# ROS2 Package For Testing Cpp implementation runtime - -This package is used to test the runtime of implementing a ros2 package with cpp. It's meant to be as bare bones as possible. The talker reads/publishes images in the images folder. The listener executable opens the `model.onnx` file and uses it to predict the target labels. - -| executable | publishes to | subscribes to | -| -------- | -------- | -------- | -| talker | camera_image | - | -| listener | bounding_box_coords | camera_image | - -## Running The onnx_cpp node -1. navigate to the `ros2_ws` directory -```bash -rosdep install --from-paths src -y --ignore-src -colcon build --packages-select custom_interface -colcon build --packages-select node_test -``` - -2. Open a new Terminal -```bash -. install/setup.bash -ros2 run node_test jetson_node -``` - -3. Open another new terminal -```bash -. install/setup.bash -ros2 run node_test camera_node -``` - - - - -## Common trouble shooting - -### Ros2 command not found -run `source /opt/ros/humble/setup.bash` to source the setup files - -### colcon build fails because it can't find onnxruntime: -1. Check if onnxruntime is already installed by running `ldconfig -p | grep onnxruntime` -2. If it's not installed, install it: -```bash -# Clone the ONNX Runtime repository -git clone https://github.com/microsoft/onnxruntime.git - -# Change directory to the cloned repository -cd onnxruntime - -# Build ONNX Runtime -./build.sh --config Release --build_shared_lib --parallel - -# Copy the built shared library to /usr/local/lib -sudo cp ./build/Linux/Release/libonnxruntime.so /usr/local/lib - -# Update the dynamic linker run-time bindings -sudo ldconfig -``` - diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/package.xml b/Experiments/valery_tests/ros2_ws/src/node_test/package.xml deleted file mode 100644 index b05c1a3..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/package.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - node_test - 0.0.0 - TODO: Package description - valery - Apache-2.0 - - ament_cmake -custom_interface - -rclcpp -std_msgs - - cv_bridge - sensor_msgs - image_transport - - - ament_lint_auto - ament_lint_common - - - ament_cmake - - diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/camera_node.cpp b/Experiments/valery_tests/ros2_ws/src/node_test/src/camera_node.cpp deleted file mode 100644 index c26b04d..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/camera_node.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include -#include -#include - -#include "rclcpp/rclcpp.hpp" -#include "std_msgs/msg/string.hpp" - -#include // for image processing -#include -#include "sensor_msgs/msg/image.hpp" - -// #include - -#include - -using namespace std::chrono_literals; -using namespace cv; - -/* This example creates a subclass of Node and uses std::bind() to register a - * member function as a callback from the timer. */ - -class MinimalPublisher : public rclcpp::Node -{ -public: - MinimalPublisher() - : Node("camera_node"), count_(0) - { - camera_image_ = this->create_publisher("camera_image", 10); - timer_ = this->create_wall_timer( - 3000ms, std::bind(&MinimalPublisher::picture_publisher, this)); - } - -private: - void picture_publisher() - { - cv::Mat image = cv::imread("src/node_test/src/images/mnist_"+ std::to_string(count_ % 10)+ ".png"); - - if (image.empty()) { - RCLCPP_ERROR(this->get_logger(), "Image NOT found"); - return; - } - - std_msgs::msg::Header header = std_msgs::msg::Header(); // empty header - header.frame_id = "image_" + std::to_string(count_++ % 10); // time - - cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::BGR8, image); - - sensor_msgs::msg::Image out_image; // >> message to be sent - img_bridge.toImageMsg(out_image); // from cv_bridge to sensor_msgs::Image - - auto message = std_msgs::msg::String(); - RCLCPP_INFO(this->get_logger(), "Publishing and working: '%s'", header.frame_id.c_str()); - camera_image_->publish(out_image); - - checkRAMUsage(); - - } - - -// takes in image message and converts into datatype for onnx model - void checkRAMUsage() const{ - struct rusage r_usage; - - // Get resource usage - if (getrusage(RUSAGE_SELF, &r_usage) != 0) { - RCLCPP_INFO(this->get_logger(), "Error: Unable to get resource usage."); - } - - // Memory usage in kilobytes - long memory_usage = r_usage.ru_maxrss; - - // Convert memory usage to megabytes - double memory_usage_mb = static_cast(memory_usage) / 1024.0; - RCLCPP_INFO(this->get_logger(), "Memory Usage: %.2f", memory_usage_mb); -} - - - rclcpp::TimerBase::SharedPtr timer_; - rclcpp::Publisher::SharedPtr camera_image_; - size_t count_; -}; - - -int main(int argc, char * argv[]) -{ - rclcpp::init(argc, argv); - rclcpp::spin(std::make_shared()); - rclcpp::shutdown(); - return 0; -} diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/classes.txt b/Experiments/valery_tests/ros2_ws/src/node_test/src/classes.txt deleted file mode 100644 index 16315f2..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/classes.txt +++ /dev/null @@ -1,80 +0,0 @@ -person -bicycle -car -motorbike -aeroplane -bus -train -truck -boat -traffic light -fire hydrant -stop sign -parking meter -bench -bird -cat -dog -horse -sheep -cow -elephant -bear -zebra -giraffe -backpack -umbrella -handbag -tie -suitcase -frisbee -skis -snowboard -sports ball -kite -baseball bat -baseball glove -skateboard -surfboard -tennis racket -bottle -wine glass -cup -fork -knife -spoon -bowl -banana -apple -sandwich -orange -broccoli -carrot -hot dog -pizza -donut -cake -chair -sofa -pottedplant -bed -diningtable -toilet -tvmonitor -laptop -mouse -remote -keyboard -cell phone -microwave -oven -toaster -sink -refrigerator -book -clock -vase -scissors -teddy bear -hair drier -toothbrush \ No newline at end of file diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/engine.cpp b/Experiments/valery_tests/ros2_ws/src/node_test/src/engine.cpp deleted file mode 100644 index 28c7fc4..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/engine.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include "engine.h" -#include -#include -#include -#include -#include -#include - -using namespace nvinfer1; -using namespace Util; - -std::vector Util::getFilesInDirectory(const std::string &dirPath) { - std::vector filepaths; - for (const auto &entry : std::filesystem::directory_iterator(dirPath)) { - filepaths.emplace_back(entry.path().string()); - } - return filepaths; -} - -void Logger::log(Severity severity, const char *msg) noexcept { - // Would advise using a proper logging utility such as - // https://github.com/gabime/spdlog For the sake of this tutorial, will just - // log to the console. - - // Only log Warnings or more important. - if (severity <= Severity::kWARNING) { - std::cout << msg << std::endl; - } -} - -Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath, - const std::string &calibTableName, const std::string &inputBlobName, - const std::array &subVals, const std::array &divVals, bool normalize, - bool readCache) - : m_batchSize(batchSize), m_inputW(inputW), m_inputH(inputH), m_imgIdx(0), m_calibTableName(calibTableName), - m_inputBlobName(inputBlobName), m_subVals(subVals), m_divVals(divVals), m_normalize(normalize), m_readCache(readCache) { - - // Allocate GPU memory to hold the entire batch - m_inputCount = 3 * inputW * inputH * batchSize; - checkCudaErrorCode(cudaMalloc(&m_deviceInput, m_inputCount * sizeof(float))); - - // Read the name of all the files in the specified directory. - if (!doesFileExist(calibDataDirPath)) { - throw std::runtime_error("Error, directory at provided path does not exist: " + calibDataDirPath); - } - - m_imgPaths = getFilesInDirectory(calibDataDirPath); - if (m_imgPaths.size() < static_cast(batchSize)) { - throw std::runtime_error("There are fewer calibration images than the specified batch size!"); - } - - // Randomize the calibration data - auto rd = std::random_device{}; - auto rng = std::default_random_engine{rd()}; - std::shuffle(std::begin(m_imgPaths), std::end(m_imgPaths), rng); -} - -int32_t Int8EntropyCalibrator2::getBatchSize() const noexcept { - // Return the batch size - return m_batchSize; -} - -bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int32_t nbBindings) noexcept { - // This method will read a batch of images into GPU memory, and place the - // pointer to the GPU memory in the bindings variable. - - if (m_imgIdx + m_batchSize > static_cast(m_imgPaths.size())) { - // There are not enough images left to satisfy an entire batch - return false; - } - - // Read the calibration images into memory for the current batch - std::vector inputImgs; - for (int i = m_imgIdx; i < m_imgIdx + m_batchSize; i++) { - std::cout << "Reading image " << i << ": " << m_imgPaths[i] << std::endl; - auto cpuImg = cv::imread(m_imgPaths[i]); - if (cpuImg.empty()) { - std::cout << "Fatal error: Unable to read image at path: " << m_imgPaths[i] << std::endl; - return false; - } - - cv::cuda::GpuMat gpuImg; - gpuImg.upload(cpuImg); - cv::cuda::cvtColor(gpuImg, gpuImg, cv::COLOR_BGR2RGB); - - // TODO: Define any preprocessing code here, such as resizing - auto resized = Engine::resizeKeepAspectRatioPadRightBottom(gpuImg, m_inputH, m_inputW); - - inputImgs.emplace_back(std::move(resized)); - } - - // Convert the batch from NHWC to NCHW - // ALso apply normalization, scaling, and mean subtraction - auto mfloat = Engine::blobFromGpuMats(inputImgs, m_subVals, m_divVals, m_normalize); - auto *dataPointer = mfloat.ptr(); - - // Copy the GPU buffer to member variable so that it persists - checkCudaErrorCode(cudaMemcpyAsync(m_deviceInput, dataPointer, m_inputCount * sizeof(float), cudaMemcpyDeviceToDevice)); - - m_imgIdx += m_batchSize; - if (std::string(names[0]) != m_inputBlobName) { - std::cout << "Error: Incorrect input name provided!" << std::endl; - return false; - } - bindings[0] = m_deviceInput; - return true; -} - -void const *Int8EntropyCalibrator2::readCalibrationCache(size_t &length) noexcept { - std::cout << "Searching for calibration cache: " << m_calibTableName << std::endl; - m_calibCache.clear(); - std::ifstream input(m_calibTableName, std::ios::binary); - input >> std::noskipws; - if (m_readCache && input.good()) { - std::cout << "Reading calibration cache: " << m_calibTableName << std::endl; - std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(m_calibCache)); - } - length = m_calibCache.size(); - return length ? m_calibCache.data() : nullptr; -} - -void Int8EntropyCalibrator2::writeCalibrationCache(const void *ptr, std::size_t length) noexcept { - std::cout << "Writing calib cache: " << m_calibTableName << " Size: " << length << " bytes" << std::endl; - std::ofstream output(m_calibTableName, std::ios::binary); - output.write(reinterpret_cast(ptr), length); -} - -Int8EntropyCalibrator2::~Int8EntropyCalibrator2() { checkCudaErrorCode(cudaFree(m_deviceInput)); }; diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/engine.h b/Experiments/valery_tests/ros2_ws/src/node_test/src/engine.h deleted file mode 100644 index b7bdff7..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/engine.h +++ /dev/null @@ -1,787 +0,0 @@ -#pragma once - -#include "NvInfer.h" -#include "NvOnnxParser.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Utility methods -namespace Util { -inline bool doesFileExist(const std::string &filepath) { - std::ifstream f(filepath.c_str()); - return f.good(); -} - -inline void checkCudaErrorCode(cudaError_t code) { - if (code != 0) { - std::string errMsg = "CUDA operation failed with code: " + std::to_string(code) + "(" + cudaGetErrorName(code) + - "), with message: " + cudaGetErrorString(code); - std::cout << errMsg << std::endl; - throw std::runtime_error(errMsg); - } -} - -std::vector getFilesInDirectory(const std::string &dirPath); -} // namespace Util -// Utility Timer -template class Stopwatch { - typename Clock::time_point start_point; - -public: - Stopwatch() : start_point(Clock::now()) {} - - // Returns elapsed time - template Rep elapsedTime() const { - std::atomic_thread_fence(std::memory_order_relaxed); - auto counted_time = std::chrono::duration_cast(Clock::now() - start_point).count(); - std::atomic_thread_fence(std::memory_order_relaxed); - return static_cast(counted_time); - } -}; - -using preciseStopwatch = Stopwatch<>; - -// Precision used for GPU inference -enum class Precision { - // Full precision floating point value - FP32, - // Half prevision floating point value - FP16, - // Int8 quantization. - // Has reduced dynamic range, may result in slight loss in accuracy. - // If INT8 is selected, must provide path to calibration dataset directory. - INT8, -}; - -// Options for the network -struct Options { - // Precision to use for GPU inference. - Precision precision = Precision::FP16; - // If INT8 precision is selected, must provide path to calibration dataset - // directory. - std::string calibrationDataDirectoryPath; - // The batch size to be used when computing calibration data for INT8 - // inference. Should be set to as large a batch number as your GPU will - // support. - int32_t calibrationBatchSize = 128; - // The batch size which should be optimized for. - int32_t optBatchSize = 1; - // Maximum allowable batch size - int32_t maxBatchSize = 16; - // GPU device index - int deviceIndex = 0; -}; - -// Class used for int8 calibration -class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { -public: - Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, const std::string &calibDataDirPath, - const std::string &calibTableName, const std::string &inputBlobName, - const std::array &subVals = {0.f, 0.f, 0.f}, const std::array &divVals = {1.f, 1.f, 1.f}, - bool normalize = true, bool readCache = true); - virtual ~Int8EntropyCalibrator2(); - // Abstract base class methods which must be implemented - int32_t getBatchSize() const noexcept override; - bool getBatch(void *bindings[], char const *names[], int32_t nbBindings) noexcept override; - void const *readCalibrationCache(std::size_t &length) noexcept override; - void writeCalibrationCache(void const *ptr, std::size_t length) noexcept override; - -private: - const int32_t m_batchSize; - const int32_t m_inputW; - const int32_t m_inputH; - int32_t m_imgIdx; - std::vector m_imgPaths; - size_t m_inputCount; - const std::string m_calibTableName; - const std::string m_inputBlobName; - const std::array m_subVals; - const std::array m_divVals; - const bool m_normalize; - const bool m_readCache; - void *m_deviceInput; - std::vector m_calibCache; -}; - -// Class to extend TensorRT logger -class Logger : public nvinfer1::ILogger { - void log(Severity severity, const char *msg) noexcept override; -}; - -template class Engine { -public: - Engine(const Options &options); - ~Engine(); - - // Build the onnx model into a TensorRT engine file, cache the model to disk - // (to avoid rebuilding in future), and then load the model into memory The - // default implementation will normalize values between [0.f, 1.f] Setting the - // normalize flag to false will leave values between [0.f, 255.f] (some - // converted models may require this). If the model requires values to be - // normalized between [-1.f, 1.f], use the following params: - // subVals = {0.5f, 0.5f, 0.5f}; - // divVals = {0.5f, 0.5f, 0.5f}; - // normalize = true; - bool buildLoadNetwork(std::string onnxModelPath, const std::array &subVals = {0.f, 0.f, 0.f}, - const std::array &divVals = {1.f, 1.f, 1.f}, bool normalize = true); - - // Load a TensorRT engine file from disk into memory - // The default implementation will normalize values between [0.f, 1.f] - // Setting the normalize flag to false will leave values between [0.f, 255.f] - // (some converted models may require this). If the model requires values to - // be normalized between [-1.f, 1.f], use the following params: - // subVals = {0.5f, 0.5f, 0.5f}; - // divVals = {0.5f, 0.5f, 0.5f}; - // normalize = true; - bool loadNetwork(std::string trtModelPath, const std::array &subVals = {0.f, 0.f, 0.f}, - const std::array &divVals = {1.f, 1.f, 1.f}, bool normalize = true); - - // Run inference. - // Input format [input][batch][cv::cuda::GpuMat] - // Output format [batch][output][feature_vector] - bool runInference(const std::vector> &inputs, std::vector>> &featureVectors); - - // Utility method for resizing an image while maintaining the aspect ratio by - // adding padding to smaller dimension after scaling While letterbox padding - // normally adds padding to top & bottom, or left & right sides, this - // implementation only adds padding to the right or bottom side This is done - // so that it's easier to convert detected coordinates (ex. YOLO model) back - // to the original reference frame. - static cv::cuda::GpuMat resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat &input, size_t height, size_t width, - const cv::Scalar &bgcolor = cv::Scalar(0, 0, 0)); - - [[nodiscard]] const std::vector &getInputDims() const { return m_inputDims; }; - [[nodiscard]] const std::vector &getOutputDims() const { return m_outputDims; }; - - // Utility method for transforming triple nested output array into 2D array - // Should be used when the output batch size is 1, but there are multiple - // output feature vectors - static void transformOutput(std::vector>> &input, std::vector> &output); - - // Utility method for transforming triple nested output array into single - // array Should be used when the output batch size is 1, and there is only a - // single output feature vector - static void transformOutput(std::vector>> &input, std::vector &output); - // Convert NHWC to NCHW and apply scaling and mean subtraction - static cv::cuda::GpuMat blobFromGpuMats(const std::vector &batchInput, const std::array &subVals, - const std::array &divVals, bool normalize); - -private: - // Build the network - bool build(std::string onnxModelPath, const std::array &subVals, const std::array &divVals, bool normalize); - - // Converts the engine options into a string - std::string serializeEngineOptions(const Options &options, const std::string &onnxModelPath); - - void getDeviceNames(std::vector &deviceNames); - - void clearGpuBuffers(); - - // Normalization, scaling, and mean subtraction of inputs - std::array m_subVals{}; - std::array m_divVals{}; - bool m_normalize; - - // Holds pointers to the input and output GPU buffers - std::vector m_buffers; - std::vector m_outputLengths{}; - std::vector m_inputDims; - std::vector m_outputDims; - std::vector m_IOTensorNames; - int32_t m_inputBatchSize; - - // Must keep IRuntime around for inference, see: - // https://forums.developer.nvidia.com/t/is-it-safe-to-deallocate-nvinfer1-iruntime-after-creating-an-nvinfer1-icudaengine-but-before-running-inference-with-said-icudaengine/255381/2?u=cyruspk4w6 - std::unique_ptr m_runtime = nullptr; - std::unique_ptr m_calibrator = nullptr; - std::unique_ptr m_engine = nullptr; - std::unique_ptr m_context = nullptr; - const Options m_options; - Logger m_logger; -}; - -template Engine::Engine(const Options &options) : m_options(options) {} - -template Engine::~Engine() { clearGpuBuffers(); } - -template void Engine::clearGpuBuffers() { - if (!m_buffers.empty()) { - // Free GPU memory of outputs - const auto numInputs = m_inputDims.size(); - for (int32_t outputBinding = numInputs; outputBinding < m_engine->getNbIOTensors(); ++outputBinding) { - Util::checkCudaErrorCode(cudaFree(m_buffers[outputBinding])); - } - m_buffers.clear(); - } -} - -template -bool Engine::buildLoadNetwork(std::string onnxModelPath, const std::array &subVals, const std::array &divVals, - bool normalize) { - // Only regenerate the engine file if it has not already been generated for - // the specified options, otherwise load cached version from disk - const auto engineName = serializeEngineOptions(m_options, onnxModelPath); - std::cout << "Searching for engine file with name: " << engineName << std::endl; - - if (Util::doesFileExist(engineName)) { - std::cout << "Engine found, not regenerating..." << std::endl; - } else { - if (!Util::doesFileExist(onnxModelPath)) { - throw std::runtime_error("Could not find onnx model at path: " + onnxModelPath); - } - - // Was not able to find the engine file, generate... - std::cout << "Engine not found, generating. This could take a while..." << std::endl; - - // Build the onnx model into a TensorRT engine - auto ret = build(onnxModelPath, subVals, divVals, normalize); - if (!ret) { - return false; - } - } - - // Load the TensorRT engine file into memory - return loadNetwork(engineName, subVals, divVals, normalize); -} - -template -bool Engine::loadNetwork(std::string trtModelPath, const std::array &subVals, const std::array &divVals, - bool normalize) { - m_subVals = subVals; - m_divVals = divVals; - m_normalize = normalize; - - // Read the serialized model from disk - if (!Util::doesFileExist(trtModelPath)) { - std::cout << "Error, unable to read TensorRT model at path: " + trtModelPath << std::endl; - return false; - } else { - std::cout << "Loading TensorRT engine file at path: " << trtModelPath << std::endl; - } - - std::ifstream file(trtModelPath, std::ios::binary | std::ios::ate); - std::streamsize size = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(size); - if (!file.read(buffer.data(), size)) { - throw std::runtime_error("Unable to read engine file"); - } - - // Create a runtime to deserialize the engine file. - m_runtime = std::unique_ptr{nvinfer1::createInferRuntime(m_logger)}; - if (!m_runtime) { - return false; - } - - // Set the device index - auto ret = cudaSetDevice(m_options.deviceIndex); - if (ret != 0) { - int numGPUs; - cudaGetDeviceCount(&numGPUs); - auto errMsg = "Unable to set GPU device index to: " + std::to_string(m_options.deviceIndex) + ". Note, your device has " + - std::to_string(numGPUs) + " CUDA-capable GPU(s)."; - throw std::runtime_error(errMsg); - } - - // Create an engine, a representation of the optimized model. - m_engine = std::unique_ptr(m_runtime->deserializeCudaEngine(buffer.data(), buffer.size())); - if (!m_engine) { - return false; - } - - // The execution context contains all of the state associated with a - // particular invocation - m_context = std::unique_ptr(m_engine->createExecutionContext()); - if (!m_context) { - return false; - } - - // Storage for holding the input and output buffers - // This will be passed to TensorRT for inference - clearGpuBuffers(); - m_buffers.resize(m_engine->getNbIOTensors()); - - m_outputLengths.clear(); - m_inputDims.clear(); - m_outputDims.clear(); - m_IOTensorNames.clear(); - - // Create a cuda stream - cudaStream_t stream; - Util::checkCudaErrorCode(cudaStreamCreate(&stream)); - - // Allocate GPU memory for input and output buffers - m_outputLengths.clear(); - for (int i = 0; i < m_engine->getNbIOTensors(); ++i) { - const auto tensorName = m_engine->getIOTensorName(i); - m_IOTensorNames.emplace_back(tensorName); - const auto tensorType = m_engine->getTensorIOMode(tensorName); - const auto tensorShape = m_engine->getTensorShape(tensorName); - const auto tensorDataType = m_engine->getTensorDataType(tensorName); - - if (tensorType == nvinfer1::TensorIOMode::kINPUT) { - // The implementation currently only supports inputs of type float - if (m_engine->getTensorDataType(tensorName) != nvinfer1::DataType::kFLOAT) { - throw std::runtime_error("Error, the implementation currently only supports float inputs"); - } - - // Don't need to allocate memory for inputs as we will be using the OpenCV - // GpuMat buffer directly. - - // Store the input dims for later use - m_inputDims.emplace_back(tensorShape.d[1], tensorShape.d[2], tensorShape.d[3]); - m_inputBatchSize = tensorShape.d[0]; - } else if (tensorType == nvinfer1::TensorIOMode::kOUTPUT) { - // Ensure the model output data type matches the template argument - // specified by the user - if (tensorDataType == nvinfer1::DataType::kFLOAT && !std::is_same::value) { - throw std::runtime_error("Error, the model has expected output of type float. Engine class " - "template parameter must be adjusted."); - } else if (tensorDataType == nvinfer1::DataType::kHALF && !std::is_same<__half, T>::value) { - throw std::runtime_error("Error, the model has expected output of type __half. Engine class " - "template parameter must be adjusted."); - } else if (tensorDataType == nvinfer1::DataType::kINT8 && !std::is_same::value) { - throw std::runtime_error("Error, the model has expected output of type int8_t. Engine class " - "template parameter must be adjusted."); - } else if (tensorDataType == nvinfer1::DataType::kINT32 && !std::is_same::value) { - throw std::runtime_error("Error, the model has expected output of type int32_t. Engine " - "class template parameter must be adjusted."); - } else if (tensorDataType == nvinfer1::DataType::kBOOL && !std::is_same::value) { - throw std::runtime_error("Error, the model has expected output of type bool. Engine class " - "template parameter must be adjusted."); - } else if (tensorDataType == nvinfer1::DataType::kUINT8 && !std::is_same::value) { - throw std::runtime_error("Error, the model has expected output of type uint8_t. Engine " - "class template parameter must be adjusted."); - } else if (tensorDataType == nvinfer1::DataType::kFP8) { - throw std::runtime_error("Error, model has unsupported output type"); - } - - // The binding is an output - uint32_t outputLength = 1; - m_outputDims.push_back(tensorShape); - - for (int j = 1; j < tensorShape.nbDims; ++j) { - // We ignore j = 0 because that is the batch size, and we will take that - // into account when sizing the buffer - outputLength *= tensorShape.d[j]; - } - - m_outputLengths.push_back(outputLength); - // Now size the output buffer appropriately, taking into account the max - // possible batch size (although we could actually end up using less - // memory) - Util::checkCudaErrorCode(cudaMallocAsync(&m_buffers[i], outputLength * m_options.maxBatchSize * sizeof(T), stream)); - } else { - throw std::runtime_error("Error, IO Tensor is neither an input or output!"); - } - } - - // Synchronize and destroy the cuda stream - Util::checkCudaErrorCode(cudaStreamSynchronize(stream)); - Util::checkCudaErrorCode(cudaStreamDestroy(stream)); - - return true; -} - -template -bool Engine::build(std::string onnxModelPath, const std::array &subVals, const std::array &divVals, bool normalize) { - // Create our engine builder. - auto builder = std::unique_ptr(nvinfer1::createInferBuilder(m_logger)); - if (!builder) { - return false; - } - - // Define an explicit batch size and then create the network (implicit batch - // size is deprecated). More info here: - // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#explicit-implicit-batch - auto explicitBatch = 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - auto network = std::unique_ptr(builder->createNetworkV2(explicitBatch)); - if (!network) { - return false; - } - - // Create a parser for reading the onnx file. - auto parser = std::unique_ptr(nvonnxparser::createParser(*network, m_logger)); - if (!parser) { - return false; - } - - // We are going to first read the onnx file into memory, then pass that buffer - // to the parser. Had our onnx model file been encrypted, this approach would - // allow us to first decrypt the buffer. - std::ifstream file(onnxModelPath, std::ios::binary | std::ios::ate); - std::streamsize size = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(size); - if (!file.read(buffer.data(), size)) { - throw std::runtime_error("Unable to read engine file"); - } - - // Parse the buffer we read into memory. - auto parsed = parser->parse(buffer.data(), buffer.size()); - if (!parsed) { - return false; - } - - // Ensure that all the inputs have the same batch size - const auto numInputs = network->getNbInputs(); - if (numInputs < 1) { - throw std::runtime_error("Error, model needs at least 1 input!"); - } - const auto input0Batch = network->getInput(0)->getDimensions().d[0]; - for (int32_t i = 1; i < numInputs; ++i) { - if (network->getInput(i)->getDimensions().d[0] != input0Batch) { - throw std::runtime_error("Error, the model has multiple inputs, each " - "with differing batch sizes!"); - } - } - - // Check to see if the model supports dynamic batch size or not - bool doesSupportDynamicBatch = false; - if (input0Batch == -1) { - doesSupportDynamicBatch = true; - std::cout << "Model supports dynamic batch size" << std::endl; - } else { - std::cout << "Model only supports fixed batch size of " << input0Batch << std::endl; - // If the model supports a fixed batch size, ensure that the maxBatchSize - // and optBatchSize were set correctly. - if (m_options.optBatchSize != input0Batch || m_options.maxBatchSize != input0Batch) { - throw std::runtime_error("Error, model only supports a fixed batch size of " + std::to_string(input0Batch) + - ". Must set Options.optBatchSize and Options.maxBatchSize to 1"); - } - } - - auto config = std::unique_ptr(builder->createBuilderConfig()); - if (!config) { - return false; - } - - // Register a single optimization profile - nvinfer1::IOptimizationProfile *optProfile = builder->createOptimizationProfile(); - for (int32_t i = 0; i < numInputs; ++i) { - // Must specify dimensions for all the inputs the model expects. - const auto input = network->getInput(i); - const auto inputName = input->getName(); - const auto inputDims = input->getDimensions(); - int32_t inputC = inputDims.d[1]; - int32_t inputH = inputDims.d[2]; - int32_t inputW = inputDims.d[3]; - - // Specify the optimization profile` - if (doesSupportDynamicBatch) { - optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4(1, inputC, inputH, inputW)); - } else { - optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMIN, - nvinfer1::Dims4(m_options.optBatchSize, inputC, inputH, inputW)); - } - optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kOPT, - nvinfer1::Dims4(m_options.optBatchSize, inputC, inputH, inputW)); - optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMAX, - nvinfer1::Dims4(m_options.maxBatchSize, inputC, inputH, inputW)); - } - config->addOptimizationProfile(optProfile); - - // Set the precision level - const auto engineName = serializeEngineOptions(m_options, onnxModelPath); - if (m_options.precision == Precision::FP16) { - // Ensure the GPU supports FP16 inference - if (!builder->platformHasFastFp16()) { - throw std::runtime_error("Error: GPU does not support FP16 precision"); - } - config->setFlag(nvinfer1::BuilderFlag::kFP16); - } else if (m_options.precision == Precision::INT8) { - if (numInputs > 1) { - throw std::runtime_error("Error, this implementation currently only supports INT8 " - "quantization for single input models"); - } - - // Ensure the GPU supports INT8 Quantization - if (!builder->platformHasFastInt8()) { - throw std::runtime_error("Error: GPU does not support INT8 precision"); - } - - // Ensure the user has provided path to calibration data directory - if (m_options.calibrationDataDirectoryPath.empty()) { - throw std::runtime_error("Error: If INT8 precision is selected, must provide path to " - "calibration data directory to Engine::build method"); - } - - config->setFlag((nvinfer1::BuilderFlag::kINT8)); - - const auto input = network->getInput(0); - const auto inputName = input->getName(); - const auto inputDims = input->getDimensions(); - const auto calibrationFileName = engineName + ".calibration"; - - m_calibrator = std::make_unique(m_options.calibrationBatchSize, inputDims.d[3], inputDims.d[2], - m_options.calibrationDataDirectoryPath, calibrationFileName, inputName, - subVals, divVals, normalize); - config->setInt8Calibrator(m_calibrator.get()); - } - - // CUDA stream used for profiling by the builder. - cudaStream_t profileStream; - Util::checkCudaErrorCode(cudaStreamCreate(&profileStream)); - config->setProfileStream(profileStream); - - // Build the engine - // If this call fails, it is suggested to increase the logger verbosity to - // kVERBOSE and try rebuilding the engine. Doing so will provide you with more - // information on why exactly it is failing. - std::unique_ptr plan{builder->buildSerializedNetwork(*network, *config)}; - if (!plan) { - return false; - } - - // Write the engine to disk - std::ofstream outfile(engineName, std::ofstream::binary); - outfile.write(reinterpret_cast(plan->data()), plan->size()); - - std::cout << "Success, saved engine to " << engineName << std::endl; - - Util::checkCudaErrorCode(cudaStreamDestroy(profileStream)); - return true; -} - -template -bool Engine::runInference(const std::vector> &inputs, - std::vector>> &featureVectors) { - // First we do some error checking - if (inputs.empty() || inputs[0].empty()) { - std::cout << "===== Error =====" << std::endl; - std::cout << "Provided input vector is empty!" << std::endl; - return false; - } - - const auto numInputs = m_inputDims.size(); - if (inputs.size() != numInputs) { - std::cout << "===== Error =====" << std::endl; - std::cout << "Incorrect number of inputs provided!" << std::endl; - return false; - } - - // Ensure the batch size does not exceed the max - if (inputs[0].size() > static_cast(m_options.maxBatchSize)) { - std::cout << "===== Error =====" << std::endl; - std::cout << "The batch size is larger than the model expects!" << std::endl; - std::cout << "Model max batch size: " << m_options.maxBatchSize << std::endl; - std::cout << "Batch size provided to call to runInference: " << inputs[0].size() << std::endl; - return false; - } - - // Ensure that if the model has a fixed batch size that is greater than 1, the - // input has the correct length - if (m_inputBatchSize != -1 && inputs[0].size() != static_cast(m_inputBatchSize)) { - std::cout << "===== Error =====" << std::endl; - std::cout << "The batch size is different from what the model expects!" << std::endl; - std::cout << "Model batch size: " << m_inputBatchSize << std::endl; - std::cout << "Batch size provided to call to runInference: " << inputs[0].size() << std::endl; - return false; - } - - const auto batchSize = static_cast(inputs[0].size()); - // Make sure the same batch size was provided for all inputs - for (size_t i = 1; i < inputs.size(); ++i) { - if (inputs[i].size() != static_cast(batchSize)) { - std::cout << "===== Error =====" << std::endl; - std::cout << "The batch size needs to be constant for all inputs!" << std::endl; - return false; - } - } - - // Create the cuda stream that will be used for inference - cudaStream_t inferenceCudaStream; - Util::checkCudaErrorCode(cudaStreamCreate(&inferenceCudaStream)); - - std::vector preprocessedInputs; - - // Preprocess all the inputs - for (size_t i = 0; i < numInputs; ++i) { - const auto &batchInput = inputs[i]; - const auto &dims = m_inputDims[i]; - - auto &input = batchInput[0]; - if (input.channels() != dims.d[0] || input.rows != dims.d[1] || input.cols != dims.d[2]) { - std::cout << "===== Error =====" << std::endl; - std::cout << "Input does not have correct size!" << std::endl; - std::cout << "Expected: (" << dims.d[0] << ", " << dims.d[1] << ", " << dims.d[2] << ")" << std::endl; - std::cout << "Got: (" << input.channels() << ", " << input.rows << ", " << input.cols << ")" << std::endl; - std::cout << "Ensure you resize your input image to the correct size" << std::endl; - return false; - } - - nvinfer1::Dims4 inputDims = {batchSize, dims.d[0], dims.d[1], dims.d[2]}; - m_context->setInputShape(m_IOTensorNames[i].c_str(), - inputDims); // Define the batch size - - // OpenCV reads images into memory in NHWC format, while TensorRT expects - // images in NCHW format. The following method converts NHWC to NCHW. Even - // though TensorRT expects NCHW at IO, during optimization, it can - // internally use NHWC to optimize cuda kernels See: - // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#data-layout - // Copy over the input data and perform the preprocessing - auto mfloat = blobFromGpuMats(batchInput, m_subVals, m_divVals, m_normalize); - preprocessedInputs.push_back(mfloat); - m_buffers[i] = mfloat.ptr(); - } - - // Ensure all dynamic bindings have been defined. - if (!m_context->allInputDimensionsSpecified()) { - throw std::runtime_error("Error, not all required dimensions specified."); - } - - // Set the address of the input and output buffers - for (size_t i = 0; i < m_buffers.size(); ++i) { - bool status = m_context->setTensorAddress(m_IOTensorNames[i].c_str(), m_buffers[i]); - if (!status) { - return false; - } - } - - // Run inference. - bool status = m_context->enqueueV3(inferenceCudaStream); - if (!status) { - return false; - } - - // Copy the outputs back to CPU - featureVectors.clear(); - - for (int batch = 0; batch < batchSize; ++batch) { - // Batch - std::vector> batchOutputs{}; - for (int32_t outputBinding = numInputs; outputBinding < m_engine->getNbIOTensors(); ++outputBinding) { - // We start at index m_inputDims.size() to account for the inputs in our - // m_buffers - std::vector output; - auto outputLength = m_outputLengths[outputBinding - numInputs]; - output.resize(outputLength); - // Copy the output - Util::checkCudaErrorCode(cudaMemcpyAsync(output.data(), - static_cast(m_buffers[outputBinding]) + (batch * sizeof(T) * outputLength), - outputLength * sizeof(T), cudaMemcpyDeviceToHost, inferenceCudaStream)); - batchOutputs.emplace_back(std::move(output)); - } - featureVectors.emplace_back(std::move(batchOutputs)); - } - - // Synchronize the cuda stream - Util::checkCudaErrorCode(cudaStreamSynchronize(inferenceCudaStream)); - Util::checkCudaErrorCode(cudaStreamDestroy(inferenceCudaStream)); - return true; -} - -template -cv::cuda::GpuMat Engine::blobFromGpuMats(const std::vector &batchInput, const std::array &subVals, - const std::array &divVals, bool normalize) { - cv::cuda::GpuMat gpu_dst(1, batchInput[0].rows * batchInput[0].cols * batchInput.size(), CV_8UC3); - - size_t width = batchInput[0].cols * batchInput[0].rows; - for (size_t img = 0; img < batchInput.size(); img++) { - std::vector input_channels{ - cv::cuda::GpuMat(batchInput[0].rows, batchInput[0].cols, CV_8U, &(gpu_dst.ptr()[0 + width * 3 * img])), - cv::cuda::GpuMat(batchInput[0].rows, batchInput[0].cols, CV_8U, &(gpu_dst.ptr()[width + width * 3 * img])), - cv::cuda::GpuMat(batchInput[0].rows, batchInput[0].cols, CV_8U, &(gpu_dst.ptr()[width * 2 + width * 3 * img]))}; - cv::cuda::split(batchInput[img], input_channels); // HWC -> CHW - } - - cv::cuda::GpuMat mfloat; - if (normalize) { - // [0.f, 1.f] - gpu_dst.convertTo(mfloat, CV_32FC3, 1.f / 255.f); - } else { - // [0.f, 255.f] - gpu_dst.convertTo(mfloat, CV_32FC3); - } - - // Apply scaling and mean subtraction - cv::cuda::subtract(mfloat, cv::Scalar(subVals[0], subVals[1], subVals[2]), mfloat, cv::noArray(), -1); - cv::cuda::divide(mfloat, cv::Scalar(divVals[0], divVals[1], divVals[2]), mfloat, 1, -1); - - return mfloat; -} - -template std::string Engine::serializeEngineOptions(const Options &options, const std::string &onnxModelPath) { - const auto filenamePos = onnxModelPath.find_last_of('/') + 1; - std::string engineName = onnxModelPath.substr(filenamePos, onnxModelPath.find_last_of('.') - filenamePos) + ".engine"; - - // Add the GPU device name to the file to ensure that the model is only used - // on devices with the exact same GPU - std::vector deviceNames; - getDeviceNames(deviceNames); - - if (static_cast(options.deviceIndex) >= deviceNames.size()) { - throw std::runtime_error("Error, provided device index is out of range!"); - } - - auto deviceName = deviceNames[options.deviceIndex]; - // Remove spaces from the device name - deviceName.erase(std::remove_if(deviceName.begin(), deviceName.end(), ::isspace), deviceName.end()); - - engineName += "." + deviceName; - - // Serialize the specified options into the filename - if (options.precision == Precision::FP16) { - engineName += ".fp16"; - } else if (options.precision == Precision::FP32) { - engineName += ".fp32"; - } else { - engineName += ".int8"; - } - - engineName += "." + std::to_string(options.maxBatchSize); - engineName += "." + std::to_string(options.optBatchSize); - - return engineName; -} - -template void Engine::getDeviceNames(std::vector &deviceNames) { - int numGPUs; - cudaGetDeviceCount(&numGPUs); - - for (int device = 0; device < numGPUs; device++) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, device); - - deviceNames.push_back(std::string(prop.name)); - } -} - -template -cv::cuda::GpuMat Engine::resizeKeepAspectRatioPadRightBottom(const cv::cuda::GpuMat &input, size_t height, size_t width, - const cv::Scalar &bgcolor) { - float r = std::min(width / (input.cols * 1.0), height / (input.rows * 1.0)); - int unpad_w = r * input.cols; - int unpad_h = r * input.rows; - cv::cuda::GpuMat re(unpad_h, unpad_w, CV_8UC3); - cv::cuda::resize(input, re, re.size()); - cv::cuda::GpuMat out(height, width, CV_8UC3, bgcolor); - re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows))); - return out; -} - -template -void Engine::transformOutput(std::vector>> &input, std::vector> &output) { - if (input.size() != 1) { - throw std::logic_error("The feature vector has incorrect dimensions!"); - } - - output = std::move(input[0]); -} - -template void Engine::transformOutput(std::vector>> &input, std::vector &output) { - if (input.size() != 1 || input[0].size() != 1) { - throw std::logic_error("The feature vector has incorrect dimensions!"); - } - - output = std::move(input[0][0]); -} diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/jetson_node.cpp b/Experiments/valery_tests/ros2_ws/src/node_test/src/jetson_node.cpp deleted file mode 100644 index bcc5048..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/jetson_node.cpp +++ /dev/null @@ -1,218 +0,0 @@ -#include -#include -#include -#include -#include - -#include "rclcpp/rclcpp.hpp" -#include "std_msgs/msg/string.hpp" - -#include // for image processing -#include -#include "sensor_msgs/msg/image.hpp" -#include "custom_interface/msg/bounding_box.hpp" // CHANGE - -#include -#include - - -using std::placeholders::_1; -using namespace cv; - -class MinimalSubscriber : public rclcpp::Node -{ - const int64_t INPUT_WIDTH = 640.0; - const int64_t INPUT_HEIGHT = 640.0; - const float SCORE_THRESHOLD = 0.2; - const float NMS_THRESHOLD = 0.4; - const float CONFIDENCE_THRESHOLD = 0.3; - -public: - MinimalSubscriber() - : Node("jetson_node") - { - camera_image_ = this->create_subscription( - "camera_image", 10, std::bind(&MinimalSubscriber::topic_callback, this, _1)); - bounding_box_ = this->create_publisher("bounding_box", 10); - - class_list_ = std::make_unique>(load_class_list()); - create_ort_session(); - } - -private: - std::vector load_class_list() -{ - std::vector class_list; - std::ifstream ifs("src/node_test/src/classes.txt"); - std::string line; - while (getline(ifs, line)) - { - class_list.push_back(line); - } - return class_list; -} - - void create_ort_session() - { - Ort::Env env(ORT_LOGGING_LEVEL_ERROR, "ModelRunner"); - - std::string instanceName{"Image classifier inference"}; - environment_ = std::make_unique(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "ModelRunner"); - - Ort::SessionOptions session_options; - session_options.SetIntraOpNumThreads(1); - session_ = std::make_unique(env, "src/node_test/src/yolov5s.onnx", session_options); - } - - void topic_callback(const sensor_msgs::msg::Image &msg) const - - { - auto start = std::chrono::high_resolution_clock::now(); - - - std::vector input_vector = preprocess(msg); - auto after_preprocess = std::chrono::high_resolution_clock::now(); - - std::vector outputs = image_predict(input_vector); - - auto after_model = std::chrono::high_resolution_clock::now(); - - custom_interface::msg::BoundingBox out_box = postprocessing(msg,&outputs); - - auto after_postprocess = std::chrono::high_resolution_clock::now(); - - bounding_box_->publish(out_box); - - auto end = std::chrono::high_resolution_clock::now(); - - std::chrono::duration elapsed = end - start; - RCLCPP_INFO(this->get_logger(), "Total time:\t\t'%s'", std::to_string(std::chrono::duration(end-start).count()).c_str()); - RCLCPP_INFO(this->get_logger(), "Time for preprocess:\t\t%s'", std::to_string(std::chrono::duration(after_preprocess-start).count()).c_str()); - RCLCPP_INFO(this->get_logger(), "Time for model:\t\t%s'", std::to_string(std::chrono::duration(after_model-after_preprocess).count()).c_str()); - RCLCPP_INFO(this->get_logger(), "Time for postprocess:\t\t%s'", std::to_string(std::chrono::duration(after_postprocess-after_model).count()).c_str()); - - checkRAMUsage(); - - } - - custom_interface::msg::BoundingBox postprocessing(const sensor_msgs::msg::Image &image,std::vector *outputs) const { - float* data = (*outputs)[0].GetTensorMutableData(); - - std::vector boxes; - std::vector predictions; - std::vector class_nums; - - const int dimensions = 85; - const int rows = 25200; - - float x_factor = image.width / INPUT_WIDTH; - float y_factor = image.height / INPUT_HEIGHT; - - for (int i = 0; i < rows; ++i) { - float confidence = data[4]; - if (confidence >= CONFIDENCE_THRESHOLD) { - float * classes_scores = data + 5; - cv::Mat scores(1, class_list_->size(), CV_32FC1, classes_scores); - cv::Point class_id; - double max_class_score; - minMaxLoc(scores, 0, &max_class_score, 0, &class_id); - if (max_class_score > SCORE_THRESHOLD) { - predictions.push_back(confidence); - class_nums.push_back(class_id.x); - - float x = data[0]; - float y = data[1]; - float w = data[2]; - float h = data[3]; - int left = int((x - 0.5 * w) * x_factor); - int top = int((y - 0.5 * h) * y_factor); - int width = int(w * x_factor); - int height = int(h * y_factor); - std::vector box = {left, top, width, height}; - - boxes.insert(boxes.end(), box.begin(), box.end()); - } - } - data += dimensions; - } - - custom_interface::msg::BoundingBox out_box; - out_box.box = boxes; - out_box.scores = predictions; - out_box.class_num = class_nums; - - return out_box; - } - - std::vector image_predict(std::vector input_vector) const - { - // Get the input tensor shape - std::vector input_tensor_shape = {1,3, INPUT_WIDTH, INPUT_HEIGHT};// Adjust this to match your input shape - - // Create the input tensor object from the data - Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_vector.data(), input_vector.size(), input_tensor_shape.data(), input_tensor_shape.size()); - - std::vector input_names = {"images"}; - std::vector input_tensors; - input_tensors.emplace_back(std::move(input_tensor)); - - std::vector output_names = {"output"}; - - std::vector output_tensors = session_->Run(Ort::RunOptions{nullptr}, input_names.data(), input_tensors.data(), input_tensors.size(), output_names.data(), output_names.size()); - - return output_tensors; - } - - // takes in image message and converts into datatype for onnx model - std::vector preprocess(sensor_msgs::msg::Image image) const - { - cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(image, sensor_msgs::image_encodings::BGR8); - cv::Mat image_raw = cv_ptr->image; - - cv::Mat resizedImage; - cv::resize(image_raw, resizedImage, cv::Size(640, 640)); - - resizedImage.convertTo(resizedImage, CV_32F, 2.0f / 255.0f, -1.0f); - - std::vector img_vector; - img_vector.assign((float *)resizedImage.datastart, (float *)resizedImage.dataend); - - return img_vector; - } - - // takes in image message and converts into datatype for onnx model - void checkRAMUsage() const - { - struct rusage r_usage; - - // Get resource usage - if (getrusage(RUSAGE_SELF, &r_usage) != 0) - { - RCLCPP_INFO(this->get_logger(), "Error: Unable to get resource usage."); - } - - // Memory usage in kilobytes - long memory_usage = r_usage.ru_maxrss; - - // Convert memory usage to megabytes - double memory_usage_mb = static_cast(memory_usage) / 1024.0; - RCLCPP_INFO(this->get_logger(), "Memory Usage: %.2f", memory_usage_mb); - } - - rclcpp::Publisher::SharedPtr bounding_box_; - rclcpp::Subscription::SharedPtr camera_image_; - - std::unique_ptr session_; - std::unique_ptr environment_; - - std::unique_ptr> class_list_; -}; - -int main(int argc, char *argv[]) -{ - rclcpp::init(argc, argv); - rclcpp::spin(std::make_shared()); - rclcpp::shutdown(); - return 0; -} diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/new_jetson_node.cpp b/Experiments/valery_tests/ros2_ws/src/node_test/src/new_jetson_node.cpp deleted file mode 100644 index 28a1f11..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/new_jetson_node.cpp +++ /dev/null @@ -1,291 +0,0 @@ -#include -#include -#include -#include -#include - -#include "rclcpp/rclcpp.hpp" -#include "std_msgs/msg/string.hpp" - -#include // for image processing -#include -#include "sensor_msgs/msg/image.hpp" -#include "custom_interface/msg/bounding_box.hpp" // CHANGE - -#include -#include -#include - - -// TODO - merge existing CMAKE with that of https://github.com/cyrusbehr/tensorrt-cpp-api -using std::placeholders::_1; -using namespace cv; - -class MinimalSubscriber : public rclcpp::Node -{ - const int64_t INPUT_WIDTH = 640.0; - const int64_t INPUT_HEIGHT = 640.0; - const float SCORE_THRESHOLD = 0.2; - const float NMS_THRESHOLD = 0.4; - const float CONFIDENCE_THRESHOLD = 0.3; - -public: - MinimalSubscriber(char* arguments[]) - : Node("jetson_node") - { - camera_image_ = this->create_subscription( - "camera_image", 10, std::bind(&MinimalSubscriber::topic_callback, this, _1)); - bounding_box_ = this->create_publisher("bounding_box", 10); - class_list_ = std::make_unique>(load_class_list()); - create_ort_session(); - tensorrt_engine_ = create_tensorrt_engine(); - std::string onnx_model_path = arguments[3]; // TODO: might have to load paths from ENV variables - std::string tensor_rt_model_path = arguments[4]; // TODO: this might lead to accessing invalid place in array - load_tensorrt_engine_from_onnx(onnx_model_path, tensor_rt_model_path); - } - -private: - rclcpp::Publisher::SharedPtr bounding_box_; - rclcpp::Subscription::SharedPtr camera_image_; - Engine tensorrt_engine_; - - // TODO: cleanup ONNX code - std::unique_ptr session_; - std::unique_ptr environment_; - - std::unique_ptr> class_list_; - std::vector load_class_list() -{ - std::vector class_list; - std::ifstream ifs("src/node_test/src/classes.txt"); - std::string line; - while (getline(ifs, line)) - { - class_list.push_back(line); - } - return class_list; -} - - void load_tensorrt_engine_from_onnx(Options &options, std::string &onnx_path, std::string &tensor_rt_model_path) { - // TODO - can the following normalization setup for YOLO be triggered in response to an environment variable? - // We probably won't be switching the model we use while in-competition - const bool mock_env_var_for_yolo = true; - - std::array subVals{0.f, 0.f, 0.f}; - std::array divVals{1.f, 1.f, 1.f}; - bool normalize = true; - - // If an onnx path is specified, we want to generate a (new) trt file based on that onnx model - if (!onnx_path.empty())) { - bool succ = engine.buildLoadNetwork(onnx_path, subVals, divVals, normalize); - if (!succ) { - throw std::runtime_error("Unable to build or load TRT engine"); - } - } else { - bool succ = engine.loadNetwork(tensor_rt_model_path, subVals, divVals, normalize); - if (!succ) { - throw std::runtime_error("Unable to load TRT engine"); - } - } - } - - bool does_tensorrt_file_exist(Options &options, std::string &onnx_path) - { - const std::string tensorrt_path = get_tensorrt_path_from_oonx(options, onnx_path); - return Util::doesFileExist(tensorrt_path); - } - - // TODO - consider moving this to another file - Engine create_tensorrt_engine() - { - const options = get_tensorrt_options(); - // IMPROVEMENT: How can I make the type of the engine dyanmic here? - // A declaration and instantiation in one line?? My god... - Engine = engine(options); - return engine; - } - - Options create_tensorrt_engine() { - Options options; - // Go with FP::16 initially since we care about performance. - // TODO: Benchmark against FP::32 ? - options.precision = Precision::FP16; - // Specify path to calibration data if using INT8 precision - options.calibrationDataDirectoryPath = ""; - // Specify batch size to optimize for - // Q: For the purposes of the competition, will our batch size be equivalent to the number of - // frames per second? - options.optBatchSize = 1; - // Specify max batch size we plan to run inference on (for the purposes of allocating enough memory ahead of time) - // Q: What would this value be for the competition? - options.maxBatchSize = 1; - return options; - } - - void create_ort_session() - { - Ort::Env env(ORT_LOGGING_LEVEL_ERROR, "ModelRunner"); - - std::string instanceName{"Image classifier inference"}; - environment_ = std::make_unique(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "ModelRunner"); - - Ort::SessionOptions session_options; - session_options.SetIntraOpNumThreads(1); - session_ = std::make_unique(env, "src/node_test/src/yolov5s.onnx", session_options); - } - - void topic_callback(const sensor_msgs::msg::Image &msg) const - - { - auto start = std::chrono::high_resolution_clock::now(); - - - cv::Mat input_vector = preprocess(msg); - auto after_preprocess = std::chrono::high_resolution_clock::now(); - - std::vector> outputs = image_predict(input_vector); - - auto after_model = std::chrono::high_resolution_clock::now(); - - custom_interface::msg::BoundingBox out_box = postprocessing(msg,&outputs); - - auto after_postprocess = std::chrono::high_resolution_clock::now(); - - bounding_box_->publish(out_box); - - auto end = std::chrono::high_resolution_clock::now(); - - std::chrono::duration elapsed = end - start; - RCLCPP_INFO(this->get_logger(), "Total time:\t\t'%s'", std::to_string(std::chrono::duration(end-start).count()).c_str()); - RCLCPP_INFO(this->get_logger(), "Time for preprocess:\t\t%s'", std::to_string(std::chrono::duration(after_preprocess-start).count()).c_str()); - RCLCPP_INFO(this->get_logger(), "Time for model:\t\t%s'", std::to_string(std::chrono::duration(after_model-after_preprocess).count()).c_str()); - RCLCPP_INFO(this->get_logger(), "Time for postprocess:\t\t%s'", std::to_string(std::chrono::duration(after_postprocess-after_model).count()).c_str()); - - checkRAMUsage(); - - } - - custom_interface::msg::BoundingBox postprocessing(const sensor_msgs::msg::Image &image, std::vector>> *outputs) const { - std::vector flat_data; - flatten(*outputs, flat_data); - - float* data = flat_data.data(); - - std::vector boxes; - std::vector predictions; - std::vector class_nums; - - const int dimensions = 85; - const int rows = 25200; - - float x_factor = image.width / INPUT_WIDTH; - float y_factor = image.height / INPUT_HEIGHT; - - for (int i = 0; i < rows; ++i) { - float confidence = data[4]; - if (confidence >= CONFIDENCE_THRESHOLD) { - float * classes_scores = data + 5; - cv::Mat scores(1, class_list_->size(), CV_32FC1, classes_scores); - cv::Point class_id; - double max_class_score; - minMaxLoc(scores, 0, &max_class_score, 0, &class_id); - if (max_class_score > SCORE_THRESHOLD) { - predictions.push_back(confidence); - class_nums.push_back(class_id.x); - - float x = data[0]; - float y = data[1]; - float w = data[2]; - float h = data[3]; - int left = int((x - 0.5 * w) * x_factor); - int top = int((y - 0.5 * h) * y_factor); - int width = int(w * x_factor); - int height = int(h * y_factor); - std::vector box = {left, top, width, height}; - - boxes.insert(boxes.end(), box.begin(), box.end()); - } - } - data += dimensions; - } - - custom_interface::msg::BoundingBox out_box; - out_box.box = boxes; - out_box.scores = predictions; - out_box.class_num = class_nums; - - return out_box; - } - - std::vector<>> image_predict(cv::Mat input_vector) const - { - // Upload the image to GPU memory - cv::cuda::GpuMat img; - img.upload(input_vector); - - // Keep this line of code if model expects RGB image - cv::cuda::cvtColor(img, img, cv::COLOR_BGR2RGB); - - std::vector>> featureVectors; - - // TODO - implement batching for better performance - std::vector> inputs; - std::vector input; - input.emplace_back(std::move(img)) - inputs.emplace_back(std::move(input)) - engine.runInference(inputs, featureVectors) - - return featureVectors; - } - - // takes in image message and converts into datatype for onnx model - std::vector preprocess(sensor_msgs::msg::Image image) const - { - cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(image, sensor_msgs::image_encodings::BGR8); - cv::Mat image_raw = cv_ptr->image; - - cv::Mat resizedImage; - cv::resize(image_raw, resizedImage, cv::Size(640, 640)); - - resizedImage.convertTo(resizedImage, CV_32F, 2.0f / 255.0f, -1.0f); - - return resizedImage; - } - - // takes in image message and converts into datatype for onnx model - void checkRAMUsage() const - { - struct rusage r_usage; - - // Get resource usage - if (getrusage(RUSAGE_SELF, &r_usage) != 0) - { - RCLCPP_INFO(this->get_logger(), "Error: Unable to get resource usage."); - } - - // Returns peak memory in KB used by process during its execution - long memory_usage = r_usage.ru_maxrss; - - // Convert memory usage to megabytes - double memory_usage_mb = static_cast(memory_usage) / 1024.0; - RCLCPP_INFO(this->get_logger(), "Memory Usage: %.2f", memory_usage_mb); - } -}; - -void flatten(const std::vector>>& nested, std::vector& flat) { - for (const auto& outer : nested) { - for (const auto& inner : outer) { - flat.insert(flat.end(), inner.begin(), inner.end()); - } - } -} - -int main(int argc, char *argv[]) -{ - // Q: Can i pass a the onnx model path as an argument to the minimal subscriber constructor? - rclcpp::init(argc, argv); - // Assume that the onnx and tensorrt paths can be passed in the constructor here - rclcpp::spin(std::make_shared(argv)); - rclcpp::shutdown(); - return 0; -} diff --git a/Experiments/valery_tests/ros2_ws/src/node_test/src/preprocessing_node.cpp b/Experiments/valery_tests/ros2_ws/src/node_test/src/preprocessing_node.cpp deleted file mode 100644 index 190dfa7..0000000 --- a/Experiments/valery_tests/ros2_ws/src/node_test/src/preprocessing_node.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include - -class preprocessing_node : public rclcpp::Node -{ -public: - preprocessing_node() : Node("image_concatenator_node") - { - // Subscribe to left and right image topics - left_image_subscriber_ = create_subscription( - "left_image_data", 10, std::bind(&preprocessing_node::leftImageCallback, this, std::placeholders::_1)); - right_image_subscriber_ = create_subscription( - "right_image_data", 10, std::bind(&preprocessing_node::rightImageCallback, this, std::placeholders::_1)); - - // Advertise concatenated and resized image topic - concatenated_image_publisher_ = create_publisher("concatenated_image_data", 10); - - // Advertise preprocessed image topic - preprocessed_image_publisher_ = create_publisher("preprocessed_image", 10); - } - -private: - void leftImageCallback(const sensor_msgs::msg::Image::SharedPtr msg) - { - left_image_ = cv_bridge::toCvCopy(msg, "rgb8")->image; - processAndPublish(); - } - - void rightImageCallback(const sensor_msgs::msg::Image::SharedPtr msg) - { - right_image_ = cv_bridge::toCvCopy(msg, "rgb8")->image; - processAndPublish(); - } - - void processAndPublish() - { - if (left_image_.empty() || right_image_.empty()) - return; - - cv::Mat concatenated_image; - cv::vconcat(left_image_, right_image_, concatenated_image); - - cv::Size target_size(640, 480); - cv::resize(concatenated_image, concatenated_image, target_size); - - // Publish concatenated and resized image - sensor_msgs::msg::Image::SharedPtr output_msg = cv_bridge::CvImage(std_msgs::msg::Header(), "rgb8", concatenated_image).toImageMsg(); - concatenated_image_publisher_->publish(output_msg); - - // Publish preprocessed image - preprocessed_image_publisher_->publish(output_msg); - } - - rclcpp::Subscription::SharedPtr left_image_subscriber_; - rclcpp::Subscription::SharedPtr right_image_subscriber_; - rclcpp::Publisher::SharedPtr concatenated_image_publisher_; - rclcpp::Publisher::SharedPtr preprocessed_image_publisher_; - - cv::Mat left_image_; - cv::Mat right_image_; -}; - -int main(int argc, char *argv[]) -{ - rclcpp::init(argc, argv); - auto node = std::make_shared(); - rclcpp::spin(node); - rclcpp::shutdown(); - return 0; -} \ No newline at end of file diff --git a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/CMakeLists.txt b/Experiments/valery_tests/ros2_ws/src/onnx_cpp/CMakeLists.txt deleted file mode 100644 index 1b262b8..0000000 --- a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -cmake_minimum_required(VERSION 3.5) -project(onnx_cpp) - -if(NOT CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 14) -endif() - -if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - add_compile_options(-Wall -Wextra -Wpedantic) -endif() - - -find_package(ament_cmake REQUIRED) -find_package(rclcpp REQUIRED) -find_package(std_msgs REQUIRED) - -find_package(CURL REQUIRED) -find_package( OpenCV REQUIRED ) -find_package(cv_bridge REQUIRED) -find_package(sensor_msgs REQUIRED) - -add_executable(talker src/talker.cpp) -ament_target_dependencies(talker -OpenCV -cv_bridge -rclcpp -sensor_msgs -std_msgs -) - -# /usr/local/include -add_executable(listener src/listener.cpp) -ament_target_dependencies(listener rclcpp std_msgs -OpenCV -cv_bridge -sensor_msgs -) -include_directories(/usr/local/include) -target_link_libraries(listener onnxruntime) - -install(TARGETS - talker - listener - DESTINATION lib/${PROJECT_NAME}) - -ament_package() \ No newline at end of file diff --git a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/package.xml b/Experiments/valery_tests/ros2_ws/src/onnx_cpp/package.xml deleted file mode 100644 index 35a0963..0000000 --- a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/package.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - onnx_cpp - 0.0.0 - TODO: Package description - valery - Apache-2.0 - - ament_cmake -rclcpp -std_msgs - - cv_bridge - sensor_msgs - image_transport - - - ament_lint_auto - ament_lint_common - - - ament_cmake - - diff --git a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/src/listener.cpp b/Experiments/valery_tests/ros2_ws/src/onnx_cpp/src/listener.cpp deleted file mode 100644 index 5aa3f71..0000000 --- a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/src/listener.cpp +++ /dev/null @@ -1,110 +0,0 @@ -#include -#include -#include -#include - -#include "rclcpp/rclcpp.hpp" -#include "std_msgs/msg/string.hpp" - -#include // for image processing -#include -#include "sensor_msgs/msg/image.hpp" - -#include - - -using std::placeholders::_1; -using namespace cv; - -std::string image_predict(sensor_msgs::msg::Image in_image); - -class MinimalSubscriber : public rclcpp::Node -{ -public: - MinimalSubscriber() - : Node("minimal_subscriber") - { - subscription_ = this->create_subscription( - "topic", 10, std::bind(&MinimalSubscriber::topic_callback, this, _1)); - Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "ModelRunner"); - - std::string instanceName{"Image classifier inference"}; - environment_ = std::make_unique(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,"ModelRunner"); - - Ort::SessionOptions session_options; - session_options.SetIntraOpNumThreads(1); - session_ = std::make_unique(env, "/home/valery/ros2_cpp_publisher/src/onnx_cpp/src/model.onnx", session_options); - } - -private: - void topic_callback(const sensor_msgs::msg::Image & msg) const - - { - auto start = std::chrono::high_resolution_clock::now(); - std::string prediction = image_predict(msg); - // std::vector expected = {5,4,3,3,0,3,6,6,5,5}; - RCLCPP_INFO(this->get_logger(), "Image: '%s', %s", msg.header.frame_id.c_str(), prediction.c_str()); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - RCLCPP_INFO(this->get_logger(), "Time: '%s'", std::to_string(elapsed.count()).c_str()); - - - } - - std::string image_predict(sensor_msgs::msg::Image image) const{ - // convert the image into a better format: - std::vector input_vector = process_image(image); - - // Get the input tensor shape - std::vector input_tensor_shape = {1,32, 32, 3}; // Adjust this to match your input shape - - // Create the input tensor object from the data - Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_vector.data(), input_vector.size(), input_tensor_shape.data(), input_tensor_shape.size()); - - std::vector input_names = { "args_0" }; - std::vector input_tensors; - input_tensors.emplace_back(std::move(input_tensor)); - - std::vector output_names = { "dense_4" }; - - std::vector output_tensors = session_->Run(Ort::RunOptions{ nullptr }, input_names.data(), input_tensors.data(), input_tensors.size(), output_names.data(), output_names.size()); - - // Get a pointer to the data in the first output tensor - float* floatarr = output_tensors[0].GetTensorMutableData(); - int cls_idx = std::max_element(floatarr, floatarr + 10) - floatarr; - - return "prediction: " + std::to_string(cls_idx); - } - -std::vector process_image(sensor_msgs::msg::Image image) const{ - cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(image, sensor_msgs::image_encodings::BGR8); - cv::Mat image_raw = cv_ptr->image; - - cv::Mat scaledImage,img_greyscale; - image_raw.convertTo(scaledImage, CV_32F, 2.0f / 255.0f, -1.0f); // Scale image pixels: [0 255] -> [-1, 1] - - std::vector img_vector; - - img_vector.assign((float*)scaledImage.datastart, (float*)scaledImage.dataend); - - return img_vector; -} - - rclcpp::Subscription::SharedPtr subscription_; - std::unique_ptr session_; - std::unique_ptr environment_; -}; - - - - -int main(int argc, char * argv[]) -{ - rclcpp::init(argc, argv); - rclcpp::spin(std::make_shared()); - rclcpp::shutdown(); - return 0; -} - - diff --git a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/src/talker.cpp b/Experiments/valery_tests/ros2_ws/src/onnx_cpp/src/talker.cpp deleted file mode 100644 index 63d62be..0000000 --- a/Experiments/valery_tests/ros2_ws/src/onnx_cpp/src/talker.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include - -#include "rclcpp/rclcpp.hpp" -#include "std_msgs/msg/string.hpp" - -#include // for image processing -#include -#include "sensor_msgs/msg/image.hpp" - -// #include - - -using namespace std::chrono_literals; -using namespace cv; - -/* This example creates a subclass of Node and uses std::bind() to register a - * member function as a callback from the timer. */ - -class MinimalPublisher : public rclcpp::Node -{ -public: - MinimalPublisher() - : Node("minimal_publisher"), count_(0) - { - publisher_ = this->create_publisher("topic", 10); - timer_ = this->create_wall_timer( - 500ms, std::bind(&MinimalPublisher::timer_callback, this)); - } - -private: - void timer_callback() - { - cv::Mat image = cv::imread("ros2_ws/src/onnx_cpp/src/images/mnist_"+ std::to_string(count_ % 10)+ ".png"); - - if (image.empty()) { - RCLCPP_ERROR(this->get_logger(), "Image NOT found"); - return; - } - - std_msgs::msg::Header header = std_msgs::msg::Header(); // empty header - header.frame_id = "image_" + std::to_string(count_++ % 10); // time - - cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::BGR8, image); - - sensor_msgs::msg::Image out_image; // >> message to be sent - img_bridge.toImageMsg(out_image); // from cv_bridge to sensor_msgs::Image - - auto message = std_msgs::msg::String(); - RCLCPP_INFO(this->get_logger(), "Publishing and working: '%s'", header.frame_id.c_str()); - publisher_->publish(out_image); - - - } - rclcpp::TimerBase::SharedPtr timer_; - rclcpp::Publisher::SharedPtr publisher_; - size_t count_; -}; - -int main(int argc, char * argv[]) -{ - rclcpp::init(argc, argv); - rclcpp::spin(std::make_shared()); - rclcpp::shutdown(); - return 0; -} diff --git a/CI-CD Scripts/benchmarking.sh b/actions_scripts/benchmarking.sh similarity index 100% rename from CI-CD Scripts/benchmarking.sh rename to actions_scripts/benchmarking.sh diff --git a/CI-CD Scripts/delete-logs.sh b/actions_scripts/delete-logs.sh similarity index 100% rename from CI-CD Scripts/delete-logs.sh rename to actions_scripts/delete-logs.sh diff --git a/CI-CD Scripts/off_script.py b/actions_scripts/off_script.py similarity index 100% rename from CI-CD Scripts/off_script.py rename to actions_scripts/off_script.py diff --git a/CI-CD Scripts/post-job-script.sh b/actions_scripts/post-job-script.sh similarity index 100% rename from CI-CD Scripts/post-job-script.sh rename to actions_scripts/post-job-script.sh diff --git a/CI-CD Scripts/pre-job-script.sh b/actions_scripts/pre-job-script.sh similarity index 100% rename from CI-CD Scripts/pre-job-script.sh rename to actions_scripts/pre-job-script.sh diff --git a/CI-CD Scripts/system_metrics_benchmarking.py b/actions_scripts/system_metrics_benchmarking.py similarity index 100% rename from CI-CD Scripts/system_metrics_benchmarking.py rename to actions_scripts/system_metrics_benchmarking.py diff --git a/CI-CD Scripts/unit-tests.sh b/actions_scripts/unit-tests.sh similarity index 100% rename from CI-CD Scripts/unit-tests.sh rename to actions_scripts/unit-tests.sh diff --git a/Models/Maize Model/sample_maize_images/IMG_1822_14.JPG b/assets/maize/IMG_1822_14.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1822_14.JPG rename to assets/maize/IMG_1822_14.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1822_14.txt b/assets/maize/IMG_1822_14.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1822_14.txt rename to assets/maize/IMG_1822_14.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1828_02.JPG b/assets/maize/IMG_1828_02.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1828_02.JPG rename to assets/maize/IMG_1828_02.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1828_02.txt b/assets/maize/IMG_1828_02.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1828_02.txt rename to assets/maize/IMG_1828_02.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1830_03.JPG b/assets/maize/IMG_1830_03.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1830_03.JPG rename to assets/maize/IMG_1830_03.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1830_03.txt b/assets/maize/IMG_1830_03.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1830_03.txt rename to assets/maize/IMG_1830_03.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1831_20.JPG b/assets/maize/IMG_1831_20.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1831_20.JPG rename to assets/maize/IMG_1831_20.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1831_20.txt b/assets/maize/IMG_1831_20.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1831_20.txt rename to assets/maize/IMG_1831_20.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1832_13.JPG b/assets/maize/IMG_1832_13.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1832_13.JPG rename to assets/maize/IMG_1832_13.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1832_13.txt b/assets/maize/IMG_1832_13.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1832_13.txt rename to assets/maize/IMG_1832_13.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1843_12.JPG b/assets/maize/IMG_1843_12.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1843_12.JPG rename to assets/maize/IMG_1843_12.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1843_12.txt b/assets/maize/IMG_1843_12.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1843_12.txt rename to assets/maize/IMG_1843_12.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1843_30.JPG b/assets/maize/IMG_1843_30.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1843_30.JPG rename to assets/maize/IMG_1843_30.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1843_30.txt b/assets/maize/IMG_1843_30.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1843_30.txt rename to assets/maize/IMG_1843_30.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1848_27.JPG b/assets/maize/IMG_1848_27.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1848_27.JPG rename to assets/maize/IMG_1848_27.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1848_27.txt b/assets/maize/IMG_1848_27.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1848_27.txt rename to assets/maize/IMG_1848_27.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1851_02.JPG b/assets/maize/IMG_1851_02.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1851_02.JPG rename to assets/maize/IMG_1851_02.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1851_02.txt b/assets/maize/IMG_1851_02.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1851_02.txt rename to assets/maize/IMG_1851_02.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1851_07.JPG b/assets/maize/IMG_1851_07.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1851_07.JPG rename to assets/maize/IMG_1851_07.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1851_07.txt b/assets/maize/IMG_1851_07.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1851_07.txt rename to assets/maize/IMG_1851_07.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1852_06.JPG b/assets/maize/IMG_1852_06.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1852_06.JPG rename to assets/maize/IMG_1852_06.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1852_06.txt b/assets/maize/IMG_1852_06.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1852_06.txt rename to assets/maize/IMG_1852_06.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1861_04.JPG b/assets/maize/IMG_1861_04.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1861_04.JPG rename to assets/maize/IMG_1861_04.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1861_04.txt b/assets/maize/IMG_1861_04.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1861_04.txt rename to assets/maize/IMG_1861_04.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1867_33.JPG b/assets/maize/IMG_1867_33.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1867_33.JPG rename to assets/maize/IMG_1867_33.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1867_33.txt b/assets/maize/IMG_1867_33.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1867_33.txt rename to assets/maize/IMG_1867_33.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1874_23.JPG b/assets/maize/IMG_1874_23.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1874_23.JPG rename to assets/maize/IMG_1874_23.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1874_23.txt b/assets/maize/IMG_1874_23.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1874_23.txt rename to assets/maize/IMG_1874_23.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1875_21.JPG b/assets/maize/IMG_1875_21.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1875_21.JPG rename to assets/maize/IMG_1875_21.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1875_21.txt b/assets/maize/IMG_1875_21.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1875_21.txt rename to assets/maize/IMG_1875_21.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1877_39.JPG b/assets/maize/IMG_1877_39.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1877_39.JPG rename to assets/maize/IMG_1877_39.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1877_39.txt b/assets/maize/IMG_1877_39.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1877_39.txt rename to assets/maize/IMG_1877_39.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1878_28.JPG b/assets/maize/IMG_1878_28.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1878_28.JPG rename to assets/maize/IMG_1878_28.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1878_28.txt b/assets/maize/IMG_1878_28.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1878_28.txt rename to assets/maize/IMG_1878_28.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1880_28.JPG b/assets/maize/IMG_1880_28.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1880_28.JPG rename to assets/maize/IMG_1880_28.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1880_28.txt b/assets/maize/IMG_1880_28.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1880_28.txt rename to assets/maize/IMG_1880_28.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1882_17.JPG b/assets/maize/IMG_1882_17.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1882_17.JPG rename to assets/maize/IMG_1882_17.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1882_17.txt b/assets/maize/IMG_1882_17.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1882_17.txt rename to assets/maize/IMG_1882_17.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1883_04.JPG b/assets/maize/IMG_1883_04.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1883_04.JPG rename to assets/maize/IMG_1883_04.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1883_04.txt b/assets/maize/IMG_1883_04.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1883_04.txt rename to assets/maize/IMG_1883_04.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1884_28.JPG b/assets/maize/IMG_1884_28.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1884_28.JPG rename to assets/maize/IMG_1884_28.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1884_28.txt b/assets/maize/IMG_1884_28.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1884_28.txt rename to assets/maize/IMG_1884_28.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1884_36.JPG b/assets/maize/IMG_1884_36.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1884_36.JPG rename to assets/maize/IMG_1884_36.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1884_36.txt b/assets/maize/IMG_1884_36.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1884_36.txt rename to assets/maize/IMG_1884_36.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1886_38.JPG b/assets/maize/IMG_1886_38.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1886_38.JPG rename to assets/maize/IMG_1886_38.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1886_38.txt b/assets/maize/IMG_1886_38.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1886_38.txt rename to assets/maize/IMG_1886_38.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1890_08.JPG b/assets/maize/IMG_1890_08.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1890_08.JPG rename to assets/maize/IMG_1890_08.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1890_08.txt b/assets/maize/IMG_1890_08.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1890_08.txt rename to assets/maize/IMG_1890_08.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1892_39.JPG b/assets/maize/IMG_1892_39.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1892_39.JPG rename to assets/maize/IMG_1892_39.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1892_39.txt b/assets/maize/IMG_1892_39.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1892_39.txt rename to assets/maize/IMG_1892_39.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1892_42.JPG b/assets/maize/IMG_1892_42.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1892_42.JPG rename to assets/maize/IMG_1892_42.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1892_42.txt b/assets/maize/IMG_1892_42.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1892_42.txt rename to assets/maize/IMG_1892_42.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1893_04.JPG b/assets/maize/IMG_1893_04.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1893_04.JPG rename to assets/maize/IMG_1893_04.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1893_04.txt b/assets/maize/IMG_1893_04.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1893_04.txt rename to assets/maize/IMG_1893_04.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1898_13.JPG b/assets/maize/IMG_1898_13.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1898_13.JPG rename to assets/maize/IMG_1898_13.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1898_13.txt b/assets/maize/IMG_1898_13.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1898_13.txt rename to assets/maize/IMG_1898_13.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_1901_19.JPG b/assets/maize/IMG_1901_19.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1901_19.JPG rename to assets/maize/IMG_1901_19.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_1901_19.txt b/assets/maize/IMG_1901_19.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_1901_19.txt rename to assets/maize/IMG_1901_19.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2085_42.JPG b/assets/maize/IMG_2085_42.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2085_42.JPG rename to assets/maize/IMG_2085_42.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2085_42.txt b/assets/maize/IMG_2085_42.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2085_42.txt rename to assets/maize/IMG_2085_42.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2087_22.JPG b/assets/maize/IMG_2087_22.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2087_22.JPG rename to assets/maize/IMG_2087_22.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2087_22.txt b/assets/maize/IMG_2087_22.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2087_22.txt rename to assets/maize/IMG_2087_22.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2090_41.JPG b/assets/maize/IMG_2090_41.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2090_41.JPG rename to assets/maize/IMG_2090_41.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2090_41.txt b/assets/maize/IMG_2090_41.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2090_41.txt rename to assets/maize/IMG_2090_41.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2091_39.JPG b/assets/maize/IMG_2091_39.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2091_39.JPG rename to assets/maize/IMG_2091_39.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2091_39.txt b/assets/maize/IMG_2091_39.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2091_39.txt rename to assets/maize/IMG_2091_39.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2101_37.JPG b/assets/maize/IMG_2101_37.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2101_37.JPG rename to assets/maize/IMG_2101_37.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2101_37.txt b/assets/maize/IMG_2101_37.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2101_37.txt rename to assets/maize/IMG_2101_37.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2541_29.JPG b/assets/maize/IMG_2541_29.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2541_29.JPG rename to assets/maize/IMG_2541_29.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2541_29.txt b/assets/maize/IMG_2541_29.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2541_29.txt rename to assets/maize/IMG_2541_29.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2543_12.JPG b/assets/maize/IMG_2543_12.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2543_12.JPG rename to assets/maize/IMG_2543_12.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2543_12.txt b/assets/maize/IMG_2543_12.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2543_12.txt rename to assets/maize/IMG_2543_12.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2543_17.JPG b/assets/maize/IMG_2543_17.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2543_17.JPG rename to assets/maize/IMG_2543_17.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2543_17.txt b/assets/maize/IMG_2543_17.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2543_17.txt rename to assets/maize/IMG_2543_17.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2544_26.JPG b/assets/maize/IMG_2544_26.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2544_26.JPG rename to assets/maize/IMG_2544_26.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2544_26.txt b/assets/maize/IMG_2544_26.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2544_26.txt rename to assets/maize/IMG_2544_26.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2574_01.JPG b/assets/maize/IMG_2574_01.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2574_01.JPG rename to assets/maize/IMG_2574_01.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2574_01.txt b/assets/maize/IMG_2574_01.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2574_01.txt rename to assets/maize/IMG_2574_01.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2575_21.JPG b/assets/maize/IMG_2575_21.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2575_21.JPG rename to assets/maize/IMG_2575_21.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2575_21.txt b/assets/maize/IMG_2575_21.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2575_21.txt rename to assets/maize/IMG_2575_21.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2577_18.JPG b/assets/maize/IMG_2577_18.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2577_18.JPG rename to assets/maize/IMG_2577_18.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2577_18.txt b/assets/maize/IMG_2577_18.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2577_18.txt rename to assets/maize/IMG_2577_18.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2577_32.JPG b/assets/maize/IMG_2577_32.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2577_32.JPG rename to assets/maize/IMG_2577_32.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2577_32.txt b/assets/maize/IMG_2577_32.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2577_32.txt rename to assets/maize/IMG_2577_32.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2642_02.JPG b/assets/maize/IMG_2642_02.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2642_02.JPG rename to assets/maize/IMG_2642_02.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2642_02.txt b/assets/maize/IMG_2642_02.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2642_02.txt rename to assets/maize/IMG_2642_02.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2656_35.JPG b/assets/maize/IMG_2656_35.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2656_35.JPG rename to assets/maize/IMG_2656_35.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2656_35.txt b/assets/maize/IMG_2656_35.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2656_35.txt rename to assets/maize/IMG_2656_35.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2663_23.JPG b/assets/maize/IMG_2663_23.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2663_23.JPG rename to assets/maize/IMG_2663_23.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2663_23.txt b/assets/maize/IMG_2663_23.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2663_23.txt rename to assets/maize/IMG_2663_23.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2717_22.JPG b/assets/maize/IMG_2717_22.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2717_22.JPG rename to assets/maize/IMG_2717_22.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2717_22.txt b/assets/maize/IMG_2717_22.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2717_22.txt rename to assets/maize/IMG_2717_22.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2807_28.JPG b/assets/maize/IMG_2807_28.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2807_28.JPG rename to assets/maize/IMG_2807_28.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2807_28.txt b/assets/maize/IMG_2807_28.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2807_28.txt rename to assets/maize/IMG_2807_28.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2883_23.JPG b/assets/maize/IMG_2883_23.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2883_23.JPG rename to assets/maize/IMG_2883_23.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2883_23.txt b/assets/maize/IMG_2883_23.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2883_23.txt rename to assets/maize/IMG_2883_23.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2884_07.JPG b/assets/maize/IMG_2884_07.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2884_07.JPG rename to assets/maize/IMG_2884_07.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2884_07.txt b/assets/maize/IMG_2884_07.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2884_07.txt rename to assets/maize/IMG_2884_07.txt diff --git a/Models/Maize Model/sample_maize_images/IMG_2884_18.JPG b/assets/maize/IMG_2884_18.JPG similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2884_18.JPG rename to assets/maize/IMG_2884_18.JPG diff --git a/Models/Maize Model/sample_maize_images/IMG_2884_18.txt b/assets/maize/IMG_2884_18.txt similarity index 100% rename from Models/Maize Model/sample_maize_images/IMG_2884_18.txt rename to assets/maize/IMG_2884_18.txt diff --git a/Calibration Tools/camera_calibration.py b/calibration_tools/camera_calibration.py similarity index 91% rename from Calibration Tools/camera_calibration.py rename to calibration_tools/camera_calibration.py index 6f36244..6875159 100644 --- a/Calibration Tools/camera_calibration.py +++ b/calibration_tools/camera_calibration.py @@ -84,4 +84,10 @@ def onTrack4(val): cam.close() print("ZED Camera closed") -cv2.destroyAllWindows() \ No newline at end of file +cv2.destroyAllWindows() + +# # Get all connected cameras +# cameras = sl.Camera.get_device_list() +# for cam in cameras: +# # Create and open the camera for each serial number +# zed = self.open_camera(cam.serial_number) \ No newline at end of file diff --git a/Calibration Tools/colorspace.py b/calibration_tools/colorspace_calibration.py similarity index 100% rename from Calibration Tools/colorspace.py rename to calibration_tools/colorspace_calibration.py diff --git a/Calibration Tools/NMS_Calibration.py b/calibration_tools/nms_calibration.py similarity index 100% rename from Calibration Tools/NMS_Calibration.py rename to calibration_tools/nms_calibration.py diff --git a/Calibration Tools/camera_roi.py b/calibration_tools/roi_calibration.py similarity index 100% rename from Calibration Tools/camera_roi.py rename to calibration_tools/roi_calibration.py diff --git a/Calibration Tools/test.jpg b/calibration_tools/test.jpg similarity index 100% rename from Calibration Tools/test.jpg rename to calibration_tools/test.jpg diff --git a/Calibration Tools/values.csv b/calibration_tools/values.csv similarity index 100% rename from Calibration Tools/values.csv rename to calibration_tools/values.csv diff --git a/conversion_tools/ONNX_GS.py b/conversion_tools/ONNX_GS.py new file mode 100644 index 0000000..4fd7926 --- /dev/null +++ b/conversion_tools/ONNX_GS.py @@ -0,0 +1,28 @@ +import onnx_graphsurgeon as gs +import onnx +import argparse + +def optimize_onnx(model_path="/home/user/Downloads/model.onnx"): + print("Optimizing ONNX model") + model = onnx.load(model_path) + graph = gs.import_onnx(model) + + print("Graph nodes before optimization:") + for node in graph.nodes: + print(node) + + graph.cleanup().toposort() + graph.fold_constants() + + model_path = model_path.replace(".onnx", "_optimized.onnx") + onnx.save(gs.export_onnx(graph), model_path) + return model_path + +if __name__ == "__main__": + print("Usage: python3 ONNX_GS.py --model_path=/home/user/Downloads/model.onnx") + + parser = argparse.ArgumentParser(description='Optimize the ONNX model using GraphSurgeon') + parser.add_argument('--model_path', type=str, default="/home/user/Downloads/model.onnx", required=False, help='Path to the ONNX model file (.onnx)') + args = parser.parse_args() + + optimize_onnx(args.model_path) \ No newline at end of file diff --git a/conversion_tools/ONNX_TRT.py b/conversion_tools/ONNX_TRT.py index 603dcba..8549134 100644 --- a/conversion_tools/ONNX_TRT.py +++ b/conversion_tools/ONNX_TRT.py @@ -1,26 +1,17 @@ +import os import argparse import onnx import tensorrt as trt -import numpy as np -# import pycuda.driver as cuda -# import pycuda.autoinit -def get_max_memory(): - total, free = cuda.mem_get_info() - max_mem = free * 0.95 - - print(f"Total GPU memory: {total / (1024**2)} MB") - print(f"Free GPU memory: {free / (1024**2)} MB") - print(f"Max memory to use: {max_mem / (1024**2)} MB") - return max_mem - -def convert_onnx_to_trt(model_path, output_path, FP16, INT8, strip_weights, batch_size, verbose): - # # Simplify the ONNX model (optional) - # print("Loading the ONNX model") - # onnx_model = onnx.load(model_path) - # graph = gs.import_onnx(onnx_model) - # graph.toposort() - # onnx_model = gs.export_onnx(graph) +def convert_onnx_to_trt(model_path="/home/user/Downloads/model.onnx", output_path="/home/user/Downloads/model.engine", FP16_mode=True, strip_weights=False, gs_optimize=False, verbose=False, verify=True): + if not os.path.exists(model_path): + raise ValueError(f"Model file not found at {model_path}") + if not output_path.endswith(".engine"): + raise ValueError("Output path should end with .engine") + + if gs_optimize: + from ONNX_GS import optimize_onnx + model_path = optimize_onnx(model_path) if verbose: TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) @@ -30,12 +21,10 @@ def convert_onnx_to_trt(model_path, output_path, FP16, INT8, strip_weights, batc builder = trt.Builder(TRT_LOGGER) config = builder.create_builder_config() - # Set cache - cache = config.create_timing_cache(b"") - config.set_timing_cache(cache, ignore_mismatch=False) + # cache = config.create_timing_cache(b"") + # config.set_timing_cache(cache, ignore_mismatch=False) - # Set max workspace - max_workspace = (1 << 30) # 15 + max_workspace = (1 << 33) # 8GB config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, max_workspace) network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) @@ -49,50 +38,45 @@ def convert_onnx_to_trt(model_path, output_path, FP16, INT8, strip_weights, batc inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] + input_shape = inputs[0].shape for input in inputs: print(f"Model {input.name} shape: {input.shape} {input.dtype}") for output in outputs: print(f"Model {output.name} shape: {output.shape} {output.dtype}") - if FP16: + if FP16_mode: config.set_flag(trt.BuilderFlag.FP16) - elif INT8: - config.set_flag(trt.BuilderFlag.INT8) + # elif INT8: + # config.set_flag(trt.BuilderFlag.INT8) if strip_weights: config.set_flag(trt.BuilderFlag.STRIP_PLAN) - # if batch_size > 1: - # # https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#opt_profiles - # profile = builder.create_optimization_profile() - # min_shape = [1] + shape_input_model[-3:] - # opt_shape = [int(max_batch_size/2)] + shape_input_model[-3:] - # max_shape = shape_input_model - # for input in inputs: - # profile.set_shape(input.name, min_shape, opt_shape, max_shape) - # config.add_optimization_profile(profile) - print("Building TensorRT engine. This may take a few minutes.") engine_bytes = builder.build_serialized_network(network, config) with open(output_path, "wb") as f: f.write(engine_bytes) + if verify: + from TRT_Verify import verify_trt + verify_trt(model_path, output_path, FP16_mode, input_shape) + print("Engine built successfully") print(f"Converted TensorRT engine saved at {output_path}") - if __name__ == "__main__": - print("Usage: python3 ONNX_TRT.py --model_path=/home/user/Downloads/model.onnx --output_path=/home/user/Downloads/model.engine --FP16=False --INT8=False --strip_weights=False --batch_size=1 --verbose=False ") + print("Usage: python3 ONNX_TRT.py --model_path=/home/user/Downloads/model.onnx --output_path=/home/user/Downloads/model.engine --FP16=False --strip_weights=False --gs_optimize=False --verbose=False --verify=True") parser = argparse.ArgumentParser(description='Convert Onnx model to TensorRT') - parser.add_argument('--model_path', type=str, default="/home/user/Downloads/model.onnx", required=False, help='Path to the PyTorch model file (.pt)') - parser.add_argument('--output_path', type=str, default="/home/user/Downloads/model.engine", required=False, help='Path to save the converted TensorRT model file (.trt)') + parser.add_argument('--model_path', type=str, default="/home/user/Downloads/model.onnx", required=False, help='Path to the ONNX model file (.onnx)') + parser.add_argument('--output_path', type=str, default="/home/user/Downloads/model.engine", required=False, help='Path to save the converted TensorRT model file (.engine)') parser.add_argument('--FP16', type=bool, default=False, help="FP16 precision mode") - parser.add_argument('--INT8', type=bool, default=False, help="INT8 precision mode") + # parser.add_argument('--INT8', type=bool, default=False, help="INT8 precision mode") parser.add_argument('--strip_weights', type=bool, default=False, help="Strip unnecessary weights") - parser.add_argument('--batch_size', type=int, default=1, help='Batch size') + parser.add_argument('--gs_optimize', type=bool, default=False, help='Use ONNX GraphSurgeon to optimize model first') parser.add_argument('--verbose', type=bool, default=False, help="Verbose TensorRT logging") + parser.add_argument('--verify', type=bool, default=True, help="Verify the converted engine output") args = parser.parse_args() - convert_onnx_to_trt(args.model_path, args.output_path, args.FP16, args.INT8, args.strip_weights, args.batch_size, args.verbose) \ No newline at end of file + convert_onnx_to_trt(args.model_path, args.output_path, args.FP16, args.strip_weights, args.gs_optimize, args.verbose) \ No newline at end of file diff --git a/conversion_tools/ONNX_Verify.py b/conversion_tools/ONNX_Verify.py new file mode 100644 index 0000000..3ca7703 --- /dev/null +++ b/conversion_tools/ONNX_Verify.py @@ -0,0 +1,68 @@ +import time +import onnx +import onnxruntime as ort +import numpy as np + +# put random input shape into CUDA if using CUDA provider? +def verify_onnx(model_path, compared_outputs, model_dimensions, fp_16): + print("Verifying the converted model") + onnx_output, onnx_inference = predict_onnx(model_path, fp_16, model_dimensions) + + print("ONNX inference time:", onnx_inference, "ms") + + # Calculate MSE (Mean Squared Error) + mse = np.mean((onnx_output - compared_outputs) ** 2) + print("MSE between ONNX and TensorRT outputs:", mse) + + # Calculate MAE (Mean Absolute Error) + mae = np.mean(np.abs(onnx_output - compared_outputs)) + print("MAE between ONNX and TensorRT outputs:", mae) + return + +# any other chanes for fp_16 to work? +def predict_onnx(model_path, fp_16, input_shape): + onnx_session = ort.InferenceSession(model_path,providers=["CUDAExecutionProvider"]) + + if fp_16: + random_input = np.random.randn(input_shape).astype(np.float16) + else: + random_input = np.random.randn(input_shape).astype(np.float32) + + input_name = onnx_session.get_inputs()[0].name + tic = time.perf_counter_ns() + # results_ort = session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: x_test}) + # results_ort = onnx_session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: model_test}) + onnx_output = onnx_session.run(None, {input_name: random_input}) + toc = time.perf_counter_ns() + onnx_output = onnx_output[0] + # onnx_output= np.array(onnx_output) + return onnx_output, (toc - tic) / 1e6 + +# given the predictions from the original model and the converted model, check if they are consistent +# shape of predictions_original and converted_results should be the same +# only checks for the predicted class (aka the argmax) +# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence +def checkPredictionConsistency(predictions_original, converted_results): + for n in range(predictions_original.shape[0]): + if np.argmax(predictions_original[n]) != np.argmax(converted_results[n]): + print(f"Original: {np.argmax(predictions_original[n])}, ONNX: {np.argmax(converted_results[n])}") + print(f"{predictions_original[n]}, \n{converted_results[n]}") + print("=====================================") + raise ValueError("Predictions are not consistent") + + print("All predictions are consistent") + +# given the predictions from the original model and the converted model, check if they are consistent +# shape of predictions_original and converted_results should be the same +# only checks for the difference in confidence +# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence +# tolerance: the maximum difference in confidence that is allowed +def checkConfidenceConsistency(predictions_original, converted_results, tolerance=1e-5): + np.testing.assert_allclose(predictions_original, converted_results,atol=tolerance) + # for n in range(predictions_original.shape[0]): + # if not np.allclose(predictions_original[n], converted_results[n], atol=tolerance): + # print(f"Original: \t {predictions_original[n]}, \nONNX: \t{converted_results[n]}") + # print("=====================================") + # return + + print("All confidence percentages are consistent") \ No newline at end of file diff --git a/conversion_tools/PT_ONNX.py b/conversion_tools/PT_ONNX.py index 20b4ed7..9f95bcd 100644 --- a/conversion_tools/PT_ONNX.py +++ b/conversion_tools/PT_ONNX.py @@ -1,96 +1,76 @@ -import sys +import os +import time +import argparse import torch import torch.onnx import onnx -# import onnxruntime as ort import numpy as np - +from ultralytics import YOLO OPSET_VERS = 13 -# netowrk is the model -# network.eval() -# torch_out_load = network(example_data) -# mct.torch_to_onnx(network, example_data, "torch_model.onnx") -# ort_session = mct.loadOnnxModel("torch_model.onnx") -# ort_predictions = mct.predictOnnx(example_data.numpy(), session=ort_session) -# ort_predictions[0][0] = 3 -# mct.checkPredictionConsistency(torch_out_load.detach().numpy(), ort_predictions) -# mct.checkConfidenceConsistency(torch_out_load.detach().numpy(), ort_predictions) - -# given an array of test inputs and a path to onnx model or a session returns the predictions -def predictOnnx(x_test,session=None,dest_path=""): - if session is None and dest_path == "": - raise ValueError("No model or path provided, please specifiy one of them.") - if session is None: - session = loadOnnxModel(dest_path) - - results_ort = session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: x_test}) - return np.array(results_ort[0]) - -# given the predictions from the original model and the converted model, check if they are consistent -# shape of predictions_original and converted_results should be the same -# only checks for the predicted class (aka the argmax) -# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence -def checkPredictionConsistency(predictions_original, converted_results): - for n in range(predictions_original.shape[0]): - if np.argmax(predictions_original[n]) != np.argmax(converted_results[n]): - print(f"Original: {np.argmax(predictions_original[n])}, ONNX: {np.argmax(converted_results[n])}") - print(f"{predictions_original[n]}, \n{converted_results[n]}") - print("=====================================") - raise ValueError("Predictions are not consistent") - - print("All predictions are consistent") - -# given the predictions from the original model and the converted model, check if they are consistent -# shape of predictions_original and converted_results should be the same -# only checks for the difference in confidence -# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence -# tolerance: the maximum difference in confidence that is allowed -def checkConfidenceConsistency(predictions_original, converted_results, tolerance=1e-5): - np.testing.assert_allclose(predictions_original, converted_results,atol=tolerance) - # for n in range(predictions_original.shape[0]): - # if not np.allclose(predictions_original[n], converted_results[n], atol=tolerance): - # print(f"Original: \t {predictions_original[n]}, \nONNX: \t{converted_results[n]}") - # print("=====================================") - # return - - print("All confidence percentages are consistent") - -def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1, 3, 448, 1024), constant_folding=False): +def convert_pytorch_to_onnx(model_path="/home/user/Downloads/model.pt", output_path="/home/user/Downloads/model.onnx", FP16_mode=False, constant_folding=False, gs_optimize=False, model_dimensions=None, verify=True, verbose=False): + if not os.path.exists(model_path): + raise ValueError(f"Model file not found at {model_path}") + if not output_path.endswith(".onnx"): + raise ValueError("Output path should end with .onnx") + if verify and model_dimensions is None: + raise ValueError("Model dimensions are required for verification") + print("Loading the PyTorch model") - model = torch.load(model_path) - model.eval() - # traced_model = torch.jit.trace(model, torch.randn(input_shape)) + model = YOLO(model_path) + + model.eval().cuda() + input_data = torch.randn(model_dimensions).cuda() - input_data = torch.randn(input_shape).cuda() + if FP16_mode: + model.half() + input_data.half() - print("Exporting model to ONNX format") + tic = time.perf_counter_ns() + torch_out = model(input_data) + toc = time.perf_counter_ns() + + # names might be wrong + print("Exporting model to ONNX format") torch.onnx.export(model, input_data, output_path, - verbose=True, + verbose=verbose, opset_version=OPSET_VERS, export_params=True, do_constant_folding=constant_folding, input_names = ['input'], output_names = ['output'], - dynamic_axes={'input' : {0 : 'batch_size'}, - 'output' : {0 : 'batch_size'}} ) model = onnx.load(output_path) onnx.checker.check_model(model) - print("Model converted successfully") + + if gs_optimize: + from ONNX_GS import optimize_onnx + output_path = optimize_onnx(output_path) + print(model.graph) + print("Model converted successfully") + + if verify: + print("PyTorch inference time:", (toc - tic) / 1e6, "ms") + from ONNX_Verify import verify_onnx + verify_onnx(model_path, np.array(torch_out), model_dimensions) # torch.out.numpy() print(f"Converted ONNX model saved at {output_path}") - # return loadOnnxModel(output_path) - -def loadOnnxModel(path, providers=["CUDAExecutionProvider"]): - return ort.InferenceSession(path,providers=providers) + return if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python convert_to_trt.py ") - sys.exit(1) - - model_path = sys.argv[1] - convert_pytorch_to_onnx(model_path) \ No newline at end of file + print("Usage: python3 PT_ONNX.py --model_path=/home/user/Downloads/model.onnx --output_path=/home/user/Downloads/model.engine --FP16=False --constant_folding=True --gs_optimize=False --model_dimensions=(1, 3, 448, 1024) --verify=True --verbose=False") + + parser = argparse.ArgumentParser(description='Convert PyTorch model to ONNX') + parser.add_argument('--model_path', type=str, default="/home/user/Downloads/model.pt", required=False, help='Path to the PyTorch model file (.pt)') + parser.add_argument('--output_path', type=str, default="/home/user/Downloads/model.onnx", required=False, help='Path to save the converted ONNX model file (.onnx)') + parser.add_argument('--FP16', type=bool, default=False, help="FP16 precision mode") + parser.add_argument('--constant_folding', type=bool, default=False, help="Apply constant folding opreation") + parser.add_argument('--gs_optimize', type=bool, default=False, help='Use ONNX GraphSurgeon to optimize model after conversion') + parser.add_argument('--model_dimensions', type=tuple, default=False, help="Model input dimensions") + parser.add_argument('--verify', type=bool, default=True, help="Verify the converted model") + parser.add_argument('--verbose', type=bool, default=False, help="Verbose mode") + args = parser.parse_args() + + convert_pytorch_to_onnx(args.model_path, args.output_path, args.FP16, args.constant_folding, args.gs_optimize, args.model_dimensions, args.verify, args.verbose) \ No newline at end of file diff --git a/conversion_tools/PT_TRT.py b/conversion_tools/PT_TRT.py index e728c33..eae3858 100644 --- a/conversion_tools/PT_TRT.py +++ b/conversion_tools/PT_TRT.py @@ -1,66 +1,66 @@ +import os +import time import argparse import torch -import torch_tensorrt +from torch2trt import torch2trt import numpy as np -# import pycuda.driver as cuda -# import pycuda.autoinit +from ultralytics import YOLO -def get_max_memory(): - total, free = cuda.mem_get_info() - max_mem = free * 0.95 - - print(f"Total GPU memory: {total / (1024**2)} MB") - print(f"Free GPU memory: {free / (1024**2)} MB") - print(f"Max memory to use: {max_mem / (1024**2)} MB") - return max_mem - -# def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)): -# print("Loading the PyTorch model") -# model = torch.load(model_path, weights_only=True) -# model.eval() - -# input_data = torch.randn(input_shape).cuda() -# print("Building TensorRT engine. This may take a few minutes.") -# model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory() -# # torch.save(model_trt.state_dict(), output_file) - -# with open(output_path, 'wb') as f: -# f.write(model_trt.engine.serialize()) +def convert_pt_to_trt(model_path='/home/user/Downloads/model.pt', output_path='/home/user/Downloads/model.pth', FP16_mode=False, input_shape=(1, 3, 448, 1024), verify=True): + if not os.path.exists(model_path): + raise ValueError(f"Model file not found at {model_path}") + if not output_path.endswith(".pth"): + raise ValueError("Output path should end with .pth") + if input_shape is None: + raise ValueError("Input shape is required for conversion") -# print("Engine built successfully") -# print(f"Converted TensorRT engine saved at {output_path}") -# return model_trt - -def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', batch_size=1, input_shape=(1, 3, 448, 1024)): print("Loading the PyTorch model") - model = torch.load(model_path, weights_only=True) - model = model().eval().cuda() - - inputs = [torch.randn(input_shape).cuda()] - trt_gm = torch_tensorrt.compile(model, inputs, ir="dynamo") - torch_tensorrt.save(trt_gm, "trt.ep", inputs=inputs) - print("Building TensorRT engine. This may take a few minutes.") - model_trt = torch2trt(model, [input_data], max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory() - # torch.save(model_trt.state_dict(), output_file) + model = YOLO(model_path).cuda().eval() + + if FP16_mode: + data = torch.randn(input_shape).cuda().half() + print("Building TensorRT engine. This may take a few minutes.") + model_trt = torch2trt(model, [data], fp16_mode=True) + model = model.half() + else: + data = torch.randn(input_shape).cuda() + print("Building TensorRT engine. This may take a few minutes.") + model_trt = torch2trt(model, [data]) + + print("Engine built successfully") + + if verify: + tic = time.perf_counter_ns() + output_trt = model_trt(data) + toc = time.perf_counter_ns() + print(f"TensorRT Inference time: {(toc - tic)/1e6}ms") + + tic = time.perf_counter_ns() + output = model(data) + toc = time.perf_counter_ns() + print(f"PyTorch Inference time: {(toc - tic)/1e6}ms") + + # Mean Squared Error (MSE) + mse = torch.mean((output - output_trt) ** 2) + print(f"MSE between PyTorch and TensorRT outputs: {mse.item()}") - with open(output_path, 'wb') as f: - f.write(model_trt.engine.serialize()) + # Mean Absolute Error (MAE) + mae = torch.mean(torch.abs(output - output_trt)) + print(f"MAE between PyTorch and TensorRT outputs: {mae.item()}") - print("Engine built successfully") + torch.save(model_trt.state_dict(), output_path) print(f"Converted TensorRT engine saved at {output_path}") - return model_trt + return if __name__ == "__main__": - # print the defaults, set them here rather than in the function - print("Usage: python3 PyTorch_TensorRT.py FP16_mode batch_size input_shape") - print("Example: python3 PyTorch_TensorRT.py ./model.pt ./model_trt.trt True 1 (1, 3, 224, 224)") + print("Usage: python3 PT_TRT.py --model_path=/home/user/Downloads/model.onnx --output_path=/home/user/Downloads/model.engine --FP16_mode=False --input_shape=(1, 3, 448, 1024) --verify=True") parser = argparse.ArgumentParser(description='Convert PyTorch model to TensorRT') parser.add_argument('--modelpath', type=str, required=False, help='Path to the PyTorch model file (.pt)') parser.add_argument('--outputpath', type=str, required=False, help='Path to save the converted TensorRT model file (.trt)') parser.add_argument('--FP16_mode', type=bool, default=True, help='FP16 mode for TensorRT') - parser.add_argument('--batch_size', type=int, default=1, help='Batch size for TensorRT') - parser.add_argument('--input_shape', type=tuple, default=(1, 3, 224, 224), help='Input shape for TensorRT') + parser.add_argument('--input_shape', type=tuple, default=(1, 3, 448, 1024), help='Input shape for TensorRT') + parser.add_argument('--verify', type=bool, default=True, help='Verify the converted model') args = parser.parse_args() - convert_pt_to_trt(args.modelpath, args.outputpath, args.FP16_mode, args.batch_size, args.input_shape) \ No newline at end of file + convert_pt_to_trt(args.modelpath, args.outputpath, args.FP16_mode, args.input_shape, args.verify) \ No newline at end of file diff --git a/conversion_tools/TF_ONNX.py b/conversion_tools/TF_ONNX.py deleted file mode 100644 index 7ef7e5b..0000000 --- a/conversion_tools/TF_ONNX.py +++ /dev/null @@ -1,86 +0,0 @@ -import sys -import tensorflow as tf -import tf2onnx -# from tensorflow.python.tools import optimize_for_inference_lib -import onnx -# import onnxruntime as ort -import numpy as np -import pycuda.driver as cuda - -OPSET_VERS = 13 - -# tf_predictions = tf_model.predict(x_test) -# results_tf_ort = mct.predictOnnx(x_test, session=onnx_sess) -# mct.checkPredictionConsistency(tf_predictions, results_tf_ort) -# mct.checkConfidenceConsistency(tf_predictions, results_tf_ort) -# given an array of test inputs and a path to onnx model or a session returns the predictions -def predictOnnx(x_test,session=None,dest_path=""): - if session is None and dest_path == "": - raise ValueError("No model or path provided, please specifiy one of them.") - if session is None: - session = loadOnnxModel(dest_path) - - results_ort = session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: x_test}) - return np.array(results_ort[0]) - -# given the predictions from the original model and the converted model, check if they are consistent -# shape of predictions_original and converted_results should be the same -# only checks for the predicted class (aka the argmax) -# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence -def checkPredictionConsistency(predictions_original, converted_results): - for n in range(predictions_original.shape[0]): - if np.argmax(predictions_original[n]) != np.argmax(converted_results[n]): - print(f"Original: {np.argmax(predictions_original[n])}, ONNX: {np.argmax(converted_results[n])}") - print(f"{predictions_original[n]}, \n{converted_results[n]}") - print("=====================================") - raise ValueError("Predictions are not consistent") - - print("All predictions are consistent") - -# given the predictions from the original model and the converted model, check if they are consistent -# shape of predictions_original and converted_results should be the same -# only checks for the difference in confidence -# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence -# tolerance: the maximum difference in confidence that is allowed -def checkConfidenceConsistency(predictions_original, converted_results, tolerance=1e-5): - np.testing.assert_allclose(predictions_original, converted_results,atol=tolerance) - # for n in range(predictions_original.shape[0]): - # if not np.allclose(predictions_original[n], converted_results[n], atol=tolerance): - # print(f"Original: \t {predictions_original[n]}, \nONNX: \t{converted_results[n]}") - # print("=====================================") - # return - - print("All confidence percentages are consistent") - -def convert_tf_to_onnx(model_path="./model.pb", output_path="./model.onnx", input_names=["input"], output_names=["output"], constant_folding=False): - print("Loading the TensorFlow model") - with tf.io.gfile.GFile(model_path, "rb") as f: - frozen_graph_def = tf.compat.v1.GraphDef() - frozen_graph_def.ParseFromString(f.read()) - - print("Exporting the model to ONNX format") - onnx_model, _ = tf2onnx.convert.from_graph_def( - frozen_graph_def, # The frozen graph definition - input_names=input_names, - output_names=output_names, - opset=13 - ) - - with open(output_path, "wb") as f: - f.write(onnx_model.SerializeToString()) - - model = onnx.load(output_path) - onnx.checker.check_model(model) - print("Model converted successfully") - print(model.graph) - - print(f"Converted ONNX model saved at {output_path}") - # return loadOnnxModel(onnx_model_str) - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python convert_to_trt.py ") - sys.exit(1) - - model_path = sys.argv[1] - convert_tf_to_onnx(model_path) \ No newline at end of file diff --git a/conversion_tools/TRT_Benchmark.py b/conversion_tools/TRT_Benchmark.py index 90e7206..b1fd44c 100644 --- a/conversion_tools/TRT_Benchmark.py +++ b/conversion_tools/TRT_Benchmark.py @@ -279,4 +279,267 @@ def run_benchmark(trt_model_path, test_images, ground_truth_path): # Update paths for the .trt model, test images, and ground truth file. # Ensure the bounding box coordinates are converted between the format of the ground truth and model output if necessary. -# Expand with more metrics such as precision, recall, or F1 score based on IoU thresholds if relevant. \ No newline at end of file +# Expand with more metrics such as precision, recall, or F1 score based on IoU thresholds if relevant. + + +import tensorrt as trt +import pycuda.driver as cuda +import pycuda.autoinit +import numpy as np + +def allocate_buffers(engine): + """ + Allocates input/output buffers for TensorRT engine inference. + Args: + engine: The TensorRT engine. + Returns: + inputs: List of input GPU buffers. + outputs: List of output GPU buffers. + bindings: List of bindings for the model. + stream: CUDA stream for the inference. + """ + inputs = [] + outputs = [] + bindings = [] + stream = cuda.Stream() + + for binding in engine: + size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size + dtype = trt.nptype(engine.get_binding_dtype(binding)) + + # Allocate host and device buffers + host_mem = cuda.pagelocked_empty(size, dtype) + device_mem = cuda.mem_alloc(host_mem.nbytes) + + # Append the device buffer to device bindings + bindings.append(int(device_mem)) + + # Append to the appropriate list + if engine.binding_is_input(binding): + inputs.append((host_mem, device_mem)) + else: + outputs.append((host_mem, device_mem)) + + return inputs, outputs, bindings, stream + +def infer(engine, inputs, outputs, bindings, stream, input_data): + """ + Performs inference on the input data using the TensorRT engine. + Args: + engine: The TensorRT engine. + inputs: List of input buffers. + outputs: List of output buffers. + bindings: List of bindings for the model. + stream: CUDA stream for the inference. + input_data: The data to be used as input for the model. + Returns: + output: The model's output. + """ + # Transfer input data to the device + np.copyto(inputs[0][0], input_data.ravel()) + cuda.memcpy_htod_async(inputs[0][1], inputs[0][0], stream) + + # Execute the model + context = engine.create_execution_context() + context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) + + # Transfer predictions back from the GPU + cuda.memcpy_dtoh_async(outputs[0][0], outputs[0][1], stream) + + # Wait for the stream to complete the operation + stream.synchronize() + + return outputs[0][0] + +def test_trt_engine(trt_engine_path, input_shape, input_data=None, expected_output=None): + """ + Tests a TensorRT engine file by performing inference and checking outputs. + Args: + trt_engine_path: Path to the TensorRT engine file. + input_shape: Shape of the input data. + input_data: Optional input data. If None, random data will be generated. + expected_output: Optional expected output. If provided, it will be compared to the TensorRT inference result. + Returns: + True if the engine works and inference results match the expected output (if provided), otherwise False. + """ + # Load the TensorRT engine from the file + with open(trt_engine_path, "rb") as f: + runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING)) + engine = runtime.deserialize_cuda_engine(f.read()) + + # Allocate buffers for inference + inputs, outputs, bindings, stream = allocate_buffers(engine) + + # Generate random input data if not provided + if input_data is None: + input_data = np.random.rand(*input_shape).astype(np.float32) + + # Perform inference using the TensorRT engine + output = infer(engine, inputs, outputs, bindings, stream, input_data) + + # Print the inference result + print("Inference output:", output) + + # Compare with expected output if provided + if expected_output is not None: + if np.allclose(output, expected_output, rtol=1e-3, atol=1e-3): + print("The inference result matches the expected output.") + return True + else: + print("The inference result does not match the expected output.") + return False + else: + print("No expected output provided. Unable to verify accuracy.") + return True # Pass as long as inference ran without errors + +# Example usage: +# Test TensorRT engine using random input +trt_engine_path = "model.trt" # Path to your TensorRT engine file +input_shape = (1, 3, 224, 224) # Adjust based on your model's input shape + +test_trt_engine(trt_engine_path, input_shape) + +input_data = np.random.rand(1, 3, 224, 224).astype(np.float32) # Example input, replace with actual data +expected_output = np.random.rand(1, 1000).astype(np.float32) # Example expected output (optional) +test_trt_engine("path_to_your_model.trt", (1, 3, 224, 224), input_data=input_data, expected_output=expected_output) + +### new! + +import time +import numpy as np +import cv2 +import pycuda.driver as cuda +import pycuda.autoinit +import tensorrt as trt +from onnxruntime import InferenceSession + +# Helper functions +def load_ground_truth(file_path): + """Load ground truth bounding boxes from text file.""" + with open(file_path, 'r') as f: + bboxes = [] + for line in f: + tokens = line.strip().split() + cls, x_center, y_center, width, height = map(float, tokens) + bboxes.append((cls, x_center, y_center, width, height)) + return bboxes + +def iou(boxA, boxB): + """Compute Intersection Over Union (IoU) between two bounding boxes.""" + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + + interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) + boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) + boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) + + iou = interArea / float(boxAArea + boxBArea - interArea) + return iou + +def calculate_centroid_offset(pred_box, gt_box): + """Calculate percentage centroid offset between two boxes.""" + pred_center = (pred_box[0] + pred_box[2]) / 2, (pred_box[1] + pred_box[3]) / 2 + gt_center = (gt_box[0] + gt_box[2]) / 2, (gt_box[1] + gt_box[3]) / 2 + offset_x = abs(pred_center[0] - gt_center[0]) / (gt_box[2] - gt_box[0]) + offset_y = abs(pred_center[1] - gt_center[1]) / (gt_box[3] - gt_box[1]) + return (offset_x + offset_y) / 2 * 100 + +# Load TensorRT model +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + +class TRTInference: + def __init__(self, engine_path): + self.engine = self.load_engine(engine_path) + self.context = self.engine.create_execution_context() + + def load_engine(self, engine_path): + with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: + return runtime.deserialize_cuda_engine(f.read()) + + def allocate_buffers(self): + inputs = [] + outputs = [] + bindings = [] + stream = cuda.Stream() + + for binding in self.engine: + size = trt.volume(self.engine.get_binding_shape(binding)) * self.engine.max_batch_size + dtype = trt.nptype(self.engine.get_binding_dtype(binding)) + host_mem = cuda.pagelocked_empty(size, dtype) + device_mem = cuda.mem_alloc(host_mem.nbytes) + bindings.append(int(device_mem)) + + if self.engine.binding_is_input(binding): + inputs.append({'host': host_mem, 'device': device_mem}) + else: + outputs.append({'host': host_mem, 'device': device_mem}) + + return inputs, outputs, bindings, stream + + def infer(self, image, inputs, outputs, bindings, stream): + np.copyto(inputs[0]['host'], image.ravel()) + + # Transfer input data to the GPU. + cuda.memcpy_htod_async(inputs[0]['device'], inputs[0]['host'], stream) + + # Run inference. + self.context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) + + # Transfer predictions back from GPU. + cuda.memcpy_dtoh_async(outputs[0]['host'], outputs[0]['device'], stream) + stream.synchronize() + + return outputs[0]['host'] + +def preprocess_image(image_path, input_shape): + """Preprocess image for inference.""" + image = cv2.imread(image_path) + image_resized = cv2.resize(image, (input_shape[1], input_shape[0])) + image = np.asarray(image_resized).astype(np.float32) + return np.transpose(image, (2, 0, 1)) / 255.0 # CHW format and normalized + +def run_benchmark(trt_model_path, test_images, ground_truth_path): + """Run benchmark on the model.""" + # Load ground truth + ground_truth_bboxes = load_ground_truth(ground_truth_path) + + # Initialize TensorRT inference + trt_infer = TRTInference(trt_model_path) + inputs, outputs, bindings, stream = trt_infer.allocate_buffers() + + inference_times = [] + iou_scores = [] + centroid_offsets = [] + + for idx, img_path in enumerate(test_images): + # Preprocess image + image = preprocess_image(img_path, (300, 300)) # Adjust size as needed + + # Perform inference and measure time + start_time = time.time() + pred_bbox = trt_infer.infer(image, inputs, outputs, bindings, stream) + inference_time = time.time() - start_time + + # Compute IoU, centroid offset + gt_bbox = ground_truth_bboxes[idx] + iou_score = iou(pred_bbox, gt_bbox) + offset = calculate_centroid_offset(pred_bbox, gt_bbox) + + # Store results + inference_times.append(inference_time) + iou_scores.append(iou_score) + centroid_offsets.append(offset) + + # Summary of benchmark + print(f"Average Inference Time: {np.mean(inference_times):.4f} seconds") + print(f"Average IoU: {np.mean(iou_scores) * 100:.2f}%") + print(f"Average Centroid Offset: {np.mean(centroid_offsets):.2f}%") + +if __name__ == "__main__": + trt_model_path = "model.trt" # Replace with your TensorRT model path + test_images = ["test1.jpg", "test2.jpg"] # Replace with your test images + ground_truth_path = "ground_truth.txt" # Replace with your ground truth file path + + run_benchmark(trt_model_path, test_images, ground_truth_path) \ No newline at end of file diff --git a/conversion_tools/TRT_Verify.py b/conversion_tools/TRT_Verify.py new file mode 100644 index 0000000..83bb731 --- /dev/null +++ b/conversion_tools/TRT_Verify.py @@ -0,0 +1,63 @@ +import time +import tensorrt as trt +import pycuda.driver as cuda +import pycuda.autoinit +import numpy as np + +# Load TensorRT engine +def load_engine(engine_file_path): + TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: + return runtime.deserialize_cuda_engine(f.read()) + +# Set up CUDA context and allocate memory +def infer_with_tensorrt(engine, random_input): + context = engine.create_execution_context() + + # Allocate memory for input and output + input_shape = random_input.shape + input_size = trt.volume(input_shape) * random_input.itemsize + + d_input = cuda.mem_alloc(input_size) + h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=np.float32) + d_output = cuda.mem_alloc(h_output.nbytes) + + # Transfer input to GPU + cuda.memcpy_htod(d_input, random_input) + + # Run inference + tic = time.perf_counter_ns() + context.execute(bindings=[int(d_input), int(d_output)]) + toc = time.perf_counter_ns() + + # Transfer prediction back to host + cuda.memcpy_dtoh(h_output, d_output) + + return h_output, (toc - tic) / 1e6 + +def verify_trt(model_path, output_path, fp_16, input_shape): + print("Verifying the converted model") + if fp_16: + random_input = np.random.randn(*input_shape).astype(np.float16) + else: + random_input = np.random.randn(*input_shape).astype(np.float32) + + # Load the TensorRT engine + engine = load_engine(output_path) + + # Run inference + trt_output, trt_inference = infer_with_tensorrt(engine, random_input) + print("TensorRT inference time:", trt_inference, "ms") + + # Get ONNX output + from ONNX_Verify import predict_onnx + onnx_output, onnx_inference = predict_onnx(model_path, fp_16, input_shape) + print("ONNX inference time:", onnx_inference, "ms") + + # Calculate MSE (Mean Squared Error) + mse = np.mean((onnx_output - trt_output) ** 2) + print("MSE between ONNX and TensorRT outputs:", mse) + + # Calculate MAE (Mean Absolute Error) + mae = np.mean(np.abs(onnx_output - trt_output)) + print("MAE between ONNX and TensorRT outputs:", mae) \ No newline at end of file diff --git a/models/maize/Maize.onnx b/models/maize/Maize.onnx new file mode 100644 index 0000000..29fdf3e Binary files /dev/null and b/models/maize/Maize.onnx differ diff --git a/models/maize/Maize.pt b/models/maize/Maize.pt new file mode 100644 index 0000000..ff49ea3 Binary files /dev/null and b/models/maize/Maize.pt differ diff --git a/python_wip/Local_Test.py b/python_wip/Local_Test.py new file mode 100644 index 0000000..6420e0a --- /dev/null +++ b/python_wip/Local_Test.py @@ -0,0 +1,58 @@ +import cv2 +import os + +def draw_bounding_boxes(image_path, bboxes): + # Read the image using OpenCV + img = cv2.imread(image_path) + + if img is None: + print(f"Error loading image: {image_path}") + return + + # Get the dimensions of the image + height, width, _ = img.shape + print(height) + print(width) + + # Draw each bounding box on the image + for bbox in bboxes: + class_id, x_center, y_center, bbox_width, bbox_height = bbox + + # Convert normalized values to absolute pixel values + x_center_pixel = int(x_center * width) + y_center_pixel = int(y_center * height) + bbox_width_pixel = int(bbox_width * width) + bbox_height_pixel = int(bbox_height * height) + + # Calculate the top-left and bottom-right corners of the bounding box + top_left_x = int(x_center_pixel - bbox_width_pixel / 2) + top_left_y = int(y_center_pixel - bbox_height_pixel / 2) + bottom_right_x = int(x_center_pixel + bbox_width_pixel / 2) + bottom_right_y = int(y_center_pixel + bbox_height_pixel / 2) + + # Draw the bounding box (using green color and thickness of 2) + cv2.rectangle(img, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), (0, 255, 0), 2) + + # Show the image with bounding boxes (press any key to close) + cv2.imshow('Bounding Boxes', img) + cv2.waitKey(10000) + cv2.destroyAllWindows() + +def read_bounding_boxes(txt_file): + bboxes = [] + with open(txt_file, 'r') as file: + for line in file.readlines(): + values = line.strip().split() + class_id = int(values[0]) + x_center = float(values[1]) + y_center = float(values[2]) + bbox_width = float(values[3]) + bbox_height = float(values[4]) + bboxes.append((class_id, x_center, y_center, bbox_width, bbox_height)) + return bboxes + +os.chdir("C:/Users/Ishaan/Coding Projects/Applied-AI/ROS/Models/Maize Model/sample_maize_images") +print(os.getcwd()) +boxes = read_bounding_boxes("IMG_2884_18.txt") +print(boxes) +draw_bounding_boxes("IMG_2884_18.JPG", boxes) \ No newline at end of file diff --git a/Python POCs/Object Count Tracker.py b/python_wip/Object Count Tracker.py similarity index 100% rename from Python POCs/Object Count Tracker.py rename to python_wip/Object Count Tracker.py diff --git a/Python POCs/Python Unit Tests.py b/python_wip/Python Unit Tests.py similarity index 100% rename from Python POCs/Python Unit Tests.py rename to python_wip/Python Unit Tests.py diff --git a/Python POCs/TF Benchmarking Example.py b/python_wip/TF Benchmarking Example.py similarity index 100% rename from Python POCs/TF Benchmarking Example.py rename to python_wip/TF Benchmarking Example.py diff --git a/python_wip/TF_ONNX.py b/python_wip/TF_ONNX.py new file mode 100644 index 0000000..8fba178 --- /dev/null +++ b/python_wip/TF_ONNX.py @@ -0,0 +1,45 @@ +import argparse +import tensorflow as tf +import tf2onnx +from tensorflow.python.tools import optimize_for_inference_lib + +def convert_tf_to_onnx(model_path="/home/user/Downloads/model.pt", output_path="/home/user/Downloads/model.onnx", input_names=["input"], output_names=["output"], constant_folding=False): + # use os module to check if path exists... + print("Loading the TensorFlow model") + with tf.io.gfile.GFile(model_path, "rb") as f: + frozen_graph_def = tf.compat.v1.GraphDef() + frozen_graph_def.ParseFromString(f.read()) + + print("Exporting the model to ONNX format") + onnx_model, _ = tf2onnx.convert.from_graph_def( + frozen_graph_def, + input_names=input_names, + output_names=output_names, + opset=13 + ) + + with open(output_path, "wb") as f: + f.write(onnx_model.SerializeToString()) + + model = onnx.load(output_path) + onnx.checker.check_model(model) + print("Model converted successfully") + print(model.graph) + + print(f"Converted ONNX model saved at {output_path}") + # return loadOnnxModel(onnx_model_str) + +if __name__ == "__main__": + print("Usage: python3 ONNX_TRT.py --model_path=/home/user/Downloads/model.onnx --output_path=/home/user/Downloads/model.engine --FP16=False --INT8=False --strip_weights=False --gs_optimize=False --verbose=False ") + + parser = argparse.ArgumentParser(description='Convert Onnx model to TensorRT') + parser.add_argument('--model_path', type=str, default="/home/user/Downloads/model.onnx", required=False, help='Path to the ONNX model file (.onnx)') + parser.add_argument('--output_path', type=str, default="/home/user/Downloads/model.engine", required=False, help='Path to save the converted TensorRT model file (.engine)') + parser.add_argument('--FP16', type=bool, default=False, help="FP16 precision mode") + parser.add_argument('--INT8', type=bool, default=False, help="INT8 precision mode") + parser.add_argument('--strip_weights', type=bool, default=False, help="Strip unnecessary weights") + parser.add_argument('--gs_optimize', type=bool, default=False, help='Use ONNX GraphSurgeon to optimize model first') + parser.add_argument('--verbose', type=bool, default=False, help="Verbose TensorRT logging") + args = parser.parse_args() + + convert_onnx_to_trt(args.model_path, args.output_path, args.FP16, args.INT8, args.strip_weights, args.gs_optimize, args.verbose) \ No newline at end of file diff --git a/Python POCs/TF_TRT Inference.py b/python_wip/TF_TRT Inference.py similarity index 100% rename from Python POCs/TF_TRT Inference.py rename to python_wip/TF_TRT Inference.py diff --git a/conversion_tools/TF_TRT.py b/python_wip/TF_TRT.py similarity index 100% rename from conversion_tools/TF_TRT.py rename to python_wip/TF_TRT.py diff --git a/Python POCs/TRT Quantization Examples.py b/python_wip/TRT Quantization Examples.py similarity index 100% rename from Python POCs/TRT Quantization Examples.py rename to python_wip/TRT Quantization Examples.py diff --git a/Python POCs/composable_arch.py b/python_wip/composable_arch.py similarity index 100% rename from Python POCs/composable_arch.py rename to python_wip/composable_arch.py diff --git a/Python POCs/composable_arch_new.py b/python_wip/composable_arch_new.py similarity index 100% rename from Python POCs/composable_arch_new.py rename to python_wip/composable_arch_new.py diff --git a/python_wip/composable_lifecycle_launch.py b/python_wip/composable_lifecycle_launch.py new file mode 100644 index 0000000..2e0393e --- /dev/null +++ b/python_wip/composable_lifecycle_launch.py @@ -0,0 +1,66 @@ +from launch import LaunchDescription +from launch_ros.actions import LifecycleNode +from launch.actions import EmitEvent +from launch.events import matches_action +from launch_ros.events.lifecycle import ChangeState + +from lifecycle_msgs.msg import Transition + +def generate_launch_description(): + return LaunchDescription([ + LifecycleNode( + package='your_package', + executable='your_lifecycle_node', + name='lifecycle_node_1', + namespace='', + output='screen' + ), + LifecycleNode( + package='another_package', + executable='another_lifecycle_node', + name='lifecycle_node_2', + namespace='', + output='screen' + ), + ]) + +## automatic lifecycle transition launch: +from launch import LaunchDescription +from launch_ros.actions import LifecycleNode +from launch.actions import EmitEvent +from launch.events import matches_action +from launch_ros.events.lifecycle import ChangeState +from lifecycle_msgs.msg import Transition + +def generate_launch_description(): + lifecycle_node_1 = LifecycleNode( + package='your_package', + executable='your_lifecycle_node', + name='lifecycle_node_1', + namespace='', + output='screen' + ) + + return LaunchDescription([ + # Launch the lifecycle node + lifecycle_node_1, + + # Emit an event to automatically configure the node + EmitEvent( + event=ChangeState( + lifecycle_node_matcher=matches_action(lifecycle_node_1), + transition_id=Transition.TRANSITION_CONFIGURE + ) + ), + + # Emit an event to automatically activate the node after configuring + EmitEvent( + event=ChangeState( + lifecycle_node_matcher=matches_action(lifecycle_node_1), + transition_id=Transition.TRANSITION_ACTIVATE + ) + ), + ]) + +# ComposableNodeContainer: This is the container where composable nodes are dynamically loaded. It's similar to a "node manager" that manages composable nodes. +# ComposableNode: This describes your node (in this case, the CameraNode) which will be loaded into the container. \ No newline at end of file diff --git a/python_wip/composable_node_launch.py b/python_wip/composable_node_launch.py new file mode 100644 index 0000000..8b239e0 --- /dev/null +++ b/python_wip/composable_node_launch.py @@ -0,0 +1,29 @@ +from launch import LaunchDescription +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + +def generate_launch_description(): + return LaunchDescription([ + ComposableNodeContainer( + name='camera_container', + namespace='', + package='rclcpp_components', + executable='component_container', + composable_node_descriptions=[ + ComposableNode( + package='your_package_name', + plugin='your_package_name.CameraNode', + name='camera_node', + parameters=[{ + 'source_type': 'zed', + 'static_image_path': '.../assets/', + 'video_path': '.../assets/video.mp4', + 'loop': 0, + 'frame_rate': 30, + 'model_type': 'maize', + }], + ), + ], + output='screen', + ), + ]) diff --git a/Python POCs/idk1.py b/python_wip/idk1.py similarity index 100% rename from Python POCs/idk1.py rename to python_wip/idk1.py diff --git a/Python POCs/idk2.py b/python_wip/idk2.py similarity index 100% rename from Python POCs/idk2.py rename to python_wip/idk2.py diff --git a/Python POCs/lifecycle_arch.py b/python_wip/lifecycle_arch.py similarity index 100% rename from Python POCs/lifecycle_arch.py rename to python_wip/lifecycle_arch.py diff --git a/Python POCs/lifecycle_arch_new.py b/python_wip/lifecycle_arch_new.py similarity index 100% rename from Python POCs/lifecycle_arch_new.py rename to python_wip/lifecycle_arch_new.py diff --git a/python_wip/lifecycle_idkold.py b/python_wip/lifecycle_idkold.py new file mode 100644 index 0000000..044c87c --- /dev/null +++ b/python_wip/lifecycle_idkold.py @@ -0,0 +1,129 @@ +import rclpy +from rclpy.lifecycle import Node as LifecycleNode +from rclpy.lifecycle import State +from rclpy.lifecycle import TransitionCallbackReturn +import pycuda.driver as cuda +import pycuda.autoinit +import tensorrt as trt +import numpy as np +import time + +# TensorRT logger +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + +class TRTLifecycleNode(LifecycleNode): + def __init__(self): + super().__init__('trt_lifecycle_node') + self.engine = None + self.context = None + self.inputs = None + self.outputs = None + self.bindings = None + self.stream = None + + def on_configure(self, state: State) -> TransitionCallbackReturn: + """Load and deserialize TensorRT engine and allocate buffers.""" + self.get_logger().info("Configuring... Loading TensorRT engine.") + + try: + # Load TensorRT engine + engine_path = '/path/to/your/model.trt' + self.engine = self.load_trt_engine(engine_path) + self.context = self.engine.create_execution_context() + + # Allocate buffers + self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers() + + self.get_logger().info("TensorRT engine loaded and buffers allocated.") + return TransitionCallbackReturn.SUCCESS + + except Exception as e: + self.get_logger().error(f"Failed to load TensorRT engine: {e}") + return TransitionCallbackReturn.FAILURE + + def on_activate(self, state: State) -> TransitionCallbackReturn: + """Start inference process or any task in this state.""" + self.get_logger().info("Activating... ready for inference.") + return TransitionCallbackReturn.SUCCESS + + def on_deactivate(self, state: State) -> TransitionCallbackReturn: + """Handle node deactivation (optional).""" + self.get_logger().info("Deactivating...") + return TransitionCallbackReturn.SUCCESS + + def on_cleanup(self, state: State) -> TransitionCallbackReturn: + """Clean up and release resources.""" + self.get_logger().info("Cleaning up... releasing resources.") + self.context = None + self.engine = None + self.inputs = None + self.outputs = None + self.bindings = None + self.stream = None + return TransitionCallbackReturn.SUCCESS + + def on_shutdown(self, state: State) -> TransitionCallbackReturn: + """Shutdown node gracefully.""" + self.get_logger().info("Shutting down...") + return TransitionCallbackReturn.SUCCESS + + def load_trt_engine(self, engine_path): + """Load and deserialize the TensorRT engine.""" + with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: + engine_data = f.read() + return runtime.deserialize_cuda_engine(engine_data) + + def allocate_buffers(self): + """Allocate input/output buffers for TensorRT engine.""" + inputs = [] + outputs = [] + bindings = [] + stream = cuda.Stream() + + for binding in self.engine: + binding_shape = self.engine.get_binding_shape(binding) + size = trt.volume(binding_shape) * self.engine.max_batch_size + dtype = trt.nptype(self.engine.get_binding_dtype(binding)) + + # Allocate host and device memory for inputs/outputs + host_mem = cuda.pagelocked_empty(size, dtype) + device_mem = cuda.mem_alloc(host_mem.nbytes) + + bindings.append(int(device_mem)) + if self.engine.binding_is_input(binding): + inputs.append({'host': host_mem, 'device': device_mem}) + else: + outputs.append({'host': host_mem, 'device': device_mem}) + + return inputs, outputs, bindings, stream + + def run_inference(self, input_data): + """Run inference using the loaded TensorRT engine.""" + np.copyto(self.inputs[0]['host'], input_data.ravel()) + + # Transfer input to device + cuda.memcpy_htod_async(self.inputs[0]['device'], self.inputs[0]['host'], self.stream) + + # Run inference + self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle) + + # Transfer predictions back from device + cuda.memcpy_dtoh_async(self.outputs[0]['host'], self.outputs[0]['device'], self.stream) + + # Synchronize stream + self.stream.synchronize() + + return self.outputs[0]['host'] + + +def main(args=None): + rclpy.init(args=args) + node = TRTLifecycleNode() + + # Spin the node until shutdown + rclpy.spin(node) + node.destroy_node() + rclpy.shutdown() + +if __name__ == '__main__': + main() diff --git a/Python POCs/overlap.py b/python_wip/overlap.py similarity index 100% rename from Python POCs/overlap.py rename to python_wip/overlap.py diff --git a/Calibration Tools/velocity.py b/python_wip/velocity.py similarity index 100% rename from Calibration Tools/velocity.py rename to python_wip/velocity.py diff --git a/Calibration Tools/velocityexample.py b/python_wip/velocityexample.py similarity index 100% rename from Calibration Tools/velocityexample.py rename to python_wip/velocityexample.py diff --git a/ros2_ws/src/python_workspace/python_workspace/jetson_node.py b/ros2_ws/src/python_workspace/python_workspace/jetson_node.py deleted file mode 100644 index 0af6b5d..0000000 --- a/ros2_ws/src/python_workspace/python_workspace/jetson_node.py +++ /dev/null @@ -1,164 +0,0 @@ -import time, sys, os -import cv2 -import numpy as np - -import tensorrt as trt -import pycuda.driver as cuda -# import pycuda.autoinit -cuda.init() -device = cuda.Device(0) -cuda_driver_context = device.make_context() - -import rclpy -from rclpy.time import Time -from rclpy.node import Node -from rclpy.executors import MultiThreadedExecutor -from sensor_msgs.msg import Image -from std_msgs.msg import Header, String -from cv_bridge import CvBridge, CvBridgeError - -class JetsonNode(Node): - def __init__(self): - super().__init__('jetson_node') - self.declare_parameter('engine_path', '/home/user/Downloads/model.engine') - self.engine_path = self.get_parameter('engine_path').get_parameter_value().string_value - - self.camera_subscriber = self.create_subscription(Image, 'image_data', self.image_callback, 10) - self.bbox_publisher = self.create_publisher(String, 'bounding_boxes', 10) - self.bridge = CvBridge() - - self.engine = self.load_engine() - self.allocate_buffers() - self.exec_context = (self.engine).create_execution_context() - self.arrival_time = 0 - - def load_engine(self): - if not os.path.exists(self.engine_path): - self.get_logger().error(f"Engine file not found at {self.engine_path}") - return None - - TRT_LOGGER = trt.Logger(trt.Logger.WARNING) - with open(self.engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: - engine = runtime.deserialize_cuda_engine(f.read()) - self.get_logger().info(f"Successfully loaded engine from {self.engine_path}") - return engine - - # if strip_weights: - # engine = load_stripped_engine_and_refit(engine_path, path_onnx_model) - # else: - # engine = load_normal_engine(engine_path) - # def load_stripped_engine_and_refit(engine_path, onnx_model_path): - # runtime = trt.Runtime(TRT_LOGGER) - # with open(engine_path, "rb") as engine_file: - # engine = runtime.deserialize_cuda_engine(engine_file.read()) - # refitter = trt.Refitter(engine, TRT_LOGGER) - # parser_refitter = trt.OnnxParserRefitter(refitter, TRT_LOGGER) - # assert parser_refitter.refit_from_file(onnx_model_path) - # assert refitter.refit_cuda_engine() - # return engine - # def load_normal_engine(engine_path): - # runtime = trt.Runtime(TRT_LOGGER) - # with open(engine_path, "rb") as plan: - # engine = runtime.deserialize_cuda_engine(plan.read()) - # return engine - - def allocate_buffers(self): - engine = self.engine - # Allocate host (pinned) and device memory for input/output - # self.input_binding_idx = engine.get_binding_index("input_0") - # self.output_binding_idx = engine.get_binding_index("output_0") - - self.input_shape = engine.get_binding_shape(0) # self.input_binding_idx - self.output_shape = engine.get_binding_shape(1) # self.output_binding_idx - - # Allocate device memory for input/output - self.d_input = cuda.mem_alloc(trt.volume(self.input_shape) * np.dtype(np.float32).itemsize) - self.d_output = cuda.mem_alloc(trt.volume(self.output_shape) * np.dtype(np.float32).itemsize) - - # Allocate host pinned memory for input/output - self.h_input = cuda.pagelocked_empty(trt.volume(self.input_shape), dtype=np.float32) - self.h_output = cuda.pagelocked_empty(trt.volume(self.output_shape), dtype=np.float32) - - # Create a CUDA stream for async execution - self.stream = cuda.Stream() - - def image_callback(self, msg): - self.arrival_time = Time.from_msg(msg.header.stamp) - latency = self.get_clock().now() - Time.from_msg(msg.header.stamp) - self.get_logger().info(f"Latency: {latency.nanoseconds / 1e6} milliseconds") - - try: - image = self.bridge.imgmsg_to_cv2(msg, desired_encoding="rgb8") - except CvBridgeError as e: - print(e) - - self.preprocess(image) - - def preprocess(self, image): - tic = time.perf_counter_ns() - # Preprocess the image (e.g. normalize) - input_data = image.astype(np.float32) - input_data = np.transpose(input_data, (2, 0, 1)) # HWC to CHW - input_data = np.expand_dims(input_data, axis=0) # add batch dimension - # Copy input data to pinned memory (host side) - np.copyto(self.h_input, input_data.ravel()) - toc = time.perf_counter_ns() - self.get_logger().info(f"Preprocessing: {(toc-tic)/1e6} ") - self.run_inference() - - def run_inference(self): - tic = time.perf_counter_ns() - cuda_driver_context.push() - # Transfer data from host to device asynchronously - cuda.memcpy_htod_async(self.d_input, self.h_input, self.stream) - # Execute inference asynchronously - self.exec_context.execute_async_v2(bindings=[int(self.d_input), int(self.d_output)], stream_handle=self.stream.handle) - # Transfer output data from device to host asynchronously - cuda.memcpy_dtoh_async(self.h_output, self.d_output, self.stream) - # Synchronize the stream to ensure the transfers are completed - self.stream.synchronize() - # Return the output from host memory - output = np.copy(np.array(self.h_output)) - cuda_driver_context.pop() - toc = time.perf_counter_ns() - self.get_logger().info(f"Execution time: {(toc-tic)/1e6}") - self.postprocess(output) - - def postprocess(self, output): - output = output.reshape(-1, 7) # Reshape the output tensor to a 2D array - # self.get_logger().info(f'Detected bounding boxes: {output}') - - msg_list = [] - for detection in output: - x_min, y_min, width, height = detection[3:7] # Assume these are the bbox coordinates or [:5] - # confidence = detection[2] - # msg = ",".join([str(x_min), str(y_min), str(width), str(height)]) - # msg_list.append(msg) - bbox_msg = String() - # bbox_msg.data = ";".join(msg_list) - bbox_msg.data = "[x, y, w, h]" - self.bbox_publisher.publish(bbox_msg) - latency = (self.get_clock().now() - self.arrival_time) - self.get_logger().info(f"End to end: {latency.nanoseconds / 1e6} milliseconds") - -def main(args=None): - rclpy.init(args=args) - jetson_node = JetsonNode() - # try: - # rclpy.spin(jetson_node) - # except KeyboardInterrupt: - # print("qq...") - # jetson_node.display_metrics() - # rclpy.logging.get_logger("Quitting").info('Done') - # except SystemExit: - # print("qqq...") - # jetson_node.display_metrics() - # rclpy.logging.get_logger("Quitting").info('Done') - executor = MultiThreadedExecutor() - executor.add_node(jetson_node) - executor.spin() - jetson_node.destroy_node() - rclpy.shutdown() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/C++ Workspace/.gitignore b/workspace_cpp/.gitignore similarity index 100% rename from C++ Workspace/.gitignore rename to workspace_cpp/.gitignore diff --git a/C++ Workspace/ros2_ws/README.MD b/workspace_cpp/ros2_ws/README.MD similarity index 100% rename from C++ Workspace/ros2_ws/README.MD rename to workspace_cpp/ros2_ws/README.MD diff --git a/C++ Workspace/ros2_ws/src/custom_interface/CMakeLists.txt b/workspace_cpp/ros2_ws/src/custom_interface/CMakeLists.txt similarity index 100% rename from C++ Workspace/ros2_ws/src/custom_interface/CMakeLists.txt rename to workspace_cpp/ros2_ws/src/custom_interface/CMakeLists.txt diff --git a/C++ Workspace/ros2_ws/src/custom_interface/LICENSE b/workspace_cpp/ros2_ws/src/custom_interface/LICENSE similarity index 100% rename from C++ Workspace/ros2_ws/src/custom_interface/LICENSE rename to workspace_cpp/ros2_ws/src/custom_interface/LICENSE diff --git a/C++ Workspace/ros2_ws/src/custom_interface/msg/BoundingBox.msg b/workspace_cpp/ros2_ws/src/custom_interface/msg/BoundingBox.msg similarity index 100% rename from C++ Workspace/ros2_ws/src/custom_interface/msg/BoundingBox.msg rename to workspace_cpp/ros2_ws/src/custom_interface/msg/BoundingBox.msg diff --git a/C++ Workspace/ros2_ws/src/custom_interface/package.xml b/workspace_cpp/ros2_ws/src/custom_interface/package.xml similarity index 100% rename from C++ Workspace/ros2_ws/src/custom_interface/package.xml rename to workspace_cpp/ros2_ws/src/custom_interface/package.xml diff --git a/C++ Workspace/ros2_ws/src/node_test/CMakeLists.txt b/workspace_cpp/ros2_ws/src/node_test/CMakeLists.txt similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/CMakeLists.txt rename to workspace_cpp/ros2_ws/src/node_test/CMakeLists.txt diff --git a/C++ Workspace/ros2_ws/src/node_test/README.md b/workspace_cpp/ros2_ws/src/node_test/README.md similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/README.md rename to workspace_cpp/ros2_ws/src/node_test/README.md diff --git a/C++ Workspace/ros2_ws/src/node_test/package.xml b/workspace_cpp/ros2_ws/src/node_test/package.xml similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/package.xml rename to workspace_cpp/ros2_ws/src/node_test/package.xml diff --git a/C++ Workspace/ros2_ws/src/node_test/src/camera_node.cpp b/workspace_cpp/ros2_ws/src/node_test/src/camera_node.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/camera_node.cpp rename to workspace_cpp/ros2_ws/src/node_test/src/camera_node.cpp diff --git a/C++ Workspace/ros2_ws/src/node_test/src/camera_subscriber.cpp b/workspace_cpp/ros2_ws/src/node_test/src/camera_subscriber.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/camera_subscriber.cpp rename to workspace_cpp/ros2_ws/src/node_test/src/camera_subscriber.cpp diff --git a/C++ Workspace/ros2_ws/src/node_test/src/classes.txt b/workspace_cpp/ros2_ws/src/node_test/src/classes.txt similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/classes.txt rename to workspace_cpp/ros2_ws/src/node_test/src/classes.txt diff --git a/C++ Workspace/ros2_ws/src/node_test/src/engine.cpp b/workspace_cpp/ros2_ws/src/node_test/src/engine.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/engine.cpp rename to workspace_cpp/ros2_ws/src/node_test/src/engine.cpp diff --git a/C++ Workspace/ros2_ws/src/node_test/src/engine.h b/workspace_cpp/ros2_ws/src/node_test/src/engine.h similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/engine.h rename to workspace_cpp/ros2_ws/src/node_test/src/engine.h diff --git a/C++ Workspace/ros2_ws/src/node_test/src/jetson_node.cpp b/workspace_cpp/ros2_ws/src/node_test/src/jetson_node.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/jetson_node.cpp rename to workspace_cpp/ros2_ws/src/node_test/src/jetson_node.cpp diff --git a/C++ Workspace/ros2_ws/src/node_test/src/new_jetson_node.cpp b/workspace_cpp/ros2_ws/src/node_test/src/new_jetson_node.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/new_jetson_node.cpp rename to workspace_cpp/ros2_ws/src/node_test/src/new_jetson_node.cpp diff --git a/C++ Workspace/ros2_ws/src/node_test/src/preprocessing_node.cpp b/workspace_cpp/ros2_ws/src/node_test/src/preprocessing_node.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/node_test/src/preprocessing_node.cpp rename to workspace_cpp/ros2_ws/src/node_test/src/preprocessing_node.cpp diff --git a/C++ Workspace/ros2_ws/src/onnx_cpp/CMakeLists.txt b/workspace_cpp/ros2_ws/src/onnx_cpp/CMakeLists.txt similarity index 100% rename from C++ Workspace/ros2_ws/src/onnx_cpp/CMakeLists.txt rename to workspace_cpp/ros2_ws/src/onnx_cpp/CMakeLists.txt diff --git a/C++ Workspace/ros2_ws/src/onnx_cpp/package.xml b/workspace_cpp/ros2_ws/src/onnx_cpp/package.xml similarity index 100% rename from C++ Workspace/ros2_ws/src/onnx_cpp/package.xml rename to workspace_cpp/ros2_ws/src/onnx_cpp/package.xml diff --git a/C++ Workspace/ros2_ws/src/onnx_cpp/src/listener.cpp b/workspace_cpp/ros2_ws/src/onnx_cpp/src/listener.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/onnx_cpp/src/listener.cpp rename to workspace_cpp/ros2_ws/src/onnx_cpp/src/listener.cpp diff --git a/C++ Workspace/ros2_ws/src/onnx_cpp/src/talker.cpp b/workspace_cpp/ros2_ws/src/onnx_cpp/src/talker.cpp similarity index 100% rename from C++ Workspace/ros2_ws/src/onnx_cpp/src/talker.cpp rename to workspace_cpp/ros2_ws/src/onnx_cpp/src/talker.cpp diff --git a/ros2_ws/.gitignore b/workspace_python/ros2_ws/.gitignore similarity index 100% rename from ros2_ws/.gitignore rename to workspace_python/ros2_ws/.gitignore diff --git a/ros2_ws/src/python_workspace/launch/launch.py b/workspace_python/ros2_ws/src/python_workspace/launch/launch.py similarity index 100% rename from ros2_ws/src/python_workspace/launch/launch.py rename to workspace_python/ros2_ws/src/python_workspace/launch/launch.py diff --git a/ros2_ws/src/python_workspace/package.xml b/workspace_python/ros2_ws/src/python_workspace/package.xml similarity index 100% rename from ros2_ws/src/python_workspace/package.xml rename to workspace_python/ros2_ws/src/python_workspace/package.xml diff --git a/ros2_ws/src/python_workspace/python_workspace/__init__.py b/workspace_python/ros2_ws/src/python_workspace/python_workspace/__init__.py similarity index 100% rename from ros2_ws/src/python_workspace/python_workspace/__init__.py rename to workspace_python/ros2_ws/src/python_workspace/python_workspace/__init__.py diff --git a/ros2_ws/src/python_workspace/python_workspace/camera_node.py b/workspace_python/ros2_ws/src/python_workspace/python_workspace/camera_node.py similarity index 64% rename from ros2_ws/src/python_workspace/python_workspace/camera_node.py rename to workspace_python/ros2_ws/src/python_workspace/python_workspace/camera_node.py index e9e1312..d02be31 100644 --- a/ros2_ws/src/python_workspace/python_workspace/camera_node.py +++ b/workspace_python/ros2_ws/src/python_workspace/python_workspace/camera_node.py @@ -8,37 +8,28 @@ from rclpy.node import Node from rclpy.executors import MultiThreadedExecutor from sensor_msgs.msg import Image -from std_msgs.msg import Header, String -from cv_bridge import CvBridge, CvBridgeError +from std_msgs.msg import Header +from cv_bridge import CvBridge class CameraNode(Node): def __init__(self): super().__init__('camera_node') - - # os.chdir(Path(__file__).parent) - - # Declare parameters for the image source type, file paths, looping, and frame rate + self.declare_parameter('source_type', 'zed') # static_image, video, zed self.declare_parameter('static_image_path', '.../assets/') self.declare_parameter('video_path', '.../assets/video.mp4') self.declare_parameter('loop', 0) # 0 = don't loop, >0 = # of loops, -1 = loop forever self.declare_parameter('frame_rate', 30) # Desired frame rate for publishing - self.declare_parameter('model_type', 'maize') + self.declare_parameter('model_dimensions', (448, 1024)) + # self.declare_parameter('camera_serial_number', 1101010101) - # Retrieve the parameters self.source_type = self.get_parameter('source_type').get_parameter_value().string_value self.static_image_path = self.get_parameter('static_image_path').get_parameter_value().string_value self.video_path = self.get_parameter('video_path').get_parameter_value().string_value self.loop = self.get_parameter('loop').get_parameter_value().integer_value self.frame_rate = self.get_parameter('frame_rate').get_parameter_value().integer_value - - # width is 448? - if self.get_parameter('model_type').get_parameter_value().string_value == 'maize': - self.dimensions = (640, 640) # 448, 1014 - elif self.get_parameter('model_type').get_parameter_value().string_value == 'weed': - self.dimensions = (1024, 448) - else: - self.dimensions = (1024, 448) + self.dimensions = tuple(self.get_parameter('model_dimensions').get_parameter_value().integer_array_value) + # self.serial_number = self.get_parameter('camera_serial_number').get_parameter_value().integer_value self.publisher = self.create_publisher(Image, 'image_data', 10) self.bridge = CvBridge() @@ -54,7 +45,6 @@ def __init__(self): self.get_logger().error(f"Invalid source_type: {self.source_type}") def publish_static_image(self): - # Load and publish the static image if not os.path.exists(self.static_image_path): self.get_logger().error(f"Static image not found at {self.static_image_path}") return @@ -62,17 +52,18 @@ def publish_static_image(self): loops = 0 while rclpy.ok() and (self.loop == -1 or loops < self.loop): for filename in os.listdir(self.static_image_path): - if filename.endswith('.jpg') or filename.endswith('.png'): + if filename.endswith('.JPG') or filename.endswith('.png'): + # print("found") !! log properly for vid too image = cv2.imread(os.path.join(self.static_image_path, filename), cv2.IMREAD_COLOR) if image is not None: self.index += 1 + image = cv2.resize(image, (self.dimensions)) self.publish_image(image) - + time.sleep(1.0 / self.frame_rate) # delay to control framerate if self.loop > 0: loops += 1 - def publish_video_frames(self): # replace with decord later - # Capture and publish video frames using CUDA + def publish_video_frames(self): if not os.path.exists(self.video_path): self.get_logger().error(f"Video file not found at {self.video_path}") return @@ -88,7 +79,9 @@ def publish_video_frames(self): # replace with decord later ret, frame = cap.read() if not ret: break + self.index += 1 self.publish_image(frame) + time.sleep(1.0 / self.frame_rate) # delay to control framerate if self.loop > 0: loops += 1 @@ -99,14 +92,12 @@ def publish_video_frames(self): # replace with decord later cap.release() def publish_zed_frames(self): - # Create a ZED camera object zed = sl.Camera() - # Set configuration parameters init = sl.InitParameters() - init.camera_resolution = sl.RESOLUTION.HD1080 # HD720 - init.camera_fps = 30 + init.camera_resolution = sl.RESOLUTION.HD1080 + init.camera_fps = 30 # do we need publisher delay if this param is here? + # init.set_from_serial_number(self.serial_number) # or give side and manually convert - # Open the ZED camera if not zed.is_opened(): print("Opening ZED Camera ") status = zed.open(init) @@ -114,7 +105,6 @@ def publish_zed_frames(self): self.get_logger().error(f"Failed to open ZED camera: {str(status)}") return - # Set runtime parameters after opening the camera runtime = sl.RuntimeParameters() image_zed = sl.Mat() @@ -126,8 +116,8 @@ def publish_zed_frames(self): err = zed.grab(runtime) if err == sl.ERROR_CODE.SUCCESS: self.index += 1 - tic = time.perf_counter() - zed.retrieve_image(image_zed, sl.VIEW.LEFT_UNRECTIFIED) + tic = time.perf_counter_ns() + zed.retrieve_image(image_zed, sl.VIEW.LEFT_UNRECTIFIED) # modify based on left/right zed.get_sensors_data(sensors_data, sl.TIME_REFERENCE.IMAGE) accel_data = sensors_data.get_imu_data().get_linear_acceleration() @@ -139,31 +129,20 @@ def publish_zed_frames(self): previous_velocity = velocity previous_time = current_time + # Take and transform image using CUDA operations cv_image = image_zed.get_data() - # Upload the ZED image to CUDA gpu_image = cv2.cuda_GpuMat() - gpu_image.upload(cv_image) - - # Transform to BGR8 format and resize using CUDA + gpu_image.upload(cv_image) gpu_image = cv2.cuda.cvtColor(gpu_image, cv2.COLOR_RGBA2RGB) - # crop to ROI and resize + # crop goes here... gpu_image = cv2.cuda.resize(gpu_image, self.dimensions) - # Convert the image to float32 - # gpu_image = gpu_image.transpose((2, 0, 1)).astype(np.float32) - # gpu_image = np.expand_dims(gpu_image, axis=0) - - # # Transpose the image - # image_gpu = cv2.cuda.transpose(image_gpu) - # # Add a new dimension to the image - # image_gpu = cv2.cuda_GpuMat((1,) + image_gpu.size(), image_gpu.type()) - # cv2.cuda.copyMakeBorder(image_gpu, 0, 1, 0, 0, cv2.BORDER_CONSTANT, image_gpu) - # Download the processed image from GPU to CPU memory rgb_image = gpu_image.download() toc = time.perf_counter_ns() - # self.preprocessing_time = (toc - tic)/1e6 + self.get_logger().info(f"Preprocessing: {(toc - tic)/1e6} ms") self.publish_image(rgb_image) + time.sleep(1.0 / self.frame_rate) # delay to control framerate else: self.get_logger().error("Failed to grab ZED camera frame: {str(err)}") @@ -174,40 +153,18 @@ def publish_image(self, image): header = Header() header.stamp = self.get_clock().now().to_msg() header.frame_id = str(self.index) - try: - image_msg = self.bridge.cv2_to_imgmsg(image, encoding="rgb8") - except CvBridgeError as e: - print(e) - + # try packing velcoity information into header + image_msg = self.bridge.cv2_to_imgmsg(image, encoding="rgb8") + image_msg.header = header image_msg.is_bigendian = 0 image_msg.step = image_msg.width * 3 self.publisher.publish(image_msg) - size = sys.getsizeof(image_msg) - self.get_logger().info(f'Published image frame: {self.index} with message size {size} bytes') - time.sleep(1.0 / self.frame_rate) # delay to control framerate - - def display_metrics(self): - toc = time.perf_counter() - bandwidth = self.total_data / (toc - self.tic) - self.get_logger().info(f'Published {len(self.frames)} images in {toc - self.tic:0.4f} seconds with average network bandwidth of {round(bandwidth)} bytes per second') - self.get_logger().info('Shutting down display node...') - raise SystemExit def main(args=None): rclpy.init(args=args) camera_node = CameraNode() - # try: - # rclpy.spin(camera_node) - # except KeyboardInterrupt: - # print("works") - # rclpy.logging.get_logger("Quitting").info('Done') - # camera_node.display_metrics() - # except SystemExit: - # print("works") - # camera_node.display_metrics() - # rclpy.logging.get_logger("Quitting").info('Done') executor = MultiThreadedExecutor() executor.add_node(camera_node) executor.spin() diff --git a/ros2_ws/src/python_workspace/python_workspace/extermination_node.py b/workspace_python/ros2_ws/src/python_workspace/python_workspace/extermination_node.py similarity index 98% rename from ros2_ws/src/python_workspace/python_workspace/extermination_node.py rename to workspace_python/ros2_ws/src/python_workspace/python_workspace/extermination_node.py index 9336f11..a19420c 100644 --- a/ros2_ws/src/python_workspace/python_workspace/extermination_node.py +++ b/workspace_python/ros2_ws/src/python_workspace/python_workspace/extermination_node.py @@ -194,15 +194,10 @@ def display(self, image, boxes, side): if key == 27: # ESC key: quit program cv2.destroyAllWindows() sys.exit() - def main(args=None): rclpy.init(args=args) extermination_node = ExterminationNode() - # try: - # rclpy.spin(display_node) - # except SystemExit: - # rclpy.logging.get_logger("Quitting").info('Done') executor = MultiThreadedExecutor() executor.add_node(extermination_node) executor.spin() diff --git a/workspace_python/ros2_ws/src/python_workspace/python_workspace/jetson_node.py b/workspace_python/ros2_ws/src/python_workspace/python_workspace/jetson_node.py new file mode 100644 index 0000000..2813e52 --- /dev/null +++ b/workspace_python/ros2_ws/src/python_workspace/python_workspace/jetson_node.py @@ -0,0 +1,206 @@ +import time, os, sys +import numpy as np +import cv2 +import tensorrt as trt +import pycuda.driver as cuda + +import rclpy +from rclpy.time import Time +from rclpy.node import Node +from rclpy.executors import MultiThreadedExecutor +from sensor_msgs.msg import Image +from std_msgs.msg import Header, String +from cv_bridge import CvBridge + +cuda.init() +device = cuda.Device(0) +cuda_driver_context = device.make_context() + +class JetsonNode(Node): + def __init__(self): + super().__init__('jetson_node') + + self.declare_parameter('engine_path', '/home/user/Downloads/model.engine') + self.declare_parameter('strip_weights', 'False') + self.declare_parameter('model_path', '/home/user/Downloads/model.onnx') + + self.model_path = self.get_parameter('model_path').get_parameter_value().string_value + self.engine_path = self.get_parameter('engine_path').get_parameter_value().string_value + self.strip_weights = self.get_parameter('strip_weights').get_parameter_value().bool_value + + self.camera_subscriber = self.create_subscription(Image, 'image_data', self.image_callback, 10) + self.bbox_publisher = self.create_publisher(String, 'bounding_boxes', 10) + self.bridge = CvBridge() + self.arrival_time, self.image = 0, None + + if self.strip_weights: + self.engine = self.load_stripped_engine_and_refit() + else: + self.engine = self.load_normal_engine() + + self.allocate_buffers() + self.exec_context = (self.engine).create_execution_context() + + def load_stripped_engine_and_refit(self): + if not os.path.exists(self.engine_path): + self.get_logger().error(f"Engine file not found at {self.engine_path}") + return None + + if not os.path.exists(self.model_path): + self.get_logger().error(f"Model file not found at {self.model_path}") + return None + + TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + with open(self.engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: + engine = runtime.deserialize_cuda_engine(f.read()) + refitter = trt.Refitter(engine, TRT_LOGGER) + parser_refitter = trt.OnnxParserRefitter(refitter, TRT_LOGGER) + assert parser_refitter.refit_from_file(self.model_path) + assert refitter.refit_cuda_engine() + return engine + + def load_normal_engine(self): + if not os.path.exists(self.engine_path): + self.get_logger().error(f"Engine file not found at {self.engine_path}") + return None + + TRT_LOGGER = trt.Logger(trt.Logger.WARNING) + with open(self.engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: + engine = runtime.deserialize_cuda_engine(f.read()) + self.get_logger().info(f"Successfully loaded engine from {self.engine_path}") + return engine + + # fixed allocation: does not account for multiple bindings/batch sizes (single input -> output tensor) + def allocate_buffers(self): + engine = self.engine + + self.input_shape = engine.get_binding_shape(0) + self.output_shape = engine.get_binding_shape(1) + + # Allocate device memory for input/output + self.d_input = cuda.mem_alloc(trt.volume(self.input_shape) * np.dtype(np.float32).itemsize) + self.d_output = cuda.mem_alloc(trt.volume(self.output_shape) * np.dtype(np.float32).itemsize) + + # Allocate host pinned memory for input/output (pinned memory for input/output buffers) + self.h_input = cuda.pagelocked_empty(trt.volume(self.input_shape), dtype=np.float32) + self.h_output = cuda.pagelocked_empty(trt.volume(self.output_shape), dtype=np.float32) + + # Create a CUDA stream for async execution + self.stream = cuda.Stream() + + def image_callback(self, msg): + self.arrival_time = Time.from_msg(msg.header.stamp) + image = self.bridge.imgmsg_to_cv2(msg, desired_encoding="rgb8") + self.image = image + latency = self.get_clock().now() - Time.from_msg(msg.header.stamp) + self.get_logger().info(f"Latency: {latency.nanoseconds / 1e6} ms") + self.preprocess(image) + + def preprocess(self, image): + tic = time.perf_counter_ns() + # Preprocess the image (e.g. normalize) + input_data = image.astype(np.float32) + input_data = np.transpose(input_data, (2, 0, 1)) # HWC to CHW + input_data = np.expand_dims(input_data, axis=0) # add batch dimension + # Copy input data to pinned memory (host side) + np.copyto(self.h_input, input_data.ravel()) + toc = time.perf_counter_ns() + self.get_logger().info(f"Preprocessing: {(toc-tic)/1e6} ms") + self.run_inference() + + def run_inference(self): + tic = time.perf_counter_ns() + cuda_driver_context.push() + # Transfer data from host to device asynchronously + cuda.memcpy_htod_async(self.d_input, self.h_input, self.stream) + # Execute inference asynchronously + self.exec_context.execute_async_v2(bindings=[int(self.d_input), int(self.d_output)], stream_handle=self.stream.handle) + # Transfer output data from device to host asynchronously + cuda.memcpy_dtoh_async(self.h_output, self.d_output, self.stream) + # Synchronize the stream to ensure the transfers are completed + self.stream.synchronize() + # Return the output from host memory + output = np.copy(self.h_output) + cuda_driver_context.pop() + toc = time.perf_counter_ns() + self.get_logger().info(f"Inference: {(toc-tic)/1e6} ms") + self.postprocess(output) + + # output shape: (1, 5, 8400) + def postprocess(self, output): + tic = time.perf_counter_ns() + num_detections = len(output) // 5 + output = np.reshape(output, (num_detections, 5)) + + width = 640 + height = 640 + conf_threshold = 0.9 + nms_threshold = 0.1 + boxes = [] + confidences = [] + + for detection in output: + # print(detection) + obj_conf, x_center, y_center, bbox_width, bbox_height = detection[:] + + # Apply sigmoid to object confidence and class score + obj_conf = 1 / (1 + np.exp(-obj_conf)) # Sigmoid for object confidence + + # Filter out weak predictions based on confidence threshold + if obj_conf < conf_threshold: + continue + + # Convert normalized values to absolute pixel values + x_center_pixel = int(x_center) + y_center_pixel = int(y_center) + bbox_width_pixel = int(bbox_width) + bbox_height_pixel = int(bbox_height) + print(f"[{obj_conf}, {x_center_pixel}, {y_center_pixel}, {bbox_width_pixel}, {bbox_height_pixel} ]") + + # Calculate the top-left and bottom-right corners of the bounding box + # top_left_x = int(x_center_pixel - bbox_width_pixel / 2) + # top_left_y = int(y_center_pixel - bbox_height_pixel / 2) + # bottom_right_x = int(x_center_pixel + bbox_width_pixel / 2) + # bottom_right_y = int(y_center_pixel + bbox_height_pixel / 2) + + + boxes.append([x_center_pixel, y_center_pixel, bbox_width_pixel, bbox_height_pixel]) + # confidences.append(confidence) + # boxes.append([top_left_x, top_left_y, bottom_right_x, bottom_right_y]) + + # Append the box, confidence, and class score + # boxes.append([x_min, y_min, x_max, y_max]) + confidences.append(float(obj_conf)) + + # # Apply Non-Maximum Suppression (NMS) to suppress overlapping boxes + indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold) + + final_boxes = [] + for i in indices: + # Since indices is now likely a 1D list, no need for i[0] + final_boxes.append([*boxes[i], confidences[i]]) + toc = time.perf_counter_ns() + self.get_logger().info(f"Postprocessing: {(toc-tic)/1e6} ms") + # self.display(final_boxes) + # print(final_boxes) + self.display(final_boxes) + + def display(self, final_boxes): + image = self.image + for box in final_boxes: + x, y, w, h, confidence = box + cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2) + cv2.imshow("Image", image) + cv2.waitKey(0) + +def main(args=None): + rclpy.init(args=args) + jetson_node = JetsonNode() + executor = MultiThreadedExecutor() + executor.add_node(jetson_node) + executor.spin() + jetson_node.destroy_node() + rclpy.shutdown() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/ros2_ws/src/python_workspace/resource/python_workspace b/workspace_python/ros2_ws/src/python_workspace/resource/python_workspace similarity index 100% rename from ros2_ws/src/python_workspace/resource/python_workspace rename to workspace_python/ros2_ws/src/python_workspace/resource/python_workspace diff --git a/ros2_ws/src/python_workspace/setup.cfg b/workspace_python/ros2_ws/src/python_workspace/setup.cfg similarity index 100% rename from ros2_ws/src/python_workspace/setup.cfg rename to workspace_python/ros2_ws/src/python_workspace/setup.cfg diff --git a/ros2_ws/src/python_workspace/setup.py b/workspace_python/ros2_ws/src/python_workspace/setup.py similarity index 100% rename from ros2_ws/src/python_workspace/setup.py rename to workspace_python/ros2_ws/src/python_workspace/setup.py diff --git a/ros2_ws/src/python_workspace/test/test_copyright.py b/workspace_python/ros2_ws/src/python_workspace/test/test_copyright.py similarity index 100% rename from ros2_ws/src/python_workspace/test/test_copyright.py rename to workspace_python/ros2_ws/src/python_workspace/test/test_copyright.py diff --git a/ros2_ws/src/python_workspace/test/test_flake8.py b/workspace_python/ros2_ws/src/python_workspace/test/test_flake8.py similarity index 100% rename from ros2_ws/src/python_workspace/test/test_flake8.py rename to workspace_python/ros2_ws/src/python_workspace/test/test_flake8.py diff --git a/ros2_ws/src/python_workspace/test/test_pep257.py b/workspace_python/ros2_ws/src/python_workspace/test/test_pep257.py similarity index 100% rename from ros2_ws/src/python_workspace/test/test_pep257.py rename to workspace_python/ros2_ws/src/python_workspace/test/test_pep257.py