-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
75ea2fa
commit 7c691fa
Showing
13 changed files
with
1,607 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
cmake_minimum_required(VERSION 3.5) | ||
|
||
project(Yolov8CPPInference VERSION 0.1) | ||
|
||
set(CMAKE_INCLUDE_CURRENT_DIR ON) | ||
|
||
# CUDA | ||
set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda") | ||
find_package(CUDA 11 REQUIRED) | ||
|
||
set(CMAKE_CUDA_STANDARD 11) | ||
set(CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
# !CUDA | ||
|
||
# OpenCV | ||
find_package(OpenCV REQUIRED) | ||
include_directories(${OpenCV_INCLUDE_DIRS}) | ||
# !OpenCV | ||
|
||
set(PROJECT_SOURCES | ||
main.cpp | ||
|
||
inference.h | ||
inference.cpp | ||
) | ||
|
||
add_executable(Yolov8CPPInference ${PROJECT_SOURCES}) | ||
target_link_libraries(Yolov8CPPInference ${OpenCV_LIBS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# YOLOv8/YOLOv5 Inference C++ | ||
|
||
This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV's DNN API. | ||
|
||
## Usage | ||
|
||
```bash | ||
git clone ultralytics | ||
cd ultralytics | ||
pip install . | ||
cd examples/YOLOv8-CPP-Inference | ||
|
||
# Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder. | ||
# Edit the **main.cpp** to change the **projectBasePath** to match your user. | ||
|
||
# Note that by default the CMake file will try to import the CUDA library to be used with the OpenCVs dnn (cuDNN) GPU Inference. | ||
# If your OpenCV build does not use CUDA/cuDNN you can remove that import call and run the example on CPU. | ||
|
||
mkdir build | ||
cd build | ||
cmake .. | ||
make | ||
./Yolov8CPPInference | ||
``` | ||
|
||
## Exporting YOLOv8 and YOLOv5 Models | ||
|
||
To export YOLOv8 models: | ||
|
||
```commandline | ||
yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12 | ||
``` | ||
|
||
To export YOLOv5 models: | ||
|
||
```commandline | ||
python3 export.py --weights yolov5s.pt --img 480 640 --include onnx --opset 12 | ||
``` | ||
|
||
yolov8s.onnx: | ||
|
||
![image](https://user-images.githubusercontent.com/40023722/217356132-a4cecf2e-2729-4acb-b80a-6559022d7707.png) | ||
|
||
yolov5s.onnx: | ||
|
||
![image](https://user-images.githubusercontent.com/40023722/217357005-07464492-d1da-42e3-98a7-fc753f87d5e6.png) | ||
|
||
This repository utilizes OpenCV's DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case. | ||
|
||
The **main** branch version uses Qt as a GUI wrapper. The primary focus here is the **Inference** class file, which demonstrates how to transpose YOLOv8 models to work as YOLOv5 models. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
#include "inference.h" | ||
|
||
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda) | ||
{ | ||
modelPath = onnxModelPath; | ||
modelShape = modelInputShape; | ||
classesPath = classesTxtFile; | ||
cudaEnabled = runWithCuda; | ||
|
||
loadOnnxNetwork(); | ||
// loadClassesFromFile(); The classes are hard-coded for this example | ||
} | ||
|
||
std::vector<Detection> Inference::runInference(const cv::Mat &input) | ||
{ | ||
cv::Mat modelInput = input; | ||
if (letterBoxForSquare && modelShape.width == modelShape.height) | ||
modelInput = formatToSquare(modelInput); | ||
|
||
cv::Mat blob; | ||
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); | ||
net.setInput(blob); | ||
|
||
std::vector<cv::Mat> outputs; | ||
net.forward(outputs, net.getUnconnectedOutLayersNames()); | ||
|
||
int rows = outputs[0].size[1]; | ||
int dimensions = outputs[0].size[2]; | ||
|
||
bool yolov8 = false; | ||
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) | ||
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) | ||
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) | ||
{ | ||
yolov8 = true; | ||
rows = outputs[0].size[2]; | ||
dimensions = outputs[0].size[1]; | ||
|
||
outputs[0] = outputs[0].reshape(1, dimensions); | ||
cv::transpose(outputs[0], outputs[0]); | ||
} | ||
float *data = (float *)outputs[0].data; | ||
|
||
float x_factor = modelInput.cols / modelShape.width; | ||
float y_factor = modelInput.rows / modelShape.height; | ||
|
||
std::vector<int> class_ids; | ||
std::vector<float> confidences; | ||
std::vector<cv::Rect> boxes; | ||
|
||
for (int i = 0; i < rows; ++i) | ||
{ | ||
if (yolov8) | ||
{ | ||
float *classes_scores = data+4; | ||
|
||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); | ||
cv::Point class_id; | ||
double maxClassScore; | ||
|
||
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); | ||
|
||
if (maxClassScore > modelScoreThreshold) | ||
{ | ||
confidences.push_back(maxClassScore); | ||
class_ids.push_back(class_id.x); | ||
|
||
float x = data[0]; | ||
float y = data[1]; | ||
float w = data[2]; | ||
float h = data[3]; | ||
|
||
int left = int((x - 0.5 * w) * x_factor); | ||
int top = int((y - 0.5 * h) * y_factor); | ||
|
||
int width = int(w * x_factor); | ||
int height = int(h * y_factor); | ||
|
||
boxes.push_back(cv::Rect(left, top, width, height)); | ||
} | ||
} | ||
else // yolov5 | ||
{ | ||
float confidence = data[4]; | ||
|
||
if (confidence >= modelConfidenceThreshold) | ||
{ | ||
float *classes_scores = data+5; | ||
|
||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); | ||
cv::Point class_id; | ||
double max_class_score; | ||
|
||
minMaxLoc(scores, 0, &max_class_score, 0, &class_id); | ||
|
||
if (max_class_score > modelScoreThreshold) | ||
{ | ||
confidences.push_back(confidence); | ||
class_ids.push_back(class_id.x); | ||
|
||
float x = data[0]; | ||
float y = data[1]; | ||
float w = data[2]; | ||
float h = data[3]; | ||
|
||
int left = int((x - 0.5 * w) * x_factor); | ||
int top = int((y - 0.5 * h) * y_factor); | ||
|
||
int width = int(w * x_factor); | ||
int height = int(h * y_factor); | ||
|
||
boxes.push_back(cv::Rect(left, top, width, height)); | ||
} | ||
} | ||
} | ||
|
||
data += dimensions; | ||
} | ||
|
||
std::vector<int> nms_result; | ||
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); | ||
|
||
std::vector<Detection> detections{}; | ||
for (unsigned long i = 0; i < nms_result.size(); ++i) | ||
{ | ||
int idx = nms_result[i]; | ||
|
||
Detection result; | ||
result.class_id = class_ids[idx]; | ||
result.confidence = confidences[idx]; | ||
|
||
std::random_device rd; | ||
std::mt19937 gen(rd()); | ||
std::uniform_int_distribution<int> dis(100, 255); | ||
result.color = cv::Scalar(dis(gen), | ||
dis(gen), | ||
dis(gen)); | ||
|
||
result.className = classes[result.class_id]; | ||
result.box = boxes[idx]; | ||
|
||
detections.push_back(result); | ||
} | ||
|
||
return detections; | ||
} | ||
|
||
void Inference::loadClassesFromFile() | ||
{ | ||
std::ifstream inputFile(classesPath); | ||
if (inputFile.is_open()) | ||
{ | ||
std::string classLine; | ||
while (std::getline(inputFile, classLine)) | ||
classes.push_back(classLine); | ||
inputFile.close(); | ||
} | ||
} | ||
|
||
void Inference::loadOnnxNetwork() | ||
{ | ||
net = cv::dnn::readNetFromONNX(modelPath); | ||
if (cudaEnabled) | ||
{ | ||
std::cout << "\nRunning on CUDA" << std::endl; | ||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); | ||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); | ||
} | ||
else | ||
{ | ||
std::cout << "\nRunning on CPU" << std::endl; | ||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); | ||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); | ||
} | ||
} | ||
|
||
cv::Mat Inference::formatToSquare(const cv::Mat &source) | ||
{ | ||
int col = source.cols; | ||
int row = source.rows; | ||
int _max = MAX(col, row); | ||
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); | ||
source.copyTo(result(cv::Rect(0, 0, col, row))); | ||
return result; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#ifndef INFERENCE_H | ||
#define INFERENCE_H | ||
|
||
// Cpp native | ||
#include <fstream> | ||
#include <vector> | ||
#include <string> | ||
#include <random> | ||
|
||
// OpenCV / DNN / Inference | ||
#include <opencv2/imgproc.hpp> | ||
#include <opencv2/opencv.hpp> | ||
#include <opencv2/dnn.hpp> | ||
|
||
struct Detection | ||
{ | ||
int class_id{0}; | ||
std::string className{}; | ||
float confidence{0.0}; | ||
cv::Scalar color{}; | ||
cv::Rect box{}; | ||
}; | ||
|
||
class Inference | ||
{ | ||
public: | ||
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = true); | ||
std::vector<Detection> runInference(const cv::Mat &input); | ||
|
||
private: | ||
void loadClassesFromFile(); | ||
void loadOnnxNetwork(); | ||
cv::Mat formatToSquare(const cv::Mat &source); | ||
|
||
std::string modelPath{}; | ||
std::string classesPath{}; | ||
bool cudaEnabled{}; | ||
|
||
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; | ||
|
||
cv::Size2f modelShape{}; | ||
|
||
float modelConfidenceThreshold {0.25}; | ||
float modelScoreThreshold {0.45}; | ||
float modelNMSThreshold {0.50}; | ||
|
||
bool letterBoxForSquare = true; | ||
|
||
cv::dnn::Net net; | ||
}; | ||
|
||
#endif // INFERENCE_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#include <iostream> | ||
#include <vector> | ||
#include <getopt.h> | ||
|
||
#include <opencv2/opencv.hpp> | ||
|
||
#include "inference.h" | ||
|
||
using namespace std; | ||
using namespace cv; | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
std::string projectBasePath = "/home/user/ultralytics"; // Set your ultralytics base path | ||
|
||
bool runOnGPU = true; | ||
|
||
// | ||
// Pass in either: | ||
// | ||
// "yolov8s.onnx" or "yolov5s.onnx" | ||
// | ||
// To run Inference with yolov8/yolov5 (ONNX) | ||
// | ||
|
||
// Note that in this example the classes are hard-coded and 'classes.txt' is a place holder. | ||
Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 640), "classes.txt", runOnGPU); | ||
|
||
std::vector<std::string> imageNames; | ||
imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg"); | ||
imageNames.push_back(projectBasePath + "/ultralytics/assets/zidane.jpg"); | ||
|
||
for (int i = 0; i < imageNames.size(); ++i) | ||
{ | ||
cv::Mat frame = cv::imread(imageNames[i]); | ||
|
||
// Inference starts here... | ||
std::vector<Detection> output = inf.runInference(frame); | ||
|
||
int detections = output.size(); | ||
std::cout << "Number of detections:" << detections << std::endl; | ||
|
||
for (int i = 0; i < detections; ++i) | ||
{ | ||
Detection detection = output[i]; | ||
|
||
cv::Rect box = detection.box; | ||
cv::Scalar color = detection.color; | ||
|
||
// Detection box | ||
cv::rectangle(frame, box, color, 2); | ||
|
||
// Detection box text | ||
std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4); | ||
cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0); | ||
cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20); | ||
|
||
cv::rectangle(frame, textBox, color, cv::FILLED); | ||
cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0); | ||
} | ||
// Inference ends here... | ||
|
||
// This is only for preview purposes | ||
float scale = 0.8; | ||
cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale)); | ||
cv::imshow("Inference", frame); | ||
|
||
cv::waitKey(-1); | ||
} | ||
} |
Oops, something went wrong.