-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0d8dc55
commit 1429233
Showing
91 changed files
with
15,304 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,99 @@ | ||
# YoloDetectAPI | ||
This is a API for yolov5 version7 detect.py | ||
# Introduction | ||
|
||
YoloV5 作为 YoloV4 之后的改进型,在算法上做出了优化,检测的性能得到了一定的提升。其特点之一就是权重文件非常的小,可以在一些配置更低的移动设备上运行,且提高速度的同时准确度更高。本次使用的是最新推出的 YoloV5 Version7 版本。 | ||
|
||
GitHub 地址:[YOLOv5 🚀 是世界上最受欢迎的视觉 AI,代表 Ultralytics 对未来视觉 AI 方法的开源研究,结合在数千小时的研究和开发中积累的经验教训和最佳实践。](https://github.com/ultralytics/yolov5/releases/tag/v7.0) | ||
|
||
|
||
|
||
--- | ||
|
||
# Section 1 起因 | ||
|
||
本人目前的一个项目需要使用到手势识别,得益于 YoloV5 的优秀的识别速度与准确率,因此识别部分的模型均使用 YoloV5 Version7 版本进行训练。训练之后需要使用这个模型,原始的 `detect.py` 程序使用 `argparse` 对参数进行封装,这为初期验证模型提供了一定的便利,我们可以通过 `Pycharm` 或者 `Terminal` 来快速地执行程序,然后在 `run/detect` 路径下快速地查看到结果。但是在实际的应用中,识别程序往往是作为整个系统的一个组件来运行的,现有的 `detect.py` 无法满足使用需求,因此需要将其封装成一个可供多个程序调用的 `API` 接口。通过这个接口可以获得 `种类、坐标、置信度` 这三个信息。通过这些信息来控制系统软件做出对应的操作。 | ||
|
||
|
||
|
||
--- | ||
|
||
# Section 2 魔改的思路 | ||
|
||
这部分的代码与思路参照了 [爆改YOLOV7的detect.py制作成API接口供其他python程序调用(超低延时)](https://blog.csdn.net/weixin_51331359/article/details/126012620) 这篇文章的思路。由于 YoloV5 和 YoloV7 的程序有些许不一样,因此做了一些修改。 | ||
|
||
大体的思路是去除掉 `argparse` 部分,通过类将参数封装进去,去除掉识别这个核心功能之外的其它功能。 | ||
|
||
未打包程序见博客 [魔改并封装 YoloV5 Version7 的 detect.py 成 API接口以供 python 程序使用](https://blog.csdn.net/qq_17790209/article/details/129061528) | ||
|
||
|
||
|
||
# Section 3 如何安装到 Python 环境 | ||
|
||
从 `whl` 文件夹或者从`Release`下载 `yolo_detectAPI-5.7-py3-none-any.whl` ,在下载目录内进入 Terminal 并切换至你要安装的 Python 环境。输入下面的命令安装 Python 库。这里需要注意,Python 环境需要 3.8 及以上版本才能使用。 | ||
|
||
```shell | ||
pip install .\yolo_detectAPI-5.7-py3-none-any.whl | ||
``` | ||
|
||
这个库使用 CPU 执行程序,如果需要使用 GPU 执行程序请 clone 源码自行打包修改程序。 | ||
|
||
自行打包需要进入到 clone 之后的项目的根目录,打开终端输入下面的命令,然后在 `dist` 文件夹内就可找到你需要的 `whl` 文件。 | ||
|
||
```python | ||
python setup.py sdist bdist_wheel | ||
``` | ||
|
||
|
||
|
||
# Section 4 如何在项目中使用 | ||
|
||
使用下面的代码就可以引用这个库。其中的 `cv2`,`torch` 在没有特定版本需求的情况下不需要单独安装,安装本API库的时候程序会自动安装这些依赖的库。 | ||
|
||
```python | ||
import cv2 | ||
import yolo_detectAPI | ||
import torch | ||
|
||
if __name__ == '__main__': | ||
cap = cv2.VideoCapture(0) | ||
a = yolo_detectAPI.DetectAPI(weights='last.pt') # 你要使用的模型的路径 | ||
with torch.no_grad(): | ||
while True: | ||
rec, img = cap.read() | ||
result, names = a.detect([img]) | ||
img = result[0][0] # 每一帧图片的处理结果图片 | ||
# 每一帧图像的识别结果(可包含多个物体) | ||
for cls, (x1, y1, x2, y2), conf in result[0][1]: | ||
print(names[cls], x1, y1, x2, y2, conf) # 识别物体种类、左上角x坐标、左上角y轴坐标、右下角x轴坐标、右下角y轴坐标,置信度 | ||
|
||
cv2.imshow("vedio", img) | ||
|
||
if cv2.waitKey(1) == ord('q'): | ||
break | ||
``` | ||
|
||
|
||
|
||
--- | ||
|
||
# Section 5 其他 | ||
|
||
其它问题欢迎进企鹅群交流:913211989 ( 小猫不要摸鱼 ) | ||
|
||
进群令牌:fGithub | ||
|
||
不可商用,开源,论文引用请标注如下内容 | ||
|
||
``` | ||
[1] Da Kuang.YoloV5 Version7 Detect API for Python3[EB/OL]. https://github.com/Ender-William/YoloDetectAPI, 2023-02-17/引用日期{YYYY-MM-DD}. | ||
``` | ||
|
||
|
||
|
||
--- | ||
|
||
# Reference | ||
本程序的修改参考了以下的资料,在此为前人做出的努力与贡献表示感谢! | ||
|
||
https://github.com/ultralytics/yolov5/releases/tag/v7.0 | ||
https://blog.csdn.net/weixin_51331359/article/details/126012620 | ||
https://blog.csdn.net/CharmsLUO/article/details/123422822 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# YOLOv5 requirements | ||
# Usage: pip install -r requirements.txt | ||
|
||
# Base ------------------------------------------------------------------------ | ||
gitpython | ||
ipython # interactive notebook | ||
matplotlib>=3.2.2 | ||
numpy>=1.18.5 | ||
opencv-python>=4.1.1 | ||
Pillow>=7.1.2 | ||
psutil # system resources | ||
PyYAML>=5.3.1 | ||
requests>=2.23.0 | ||
scipy>=1.4.1 | ||
thop>=0.1.1 # FLOPs computation | ||
torch>=1.7.0 # see https://pytorch.org/get-started/locally (recommended) | ||
torchvision>=0.8.1 | ||
tqdm>=4.64.0 | ||
# protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012 | ||
|
||
# Logging --------------------------------------------------------------------- | ||
tensorboard>=2.4.1 | ||
# clearml>=1.2.0 | ||
# comet | ||
|
||
# Plotting -------------------------------------------------------------------- | ||
pandas>=1.1.4 | ||
seaborn>=0.11.0 | ||
|
||
# Export ---------------------------------------------------------------------- | ||
# coremltools>=6.0 # CoreML export | ||
# onnx>=1.12.0 # ONNX export | ||
# onnx-simplifier>=0.4.1 # ONNX simplifier | ||
# nvidia-pyindex # TensorRT export | ||
# nvidia-tensorrt # TensorRT export | ||
# scikit-learn<=1.1.2 # CoreML quantization | ||
# tensorflow>=2.4.1 # TF exports (-cpu, -aarch64, -macos) | ||
# tensorflowjs>=3.9.0 # TF.js export | ||
# openvino-dev # OpenVINO export | ||
|
||
# Deploy ---------------------------------------------------------------------- | ||
# tritonclient[all]~=2.24.0 | ||
|
||
# Extras ---------------------------------------------------------------------- | ||
# mss # screenshots | ||
# albumentations>=1.0.3 | ||
# pycocotools>=2.0.6 # COCO mAP | ||
# roboflow | ||
# ultralytics # HUB https://hub.ultralytics.com |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import os | ||
from setuptools import setup, find_packages | ||
|
||
setup( | ||
name='yolo_detectAPI', | ||
version='5.7', | ||
description='Detect API', | ||
long_description='This is a API for yolov5 version7 detect.py', | ||
license='GPL Licence', | ||
author='Da Kuang', | ||
author_email='[email protected]', | ||
py_modeles = '__init__.py', | ||
packages=find_packages(), | ||
pakages=['yolo_detectAPI'], | ||
include_package_data=True, | ||
python_requires='>=3.8', | ||
url = 'http://blogs.kd-mercury.xyz/', | ||
install_requires=['matplotlib>=3.2.2', 'numpy>=1.18.5', 'opencv-python>=4.1.1', | ||
'Pillow>=7.1.2', 'PyYAML>=5.3.1', 'requests>=2.23.0', 'scipy>=1.4.1', | ||
'thop>=0.1.1', 'torch>=1.7.0', 'torchvision>=0.8.1', 'tqdm>=4.64.0', | ||
'tensorboard>=2.4.1', 'pandas>=1.1.4', 'seaborn>=0.11.0', | ||
'ipython>=8.3.0', 'psutil>=5.9.4'], | ||
data_files=['export.py'], | ||
classifiers=[ | ||
"Programming Language :: Python :: 3.8", | ||
"Programming Language :: Python :: 3.9", | ||
"License :: OSI Approved :: GNU General Public License (GPL)", | ||
"Development Status :: 4 - Beta" | ||
], | ||
scripts=[], | ||
) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
#-*- coding:utf-8 -*- | ||
import os | ||
import random | ||
import sys | ||
from pathlib import Path | ||
|
||
import torch | ||
|
||
FILE = Path(__file__).resolve() | ||
ROOT = FILE.parents[0] # YOLOv5 root directory | ||
if str(ROOT) not in sys.path: | ||
sys.path.append(str(ROOT)) # add ROOT to PATH | ||
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative | ||
from models.common import DetectMultiBackend | ||
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams | ||
from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, | ||
increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh) | ||
from utils.plots import Annotator, colors, save_one_box | ||
from utils.torch_utils import select_device, smart_inference_mode, time_sync | ||
|
||
""" | ||
使用面向对象编程中的类来封装,需要去除掉原始 detect.py 中的结果保存方法,重写 | ||
保存方法将结果保存到一个 csv 文件中并打上视频的对应帧率 | ||
""" | ||
|
||
|
||
class YoloOpt: | ||
def __init__(self, weights='weights/last.pt', | ||
imgsz=(640, 640), conf_thres=0.25, | ||
iou_thres=0.45, device='cpu', view_img=False, | ||
classes=None, agnostic_nms=False, | ||
augment=False, update=False, exist_ok=False, | ||
project='/detect/result', name='result_exp', | ||
save_csv=True): | ||
self.weights = weights # 权重文件地址 | ||
self.source = None # 待识别的图像 | ||
if imgsz is None: | ||
self.imgsz = (640, 640) | ||
self.imgsz = imgsz # 输入图片的大小,默认 (640,640) | ||
self.conf_thres = conf_thres # object置信度阈值 默认0.25 用在nms中 | ||
self.iou_thres = iou_thres # 做nms的iou阈值 默认0.45 用在nms中 | ||
self.device = device # 执行代码的设备,由于项目只能用 CPU,这里只封装了 CPU 的方法 | ||
self.view_img = view_img # 是否展示预测之后的图片或视频 默认False | ||
self.classes = classes # 只保留一部分的类别,默认是全部保留 | ||
self.agnostic_nms = agnostic_nms # 进行NMS去除不同类别之间的框, 默认False | ||
self.augment = augment # augmented inference TTA测试时增强/多尺度预测,可以提分 | ||
self.update = update # 如果为True,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认为False | ||
self.exist_ok = exist_ok # 如果为True,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认为False | ||
self.project = project # 保存测试日志的参数,本程序没有用到 | ||
self.name = name # 每次实验的名称,本程序也没有用到 | ||
self.save_csv = save_csv # 是否保存成 csv 文件,本程序目前也没有用到 | ||
|
||
|
||
class DetectAPI: | ||
def __init__(self, weights, imgsz=640): | ||
self.opt = YoloOpt(weights=weights, imgsz=imgsz) | ||
weights = self.opt.weights | ||
imgsz = self.opt.imgsz | ||
|
||
# Initialize 初始化 | ||
# 获取设备 CPU/CUDA | ||
self.device = select_device(self.opt.device) | ||
# 不使用半精度 | ||
self.half = self.device.type != 'cpu' # # FP16 supported on limited backends with CUDA | ||
|
||
# Load model 加载模型 | ||
self.model = DetectMultiBackend(weights, self.device, dnn=False) | ||
self.stride = self.model.stride | ||
self.names = self.model.names | ||
self.pt = self.model.pt | ||
self.imgsz = check_img_size(imgsz, s=self.stride) | ||
|
||
# 不使用半精度 | ||
if self.half: | ||
self.model.half() # switch to FP16 | ||
|
||
# read names and colors | ||
self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names | ||
self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] | ||
|
||
def detect(self, source): | ||
# 输入 detect([img]) | ||
if type(source) != list: | ||
raise TypeError('source must a list and contain picture read by cv2') | ||
|
||
# DataLoader 加载数据 | ||
# 直接从 source 加载数据 | ||
dataset = LoadImages(source) | ||
# 源程序通过路径加载数据,现在 source 就是加载好的数据,因此 LoadImages 就要重写 | ||
bs = 1 # set batch size | ||
|
||
# 保存的路径 | ||
vid_path, vid_writer = [None] * bs, [None] * bs | ||
|
||
# Run inference | ||
result = [] | ||
if self.device.type != 'cpu': | ||
self.model(torch.zeros(1, 3, self.imgsz, self.imgsz).to(self.device).type_as( | ||
next(self.model.parameters()))) # run once | ||
dt, seen = (Profile(), Profile(), Profile()), 0 | ||
|
||
for im, im0s in dataset: | ||
with dt[0]: | ||
im = torch.from_numpy(im).to(self.model.device) | ||
im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32 | ||
im /= 255 # 0 - 255 to 0.0 - 1.0 | ||
if len(im.shape) == 3: | ||
im = im[None] # expand for batch dim | ||
|
||
# Inference | ||
pred = self.model(im, augment=self.opt.augment)[0] | ||
|
||
# NMS | ||
with dt[2]: | ||
pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, self.opt.classes, self.opt.agnostic_nms, max_det=2) | ||
|
||
# Process predictions | ||
# 处理每一张图片 | ||
det = pred[0] # API 一次只处理一张图片,因此不需要 for 循环 | ||
im0 = im0s.copy() # copy 一个原图片的副本图片 | ||
result_txt = [] # 储存检测结果,每新检测出一个物品,长度就加一。 | ||
# 每一个元素是列表形式,储存着 类别,坐标,置信度 | ||
# 设置图片上绘制框的粗细,类别名称 | ||
annotator = Annotator(im0, line_width=3, example=str(self.names)) | ||
if len(det): | ||
# Rescale boxes from img_size to im0 size | ||
# 映射预测信息到原图 | ||
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() | ||
|
||
# | ||
for *xyxy, conf, cls in reversed(det): | ||
line = (int(cls.item()), [int(_.item()) for _ in xyxy], conf.item()) # label format | ||
result_txt.append(line) | ||
label = f'{self.names[int(cls)]} {conf:.2f}' | ||
annotator.box_label(xyxy, label, color=self.colors[int(cls)]) | ||
result.append((im0, result_txt)) # 对于每张图片,返回画完框的图片,以及该图片的标签列表。 | ||
return result, self.names | ||
|
||
if __name__ == '__main__': | ||
cap = cv2.VideoCapture(0) | ||
a = DetectAPI(weights='weights/last.pt') | ||
with torch.no_grad(): | ||
while True: | ||
rec, img = cap.read() | ||
result, names = a.detect([img]) | ||
img = result[0][0] # 每一帧图片的处理结果图片 | ||
# 每一帧图像的识别结果(可包含多个物体) | ||
for cls, (x1, y1, x2, y2), conf in result[0][1]: | ||
print(names[cls], x1, y1, x2, y2, conf) # 识别物体种类、左上角x坐标、左上角y轴坐标、右下角x轴坐标、右下角y轴坐标,置信度 | ||
''' | ||
cv2.rectangle(img,(x1,y1),(x2,y2),(0,255,0)) | ||
cv2.putText(img,names[cls],(x1,y1-20),cv2.FONT_HERSHEY_DUPLEX,1.5,(255,0,0))''' | ||
print() # 将每一帧的结果输出分开 | ||
cv2.imshow("vedio", img) | ||
|
||
if cv2.waitKey(1) == ord('q'): | ||
break |
Oops, something went wrong.