-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2c8a57f
commit 1d259de
Showing
6 changed files
with
578 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import tensorrt as trt | ||
import sys | ||
import argparse | ||
|
||
""" | ||
takes in onnx model | ||
converts to tensorrt | ||
tensorrt model input size must be src pth input size | ||
""" | ||
|
||
def cli(): | ||
desc = 'Compile Onnx model to TensorRT' | ||
parser = argparse.ArgumentParser(description=desc) | ||
parser.add_argument('-m', '--model', default='', help='onnx file location') | ||
parser.add_argument('-fp', '--floatingpoint', type=int, default=16, help='floating point precision. 16 or 32') | ||
parser.add_argument('-o', '--output', default='', help='name of trt output file') | ||
args = parser.parse_args() | ||
model = 'coco_yolov5.onnx' | ||
fp = args.floatingpoint | ||
if fp != 16 and fp != 32: | ||
print('floating point precision must be 16 or 32') | ||
sys.exit() | ||
output = 'coco_yolov5-{}.trt'.format(fp) | ||
return { | ||
'model': model, | ||
'fp': fp, | ||
'output': output | ||
} | ||
|
||
if __name__ == '__main__': | ||
args = cli() | ||
batch_size = 4 | ||
model = '{}'.format(args['model']) | ||
output = '{}'.format(args['output']) | ||
logger = trt.Logger(trt.Logger.WARNING) | ||
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) # trt7 | ||
with trt.Builder(logger) as builder, builder.create_network(explicit_batch) as network, trt.OnnxParser(network, logger) as parser: | ||
builder.max_workspace_size = 1 << 28 | ||
builder.max_batch_size = batch_size | ||
if args['fp'] == 16: | ||
builder.fp16_mode = True | ||
with open(model, 'rb') as f: | ||
print('Beginning ONNX file parsing') | ||
if not parser.parse(f.read()): | ||
for error in range(parser.num_errors): | ||
print("ERROR", parser.get_error(error)) | ||
print("num layers:", network.num_layers) | ||
network.get_input(0).shape = [batch_size, 3, 608, 608] # trt7 | ||
# last_layer = network.get_layer(network.num_layers - 1) | ||
# network.mark_output(last_layer.get_output(0)) | ||
# reshape input from 32 to 1 | ||
engine = builder.build_cuda_engine(network) | ||
with open(output, 'wb') as f: | ||
f.write(engine.serialize()) | ||
print("Completed creating Engine") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,317 @@ | ||
import cv2 | ||
import sys | ||
import tensorrt as trt | ||
import pycuda.autoinit | ||
import pycuda.driver as cuda | ||
import numpy as np | ||
import math | ||
import time | ||
import torch | ||
INPUT_W = 608 | ||
INPUT_H = 608 | ||
|
||
class Processor(): | ||
def __init__(self, model): | ||
# load tensorrt engine | ||
TRT_LOGGER = trt.Logger(trt.Logger.INFO) | ||
TRTbin = '{0}'.format(model) | ||
print('trtbin', TRTbin) | ||
with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: | ||
engine = runtime.deserialize_cuda_engine(f.read()) | ||
|
||
self.context = engine.create_execution_context() | ||
stream = cuda.Stream() | ||
|
||
# allocate memory | ||
inputs, outputs, bindings = [], [], [] | ||
for binding in engine: | ||
size = trt.volume(engine.get_binding_shape(binding)) | ||
dtype = trt.nptype(engine.get_binding_dtype(binding)) | ||
host_mem = cuda.pagelocked_empty(size, dtype) | ||
device_mem = cuda.mem_alloc(host_mem.nbytes) | ||
bindings.append(int(device_mem)) | ||
if engine.binding_is_input(binding): | ||
inputs.append({'host': host_mem, 'device': device_mem }) | ||
else: | ||
outputs.append({'host': host_mem, 'device': device_mem }) | ||
# save to class | ||
self.inputs = inputs | ||
self.outputs = outputs | ||
self.bindings = bindings | ||
self.stream = stream | ||
|
||
# post processing config | ||
self.cls_nums = 80 | ||
self.filters = (self.cls_nums + 5) * 3 | ||
self.output_filter = self.cls_nums + 5 | ||
self.output_shapes1 = [ | ||
(1, 3, 80, 80, self.output_filter), | ||
(1, 3, 40, 40, self.output_filter), | ||
(1, 3, 20, 20, self.output_filter) | ||
] | ||
|
||
self.strides = np.array([8., 16., 32.]) | ||
anchors = np.array([ | ||
[[10, 13], [16, 30], [33, 23]], | ||
[[30, 61], [62, 45], [59, 119]], | ||
[[116, 90], [156, 198], [373, 326]], | ||
]) | ||
self.nl = len(anchors) | ||
self.nc = 80 # classes | ||
self.no = self.nc + 5 # outputs per anchor | ||
self.na = len(anchors[0]) | ||
a = anchors.copy().astype(np.float32) | ||
a = a.reshape(self.nl, -1, 2) | ||
self.anchors = a.copy() | ||
self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2) | ||
|
||
def detect(self, img): | ||
resized, = self.preprocess_image(img) | ||
outputs = self.inference(resized) | ||
# reshape from flat to (1, 3, x, y, 85) | ||
reshaped = [] | ||
for output, shape in zip(outputs, self.output_shapes): | ||
reshaped.append(output.reshape(shape)) | ||
return reshaped | ||
|
||
def preprocess_image(self, image_raw): | ||
""" | ||
description: Read an image from image path, convert it to RGB, | ||
resize and pad it to target size, normalize to [0,1], | ||
transform to NCHW format. | ||
param: | ||
image_raw: str, image path | ||
return: | ||
image: the processed image | ||
image_raw: the original image | ||
h: original height | ||
w: original width | ||
""" | ||
h, w, c = image_raw.shape | ||
image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) | ||
# Calculate widht and height and paddings | ||
r_w = INPUT_W / w | ||
r_h = INPUT_H / h | ||
if r_h > r_w: | ||
tw = INPUT_W | ||
th = int(r_w * h) | ||
tx1 = tx2 = 0 | ||
ty1 = int((INPUT_H - th) / 2) | ||
ty2 = INPUT_H - th - ty1 | ||
else: | ||
tw = int(r_h * w) | ||
th = INPUT_H | ||
tx1 = int((INPUT_W - tw) / 2) | ||
tx2 = INPUT_W - tw - tx1 | ||
ty1 = ty2 = 0 | ||
# Resize the image with long side while maintaining ratio | ||
image = cv2.resize(image, (tw, th)) | ||
# Pad the short side with (128,128,128) | ||
image = cv2.copyMakeBorder( | ||
image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128) | ||
) | ||
image = image.astype(np.float32) | ||
# Normalize to [0,1] | ||
image /= 255.0 | ||
# HWC to CHW format: | ||
image = np.transpose(image, [2, 0, 1]) | ||
# CHW to NCHW format | ||
image = np.expand_dims(image, axis=0) | ||
# Convert the image to row-major order, also known as "C order": | ||
image = np.ascontiguousarray(image) | ||
return image | ||
|
||
def inference(self, img): | ||
|
||
# copy img to input memory | ||
self.inputs[0]['host'] = np.ravel(img) | ||
# transfer data to the gpu | ||
for inp in self.inputs: | ||
cuda.memcpy_htod_async(inp['device'], inp['host'], self.stream) | ||
# run inference | ||
start = time.time() | ||
self.context.execute_async_v2( | ||
bindings=self.bindings, | ||
stream_handle=self.stream.handle) | ||
# fetch outputs from gpu | ||
for out in self.outputs: | ||
cuda.memcpy_dtoh_async(out['host'], out['device'], self.stream) | ||
# synchronize stream | ||
self.stream.synchronize() | ||
end = time.time() | ||
print('execution time:', end-start) | ||
return [out['host'] for out in self.outputs] | ||
|
||
def extract_object_grids(self, output): | ||
""" | ||
Extract objectness grid | ||
(how likely a box is to contain the center of a bounding box) | ||
Returns: | ||
object_grids: list of tensors (1, 3, nx, ny, 1) | ||
""" | ||
object_grids = [] | ||
for out in output: | ||
probs = self.sigmoid_v(out[..., 4:5]) | ||
object_grids.append(probs) | ||
return object_grids | ||
|
||
def extract_class_grids(self, output): | ||
""" | ||
Extracts class probabilities | ||
(the most likely class of a given tile) | ||
Returns: | ||
class_grids: array len 3 of tensors ( 1, 3, nx, ny, 80) | ||
""" | ||
class_grids = [] | ||
for out in output: | ||
object_probs = self.sigmoid_v(out[..., 4:5]) | ||
class_probs = self.sigmoid_v(out[..., 5:]) | ||
obj_class_probs = class_probs * object_probs | ||
class_grids.append(obj_class_probs) | ||
return class_grids | ||
|
||
def extract_boxes(self, output, conf_thres=0.3): | ||
""" | ||
Extracts boxes (xywh) -> (x1, y1, x2, y2) | ||
""" | ||
scaled = [] | ||
grids = [] | ||
for out in output: | ||
out = self.sigmoid_v(out) | ||
_, _, width, height, _ = out.shape | ||
grid = self.make_grid(width, height) | ||
grids.append(grid) | ||
scaled.append(out) | ||
z = [] | ||
for out, grid, stride, anchor in zip(scaled, grids, self.strides, self.anchor_grid): | ||
_, _, width, height, _ = out.shape | ||
out[..., 0:2] = (out[..., 0:2] * 2. - 0.5 + grid) * stride | ||
out[..., 2:4] = (out[..., 2:4] * 2) ** 2 * anchor | ||
|
||
out[..., 5:] = out[..., 4:5] * out[..., 5:] | ||
out = out.reshape((1, 3 * width * height, self.output_filter)) | ||
z.append(out) | ||
pred = np.concatenate(z, 1) | ||
xc = pred[..., 4] > conf_thres | ||
pred = pred[xc] | ||
boxes = self.xywh2xyxy(pred[:, :4]) | ||
return boxes | ||
|
||
def post_process(self, outputs, conf_thres=0.3, iou_thres=0.3, origin_w=0, origin_h=0): | ||
""" | ||
Transforms raw output into boxes, confs, classes | ||
Applies NMS thresholding on bounding boxes and confs | ||
Parameters: | ||
output: raw output tensor | ||
Returns: | ||
boxes: x1,y1,x2,y2 tensor (dets, 4) | ||
confs: class * obj prob tensor (dets, 1) | ||
classes: class type tensor (dets, 1) | ||
""" | ||
scaled = [] | ||
grids = [] | ||
for out in outputs: | ||
out = self.sigmoid_v(out) | ||
# print(out) | ||
_, _, width, height, _ = out.shape | ||
grid = self.make_grid(width, height) | ||
grids.append(grid) | ||
scaled.append(out) | ||
z = [] | ||
for out, grid, stride, anchor in zip(scaled, grids, self.strides, self.anchor_grid): | ||
_, _, width, height, _ = out.shape | ||
out[..., 0:2] = (out[..., 0:2] * 2. - 0.5 + grid) * stride | ||
out[..., 2:4] = (out[..., 2:4] * 2) ** 2 * anchor | ||
|
||
out = out.reshape((1, 3 * width * height, self.output_filter)) | ||
z.append(out) | ||
pred = np.concatenate(z, 1) | ||
xc = pred[..., 4] > conf_thres | ||
pred = pred[xc] | ||
return self.nms(pred, iou_thres, origin_w, origin_h) | ||
|
||
def make_grid(self, nx, ny): | ||
""" | ||
Create scaling tensor based on box location | ||
Source: https://github.com/ultralytics/yolov5/blob/master/models/yolo.py | ||
Arguments | ||
nx: x-axis num boxes | ||
ny: y-axis num boxes | ||
Returns | ||
grid: tensor of shape (1, 1, nx, ny, 80) | ||
""" | ||
nx_vec = np.arange(nx) | ||
ny_vec = np.arange(ny) | ||
yv, xv = np.meshgrid(ny_vec, nx_vec) | ||
grid = np.stack((yv, xv), axis=2) | ||
grid = grid.reshape(1, 1, ny, nx, 2) | ||
return grid | ||
|
||
def sigmoid(self, x): | ||
return 1 / (1 + math.exp(-x)) | ||
|
||
def sigmoid_v(self, array): | ||
return np.reciprocal(np.exp(-array) + 1.0) | ||
def exponential_v(self, array): | ||
return np.exp(array) | ||
|
||
def non_max_suppression(self, boxes, confs, classes, iou_thres=0.3): | ||
x1 = boxes[:, 0] | ||
y1 = boxes[:, 1] | ||
x2 = boxes[:, 2] | ||
y2 = boxes[:, 3] | ||
areas = (x2 - x1 + 1) * (y2 - y1 + 1) | ||
order = confs.flatten().argsort()[::-1] | ||
keep = [] | ||
while order.size > 0: | ||
i = order[0] | ||
keep.append(i) | ||
xx1 = np.maximum(x1[i], x1[order[1:]]) | ||
yy1 = np.maximum(y1[i], y1[order[1:]]) | ||
xx2 = np.minimum(x2[i], x2[order[1:]]) | ||
yy2 = np.minimum(y2[i], y2[order[1:]]) | ||
w = np.maximum(0.0, xx2 - xx1 + 1) | ||
h = np.maximum(0.0, yy2 - yy1 + 1) | ||
inter = w * h | ||
ovr = inter / (areas[i] + areas[order[1:]] - inter) | ||
inds = np.where(ovr <= iou_thres)[0] | ||
order = order[inds + 1] | ||
boxes = boxes[keep] | ||
confs = confs[keep] | ||
classes = classes[keep] | ||
return boxes, confs, classes | ||
|
||
def nms(self, pred, iou_thres=0.6, origin_w=0, origin_h=0): | ||
boxes = self.xywh2xyxy(pred[..., 0:4], origin_w, origin_h) | ||
# best class only | ||
confs = np.amax(pred[:, 5:], 1, keepdims=True) | ||
classes = np.argmax(pred[:, 5:], axis=-1) | ||
return self.non_max_suppression(boxes, confs, classes, iou_thres) | ||
|
||
def xywh2xyxy(self, x, origin_w=0, origin_h=0): | ||
""" | ||
description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | ||
param: | ||
origin_h: height of original image | ||
origin_w: width of original image | ||
x: A boxes tensor, each row is a box [center_x, center_y, w, h] | ||
return: | ||
y: A boxes tensor, each row is a box [x1, y1, x2, y2] | ||
""" | ||
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) | ||
r_w = INPUT_W / origin_w | ||
r_h = INPUT_H / origin_h | ||
if r_h > r_w: | ||
y[:, 0] = x[:, 0] - x[:, 2] / 2 | ||
y[:, 2] = x[:, 0] + x[:, 2] / 2 | ||
y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2 | ||
y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2 | ||
y /= r_w | ||
else: | ||
y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2 | ||
y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2 | ||
y[:, 1] = x[:, 1] - x[:, 3] / 2 | ||
y[:, 3] = x[:, 1] + x[:, 3] / 2 | ||
y /= r_h | ||
|
||
return y |
Oops, something went wrong.