Skip to content

Commit

Permalink
new code for reference/testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Ishaan-Datta committed Sep 26, 2024
1 parent 2c8a57f commit 1d259de
Show file tree
Hide file tree
Showing 6 changed files with 578 additions and 9 deletions.
55 changes: 55 additions & 0 deletions conversion_tools/engine_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import tensorrt as trt
import sys
import argparse

"""
takes in onnx model
converts to tensorrt
tensorrt model input size must be src pth input size
"""

def cli():
desc = 'Compile Onnx model to TensorRT'
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('-m', '--model', default='', help='onnx file location')
parser.add_argument('-fp', '--floatingpoint', type=int, default=16, help='floating point precision. 16 or 32')
parser.add_argument('-o', '--output', default='', help='name of trt output file')
args = parser.parse_args()
model = 'coco_yolov5.onnx'
fp = args.floatingpoint
if fp != 16 and fp != 32:
print('floating point precision must be 16 or 32')
sys.exit()
output = 'coco_yolov5-{}.trt'.format(fp)
return {
'model': model,
'fp': fp,
'output': output
}

if __name__ == '__main__':
args = cli()
batch_size = 4
model = '{}'.format(args['model'])
output = '{}'.format(args['output'])
logger = trt.Logger(trt.Logger.WARNING)
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) # trt7
with trt.Builder(logger) as builder, builder.create_network(explicit_batch) as network, trt.OnnxParser(network, logger) as parser:
builder.max_workspace_size = 1 << 28
builder.max_batch_size = batch_size
if args['fp'] == 16:
builder.fp16_mode = True
with open(model, 'rb') as f:
print('Beginning ONNX file parsing')
if not parser.parse(f.read()):
for error in range(parser.num_errors):
print("ERROR", parser.get_error(error))
print("num layers:", network.num_layers)
network.get_input(0).shape = [batch_size, 3, 608, 608] # trt7
# last_layer = network.get_layer(network.num_layers - 1)
# network.mark_output(last_layer.get_output(0))
# reshape input from 32 to 1
engine = builder.build_cuda_engine(network)
with open(output, 'wb') as f:
f.write(engine.serialize())
print("Completed creating Engine")
317 changes: 317 additions & 0 deletions conversion_tools/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
import cv2
import sys
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import math
import time
import torch
INPUT_W = 608
INPUT_H = 608

class Processor():
def __init__(self, model):
# load tensorrt engine
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRTbin = '{0}'.format(model)
print('trtbin', TRTbin)
with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())

self.context = engine.create_execution_context()
stream = cuda.Stream()

# allocate memory
inputs, outputs, bindings = [], [], []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding))
dtype = trt.nptype(engine.get_binding_dtype(binding))
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if engine.binding_is_input(binding):
inputs.append({'host': host_mem, 'device': device_mem })
else:
outputs.append({'host': host_mem, 'device': device_mem })
# save to class
self.inputs = inputs
self.outputs = outputs
self.bindings = bindings
self.stream = stream

# post processing config
self.cls_nums = 80
self.filters = (self.cls_nums + 5) * 3
self.output_filter = self.cls_nums + 5
self.output_shapes1 = [
(1, 3, 80, 80, self.output_filter),
(1, 3, 40, 40, self.output_filter),
(1, 3, 20, 20, self.output_filter)
]

self.strides = np.array([8., 16., 32.])
anchors = np.array([
[[10, 13], [16, 30], [33, 23]],
[[30, 61], [62, 45], [59, 119]],
[[116, 90], [156, 198], [373, 326]],
])
self.nl = len(anchors)
self.nc = 80 # classes
self.no = self.nc + 5 # outputs per anchor
self.na = len(anchors[0])
a = anchors.copy().astype(np.float32)
a = a.reshape(self.nl, -1, 2)
self.anchors = a.copy()
self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)

def detect(self, img):
resized, = self.preprocess_image(img)
outputs = self.inference(resized)
# reshape from flat to (1, 3, x, y, 85)
reshaped = []
for output, shape in zip(outputs, self.output_shapes):
reshaped.append(output.reshape(shape))
return reshaped

def preprocess_image(self, image_raw):
"""
description: Read an image from image path, convert it to RGB,
resize and pad it to target size, normalize to [0,1],
transform to NCHW format.
param:
image_raw: str, image path
return:
image: the processed image
image_raw: the original image
h: original height
w: original width
"""
h, w, c = image_raw.shape
image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
# Calculate widht and height and paddings
r_w = INPUT_W / w
r_h = INPUT_H / h
if r_h > r_w:
tw = INPUT_W
th = int(r_w * h)
tx1 = tx2 = 0
ty1 = int((INPUT_H - th) / 2)
ty2 = INPUT_H - th - ty1
else:
tw = int(r_h * w)
th = INPUT_H
tx1 = int((INPUT_W - tw) / 2)
tx2 = INPUT_W - tw - tx1
ty1 = ty2 = 0
# Resize the image with long side while maintaining ratio
image = cv2.resize(image, (tw, th))
# Pad the short side with (128,128,128)
image = cv2.copyMakeBorder(
image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
)
image = image.astype(np.float32)
# Normalize to [0,1]
image /= 255.0
# HWC to CHW format:
image = np.transpose(image, [2, 0, 1])
# CHW to NCHW format
image = np.expand_dims(image, axis=0)
# Convert the image to row-major order, also known as "C order":
image = np.ascontiguousarray(image)
return image

def inference(self, img):

# copy img to input memory
self.inputs[0]['host'] = np.ravel(img)
# transfer data to the gpu
for inp in self.inputs:
cuda.memcpy_htod_async(inp['device'], inp['host'], self.stream)
# run inference
start = time.time()
self.context.execute_async_v2(
bindings=self.bindings,
stream_handle=self.stream.handle)
# fetch outputs from gpu
for out in self.outputs:
cuda.memcpy_dtoh_async(out['host'], out['device'], self.stream)
# synchronize stream
self.stream.synchronize()
end = time.time()
print('execution time:', end-start)
return [out['host'] for out in self.outputs]

def extract_object_grids(self, output):
"""
Extract objectness grid
(how likely a box is to contain the center of a bounding box)
Returns:
object_grids: list of tensors (1, 3, nx, ny, 1)
"""
object_grids = []
for out in output:
probs = self.sigmoid_v(out[..., 4:5])
object_grids.append(probs)
return object_grids

def extract_class_grids(self, output):
"""
Extracts class probabilities
(the most likely class of a given tile)
Returns:
class_grids: array len 3 of tensors ( 1, 3, nx, ny, 80)
"""
class_grids = []
for out in output:
object_probs = self.sigmoid_v(out[..., 4:5])
class_probs = self.sigmoid_v(out[..., 5:])
obj_class_probs = class_probs * object_probs
class_grids.append(obj_class_probs)
return class_grids

def extract_boxes(self, output, conf_thres=0.3):
"""
Extracts boxes (xywh) -> (x1, y1, x2, y2)
"""
scaled = []
grids = []
for out in output:
out = self.sigmoid_v(out)
_, _, width, height, _ = out.shape
grid = self.make_grid(width, height)
grids.append(grid)
scaled.append(out)
z = []
for out, grid, stride, anchor in zip(scaled, grids, self.strides, self.anchor_grid):
_, _, width, height, _ = out.shape
out[..., 0:2] = (out[..., 0:2] * 2. - 0.5 + grid) * stride
out[..., 2:4] = (out[..., 2:4] * 2) ** 2 * anchor

out[..., 5:] = out[..., 4:5] * out[..., 5:]
out = out.reshape((1, 3 * width * height, self.output_filter))
z.append(out)
pred = np.concatenate(z, 1)
xc = pred[..., 4] > conf_thres
pred = pred[xc]
boxes = self.xywh2xyxy(pred[:, :4])
return boxes

def post_process(self, outputs, conf_thres=0.3, iou_thres=0.3, origin_w=0, origin_h=0):
"""
Transforms raw output into boxes, confs, classes
Applies NMS thresholding on bounding boxes and confs
Parameters:
output: raw output tensor
Returns:
boxes: x1,y1,x2,y2 tensor (dets, 4)
confs: class * obj prob tensor (dets, 1)
classes: class type tensor (dets, 1)
"""
scaled = []
grids = []
for out in outputs:
out = self.sigmoid_v(out)
# print(out)
_, _, width, height, _ = out.shape
grid = self.make_grid(width, height)
grids.append(grid)
scaled.append(out)
z = []
for out, grid, stride, anchor in zip(scaled, grids, self.strides, self.anchor_grid):
_, _, width, height, _ = out.shape
out[..., 0:2] = (out[..., 0:2] * 2. - 0.5 + grid) * stride
out[..., 2:4] = (out[..., 2:4] * 2) ** 2 * anchor

out = out.reshape((1, 3 * width * height, self.output_filter))
z.append(out)
pred = np.concatenate(z, 1)
xc = pred[..., 4] > conf_thres
pred = pred[xc]
return self.nms(pred, iou_thres, origin_w, origin_h)

def make_grid(self, nx, ny):
"""
Create scaling tensor based on box location
Source: https://github.com/ultralytics/yolov5/blob/master/models/yolo.py
Arguments
nx: x-axis num boxes
ny: y-axis num boxes
Returns
grid: tensor of shape (1, 1, nx, ny, 80)
"""
nx_vec = np.arange(nx)
ny_vec = np.arange(ny)
yv, xv = np.meshgrid(ny_vec, nx_vec)
grid = np.stack((yv, xv), axis=2)
grid = grid.reshape(1, 1, ny, nx, 2)
return grid

def sigmoid(self, x):
return 1 / (1 + math.exp(-x))

def sigmoid_v(self, array):
return np.reciprocal(np.exp(-array) + 1.0)
def exponential_v(self, array):
return np.exp(array)

def non_max_suppression(self, boxes, confs, classes, iou_thres=0.3):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = confs.flatten().argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= iou_thres)[0]
order = order[inds + 1]
boxes = boxes[keep]
confs = confs[keep]
classes = classes[keep]
return boxes, confs, classes

def nms(self, pred, iou_thres=0.6, origin_w=0, origin_h=0):
boxes = self.xywh2xyxy(pred[..., 0:4], origin_w, origin_h)
# best class only
confs = np.amax(pred[:, 5:], 1, keepdims=True)
classes = np.argmax(pred[:, 5:], axis=-1)
return self.non_max_suppression(boxes, confs, classes, iou_thres)

def xywh2xyxy(self, x, origin_w=0, origin_h=0):
"""
description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
param:
origin_h: height of original image
origin_w: width of original image
x: A boxes tensor, each row is a box [center_x, center_y, w, h]
return:
y: A boxes tensor, each row is a box [x1, y1, x2, y2]
"""
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
r_w = INPUT_W / origin_w
r_h = INPUT_H / origin_h
if r_h > r_w:
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
y /= r_w
else:
y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
y /= r_h

return y
Loading

0 comments on commit 1d259de

Please sign in to comment.