-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
200 lines (158 loc) · 7.93 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import cv2
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA=True):
"""
将 (N, num_anchors * box_attrs, H, W) 形式的4D特征图预测转换成 (N, num_anchors * H * W, box_attrs) 形式的3D预测
:param prediction: yolo层输出,(N,C,H,W)
:param inp_dim: 网络输入尺寸
:param anchors: list(tuple),anchor尺寸
:param num_classes:
:param CUDA:
"""
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2) # 从原图到特征图降采样的倍数
grid_size = inp_dim // stride # feature map尺寸
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)
prediction = prediction.view(batch_size, bbox_attrs * num_anchors, grid_size * grid_size)
prediction = prediction.transpose(1, 2).contiguous()
prediction = prediction.view(batch_size, grid_size * grid_size * num_anchors, bbox_attrs)
anchors = [(a[0] / stride, a[1] / stride) for a in anchors]
# Sigmoid the centre_X, centre_Y. and object confidencce
prediction[:, :, 0] = torch.sigmoid(prediction[:, :, 0])
prediction[:, :, 1] = torch.sigmoid(prediction[:, :, 1])
prediction[:, :, 4] = torch.sigmoid(prediction[:, :, 4])
# Add the center offsets
grid = np.arange(grid_size)
a, b = np.meshgrid(grid, grid)
x_offset = torch.FloatTensor(a).view(-1, 1)
y_offset = torch.FloatTensor(b).view(-1, 1)
if CUDA:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0)
prediction[:, :, :2] += x_y_offset
# log space transform height and the width
anchors = torch.FloatTensor(anchors)
if CUDA:
anchors = anchors.cuda()
# 这里使用广播机制,处理预测值,得到 b_w, b_h
anchors = anchors.repeat(grid_size * grid_size, 1).unsqueeze(0) # anchors.shape: (1, 13*13*3, 2)
prediction[:, :, 2:4] = torch.exp(prediction[:, :, 2:4]) * anchors # prediction[:, :, 2:4].shape: (B, 13*13*3, 2)
# 对class score进行sigmoid
prediction[:, :, 5: 5 + num_classes] = torch.sigmoid((prediction[:, :, 5: 5 + num_classes]))
# 预测的box坐标、宽高都是相对于特征图的,将其转换成相对原始图像大小
prediction[:, :, :4] *= stride
return prediction
def unique(tensor):
"""过滤掉tensor中重复出现的值"""
tensor_np = tensor.cpu().numpy()
unique_np = np.unique(tensor_np)
unique_tensor = torch.from_numpy(unique_np)
tensor_res = tensor.new(unique_tensor.shape)
tensor_res.copy_(unique_tensor)
return tensor_res
def bbox_iou(box1, box2):
"""
Returns the IoU of two bounding boxes
"""
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
# get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area)
return iou
def write_results(prediction, confidence, num_classes, nms_conf=0.4):
"""对检测结果进行过滤
:param prediction: Bx10647x85
:param confidence:
:param num_classes:
:param nms_conf:
:return: tesor of Dx8, D是最终保留的box数量, 8: img在batch中的索引、box4个属性、前景/背景置信度、类别置信度、类别索引
"""
conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) # Bx10647x1
prediction = prediction * conf_mask
box_corner = prediction.new(prediction.shape)
box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
prediction[:, :, :4] = box_corner[:, :, :4]
batch_size = prediction.size(0)
write = False
for ind in range(batch_size):
image_pred = prediction[ind] # image Tensor, 10647x85
# confidence threshholding, 去除80个类别的score,只保留最终类别索引
max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) # value, indices
max_conf = max_conf.float().unsqueeze(1)
max_conf_score = max_conf_score.float().unsqueeze(1)
seq = (image_pred[:, :5], max_conf, max_conf_score)
image_pred = torch.cat(seq, 1) # 10647x85 --> 10647x7
# 置信度较低的box之前被置为0了,这里直接将其移除
non_zero_ind = (torch.nonzero(image_pred[:, 4]))
try:
image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
except:
continue
# For PyTorch 0.4 compatibility
# Since the above code with not raise exception for no detection
# as scalars are supported in PyTorch 0.4
if image_pred_.shape[0] == 0:
continue
# Get the various classes detected in the image
img_classes = unique(image_pred_[:, -1]) # -1 index holds the class index
# NMS
for cls in img_classes:
# get the detections with one particular class
cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) # 10647x7
class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
image_pred_class = image_pred_[class_mask_ind].view(-1, 7)
# sort the detections such that the entry with the maximum objectness
# confidence is at the top
conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1]
image_pred_class = image_pred_class[conf_sort_index] # 当前类别要进行nms的所有box
idx = image_pred_class.size(0) # Number of detections
for i in range(idx):
# Get the IOUs of all boxes that come after the one we are looking at
# in the loop
try:
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:])
except ValueError:
break
except IndexError:
# 当i大于现有box数量时,数组越界,这时直接结束
break
# Zero out all the detections that have IoU > treshold
iou_mask = (ious < nms_conf).float().unsqueeze(1)
image_pred_class[i + 1:] *= iou_mask
# Remove the non-zero entries
non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze()
image_pred_class = image_pred_class[non_zero_ind].view(-1, 7)
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
# Repeat the batch_id for as many detections of the class cls in the image
seq = batch_ind, image_pred_class # tuple((num_boxes, 1), (num_boxes, 7))
if not write:
output = torch.cat(seq, 1)
write = True
else:
out = torch.cat(seq, 1)
output = torch.cat((output, out))
try:
return output
except:
return 0