-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcone_detector.py
125 lines (95 loc) · 4.36 KB
/
cone_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from __future__ import division
import logging
import logging.config
import time
import cv2
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from utils import cv_utils
from utils import operations as ops
from utils import tf_utils
logging.config.fileConfig('logging.ini')
VIDEO_PATH = 'testdata/sample_video.mp4'
FROZEN_GRAPH_PATH = 'models/ssd_mobilenet_v1/frozen_inference_graph.pb'
OUTPUT_WINDOW_WIDTH = 640 # Use None to use the original size of the image
DETECT_EVERY_N_SECONDS = None # Use None to perform detection for each frame
# TUNE ME
CROP_WIDTH = CROP_HEIGHT = 600
CROP_STEP_HORIZONTAL = CROP_STEP_VERTICAL = 600 - 20 # no cone bigger than 20px
SCORE_THRESHOLD = 0.5
NON_MAX_SUPPRESSION_THRESHOLD = 0.5
def main():
# Read TensorFlow graph
detection_graph = tf_utils.load_model(FROZEN_GRAPH_PATH)
# Read video from disk and count frames
cap = cv2.VideoCapture(VIDEO_PATH)
fps = cap.get(cv2.CAP_PROP_FPS)
# CROP_WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# CROP_HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
with tf.Session(graph=detection_graph) as sess:
processed_images = 0
while cap.isOpened():
if DETECT_EVERY_N_SECONDS:
cap.set(cv2.CAP_PROP_POS_FRAMES,
processed_images * fps * DETECT_EVERY_N_SECONDS)
ret, frame = cap.read()
if ret:
tic = time.time()
# crops are images as ndarrays of shape
# (number_crops, CROP_HEIGHT, CROP_WIDTH, 3)
# crop coordinates are the ymin, xmin, ymax, xmax coordinates in
# the original image
crops, crops_coordinates = ops.extract_crops(
frame, CROP_HEIGHT, CROP_WIDTH,
CROP_STEP_VERTICAL, CROP_STEP_VERTICAL)
# Uncomment this if you also uncommented the two lines before
# creating the TF session.
# crops = np.array([crops[0]])
# crops_coordinates = [crops_coordinates[0]]
detection_dict = tf_utils.run_inference_for_batch(crops, sess)
# The detection boxes obtained are relative to each crop. Get
# boxes relative to the original image
# IMPORTANT! The boxes coordinates are in the following order:
# (ymin, xmin, ymax, xmax)
boxes = []
for box_absolute, boxes_relative in zip(
crops_coordinates, detection_dict['detection_boxes']):
boxes.extend(ops.get_absolute_boxes(
box_absolute,
boxes_relative[np.any(boxes_relative, axis=1)]))
if boxes:
boxes = np.vstack(boxes)
# Remove overlapping boxes
boxes = ops.non_max_suppression_fast(
boxes, NON_MAX_SUPPRESSION_THRESHOLD)
# Get scores to display them on top of each detection
boxes_scores = detection_dict['detection_scores']
boxes_scores = boxes_scores[np.nonzero(boxes_scores)]
for box, score in zip(boxes, boxes_scores):
if score > SCORE_THRESHOLD:
ymin, xmin, ymax, xmax = box
color_detected_rgb = cv_utils.predominant_rgb_color(
frame, ymin, xmin, ymax, xmax)
text = '{:.2f}'.format(score)
cv_utils.add_rectangle_with_text(
frame, ymin, xmin, ymax, xmax,
color_detected_rgb, text)
if OUTPUT_WINDOW_WIDTH:
frame = cv_utils.resize_width_keeping_aspect_ratio(
frame, OUTPUT_WINDOW_WIDTH)
cv2.imshow('Detection result', frame)
cv2.waitKey(1)
processed_images += 1
toc = time.time()
processing_time_ms = (toc - tic) * 100
logging.debug(
'Detected {} objects in {} images in {:.2f} ms'.format(
len(boxes), len(crops), processing_time_ms))
else:
# No more frames. Break the loop
break
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()