forked from behretj/3D-Scene-Understanding
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
178 lines (137 loc) · 6.3 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import open3d as o3d
import glob, os, cv2
import numpy as np
from projectaria_tools.core import data_provider, calibration
import imageio
from moviepy.editor import VideoFileClip, clips_array
import json
def get_all_images(scan_dir, name):
vrs_files = glob.glob(os.path.join(scan_dir + name, '*.vrs'))
assert vrs_files is not None, "No vrs files found in directory"
for vrs_file in vrs_files:
provider = data_provider.create_vrs_data_provider(vrs_file)
assert provider is not None, "Cannot open file"
deliver_option = provider.get_default_deliver_queued_options()
deliver_option.deactivate_stream_all()
camera_label = "camera-rgb"
stream_id = provider.get_stream_id_from_label(camera_label)
calib = provider.get_device_calibration().get_camera_calib(camera_label)
w, h = calib.get_image_size()
pinhole = calibration.get_linear_camera_calibration(w, h, calib.get_focal_lengths()[0])
image_dir = os.path.join(scan_dir, os.path.basename(vrs_file)[:-4] + "_images")
os.makedirs(image_dir, exist_ok=True)
for i in range(provider.get_num_data(stream_id)):
image_data = provider.get_image_data_by_index(stream_id, i)
img = image_data[0].to_numpy_array()
undistorted_image = calibration.distort_by_calibration(img, pinhole, calib)
aruco_image = cv2.cvtColor(undistorted_image, cv2.COLOR_RGB2BGR)
aruco_image = cv2.rotate(aruco_image, cv2.ROTATE_90_CLOCKWISE)
cv2.imwrite(os.path.join(image_dir, f"frame_{i:05}.jpg"), aruco_image)
def mask3d_labels(label_path):
with open(label_path, 'r') as file:
lines = file.readlines()
file_paths = []
values = []
confidences = []
for line in lines:
parts = line.split()
file_paths.append(parts[0])
values.append(int(parts[1]))
confidences.append(float(parts[2]))
base_dir = os.path.dirname(os.path.abspath(label_path))
first_pred_mask_path = os.path.join(base_dir, file_paths[0])
with open(first_pred_mask_path, 'r') as file:
num_lines = len(file.readlines())
mask3d_labels = np.zeros(num_lines, dtype=int)
for i, relative_path in enumerate(file_paths):
value = values[i]
file_path = os.path.join(base_dir, relative_path)
with open(file_path, 'r') as file:
for j, line in enumerate(file):
if line.strip() == '1':
mask3d_labels[j] = value
output_path = os.path.join(base_dir, 'mask3d_labels.txt')
with open(output_path, 'w') as file:
for label in mask3d_labels:
file.write(f"{label}\n")
def vis_detections(coords, color=[0,0,1]):
coords = np.asarray(coords)
spheres = []
if len(coords) == 0:
return spheres
if len(coords.shape) == 1:
coords = np.array([coords])
for coord in coords:
sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.02)
sphere.paint_uniform_color(color)
sphere.compute_vertex_normals()
sphere.translate(coord)
spheres += [sphere]
return spheres
def calculate_center(bb):
return [(bb[0] + bb[2])/2, (bb[1] + bb[3])/2]
def filter_object(obj_dets, hand_dets):
object_cc_list = [] # object center list
for j in range(obj_dets.shape[0]):
object_cc_list.append(calculate_center(obj_dets[j,:4]))
object_cc_list = np.array(object_cc_list)
img_obj_id = [] # matching list
for i in range(hand_dets.shape[0]):
if hand_dets[i, 5] <= 0: # if hand is non-contact
img_obj_id.append(-1)
continue
else: # hand is in-contact
hand_cc = np.array(calculate_center(hand_dets[i,:4])) # hand center points
# caculates, using the hand offset vector, which object is the closest to this object
point_cc = np.array([(hand_cc[0]+hand_dets[i,6]*10000*hand_dets[i,7]), (hand_cc[1]+hand_dets[i,6]*10000*hand_dets[i,8])]) # extended points (hand center + offset)
dist = np.sum((object_cc_list - point_cc)**2,axis=1)
dist_min = np.argmin(dist) # find the nearest
img_obj_id.append(dist_min)
return img_obj_id
def crop_image(image):
height, width = image.shape[:2]
margin_x = int(width * (8.0 / 100) / 2)
margin_y = int(height * (8.0 / 100) / 2)
# Define the cropping rectangle
x_start = margin_x
x_end = width - margin_x
y_start = margin_y
y_end = height - margin_y
# Crop the image
cropped_image = image[y_start:y_end, x_start:x_end]
return cropped_image
def create_video(image_dir, output_path="output.mp4", fps=30):
images = []
for filename in sorted(glob.glob(os.path.join(image_dir, '*.jpg'))):
images.append(filename)
with imageio.get_writer(output_path, fps=fps) as writer:
for i, image_file in enumerate(images):
# Read each image
image = imageio.imread(image_file)
image = crop_image(image)
# Append the image to the video
writer.append_data(image)
def save_camera_params(scene, path):
# Save the current camera parameters to a file
camera_params = scene.get_view_control().convert_to_pinhole_camera_parameters()
with open(path, "w") as f:
json.dump(camera_params.__dict__, f, default=lambda o: o.__dict__)
def load_camera_params(scene, path):
# Load the camera parameters from a file
with open(path, "r") as f:
camera_params_dict = json.load(f)
camera_params = o3d.camera.PinholeCameraParameters()
camera_params.__dict__.update(camera_params_dict)
scene.get_view_control().convert_from_pinhole_camera_parameters(camera_params)
def capture_image(scene_widget, file_path):
image = scene_widget.scene.renderer.render_to_image()
o3d.io.write_image(file_path, image)
def stitch_videos(video_path_1, video_path_2, new_file="combined_video.mp4"):
video1 = VideoFileClip(video_path_1)
video2 = VideoFileClip(video_path_2)
# Ensure both videos have the same duration, fps, and dimensions
# This should be already true according to your information
# Combine the videos side by side
final_video = clips_array([[video1, video2]])
# Save the final video
final_video.write_videofile(new_file)