mit-acl · bbrito · Apr 2, 2020
diff --git a/gym_collision_avoidance/envs/agent.py b/gym_collision_avoidance/envs/agent.py
@@ -64,7 +64,7 @@ def __init__(self, start_x, start_y, goal_x, goal_y, radius,
         self.min_y = -20.0
         self.max_y = 20.0
 
-        self.num_states_in_history = 1000
+        self.num_states_in_history = 1000000
         self.global_state_dim = 11
         self.global_state_history = np.empty((self.num_states_in_history, self.global_state_dim))
         self.ego_state_dim = 3
@@ -131,6 +131,8 @@ def take_action(self, action, dt):
         if self.is_at_goal or self.ran_out_of_time or self.in_collision:
             if self.is_at_goal:
                 self.was_at_goal_already = True
+                self._update_state_history()
+                self.step_num += 1
             if self.in_collision:
                 self.was_in_collision_already = True
             self.vel_global_frame = np.array([0.0, 0.0])

diff --git a/gym_collision_avoidance/envs/collision_avoidance_env.py b/gym_collision_avoidance/envs/collision_avoidance_env.py
@@ -196,7 +196,7 @@ def _take_action(self, actions, dt):
             if agent.policy.is_external:
                 all_actions[agent_index, :] = agent.policy.convert_to_action(actions[agent_index])
             elif agent.policy.is_still_learning:
-                all_actions[agent_index, :] = agent.policy.network_output_to_action(agent, actions[agent_index])
+                all_actions[agent_index, :] = agent.policy.network_output_to_action(agent, actions)
             else:
                 dict_obs = self.observation[agent_index]
                 all_actions[agent_index, :] = agent.policy.find_next_action(dict_obs, self.agents, agent_index)

diff --git a/gym_collision_avoidance/envs/visualize.py b/gym_collision_avoidance/envs/visualize.py
@@ -155,29 +155,28 @@ def plot_episode(agents, in_evaluate_mode,
 
 def draw_agents(agents, circles_along_traj, ax, last_index=-1):
 
-    max_time = max([agent.global_state_history[agent.step_num+last_index, 0] for agent in agents] + [1e-4])
+    max_time = max([agent.global_state_history[agent.global_state_history.shape[0]+last_index, 0] for agent in agents] + [1e-4])
     max_time_alpha_scalar = 1.2
     for i, agent in enumerate(agents):
 
         # Plot line through agent trajectory
         color_ind = i % len(plt_colors)
         plt_color = plt_colors[color_ind]
-
+        t_final = agent.global_state_history[agent.step_num - 1, 0]
         if circles_along_traj:
-            plt.plot(agent.global_state_history[:agent.step_num+last_index+1, 1],
-                     agent.global_state_history[:agent.step_num+last_index+1, 2],
+            plt.plot(agent.global_state_history[:agent.step_num - 1, 1],
+                     agent.global_state_history[:agent.step_num - 1, 2],
                      color=plt_color, ls='-', linewidth=2)
             plt.plot(agent.global_state_history[0, 3],
                      agent.global_state_history[0, 4],
                      color=plt_color, marker='*', markersize=20)
 
             # Display circle at agent pos every circle_spacing (nom 1.5 sec)
             circle_spacing = 0.4
-            circle_times = np.arange(0.0, agent.global_state_history[agent.step_num+last_index, 0],
-                                     circle_spacing)
-            _, circle_inds = find_nearest(agent.global_state_history[:agent.step_num, 0],
+            circle_times = np.arange(0.0, t_final,circle_spacing)
+            _, circle_inds = find_nearest(agent.global_state_history[:agent.step_num-1,0],
                                           circle_times)
-            for ind in circle_inds:
+            for ind in circle_inds[1:]:
                 alpha = 1 - \
                         agent.global_state_history[ind, 0] / \
                         (max_time_alpha_scalar*max_time)
@@ -188,11 +187,9 @@ def draw_agents(agents, circles_along_traj, ax, last_index=-1):
 
             # Display text of current timestamp every text_spacing (nom 1.5 sec)
             text_spacing = 1.5
-            text_times = np.arange(0.0, agent.global_state_history[agent.step_num+last_index, 0],
-                                   text_spacing)
-            _, text_inds = find_nearest(agent.global_state_history[:agent.step_num, 0],
-                                        text_times)
-            for ind in text_inds:
+            text_times = np.arange(0.0, t_final,text_spacing)
+            _, text_inds = find_nearest(agent.global_state_history[:agent.step_num-1,0],text_times)
+            for ind in text_inds[1:]:
                 y_text_offset = 0.1
                 alpha = agent.global_state_history[ind, 0] / \
                     (max_time_alpha_scalar*max_time)
@@ -205,7 +202,7 @@ def draw_agents(agents, circles_along_traj, ax, last_index=-1):
                         agent.global_state_history[ind, 2]+y_text_offset,
                         '%.1f' % agent.global_state_history[ind, 0], color=c)
             # Also display circle at agent position at end of trajectory
-            ind = agent.step_num + last_index
+            ind = agent.step_num-1
             alpha = 1 - \
                 agent.global_state_history[ind, 0] / \
                 (max_time_alpha_scalar*max_time)
@@ -225,17 +222,17 @@ def draw_agents(agents, circles_along_traj, ax, last_index=-1):
             #     ax.add_patch(ptch.FancyArrowPatch(arrow_start, arrow_end, arrowstyle=style, color='black'))
 
         else:
-            colors = np.zeros((agent.step_num, 4))
+            colors = np.zeros((agent.global_state_history.shape[0], 4))
             colors[:,:3] = plt_color
-            colors[:, 3] = np.linspace(0.2, 1., agent.step_num)
+            colors[:, 3] = np.linspace(0.2, 1., agent.global_state_history.shape[0])
             colors = rgba2rgb(colors)
 
-            plt.scatter(agent.global_state_history[:agent.step_num, 1],
-                     agent.global_state_history[:agent.step_num, 2],
+            plt.scatter(agent.global_state_history[:agent.global_state_history.shape[0], 1],
+                        agent.global_state_history[:agent.global_state_history.shape[0], 2],
                      color=colors)
 
             # Also display circle at agent position at end of trajectory
-            ind = agent.step_num + last_index
+            ind = agent.global_state_history.shape[0] + last_index
             alpha = 0.7
             c = rgba2rgb(plt_color+[float(alpha)])
             ax.add_patch(plt.Circle(agent.global_state_history[ind, 1:3],

diff --git a/gym_collision_avoidance/experiments/src/run_trajectory_dataset_creator.py b/gym_collision_avoidance/experiments/src/run_trajectory_dataset_creator.py
@@ -45,15 +45,14 @@ def add_traj(agents, trajs, dt, traj_i, max_ts):
     other_agent_i = (agent_i + 1) % 2
     agent = agents[agent_i]
     other_agent = agents[other_agent_i]
-    max_t = int(max_ts[agent_i])
     future_plan_horizon_secs = 3.0
     future_plan_horizon_steps = int(future_plan_horizon_secs / dt)
 
-    for t in range(max_t):
+    for t in range(max_ts):
         robot_linear_speed = agent.global_state_history[t, 9]
         robot_angular_speed = agent.global_state_history[t, 10] / dt
 
-        t_horizon = min(max_t, t+future_plan_horizon_steps)
+        t_horizon = min(max_ts, t+future_plan_horizon_steps)
         future_linear_speeds = agent.global_state_history[t:t_horizon, 9]
         future_angular_speeds = agent.global_state_history[t:t_horizon, 10] / dt
         predicted_cmd = np.dstack([future_linear_speeds, future_angular_speeds])
@@ -139,7 +138,11 @@ def main():
                 times_to_goal, extra_times_to_goal, collision, all_at_goal, any_stuck, agents = run_episode(env, one_env)
 
                 max_ts = [t / dt for t in times_to_goal]
-                trajs = add_traj(agents, trajs, dt, test_case, max_ts)
+                # Change the global state history according with the number of steps required to finish the episode
+                if not collision:
+                    for agent in agents:
+                        agent.global_state_history = agent.global_state_history[:agent.step_num]
+                    trajs = add_traj(agents, trajs, dt, test_case, agent.step_num)
 
         # print(trajs)
 
@@ -148,7 +151,8 @@ def main():
         pkl_dir = file_dir + '/trajs/'
         os.makedirs(pkl_dir, exist_ok=True)
         fname = pkl_dir+policy+'.pkl'
-        pickle.dump(trajs, open(fname,'wb'))
+        # Protocol 2 makes it compatible for Python 2 and 3
+        pickle.dump(trajs, open(fname, 'wb'), protocol=2)
         print('dumped {}'.format(fname))
 
     print("Experiment over.")