-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathautopilot.py
50 lines (41 loc) · 1.49 KB
/
autopilot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import torch
import gym
import sys
import numpy as np
from deepq_network import LinearMapNet
from utils import epsilon_greedy
def main():
_, episodes, model_path = sys.argv
env = gym.make('LunarLander-v2')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device=torch.device('cpu')
qnet = LinearMapNet(8, 4).to(device)
qnet.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
qnet.eval()
for episode in range(int(episodes)):
episode_reward = 0
curr_state, done = env.reset(), False
print(np.shape(curr_state))
# curr_state = np.expand_dims(curr_state, 0)
curr_state=np.asarray(curr_state)
curr_state = curr_state.reshape((1,2))
print(np.shape(curr_state))
#
# curr_state=np.vstack(curr_state).astype(np.float)
print(curr_state[-1][-1])
print(curr_state)
curr_state=curr_state[0][:-1]
print("--------")
print(curr_state)
curr_state = torch.from_numpy(curr_state)
while not done:
env.render()
action = epsilon_greedy(qnet, curr_state.to(device), 0.0001, 4)
next_state, reward, done, _ = env.step(action)
next_state = np.expand_dims(next_state, 0)
next_state = torch.from_numpy(next_state)
episode_reward += reward
curr_state = next_state
print(f"Episode reward: {episode_reward}")
if __name__ == '__main__':
main()