-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollect_rew_test_set.py
138 lines (105 loc) · 3.13 KB
/
collect_rew_test_set.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import torch
import gymnasium as gym
import numpy as np
import time
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import time
import src.environment
import src.environment_agent_rew
import config.config_small_train as cfg_small_train
import config.config_small_test as cfg_small_test
import config.config_large_train as cfg_large_train
import config.config_large_test as cfg_large_test
import warnings
warnings.filterwarnings("ignore")
N = 100
def run_one(model, env):
obs, _ = env.reset()
done = False
rew_tot = 0
while not done:
action, _ = model.predict(obs)
obs, reward, done, _, _ = env.step(action)
rew_tot += reward
return rew_tot
def run_n(model, env):
global N
rews = []
for i in range(N):
rews.append(run_one(model, env))
mean = np.mean(rews)
stdv = np.std(rews) / np.sqrt(len(rews))
return (mean, stdv)
categories = []
stdvs = []
means = []
categories.append("TRANSFORMER SMALL")
model = PPO.load("models/soccer-joint-transformer")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("LINEAR SMALL")
model = PPO.load("models/soccer-joint-linear")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("JOINT A2C")
model = PPO.load("models/a2c/soccer-joint-marl")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("INDIVIDUAL A2C")
model = PPO.load("models/a2c/soccer-individual-marl")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("JOINT LARGE")
model = PPO.load("models/soccer-joint-marl-large")
env = gym.make('SoccerEnv-v0', cfg=cfg_large_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("INDIV LARGE")
model = PPO.load("models/soccer-individual-marl-large")
env = gym.make('SoccerEnv-v0', cfg=cfg_large_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("LINEAR LARGE")
model = PPO.load("models/soccer-joint-linear-large-v2")
env = gym.make('SoccerEnv-v0', cfg=cfg_large_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("INDIVIDUAL SMALL")
model = PPO.load("models/soccer-individual-marl")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("JOINT SMALL")
model = PPO.load("models/soccer-joint-marl")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("INDIVIDUAL SMALL AREW")
model = PPO.load("models/agrew/soccer-individual-marl")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
categories.append("JOINT SMALL AREW")
model = PPO.load("models/agrew/soccer-joint-marl")
env = gym.make('SoccerEnv-v0', cfg=cfg_small_test)
x = run_n(model, env)
means.append(x[0])
stdvs.append(x[1])
print(categories)
print(means)
print(stdvs)