Skip to content

Commit 7c87f5a

Browse files
authored
Add files via upload
1 parent 521fc39 commit 7c87f5a

26 files changed

+6274
-0
lines changed

Network/Weight_net.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import pdb
2+
import torch
3+
from torch import nn
4+
from torch.nn import functional as F
5+
import numpy as np
6+
7+
class Discriminator(nn.Module):
8+
def __init__(self, num_input, num_hidden, num_output=2, device="cuda", dropout=False):
9+
super().__init__()
10+
self.device = device
11+
self.fc1 = nn.Linear(num_input, num_hidden)
12+
self.fc2 = nn.Linear(num_hidden, num_hidden)
13+
self.fc3 = nn.Linear(num_hidden, num_output)
14+
self.dropout = dropout
15+
self.dropout_layer = nn.Dropout(p=0.2)
16+
17+
def forward(self, x):
18+
if isinstance(x, np.ndarray):
19+
x = torch.tensor(x, dtype=torch.float).to(self.device)
20+
if self.dropout:
21+
x = F.relu(self.dropout_layer(self.fc1(x)))
22+
x = F.relu(self.dropout_layer(self.fc2(x)))
23+
else:
24+
x = F.relu(self.fc1(x))
25+
x = F.relu(self.fc2(x))
26+
output = 2 * torch.tanh(self.fc3(x))
27+
return output
28+
29+
class ConcatDiscriminator(Discriminator):
30+
"""
31+
Concatenate inputs along dimension and then pass through MLP.
32+
"""
33+
def __init__(self, *args, dim=1, **kwargs):
34+
super().__init__(*args, **kwargs)
35+
self.dim = dim
36+
37+
def forward(self, *inputs, **kwargs):
38+
flat_inputs = torch.cat(inputs, dim=self.dim)
39+
return super().forward(flat_inputs, **kwargs)

SimpleSAC/__init__.py

Whitespace-only changes.

SimpleSAC/envs.py

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
'''
2+
Generate different type of dynamics mismatch.
3+
@python version : 3.6.4
4+
'''
5+
6+
import gym
7+
from gym.spaces import Box, Discrete, Tuple
8+
from utils_h2o import update_target_env_gravity, update_target_env_density, update_target_env_friction, update_source_env
9+
10+
11+
def get_new_gravity_env(variety, env_name):
12+
update_target_env_gravity(variety, env_name)
13+
env = gym.make(env_name)
14+
15+
return env
16+
17+
18+
def get_source_env(env_name="Walker2d-v2"):
19+
update_source_env(env_name)
20+
env = gym.make(env_name)
21+
22+
return env
23+
24+
25+
def get_new_density_env(variety, env_name):
26+
update_target_env_density(variety, env_name)
27+
env = gym.make(env_name)
28+
29+
return env
30+
31+
32+
def get_new_friction_env(variety, env_name):
33+
update_target_env_friction(variety, env_name)
34+
env = gym.make(env_name)
35+
36+
return env
37+
38+
def get_dim(space):
39+
if isinstance(space, Box):
40+
return space.low.size
41+
elif isinstance(space, Discrete):
42+
return space.n
43+
elif isinstance(space, Tuple):
44+
return sum(get_dim(subspace) for subspace in space.spaces)
45+
elif hasattr(space, 'flat_dim'):
46+
return space.flat_dim
47+
else:
48+
raise TypeError("Unknown space: {}".format(space))

SimpleSAC/mixed_replay_buffer.py

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import os
2+
import h5py
3+
import torch
4+
import random
5+
import numpy as np
6+
from gym.spaces import Box, Discrete, Tuple
7+
8+
from envs import get_dim
9+
from replay_buffer import ReplayBuffer
10+
11+
12+
class MixedReplayBuffer(ReplayBuffer):
13+
def __init__(self, reward_scale, reward_bias, clip_action, state_dim, action_dim, task="halfcheetah", data_source="medium_replay", device="cuda", scale_rewards=True, scale_state=False, buffer_ratio=1, residual_ratio=0.1):
14+
super().__init__(state_dim, action_dim, device=device)
15+
16+
self.scale_rewards = scale_rewards
17+
self.scale_state = scale_state
18+
self.buffer_ratio = buffer_ratio
19+
self.residual_ratio = residual_ratio
20+
21+
# load expert dataset into the replay buffer
22+
path = os.path.join("../../d4rl_mujoco_dataset", "{}_{}-v2.hdf5".format(task, data_source))
23+
with h5py.File(path, "r") as dataset:
24+
total_num = dataset['observations'].shape[0]
25+
# idx = random.sample(range(total_num), int(total_num * self.residual_ratio))
26+
idx = np.random.choice(range(total_num), int(total_num * self.residual_ratio), replace=False)
27+
s = np.vstack(np.array(dataset['observations'])).astype(np.float32)[idx, :] # An (N, dim_observation)-dimensional numpy array of observations
28+
a = np.vstack(np.array(dataset['actions'])).astype(np.float32)[idx, :] # An (N, dim_action)-dimensional numpy array of actions
29+
r = np.vstack(np.array(dataset['rewards'])).astype(np.float32)[idx, :] # An (N,)-dimensional numpy array of rewards
30+
s_ = np.vstack(np.array(dataset['next_observations'])).astype(np.float32)[idx, :] # An (N, dim_observation)-dimensional numpy array of next observations
31+
done = np.vstack(np.array(dataset['terminals']))[idx, :] # An (N,)-dimensional numpy array of terminal flags
32+
33+
# whether to bias the reward
34+
r = r * reward_scale + reward_bias
35+
# whether to clip actions
36+
a = np.clip(a, -clip_action, clip_action)
37+
38+
fixed_dataset_size = r.shape[0]
39+
self.fixed_dataset_size = fixed_dataset_size
40+
self.ptr = fixed_dataset_size
41+
self.size = fixed_dataset_size
42+
self.max_size = (self.buffer_ratio + 1) * fixed_dataset_size
43+
44+
self.state = np.vstack((s, np.zeros((self.max_size - self.fixed_dataset_size, state_dim))))
45+
self.action = np.vstack((a, np.zeros((self.max_size - self.fixed_dataset_size, action_dim))))
46+
self.next_state = np.vstack((s_, np.zeros((self.max_size - self.fixed_dataset_size, state_dim))))
47+
self.reward = np.vstack((r, np.zeros((self.max_size - self.fixed_dataset_size, 1))))
48+
self.done = np.vstack((done, np.zeros((self.max_size - self.fixed_dataset_size, 1))))
49+
self.device = torch.device(device)
50+
51+
# # State normalization
52+
self.normalize_states()
53+
54+
55+
56+
def normalize_states(self, eps=1e-3):
57+
# STATE: standard normalization
58+
self.state_mean = self.state.mean(0, keepdims=True)
59+
self.state_std = self.state.std(0, keepdims=True) + eps
60+
if self.scale_state:
61+
self.state = (self.state - self.state_mean) / self.state_std
62+
self.next_state = (self.next_state - self.state_mean) / self.state_std
63+
64+
def append(self, s, a, r, s_, done):
65+
66+
self.state[self.ptr] = s
67+
self.action[self.ptr] = a
68+
self.next_state[self.ptr] = s_
69+
self.reward[self.ptr] = r
70+
self.done[self.ptr] = done
71+
72+
# fix the offline dataset and shuffle the simulated part
73+
self.ptr = (self.ptr + 1 - self.fixed_dataset_size) % (self.max_size - self.fixed_dataset_size) + self.fixed_dataset_size
74+
self.size = min(self.size + 1, self.max_size)
75+
76+
def append_traj(self, observations, actions, rewards, next_observations, dones):
77+
for o, a, r, no, d in zip(observations, actions, rewards, next_observations, dones):
78+
self.append(o, a, r, no, d)
79+
80+
def sample(self, batch_size, scope=None, type=None):
81+
if scope == None:
82+
ind = np.random.randint(0, self.size, size=batch_size)
83+
elif scope == "real":
84+
ind = np.random.randint(0, self.fixed_dataset_size, size=batch_size)
85+
elif scope == "sim":
86+
ind = np.random.randint(self.fixed_dataset_size, self.size, size=batch_size)
87+
else:
88+
raise RuntimeError("Misspecified range for replay buffer sampling")
89+
90+
if type == None:
91+
return {
92+
'observations': torch.FloatTensor(self.state[ind]).to(self.device),
93+
'actions': torch.FloatTensor(self.action[ind]).to(self.device),
94+
'rewards': torch.FloatTensor(self.reward[ind]).to(self.device),
95+
'next_observations': torch.FloatTensor(self.next_state[ind]).to(self.device),
96+
'dones': torch.FloatTensor(self.done[ind]).to(self.device)
97+
}
98+
elif type == "sas":
99+
return {
100+
'observations': torch.FloatTensor(self.state[ind]).to(self.device),
101+
'actions': torch.FloatTensor(self.action[ind]).to(self.device),
102+
'next_observations': torch.FloatTensor(self.next_state[ind]).to(self.device)
103+
}
104+
elif type == "sa":
105+
return {
106+
'observations': torch.FloatTensor(self.state[ind]).to(self.device),
107+
'actions': torch.FloatTensor(self.action[ind]).to(self.device)
108+
}
109+
else:
110+
raise RuntimeError("Misspecified return data types for replay buffer sampling")
111+
112+
def get_mean_std(self):
113+
return torch.FloatTensor(self.state_mean).to(self.device), torch.FloatTensor(self.state_std).to(self.device)

0 commit comments

Comments
 (0)