|
42 | 42 | # !pip3 install torchrl
|
43 | 43 | # !pip3 install gym[mujoco]
|
44 | 44 | # !pip3 install tqdm
|
| 45 | +# !pip install torchrl gymnasium[mujoco] mujoco==3.1.1 (For Google Colab) |
45 | 46 | #
|
46 | 47 | # Proximal Policy Optimization (PPO) is a policy-gradient algorithm where a
|
47 | 48 | # batch of data is being collected and directly consumed to train the policy to maximise
|
|
211 | 212 | # to a large panel of RL simulators, allowing you to easily swap one environment
|
212 | 213 | # with another. For example, creating a wrapped gym environment can be achieved with few characters:
|
213 | 214 | #
|
| 215 | +# ----------------------------------------------------------------------------- |
| 216 | +# ⚙️ Google Colab and gymnasium compatibility for Mujoco-based environments |
| 217 | +# ----------------------------------------------------------------------------- |
| 218 | + |
| 219 | +# Try importing gymnasium (preferred), fallback to gym |
| 220 | +try: |
| 221 | + import gymnasium as gym |
| 222 | + USING_GYMNASIUM = True |
| 223 | +except ImportError: |
| 224 | + import gym |
| 225 | + USING_GYMNASIUM = False |
| 226 | + |
| 227 | +import os |
| 228 | + |
| 229 | +# In headless environments like Google Colab, Mujoco needs osmesa for rendering |
| 230 | +if "google.colab" in str(get_ipython()): |
| 231 | + os.environ["MUJOCO_GL"] = "osmesa" |
| 232 | + |
| 233 | +# Use a newer environment name if gymnasium is available |
| 234 | +# (v5 environments are preferred; gym uses v4) |
| 235 | +env_version = "v5" if USING_GYMNASIUM else "v4" |
| 236 | +env_id = f"InvertedDoublePendulum-{env_version}" |
| 237 | + |
| 238 | +# Replace this later: |
| 239 | + |
| 240 | +#base_env = GymEnv("InvertedDoublePendulum-v4", device=device) |
| 241 | +base_env = GymEnv(env_id, device=device) |
214 | 242 |
|
215 |
| -base_env = GymEnv("InvertedDoublePendulum-v4", device=device) |
216 | 243 |
|
217 | 244 | ######################################################################
|
218 | 245 | # There are a few things to notice in this code: first, we created
|
|
0 commit comments