diff --git a/rllab/envs/mujoco/dynamics_randomization/__init__.py b/rllab/envs/mujoco/dynamics_randomization/__init__.py new file mode 100644 index 000000000..00b233701 --- /dev/null +++ b/rllab/envs/mujoco/dynamics_randomization/__init__.py @@ -0,0 +1,4 @@ +from rllab.envs.mujoco.dynamics_randomization.randomized_env import randomize +from rllab.envs.mujoco.dynamics_randomization.variation import Distribution +from rllab.envs.mujoco.dynamics_randomization.variation import Method +from rllab.envs.mujoco.dynamics_randomization.variation import Variations diff --git a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py new file mode 100644 index 000000000..cea618b29 --- /dev/null +++ b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py @@ -0,0 +1,87 @@ +import os.path as osp + +from mujoco_py import MjSim +from mujoco_py import load_model_from_xml + +from rllab.core import Serializable +from rllab.envs import Env +from rllab.envs.mujoco.mujoco_env import MODEL_DIR + + +class RandomizedEnv(Env, Serializable): + """ + This class is just a wrapper class for the MujocoEnv to perform + the training using Dynamics Randomization. + Only code in the methods reset and terminate has been added. + """ + + def __init__(self, mujoco_env, variations): + """ + An instance of the class MujocoModelGenerator is created to + generate the Mujoco models with the randomization of the + requested dynamic parameters. + """ + Serializable.quick_init(self, locals()) + self._wrapped_env = mujoco_env + self._variations = variations + self._file_path = osp.join(MODEL_DIR, mujoco_env.FILE) + self._variations.initialize_variations(self._file_path) + + def reset(self): + """ + The new model with randomized parameters is requested and the + corresponding parameters in the MuJoCo environment class are + set. + """ + self._wrapped_env.model = load_model_from_xml( + self._variations.get_randomized_xml_model()) + if hasattr(self._wrapped_env, 'action_space'): + del self._wrapped_env.__dict__['action_space'] + self._wrapped_env.sim = MjSim(self._wrapped_env.model) + self._wrapped_env.data = self._wrapped_env.sim.data + self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos + self._wrapped_env.init_qvel = self._wrapped_env.sim.data.qvel + self._wrapped_env.init_qacc = self._wrapped_env.sim.data.qacc + self._wrapped_env.init_ctrl = self._wrapped_env.sim.data.ctrl + return self._wrapped_env.reset() + + def step(self, action): + return self._wrapped_env.step(action) + + def render(self, *args, **kwargs): + return self._wrapped_env.render(*args, **kwargs) + + def log_diagnostics(self, paths, *args, **kwargs): + self._wrapped_env.log_diagnostics(paths, *args, **kwargs) + + def get_param_values(self): + return self._wrapped_env.get_param_values() + + def set_param_values(self, params): + self._wrapped_env.set_param_values(params) + + def terminate(self): + """ + Besides regular termination, the MuJoCo model generator is + stopped. + """ + self._wrapped_env.terminate() + + @property + def wrapped_env(self): + return self._wrapped_env + + @property + def action_space(self): + return self._wrapped_env.action_space + + @property + def observation_space(self): + return self._wrapped_env.observation_space + + @property + def horizon(self): + return self._wrapped_env.horizon + + +randomize = RandomizedEnv diff --git a/rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py b/rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py new file mode 100644 index 000000000..7d363844c --- /dev/null +++ b/rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py @@ -0,0 +1,21 @@ +from rllab.envs.mujoco import SwimmerEnv +from rllab.envs.mujoco.dynamics_randomization import Distribution +from rllab.envs.mujoco.dynamics_randomization import Method +from rllab.envs.mujoco.dynamics_randomization import randomize +from rllab.envs.mujoco.dynamics_randomization import Variations + +variations = Variations() +variations.randomize() \ + .at_xpath(".//geom[@name='torso']") \ + .attribute("density") \ + .with_method(Method.COEFFICIENT) \ + .sampled_from(Distribution.UNIFORM) \ + .with_range(0.5, 1.5) \ + .add() + +env = randomize(SwimmerEnv(), variations) + +for i in range(1000): + env.reset() + for j in range(1000): + env.step(env.action_space.sample()) diff --git a/rllab/envs/mujoco/dynamics_randomization/variation.py b/rllab/envs/mujoco/dynamics_randomization/variation.py new file mode 100644 index 000000000..3d2c97b1f --- /dev/null +++ b/rllab/envs/mujoco/dynamics_randomization/variation.py @@ -0,0 +1,319 @@ +from enum import Enum +from lxml import etree +import numpy as np + + +class Method(Enum): + """ + The random coefficient is applied according to these methods. + """ + """ The randomization is the product of the coefficient and the dynamic parameter """ + COEFFICIENT = 1 + """ The randomization is equal to the coefficient """ + ABSOLUTE = 2 + + +class Distribution(Enum): + """ + The different ways to produce the random coefficient. + """ + """ Gaussian distribution """ + GAUSSIAN = 1 + """ Uniform distribution """ + UNIFORM = 2 + + +class Variation: + """ + Each dynamic parameter to be randomized is represented by a Variation. This + class works more like a data structure to store the data fields required + to find the corresponding dynamic parameter and apply the randomization to it. + """ + + def __init__(self, + xpath, + attrib, + method, + distribution, + var_range, + mean_std, + elem=None, + default=None): + + if distribution is Distribution.GAUSSIAN and mean_std is None: + raise ValueError( + "Need to call with_mean_std when sampled from Gaussian") + + if distribution is Distribution.UNIFORM and var_range is None: + raise ValueError( + "Need to call with_range when sampled from Uniform") + + self._xpath = xpath + self._attrib = attrib + self._method = method + self._distribution = distribution + self._var_range = var_range + self._mean_std = mean_std + self._elem = elem + self._default = default + + @property + def xpath(self): + return self._xpath + + @property + def elem(self): + return self._elem + + @property + def attrib(self): + return self._attrib + + @property + def default(self): + return self._default + + @property + def method(self): + return self._method + + @property + def distribution(self): + return self._distribution + + @property + def var_range(self): + return self._var_range + + @property + def mean_std(self): + return self._mean_std + + +class Variations: + """ + The purpose of this class is to keep a list of all the variations + that have to be applied to the randomized environment, as well as + the methods to put the variations in the corresponding XML file. + """ + + def __init__(self): + self._list = [] + self._elem_cache = {} + self._default_cache = {} + self._parsed_model = None + + def randomize(self): + """ + Creates a VariationSpec instance to store values of dynamic parameters. + + Returns + ------- + VariationSpec + """ + return VariationSpec(self) + + def initialize_variations(self, xml_file_path): + """ + Once all the variations have been added to the list member of this + class, this method finds each variation as a node within the model + defined in the XML file. + For each variation, a reference to the data structure for the model + is created, as well as the default values of each parameter found + int the model. + + Parameters + ---------- + xml_file_path : string + absolute path to the location of the XML file that contains the + model + """ + self._parsed_model = etree.parse(xml_file_path) + self._elem_cache = {} + self._default_cache = {} + for v in self._list: + e = self._parsed_model.find(v.xpath) + if e is None: + raise ValueError( + "Could not find node in the XML model: %s" % v.xpath) + self._elem_cache[v] = e + + if v.attrib not in e.attrib: + raise ValueError("Attribute %s doesn't exist in node %s" % + (v.attrib, v.xpath)) + val = e.attrib[v.attrib].split(' ') + if len(val) == 1: + self._default_cache[v] = float(e.attrib[v.attrib]) + else: + self._default_cache[v] = np.array(list(map(float, val))) + + def get_randomized_xml_model(self): + """ + After all the variations have been initialized, this method will + generate a XML string with randomized dynamic parameters. + + Returns + ---------- + string + XML string of the model with the randomized dynamic parameters + """ + for v in self._list: + e = self._elem_cache[v] + if v.distribution == Distribution.GAUSSIAN: + c = np.random.normal(loc=v.mean_std[0], scale=v.mean_std[1]) + elif v.distribution == Distribution.UNIFORM: + c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + else: + raise ValueError("Unknown distribution") + + # Check if the sampled value has the same shape with default value + if np.array(c).shape != np.array(self._default_cache[v]).shape: + raise ValueError( + "Sampled value you input %s don't match with default value %s in the xml node %s" + % (c, self._default_cache[v], v.xpath)) + + if v.method == Method.COEFFICIENT: + e.attrib[v.attrib] = str(c * self._default_cache[v]) + elif v.method == Method.ABSOLUTE: + e.attrib[v.attrib] = str(c) + else: + raise ValueError("Unknown method") + + return etree.tostring(self._parsed_model.getroot()).decode("ascii") + + def get_list(self): + """ + Returns a list with all the variations + + Returns + ------- + [Variation] + A list of all the dynamic parameters to find in the model XML + and the configuration to randomize each of them + """ + return self._list + + def add(self, variation): + self._list.append(variation) + + +class VariationSpec: + """ + The purpose of this class is to set the values of each dynamic + parameter. + The class implements the fluent interface pattern, so each call + to set an attribute will return the instance of this class. + """ + + def __init__(self, variations): + self._variations = variations + self._xpath = None + self._attrib = None + self._method = Method.ABSOLUTE + self._distribution = Distribution.UNIFORM + self._mean_std = (0.0, 1.0) + self._var_range = (0.0, 1.0) + self._elem = None + self._default = None + + def at_xpath(self, xpath): + """ + Sets the xpath for the last variation in the list. + + Parameters + ---------- + xpath : string + path expression to identify a node within the XML file + of the MuJoCo environment. + """ + self._xpath = xpath + return self + + def attribute(self, attrib): + """ + Sets the attribute for the last variation in the list. + + Parameters + ---------- + attrib : string + name of the dynamic parameter to randomize within the + node defined in xpath. + """ + self._attrib = attrib + return self + + def with_method(self, method): + """ + Sets the method to apply the random coefficient for the last variation + in the list. + + Parameters + ---------- + method : Method + if equal to "absolute", it sets the dynamic parameter + equal to the random coefficient obtained from the distribution, or + if equal to "coefficient", it multiplies the default value provided + in the XML file by the random coefficient. + """ + self._method = method + return self + + def sampled_from(self, distribution): + """ + Sets the distribution where the random coefficient is sampled from for + the last variation in the list. + + Parameters + ---------- + distribution : Distribution + it specifies the probability distribution used to obtain the random + coefficient. + """ + self._distribution = distribution + return self + + def with_mean_std(self, mean, std_deviation): + """ + Sets the range for the random coefficient for the last variation in + the list. Only to be used for Distribution.GAUSSIAN + + Parameters + ---------- + mean : int + mean of the distribution + std_deviation : int + standard mean of the distribution + """ + self._mean_std = (mean, std_deviation) + return self + + def with_range(self, low, high): + """ + Sets the range for the random coefficient for the last variation in + the list. Only to be used for Distribution.UNIFORM + + Parameters + ---------- + low : int + inclusive low value of the range + high : int + exclusive high value of the range + """ + self._var_range = (low, high) + return self + + def add(self): + """ + Adds the variation defined by the fluent interface up to this call + to the list of variations to be randomized. + """ + self._variations.add( + Variation( + xpath=self._xpath, + attrib=self._attrib, + method=self._method, + distribution=self._distribution, + var_range=self._var_range, + mean_std=self._mean_std, + elem=self._elem, + default=self._default))