facebookresearch · RedLeader962 · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 11, 2024
diff --git a/README.md b/README.md
@@ -78,8 +78,8 @@ passing `experiment=your-name`. The logger will also save a file called
 
 Beyond the override defaults, You can also change other configuration options, 
 such as the type of dynamics model 
-(e.g., `dynamics_model=basic_ensemble`), or the number of models in the ensemble 
-(e.g., `dynamics_model.model.ensemble_size=some-number`). To learn more about
+(e.g., `dynamics_model=basic_ensemble`), or the number of models in the ensemble via configuration override 
+(e.g., `++dynamics_model.model.ensemble_size=some-number`). To learn more about
 all the available options, take a look at the provided 
 [configuration files](https://github.com/facebookresearch/mbrl-lib/tree/main/mbrl/examples/conf). 
 

diff --git a/mbrl/algorithms/mbpo.py b/mbrl/algorithms/mbpo.py
@@ -129,7 +129,10 @@ def train(
 
     mbrl.planning.complete_agent_cfg(env, cfg.algorithm.agent)
     agent = SACAgent(
-        cast(pytorch_sac_pranz24.SAC, hydra.utils.instantiate(cfg.algorithm.agent))
+        cast(
+            pytorch_sac_pranz24.SAC,
+            hydra.utils.instantiate(cfg.algorithm.agent, _recursive_=False),
+        )
     )
 
     work_dir = work_dir or os.getcwd()

diff --git a/mbrl/algorithms/planet.py b/mbrl/algorithms/planet.py
@@ -86,7 +86,7 @@ def train(
 
     # Create PlaNet model
     cfg.dynamics_model.action_size = env.action_space.shape[0]
-    planet = hydra.utils.instantiate(cfg.dynamics_model)
+    planet = hydra.utils.instantiate(cfg.dynamics_model, _recursive_=False)
     assert isinstance(planet, mbrl.models.PlaNetModel)
     model_env = ModelEnv(env, planet, no_termination, generator=rng)
     trainer = ModelTrainer(planet, logger=logger, optim_lr=1e-3, optim_eps=1e-4)

diff --git a/mbrl/env/pets_reacher.py b/mbrl/env/pets_reacher.py
@@ -98,9 +98,9 @@ def get_EE_pos(self, states):
             z = -np.sin(hinge) * np.cos(roll) * perp_all_axis
             new_rot_axis = x + y + z
             new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
-            new_rot_perp_axis[
-                np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30
-            ] = rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
+            new_rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30] = (
+                rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
+            )
             new_rot_perp_axis /= np.linalg.norm(
                 new_rot_perp_axis, axis=1, keepdims=True
             )

diff --git a/mbrl/examples/README.md b/mbrl/examples/README.md
@@ -24,18 +24,19 @@ The config files are generally structured in 4 groups:
 * `action_optimizer`: describes possible optimizers to use for action selections. Some algorithms,
 like MBPO, ignore this.
 
-For example, to run MBPO on `gym`'s Hopper environment using the standard ensemble version of
+For example, to run MBPO on `gym`'s cartpole environment using the standard ensemble version of
 [GaussianMLP](https://github.com/facebookresearch/mbrl-lib/blob/main/mbrl/models/gaussian_mlp.py),
 you can type
 
 ```bash
 python -m mbrl.examples.main \
   algorithm=mbpo \
-  overrides=mbpo_hopper \
+  overrides=mbpo_cartpole \
   dynamics_model=gaussian_mlp_ensemble \
-  algorithm.agent.batch_size=256 \
-  overrides.validation_ratio=0.2 \
-  dynamics_model.activation_fn_cfg._target_=torch.nn.ReLU
+  ++device=cpu \
+  ++overrides.sac_batch_size=256 \
+  ++overrides.validation_ratio=0.2 \
+  ++dynamics_model.activation_fn_cfg._target_=torch.nn.ReLU 
 ```
 where we have re-written some defaults, just to show how `hydra` command line syntax
 works. The number of possible options is extensive, and the best way to explore would be to 
@@ -52,4 +53,4 @@ inside a folder whose path looks like
 you can change the root directory (`./exp`) by passing 
 `root_dir=path-to-your-dir`, and the experiment sub-folder (`default`) by
 passing `experiment=your-name`. The logger will also save a file called 
-`model_train.csv` with training information for the dynamics model.
+`model_train.csv` with training information for the dynamics model.
diff --git a/mbrl/examples/conf/action_optimizer/cem.yaml b/mbrl/examples/conf/action_optimizer/cem.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 
 _target_: mbrl.planning.CEMOptimizer
 num_iterations: ${overrides.cem_num_iters}

diff --git a/mbrl/examples/conf/action_optimizer/icem.yaml b/mbrl/examples/conf/action_optimizer/icem.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 
 _target_: mbrl.planning.ICEMOptimizer
 num_iterations: ${overrides.cem_num_iters}

diff --git a/mbrl/examples/conf/action_optimizer/mppi.yaml b/mbrl/examples/conf/action_optimizer/mppi.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 
 _target_: mbrl.planning.MPPIOptimizer
 num_iterations: ${overrides.mppi_num_iters}
@@ -8,4 +8,4 @@ sigma: ${overrides.mppi_sigma}
 beta: ${overrides.mppi_beta}
 lower_bound: ???
 upper_bound: ???
-device: ${device}
+device: ${device}
diff --git a/mbrl/examples/conf/algorithm/mbpo.yaml b/mbrl/examples/conf/algorithm/mbpo.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 name: "mbpo"
 
 normalize: true
@@ -34,4 +34,4 @@ agent:
     target_entropy: ${overrides.sac_target_entropy}
     hidden_size: ${overrides.sac_hidden_size}
     device: ${device}
-    lr: ${overrides.sac_lr}
+    lr: ${overrides.sac_lr}
diff --git a/mbrl/examples/conf/algorithm/pets.yaml b/mbrl/examples/conf/algorithm/pets.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 name: "pets"
 
 agent:

diff --git a/mbrl/examples/conf/algorithm/planet.yaml b/mbrl/examples/conf/algorithm/planet.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 name: "planet"
 
 agent:
@@ -15,4 +15,4 @@ num_initial_trajectories: 5
 action_noise_std: 0.3
 test_frequency: 25
 num_episodes: 1000
-dataset_size: 1000000
+dataset_size: 1000000
diff --git a/mbrl/examples/conf/dynamics_model/basic_ensemble.yaml b/mbrl/examples/conf/dynamics_model/basic_ensemble.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 _target_: mbrl.models.BasicEnsemble
 ensemble_size: 5
 device: ${device}

diff --git a/mbrl/examples/conf/dynamics_model/gaussian_mlp.yaml b/mbrl/examples/conf/dynamics_model/gaussian_mlp.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 _target_: mbrl.models.GaussianMLP
 device: ${device}
 num_layers: 4

diff --git a/mbrl/examples/conf/dynamics_model/gaussian_mlp_ensemble.yaml b/mbrl/examples/conf/dynamics_model/gaussian_mlp_ensemble.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 _target_: mbrl.models.GaussianMLP
 device: ${device}
 num_layers: 4

diff --git a/mbrl/examples/conf/dynamics_model/planet.yaml b/mbrl/examples/conf/dynamics_model/planet.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 _target_: mbrl.models.PlaNetModel
 obs_shape: [3, 64, 64]
 obs_encoding_size: 1024

diff --git a/mbrl/examples/conf/main.yaml b/mbrl/examples/conf/main.yaml
@@ -1,4 +1,5 @@
 defaults:
+  - _self_
   - algorithm: pets
   - dynamics_model: gaussian_mlp_ensemble
   - overrides: pets_cartpole

diff --git a/mbrl/examples/conf/overrides/mbpo_ant.yaml b/mbrl/examples/conf/overrides/mbpo_ant.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "ant_truncated_obs"
 # term_fn is set automatically by mbrl.util.env.EnvHandler.make_env
 

diff --git a/mbrl/examples/conf/overrides/mbpo_cartpole.yaml b/mbrl/examples/conf/overrides/mbpo_cartpole.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "cartpole_continuous"
 trial_length: 200
 
@@ -26,4 +26,4 @@ sac_automatic_entropy_tuning: true
 sac_target_entropy: -0.05
 sac_hidden_size: 256
 sac_lr: 0.0003
-sac_batch_size: 256
+sac_batch_size: 256
diff --git a/mbrl/examples/conf/overrides/mbpo_halfcheetah.yaml b/mbrl/examples/conf/overrides/mbpo_halfcheetah.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "gym___HalfCheetah-v2"
 term_fn: "no_termination"
 

diff --git a/mbrl/examples/conf/overrides/mbpo_hopper.yaml b/mbrl/examples/conf/overrides/mbpo_hopper.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "gym___Hopper-v4"
 term_fn: "hopper"
 
@@ -26,4 +26,4 @@ sac_automatic_entropy_tuning: false
 sac_target_entropy: 1 # ignored, since entropy tuning is false
 sac_hidden_size: 512
 sac_lr: 0.0003
-sac_batch_size: 256
+sac_batch_size: 256
diff --git a/mbrl/examples/conf/overrides/mbpo_humanoid.yaml b/mbrl/examples/conf/overrides/mbpo_humanoid.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "humanoid_truncated_obs"
 # term_fn is set automatically by mbrl.util.env.EnvHandler.make_env
 

diff --git a/mbrl/examples/conf/overrides/mbpo_inv_pendulum.yaml b/mbrl/examples/conf/overrides/mbpo_inv_pendulum.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "gym___InvertedPendulum-v4"
 term_fn: "inverted_pendulum"
 
@@ -26,4 +26,4 @@ sac_automatic_entropy_tuning: true
 sac_hidden_size: 256
 sac_lr: 0.0003
 sac_batch_size: 256
-sac_target_entropy: -1
+sac_target_entropy: -1
diff --git a/mbrl/examples/conf/overrides/mbpo_pusher.yaml b/mbrl/examples/conf/overrides/mbpo_pusher.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "pets_pusher"
 term_fn: "no_termination"
 trial_length: 150

diff --git a/mbrl/examples/conf/overrides/mbpo_walker.yaml b/mbrl/examples/conf/overrides/mbpo_walker.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "gym___Walker2d-v4"
 term_fn: "walker2d"
 

diff --git a/mbrl/examples/conf/overrides/pb_mbpo_inv_pendulum.yaml b/mbrl/examples/conf/overrides/pb_mbpo_inv_pendulum.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "pybulletgym___InvertedPendulumMuJoCoEnv-v0"
 
 term_fn: "inverted_pendulum"

diff --git a/mbrl/examples/conf/overrides/pets_cartpole.yaml b/mbrl/examples/conf/overrides/pets_cartpole.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "cartpole_continuous"
 learned_rewards: false
 trial_length: 200
@@ -18,4 +18,4 @@ cem_num_iters: 5
 cem_elite_ratio: 0.1
 cem_population_size: 350
 cem_alpha: 0.1
-cem_clipped_normal: false
+cem_clipped_normal: false
diff --git a/mbrl/examples/conf/overrides/pets_cartpole_paper_version.yaml b/mbrl/examples/conf/overrides/pets_cartpole_paper_version.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "cartpole_pets_version"
 
 # Note: This pre-process function requires setting model input manually
@@ -23,4 +23,4 @@ cem_num_iters: 5
 cem_elite_ratio: 0.1
 cem_population_size: 500
 cem_alpha: 0.1
-cem_clipped_normal: false
+cem_clipped_normal: false
diff --git a/mbrl/examples/conf/overrides/pets_halfcheetah.yaml b/mbrl/examples/conf/overrides/pets_halfcheetah.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "pets_halfcheetah"
 term_fn: "no_termination"
 obs_process_fn: mbrl.env.pets_halfcheetah.HalfCheetahEnv.preprocess_fn
@@ -21,4 +21,4 @@ cem_num_iters: 5
 cem_elite_ratio: 0.16
 cem_population_size: 400
 cem_alpha: 0.12
-cem_clipped_normal: false
+cem_clipped_normal: false
diff --git a/mbrl/examples/conf/overrides/pets_hopper.yaml b/mbrl/examples/conf/overrides/pets_hopper.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "gym___Hopper-v4"
 term_fn: "hopper"
 learned_rewards: true
@@ -19,4 +19,4 @@ cem_num_iters: 5
 cem_elite_ratio: 0.1
 cem_population_size: 350
 cem_alpha: 0.1
-cem_clipped_normal: false
+cem_clipped_normal: false
diff --git a/mbrl/examples/conf/overrides/pets_icem_cartpole.yaml b/mbrl/examples/conf/overrides/pets_icem_cartpole.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "cartpole_continuous"
 learned_rewards: false
 trial_length: 200

diff --git a/mbrl/examples/conf/overrides/pets_inv_pendulum.yaml b/mbrl/examples/conf/overrides/pets_inv_pendulum.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "gym___InvertedPendulum-v4"
 term_fn: "inverted_pendulum"
 learned_rewards: true

diff --git a/mbrl/examples/conf/overrides/pets_mppi_halfcheetah.yaml b/mbrl/examples/conf/overrides/pets_mppi_halfcheetah.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "pets_halfcheetah"
 term_fn: "no_termination"
 obs_process_fn: mbrl.env.pets_halfcheetah.HalfCheetahEnv.preprocess_fn
@@ -21,4 +21,4 @@ mppi_num_iters: 5
 mppi_population_size: 350
 mppi_gamma: 0.9
 mppi_sigma: 1.0
-mppi_beta: 0.9
+mppi_beta: 0.9
diff --git a/mbrl/examples/conf/overrides/pets_pusher.yaml b/mbrl/examples/conf/overrides/pets_pusher.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "pets_pusher"
 term_fn: "no_termination"
 learned_rewards: true

diff --git a/mbrl/examples/conf/overrides/pets_reacher.yaml b/mbrl/examples/conf/overrides/pets_reacher.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "pets_reacher"
 learned_rewards: true
 num_steps: 15000
@@ -19,4 +19,4 @@ cem_num_iters: 5
 cem_elite_ratio: 0.1
 cem_population_size: 350
 cem_alpha: 0.1
-cem_clipped_normal: false
+cem_clipped_normal: false
diff --git a/mbrl/examples/conf/overrides/planet_cartpole_balance.yaml b/mbrl/examples/conf/overrides/planet_cartpole_balance.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "dmcontrol_cartpole_balance"  # used to set the hydra dir, ignored otherwise
 
 env_cfg:

diff --git a/mbrl/examples/conf/overrides/planet_cartpole_swingup.yaml b/mbrl/examples/conf/overrides/planet_cartpole_swingup.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "dmcontrol_cartpole_swingup"  # used to set the hydra dir, ignored otherwise
 
 env_cfg:

diff --git a/mbrl/examples/conf/overrides/planet_cheetah_run.yaml b/mbrl/examples/conf/overrides/planet_cheetah_run.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "dmcontrol_cheetah_run"  # used to set the hydra dir, ignored otherwise
 
 env_cfg:

diff --git a/mbrl/examples/conf/overrides/planet_cup_catch.yaml b/mbrl/examples/conf/overrides/planet_cup_catch.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "dmcontrol_ball_in_cup_catch"  # used to set the hydra dir, ignored otherwise
 
 env_cfg:

diff --git a/mbrl/examples/conf/overrides/planet_finger_spin.yaml b/mbrl/examples/conf/overrides/planet_finger_spin.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "dmcontrol_finger_spin"  # used to set the hydra dir, ignored otherwise
 
 env_cfg:

diff --git a/mbrl/examples/conf/overrides/planet_walker_walk.yaml b/mbrl/examples/conf/overrides/planet_walker_walk.yaml
@@ -1,4 +1,4 @@
-# @package _group_
+
 env: "dmcontrol_walker_walk"  # used to set the hydra dir, ignored otherwise
 
 env_cfg:

diff --git a/mbrl/models/basic_ensemble.py b/mbrl/models/basic_ensemble.py
@@ -51,7 +51,7 @@ class BasicEnsemble(Ensemble):
         device (str or torch.device): the device to use for the model.
         member_cfg (omegaconf.DictConfig): the configuration needed to instantiate the models
                                            in the ensemble. They will be instantiated using
-                                           `hydra.utils.instantiate(member_cfg)`.
+                                           `hydra.utils.instantiate(member_cfg, _recursive_=False)`.
         propagation_method (str, optional): the uncertainty propagation method to use (see
             above). Defaults to ``None``.
     """
@@ -71,7 +71,7 @@ def __init__(
         )
         self.members = []
         for i in range(ensemble_size):
-            model = hydra.utils.instantiate(member_cfg)
+            model = hydra.utils.instantiate(member_cfg, _recursive_=False)
             self.members.append(model)
         self.deterministic = self.members[0].deterministic
         self.in_size = getattr(self.members[0], "in_size", None)

diff --git a/mbrl/models/gaussian_mlp.py b/mbrl/models/gaussian_mlp.py
@@ -92,7 +92,7 @@ def create_activation():
             else:
                 # Handle the case where activation_fn_cfg is a dict
                 cfg = omegaconf.OmegaConf.create(activation_fn_cfg)
-                activation_func = hydra.utils.instantiate(cfg)
+                activation_func = hydra.utils.instantiate(cfg, _recursive_=False)
             return activation_func
 
         def create_linear_layer(l_in, l_out):