LAMDA-RL
diff --git a/‎.pre-commit-config.yaml
+28 b/‎.pre-commit-config.yaml
+28
diff --git a/‎README.md
+39 b/‎README.md
+39
diff --git a/‎examples/train.py
+76 b/‎examples/train.py
+76
diff --git a/‎examples/train_mg.sh
+4 b/‎examples/train_mg.sh
+4
diff --git a/‎examples/train_movebox.sh
+4 b/‎examples/train_movebox.sh
+4
diff --git a/‎examples/train_overcooked.sh
+4 b/‎examples/train_overcooked.sh
+4
diff --git a/‎harl/__init__.py b/‎harl/__init__.py
diff --git a/‎harl/algorithms/__init__.py b/‎harl/algorithms/__init__.py
diff --git a/‎harl/algorithms/actors/__init__.py
+11 b/‎harl/algorithms/actors/__init__.py
+11
@@ -0,0 +1,28 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-added-large-files
+      - id: check-symlinks
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-toml
+      - id: check-ast
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: detect-private-key
+      # - id: debug-statements
+      # - id: double-quote-string-fixer
+  - repo: https://github.com/psf/black
+    rev: 23.7.0
+    hooks:
+      - id: black
+      - id: black-jupyter
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black", "--filter-files"]
@@ -0,0 +1,39 @@
+# Multi-Expert Distillation for Few-Shot Coordination (Student Abstract)
+
+This repository contains the implementation of Multi-Expert Distillation (MED), based on PyTorch. 
+
+## 1. Getting started
+
+Use the install script to install the python environment:
+
+```shell
+bash install.sh
+conda activate med
+```
+
+## 2. Run an experiment
+All the experiments can be run with the unified entrance file `examples/train.py` with customized arguments.
+
+### LIPO
+The repository consists of a re-implementation of [LIPO]([https://sites.google.com/view/iclr-lipo-2023).
+For generating a population in Girdworld MoveBox or Overcooked, enter the `examples` folder and run the following command:
+```bash
+python train.py --algo lipo --env gridworld --task MoveBox --map multi_exits --exp_name test --use_wandb True --pop_size 8 --horizon 50 --n_iter 500 --eval_interval 10 --n_sp_ts 5000 --n_xp_ts 5000 --eval_interval 10
+```
+```bash
+python train.py --algo lipo --env overcooked --map_name full_divider_salad_multi_ingred --exp_name test --use_wandb True --pop_size 8 --horizon 100 --n_iter 1000 --n_sp_ts 5000 --n_xp_ts 5000 --eval_interval 10
+```
+The results and models can be found in the `examples/results` folder. 
+### MED
+To run MED, the population model files should be placed in the `harl/runners/generalist_runners/models` folder. Users should make sure the file is named properly. 
+For running MED, enter the `examples` folder and run the following commands:
+```bash
+python train.py --algo med --env matrix_game --exp_name performance --t_max 30000 --n_episodes 3 --use_wandb True
+```
+```bash
+python train.py --algo med --env gridworld --task MoveBox --map multi_exits --exp_name performance --t_max 2000000 --horizon 50 --n_episodes 2 --use_wandb True
+```
+```bash
+python train.py --algo med --env overcooked --map_name full_divider_salad_multi_ingred --exp_name performance --t_max 7500000 --horizon 100 --n_episodes 2 --use_wandb True
+```
+Training scripts are also provided in the `examples` folder.
@@ -0,0 +1,76 @@
+"""Train an algorithm."""
+import argparse
+import json
+
+from harl.utils.configs_tools import get_defaults_yaml_args, update_args
+
+
+def main():
+    """Main function."""
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--algo",
+        type=str,
+        default="med",
+        choices=[
+            "med",
+            "lipo",
+        ],
+        help="Algorithm name. Choose from: med, lipo.",
+    )
+    parser.add_argument(
+        "--env",
+        type=str,
+        default="matrix_game",
+        choices=[
+            "matrix_game",
+            "gridworld",
+            "overcooked",
+        ],
+        help="Environment name. Choose from: matrix_game, gridworld, overcooked.",
+    )
+    parser.add_argument(
+        "--exp_name", type=str, default="installtest", help="Experiment name."
+    )
+    parser.add_argument(
+        "--load_config",
+        type=str,
+        default="",
+        help="If set, load existing experiment config file instead of reading from yaml config file.",
+    )
+    args, unparsed_args = parser.parse_known_args()
+
+    def process(arg):
+        try:
+            return eval(arg)
+        except:
+            return arg
+
+    keys = [k[2:] for k in unparsed_args[0::2]]  # remove -- from argument
+    values = [process(v) for v in unparsed_args[1::2]]
+    unparsed_dict = {k: v for k, v in zip(keys, values)}
+    args = vars(args)  # convert to dict
+    if args["load_config"] != "":  # load config from existing config file
+        with open(args["load_config"], encoding="utf-8") as file:
+            all_config = json.load(file)
+        args["algo"] = all_config["main_args"]["algo"]
+        args["env"] = all_config["main_args"]["env"]
+        args["exp_name"] = all_config["main_args"]["exp_name"]
+        algo_args = all_config["algo_args"]
+        env_args = all_config["env_args"]
+    else:  # load config from corresponding yaml file
+        algo_args, env_args = get_defaults_yaml_args(args["algo"], args["env"])
+    update_args(unparsed_dict, algo_args, env_args)  # update args from command line
+
+    # start training
+    from harl.runners import RUNNER_REGISTRY
+
+    runner = RUNNER_REGISTRY[args["algo"]](args, algo_args, env_args)
+    runner.run()
+    runner.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,4 @@
+for seed in 111 222 333 444 555
+do
+    python train.py --algo med --env matrix_game --exp_name performance --use_wandb True --seed $seed --t_max 30000
+done
@@ -0,0 +1,4 @@
+for seed in 111 222 333 444 555
+do
+    python train.py --algo med --env gridworld --task MoveBox --map multi_exits --exp_name performance --t_max 2000000 --horizon 50 --n_episodes 2 --use_wandb True --seed $seed
+done
@@ -0,0 +1,4 @@
+for seed in 111 222 333 444 555
+do
+    python train.py --algo med --env overcooked --map_name full_divider_salad_multi_ingred --exp_name performance --t_max 7500000 --horizon 100 --n_episodes 2 --use_wandb True --seed $seed
+done
@@ -0,0 +1,11 @@
+"""Algorithm registry."""
+# lipo
+from harl.algorithms.actors.incompact_mappo_z import IncompatMAPPOZ
+from harl.algorithms.actors.med_gpt import GPTAgent
+
+ALGO_REGISTRY = {
+    # population
+    "lipo": IncompatMAPPOZ,
+    # generalist
+    "med": GPTAgent,
+}