Policy config refactoring (#125)

* Seperate test and train domains * Add progress bars in ontology embedder * Update custom_util.py * Fix custom_util things I broke * Github master * Save dialogue ids in prediction file * Fix bug in ontology enxtraction * Return dialogue ids in predictions file and fix bugs * Add setsumbt starting config loader * Add script to extract golden labels from dataset to match model predictions * Add more setsumbt configs * Add option to use local files only in transformers package * Update starting configurations for setsumbt * Github master * Update README.md * Update README.md * Update convlab/dialog_agent/agent.py * Revert custom_util.py * Update custom_util.py * Commit unverified chnages :(:(:(:( * Fix SetSUMBT bug resulting from new torch feature * Setsumbt bug fixes * Policy config refactor * Policy config refactor * small bug fix in memory with new config path Co-authored-by: Carel van Niekerk <carel.niekerk@hhu.de> Co-authored-by: Michael Heck <michael.heck@hhu.de> Co-authored-by: Christian Geishauser <christian.geishauser@hhu.de>

Policy config refactoring (#125)
182d847b · Carel van Niekerk · GitHub · 44746e90 · 182d847b · 182d847b
Unverified Commit 182d847b authored Jan 25, 2023 by Carel van Niekerk Committed by GitHub Jan 25, 2023
--- a/convlab/policy/ppo/README.md
+++ b/convlab/policy/ppo/README.md
@@ -11,10 +11,10 @@ If you want to obtain a supervised model for pre-training, please have a look in
 Starting a RL training is as easy as executing

 ```sh
-$ python train.py --path=your_environment_config --seed=SEED
+$ python train.py --config_name=your_config_name --seed=SEED
 ```

-One example for the environment-config is **semantic_level_config.json**, where parameters for the training are specified, for instance
+One example for the environment-config is **RuleUser-Semantic-RuleDST**, where parameters for the training are specified, for instance

 - load_path: provide a path to initialise the model with a pre-trained model, skip the ending .pol.mdl
 - process_num: the number of processes to use during evaluation to speed it up

--- a/convlab/policy/ppo/semanticGenTUS-RuleDST-PPOPolicy.json
+++ b/convlab/policy/ppo/semanticGenTUS-RuleDST-PPOPolicy.json
--- a/convlab/policy/ppo/semantic_level_config.json
+++ b/convlab/policy/ppo/semantic_level_config.json
--- a/convlab/policy/ppo/setsumbt_unc_config.json
+++ b/convlab/policy/ppo/setsumbt_unc_config.json
--- a/convlab/policy/ppo/setsumbt_config.json
+++ b/convlab/policy/ppo/setsumbt_config.json
--- a/convlab/policy/ppo/trippy_config.json
+++ b/convlab/policy/ppo/trippy_config.json
--- a/convlab/policy/ppo/tus_semantic_level_config.json
+++ b/convlab/policy/ppo/tus_semantic_level_config.json
--- a/convlab/policy/ppo/config.json
+++ b/convlab/policy/ppo/config.json
--- a/convlab/policy/ppo/ppo.py
+++ b/convlab/policy/ppo/ppo.py
@@ -22,7 +22,7 @@ class PPO(Policy):

    def __init__(self, is_train=False, dataset='Multiwoz', seed=0, vectorizer=None):

-        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f:
+        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'configs' ,'ppo_config.json'), 'r') as f:
            cfg = json.load(f)
        self.save_dir = os.path.join(os.path.dirname(
            os.path.abspath(__file__)), cfg['save_dir'])

--- a/convlab/policy/ppo/train.py
+++ b/convlab/policy/ppo/train.py
@@ -182,8 +182,8 @@ if __name__ == '__main__':

    begin_time = datetime.now()
    parser = ArgumentParser()
-    parser.add_argument("--path", type=str, default='convlab/policy/ppo/semantic_level_config.json',
-                        help="Load path for config file")
+    parser.add_argument("--config_name", type=str, default='RuleUser-Semantic-RuleDST',
+                        help="Name of the configuration")
    parser.add_argument("--seed", type=int, default=None,
                        help="Seed for the policy parameter initialization")
    parser.add_argument("--mode", type=str, default='info',
@@ -191,7 +191,8 @@ if __name__ == '__main__':
    parser.add_argument("--save_eval_dials", type=bool, default=False,
                        help="Flag for saving dialogue_info during evaluation")

-    path = parser.parse_args().path
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'configs',
+                        f'{parser.parse_args().config_name}.json')
    seed = parser.parse_args().seed
    mode = parser.parse_args().mode
    save_eval = parser.parse_args().save_eval_dials

--- a/convlab/policy/vtrace_DPT/README.md
+++ b/convlab/policy/vtrace_DPT/README.md
@@ -31,10 +31,10 @@ We provide several supervised trained models on hugging-face to reproduce the re
 Starting a RL training is as easy as executing

 ```sh
-$ python train.py --path=your_environment_config --seed=SEED
+$ python train.py --config_name=your_config_name --seed=SEED
 ```

-One example for the environment-config is **semantic_level_config.json**, where parameters for the training are specified, for instance
+One example for the environment-config is **RuleUser-Semantic-RuleDST**, where parameters for the training are specified, for instance

 - load_path: provide a path to initialise the model with a pre-trained model, skip the ending .pol.mdl
 - process_num: the number of processes to use during evaluation to speed it up

--- a/convlab/policy/vtrace_DPT/semantic_level_config.json
+++ b/convlab/policy/vtrace_DPT/semantic_level_config.json
--- a/convlab/policy/vtrace_DPT/config.json
+++ b/convlab/policy/vtrace_DPT/config.json
--- a/convlab/policy/vtrace_DPT/memory.py
+++ b/convlab/policy/vtrace_DPT/memory.py
@@ -17,7 +17,9 @@ class Memory:

    def __init__(self, seed=0):

-        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f:
+        dir_name = os.path.dirname(os.path.abspath(__file__))
+        self.config_path = os.path.join(dir_name, 'configs', 'multiwoz21_dpt.json')
+        with open(self.config_path, 'r') as f:
            cfg = json.load(f)

        self.batch_size = cfg.get('batchsz', 32)

--- a/convlab/policy/vtrace_DPT/train.py
+++ b/convlab/policy/vtrace_DPT/train.py
@@ -101,8 +101,8 @@ if __name__ == '__main__':

    begin_time = datetime.now()
    parser = ArgumentParser()
-    parser.add_argument("--path", type=str, default='convlab/policy/vtrace_DPT/semantic_level_config.json',
-                        help="Load path for config file")
+    parser.add_argument("--config_name", type=str, default='RuleUser-Semantic-RuleDST',
+                        help="Name of the configuration")
    parser.add_argument("--seed", type=int, default=None,
                        help="Seed for the policy parameter initialization")
    parser.add_argument("--mode", type=str, default='info',
@@ -110,7 +110,8 @@ if __name__ == '__main__':
    parser.add_argument("--save_eval_dials", type=bool, default=False,
                        help="Flag for saving dialogue_info during evaluation")

-    path = parser.parse_args().path
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'configs',
+                        f"{parser.parse_args().config_name}.json")
    seed = parser.parse_args().seed
    mode = parser.parse_args().mode
    save_eval = parser.parse_args().save_eval_dials

--- a/convlab/policy/vtrace_DPT/vtrace.py
+++ b/convlab/policy/vtrace_DPT/vtrace.py
@@ -26,7 +26,7 @@ class VTRACE(nn.Module, Policy):
        super(VTRACE, self).__init__()

        dir_name = os.path.dirname(os.path.abspath(__file__))
-        self.config_path = os.path.join(dir_name, 'config.json')
+        self.config_path = os.path.join(dir_name, 'configs', 'multiwoz21_dpt.json')

        with open(self.config_path, 'r') as f:
            cfg = json.load(f)