diff --git a/.gitignore b/.gitignore
index 832e526dc1f997facec8833e85cd357b06d466b2..5665c62346b2f85cdea57cf2686d879e6d9cde1e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,6 +101,7 @@ deploy/bert_multiwoz_all.zip
 deploy/templates/dialog_eg.html
 test.py
 
+*convlab2/policy/vector/action_dicts
 *.egg-info
 pre-trained-models/
-venv
\ No newline at end of file
+venv
diff --git a/convlab2/dialog_agent/env.py b/convlab2/dialog_agent/env.py
index dc345786ab154d028e1c569a347fabcc9c5a5def..bee3e943db9d7363a672a4627b35ec23dde5c37d 100755
--- a/convlab2/dialog_agent/env.py
+++ b/convlab2/dialog_agent/env.py
@@ -6,6 +6,7 @@ Created on Wed Jul 17 14:27:34 2019
 """
 
 import pdb
+from copy import deepcopy
 
 
 class Environment():
@@ -47,6 +48,7 @@ class Environment():
             observation) if self.sys_nlu else observation
         self.sys_dst.state['user_action'] = dialog_act
         state = self.sys_dst.update(dialog_act)
+        state = deepcopy(state)
         dialog_act = self.sys_dst.state['user_action']
 
         state['history'].append(["sys", model_response])
diff --git a/convlab2/dst/rule/multiwoz/dst.py b/convlab2/dst/rule/multiwoz/dst.py
index a8602a7c249eac1b627fcbe851136d486850bed9..9ff59fbd3d5ae3818f42eea26adf377850b4a6ae 100755
--- a/convlab2/dst/rule/multiwoz/dst.py
+++ b/convlab2/dst/rule/multiwoz/dst.py
@@ -43,7 +43,8 @@ class RuleDST(DST):
                     continue
                 domain_dic = self.state['belief_state'][domain]
                 if slot in domain_dic:
-                    nvalue = normalize_value(self.value_dict, domain, slot, value)
+                    nvalue = normalize_value(
+                        self.value_dict, domain, slot, value)
                     self.state['belief_state'][domain][slot] = nvalue
                 elif slot != 'none' or slot != '':
                     # raise Exception('unknown slot name <{}> of domain <{}>'.format(k, domain))
@@ -63,6 +64,7 @@ class RuleDST(DST):
         self.state = default_state()
         self.state['belief_state'] = deepcopy(self.default_belief_state)
 
+
 if __name__ == '__main__':
     # from convlab2.dst.rule.multiwoz import RuleDST
 
diff --git a/convlab2/dst/rule/multiwoz/usr_dst.py b/convlab2/dst/rule/multiwoz/usr_dst.py
index 26ce84d25d2960ea48793f7d86079bc8e2f187ea..13fc39e0471bd07bfe52beffcd42c762c985f05d 100755
--- a/convlab2/dst/rule/multiwoz/usr_dst.py
+++ b/convlab2/dst/rule/multiwoz/usr_dst.py
@@ -1,12 +1,16 @@
 import json
 import os
 
-from convlab2.util.multiwoz.state import default_state
+from convlab2.util.multiwoz.state import default_state_old as default_state
 from convlab2.dst.rule.multiwoz.dst_util import normalize_value
 from convlab2.dst.rule.multiwoz import RuleDST
 from convlab2.util.multiwoz.multiwoz_slot_trans import REF_SYS_DA
 from convlab2.policy.tus.multiwoz.Da2Goal import SysDa2Goal, UsrDa2Goal
+from data.unified_datasets.multiwoz21.preprocess import normalize_domain_slot_value, reverse_da
+from convlab2.policy.rule.multiwoz.policy_agenda_multiwoz import unified_format, act_dict_to_flat_tuple
 from pprint import pprint
+from copy import deepcopy
+from convlab2.util import load_ontology
 
 SLOT2SEMI = {
     "arriveby": "arriveBy",
@@ -25,8 +29,14 @@ class UserRuleDST(RuleDST):
             It helps check whether ``user_act`` has correct content.
     """
 
-    def __init__(self):
+    def __init__(self, dataset_name='multiwoz21'):
         super().__init__()
+
+        self.state = default_state()
+        path = os.path.dirname(
+            os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
+        path = os.path.join(path, 'data/multiwoz/value_dict.json')
+        self.value_dict = json.load(open(path))
         self.mentioned_domain = []
 
     def update(self, sys_act=None):
@@ -35,6 +45,9 @@ class UserRuleDST(RuleDST):
         :param sys_act:
         :return:
         """
+        sys_act = unified_format(sys_act)
+        sys_act = reverse_da(sys_act)
+        sys_act = act_dict_to_flat_tuple(sys_act)
         # print("dst", user_act)
         self.update_mentioned_domain(sys_act)
         for intent, domain, slot, value in sys_act:
@@ -75,7 +88,7 @@ class UserRuleDST(RuleDST):
             assert domain in self.state['belief_state']
         except:
             raise Exception(
-                'Error: domain <{}> not in new belief state'.format(domain))
+                f'Error: domain <{domain}> not in new belief state')
         domain_dic = self.state['belief_state'][domain]
         assert 'semi' in domain_dic
         assert 'book' in domain_dic
diff --git a/convlab2/policy/README.md b/convlab2/policy/README.md
index cc0b29edaaa66dde5febe62182ee709e800dab67..1990cdd6b03a38fe6f5a4f8b4eb8a9708761c590 100755
--- a/convlab2/policy/README.md
+++ b/convlab2/policy/README.md
@@ -16,3 +16,21 @@ The interfaces for dialog policy are defined in policy.Policy:
 and outputs the next system action.
 
 - **init_session** reset the model variables for a new dialog session.
+
+## Rule based simulator results
+
+| Model | Complete rate | Success rate | Average return | Turns | Average actions |
+|-------|---------------|--------------|----------------|-------|-----------------|
+| MLE   |               |              |                |       |                 |
+| PG    |               |              |                |       |                 |
+| GDPL  |               |              |                |       |                 |
+| PPO   |               |              |                |       |                 |
+
+## Transformer based user simulator (TUS) results
+
+| Model | Complete rate | Success rate | Average return | Turns | Average actions |
+|-------|---------------|--------------|----------------|-------|-----------------|
+| MLE   |               |              |                |       |                 |
+| PG    |               |              |                |       |                 |
+| GDPL  |               |              |                |       |                 |
+| PPO   |               |              |                |       |                 |
diff --git a/convlab2/policy/evaluate.py b/convlab2/policy/evaluate.py
index a473d49afb59c4ab56c212763c029e8e50bef858..00b8347315f81b0d5955b77738bbe9060203cbf7 100755
--- a/convlab2/policy/evaluate.py
+++ b/convlab2/policy/evaluate.py
@@ -2,7 +2,6 @@
 
 import argparse
 import datetime
-import json
 import logging
 import os
 
@@ -10,12 +9,10 @@ import numpy as np
 import torch
 from convlab2.dialog_agent.agent import PipelineAgent
 from convlab2.dialog_agent.session import BiSession
-from convlab2.dst.rule.multiwoz import RuleDST
-from convlab2.dst.rule.multiwoz.usr_dst import UserRuleDST
 from convlab2.evaluator.multiwoz_eval import MultiWozEvaluator
-from convlab2.policy.tus.multiwoz.TUS import UserPolicy
 from convlab2.policy.rule.multiwoz import RulePolicy
-from convlab2.util.custom_util import set_seed
+from convlab2.task.multiwoz.goal_generator import GoalGenerator
+from convlab2.util.custom_util import set_seed, get_config, env_config, create_goals
 
 
 def init_logging(log_dir_path, path_suffix=None):
@@ -39,68 +36,45 @@ def init_logging(log_dir_path, path_suffix=None):
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
-def evaluate(args, model_name, load_path, verbose=False):
+def evaluate(config_path, model_name, verbose=False):
     seed = 0
     set_seed(seed)
 
-    dst_sys = RuleDST()
+    conf = get_config(config_path, [])
 
     if model_name == "PPO":
         from convlab2.policy.ppo import PPO
-        if load_path:
-            policy_sys = PPO(False)
-            policy_sys.load(load_path)
-        else:
-            policy_sys = PPO.from_pretrained()
+        policy_sys = PPO(vectorizer=conf['vectorizer_sys_activated'])
     elif model_name == "RULE":
         policy_sys = RulePolicy()
     elif model_name == "PG":
         from convlab2.policy.pg import PG
-        if load_path:
-            policy_sys = PG(False)
-            policy_sys.load(load_path)
-        else:
-            policy_sys = PG.from_pretrained()
+        policy_sys = PG(vectorizer=conf['vectorizer_sys_activated'])
     elif model_name == "MLE":
         from convlab2.policy.mle import MLE
-        if load_path:
-            policy_sys = MLE()
-            policy_sys.load(load_path)
-        else:
-            policy_sys = MLE.from_pretrained()
+        policy_sys = MLE()
     elif model_name == "GDPL":
         from convlab2.policy.gdpl import GDPL
-        if load_path:
-            policy_sys = GDPL(False)
-            policy_sys.load(load_path)
-        else:
-            policy_sys = GDPL.from_pretrained()
-    user_type = args.user.lower()
-    if user_type == "rule":
-        dst_usr = None
-        policy_usr = RulePolicy(character='usr')
-    elif user_type == "tus":
-        dst_usr = UserRuleDST()
-        user_config = json.load(open(args.user_config))
-        policy_usr = UserPolicy(user_config)
-    elif user_type == "vhus":
-        from convlab2.policy.vhus.multiwoz import UserPolicyVHUS
-        dst_usr = None
-        policy_usr = UserPolicyVHUS(
-            load_from_zip=True, model_file="/home/linh/convlab-2/vhus_simulator_multiwoz.zip")
-
-    simulator = PipelineAgent(None, dst_usr, policy_usr, None, 'user')
-    agent_sys = PipelineAgent(None, dst_sys, policy_sys, None, 'sys')
-
-    evaluator = MultiWozEvaluator()
-    sess = BiSession(agent_sys, simulator, None, evaluator)
+        policy_sys = GDPL(vectorizer=conf['vectorizer_sys_activated'])
 
+    try:
+        policy_sys.load(conf['model']['load_path'])
+    except Exception as e:
+        logging.info(f"Could not load a policy: {e}")
+
+    env, sess = env_config(conf, policy_sys)
     action_dict = {}
 
-    task_success = {'Complete': [], 'Success': [], 'Success strict': [], 'total_return': [], 'turns': []}
-    for seed in range(1000, 1400):
+    task_success = {'Complete': [], 'Success': [],
+                    'Success strict': [], 'total_return': [], 'turns': []}
+
+    dialogues = 500
+    goal_generator = GoalGenerator()
+    goals = create_goals(goal_generator, num_goals=dialogues, single_domains=False, allowed_domains=None)
+
+    for seed in range(1000, 1000 + dialogues):
         set_seed(seed)
-        sess.init_session()
+        sess.init_session(goal=goals[seed-1000])
         sys_response = []
         actions = 0.0
         total_return = 0.0
@@ -133,7 +107,7 @@ def evaluate(args, model_name, load_path, verbose=False):
 
             # logging.info(f"Actions in turn: {len(sys_response)}")
             turns += 1
-            total_return += evaluator.get_reward(session_over)
+            total_return += sess.evaluator.get_reward(session_over)
 
             if session_over:
                 task_succ = sess.evaluator.task_success()
@@ -158,31 +132,24 @@ def evaluate(args, model_name, load_path, verbose=False):
     for key in task_success:
         logging.info(
             f'{key} {len(task_success[key])} {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}')
+    logging.info(f"Average actions: {actions / turns}")
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model_name", type=str,
                         default="PPO", help="name of model")
-    parser.add_argument("--load_path", type=str,
+    parser.add_argument("--config_path", type=str,
                         default='', help="path of model")
+    parser.add_argument("--verbose", action='store_true',
+                        help="whether to output utterances")
     parser.add_argument("--log_path_suffix", type=str,
                         default="", help="suffix of path of log file")
     parser.add_argument("--log_dir_path", type=str,
                         default="log", help="path of log directory")
-    parser.add_argument("--user_config", type=str,
-                        default="convlab2/policy/tus/multiwoz/exp/default.json")
-    parser.add_argument("--user_mode", type=str, default="")
-    parser.add_argument("--user", type=str, default="rule")
-    parser.add_argument("--verbose", action='store_true', help="whether to output utterances")
 
     args = parser.parse_args()
 
     init_logging(log_dir_path=args.log_dir_path,
                  path_suffix=args.log_path_suffix)
-    evaluate(
-        args=args,
-        model_name=args.model_name,
-        load_path=args.load_path,
-        verbose=args.verbose
-    )
+    evaluate(config_path=args.config_path, model_name=args.model_name, verbose=args.verbose)
diff --git a/convlab2/policy/evaluate_distributed.py b/convlab2/policy/evaluate_distributed.py
index e6e16e4af41e5d45a4f74cdd3da1c636d61cb2bb..4be89cc726c35f7a31d41483a3d6f35f3fb3de14 100644
--- a/convlab2/policy/evaluate_distributed.py
+++ b/convlab2/policy/evaluate_distributed.py
@@ -3,6 +3,7 @@
 import random
 import torch
 import sys
+import torch
 from pprint import pprint
 
 import matplotlib.pyplot as plt
diff --git a/convlab2/policy/gdpl/semantic_level_config.json b/convlab2/policy/gdpl/semantic_level_config.json
index d46ad81753614f00ab46e78038b225aa3b723332..8e7178f013cc4981f97e14d5dd5b5920456a0777 100644
--- a/convlab2/policy/gdpl/semantic_level_config.json
+++ b/convlab2/policy/gdpl/semantic_level_config.json
@@ -5,14 +5,9 @@
 		"pretrained_load_path": "",
 		"batchsz": 1000,
 		"seed": 0,
-		"epoch": 200,
+		"epoch": 50,
 		"eval_frequency": 5,
 		"process_num": 4,
-		"use_masking": false,
-		"use_state_entropy": false,
-		"manually_add_entity_names": false,
-		"use_state_mutual_info": false,
-		"use_confidence_scores": false,
 		"sys_semantic_to_usr": false,
 		"num_eval_dialogues": 500
 	},
diff --git a/convlab2/policy/gdpl/train.py b/convlab2/policy/gdpl/train.py
index bb3d1e1eb71002d57603d490f9b6849101263719..a58a54cd9bf679cfc8bb2567a2bc933a3f070164 100755
--- a/convlab2/policy/gdpl/train.py
+++ b/convlab2/policy/gdpl/train.py
@@ -10,15 +10,15 @@ import logging
 import time
 import numpy as np
 import torch
+import random
 
 from convlab2.policy.gdpl import GDPL
 from convlab2.policy.gdpl import RewardEstimator
 from convlab2.policy.rlmodule import Memory
 from torch import multiprocessing as mp
 from argparse import ArgumentParser
-from convlab2.policy.ppo.config import get_config
 from convlab2.util.custom_util import set_seed, init_logging, save_config, move_finished_training, env_config, \
-    eval_policy, log_start_args, save_best, load_config_file
+    eval_policy, log_start_args, save_best, load_config_file, get_config
 from datetime import datetime
 
 sys.path.append(os.path.dirname(os.path.dirname(
@@ -48,7 +48,7 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0):
     :return:
     """
 
-    buff = Memory(seed=train_seed)
+    buff = Memory()
     # we need to sample batchsz of (state, action, next_state, reward, mask)
     # each trajectory contains `trajectory_len` num of items, so we only need to sample
     # `batchsz//trajectory_len` num of trajectory totally
@@ -59,6 +59,8 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0):
     traj_len = 50
     real_traj_len = 0
 
+    set_seed(train_seed)
+
     while sampled_num < batchsz:
         # for each trajectory, we reset the env and get initial state
         s = env.reset()
@@ -122,6 +124,7 @@ def sample(env, policy, batchsz, process_num, seed):
     # batchsz will be splitted into each process,
     # final batchsz maybe larger than batchsz parameters
     process_batchsz = np.ceil(batchsz / process_num).astype(np.int32)
+    train_seeds = random.sample(range(0, 1000), process_num)
     # buffer to save all data
     queue = mp.Queue()
 
@@ -135,7 +138,7 @@ def sample(env, policy, batchsz, process_num, seed):
     evt = mp.Event()
     processes = []
     for i in range(process_num):
-        process_args = (i, queue, evt, env, policy, process_batchsz, seed)
+        process_args = (i, queue, evt, env, policy, process_batchsz, train_seeds[i])
         processes.append(mp.Process(target=sampler, args=process_args))
     for p in processes:
         # set the process as daemon, and it will be killed once the main process is stoped.
@@ -250,6 +253,7 @@ if __name__ == '__main__':
         tb_writer.add_scalar(key, eval_dict[key], 0)
     best_complete_rate = eval_dict['complete_rate']
     best_success_rate = eval_dict['success_rate_strict']
+    best_return = eval_dict['avg_return']
 
     logging.info("Start of Training: " +
                  time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()))
@@ -265,9 +269,11 @@ if __name__ == '__main__':
 
             eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path)
 
-            best_complete_rate, best_success_rate = \
-                save_best(policy_sys, best_complete_rate, best_success_rate,
-                          eval_dict["complete_rate"], eval_dict["success_rate_strict"], save_path)
+            best_complete_rate, best_success_rate, best_return = \
+                save_best(policy_sys, best_complete_rate, best_success_rate, best_return,
+                          eval_dict["complete_rate"], eval_dict["success_rate_strict"],
+                          eval_dict["avg_return"], save_path)
+            policy_sys.save(save_path, "last")
             for key in eval_dict:
                 tb_writer.add_scalar(key, eval_dict[key], idx * conf['model']['batchsz'])
 
diff --git a/convlab2/policy/pg/config.json b/convlab2/policy/pg/config.json
index 480325a980a981ad91f749e3223bdf4bf4ca8ae4..8079b6b7c6ceb5fed5012430ccf8cde1c4b48ee4 100755
--- a/convlab2/policy/pg/config.json
+++ b/convlab2/policy/pg/config.json
@@ -1,7 +1,7 @@
 {
 	"batchsz": 32,
 	"gamma": 0.99,
-	"lr": 0.00001,
+	"lr": 0.0000001,
 	"save_dir": "save",
 	"log_dir": "log",
 	"save_per_epoch": 5,
diff --git a/convlab2/policy/pg/semantic_level_config.json b/convlab2/policy/pg/semantic_level_config.json
index d46ad81753614f00ab46e78038b225aa3b723332..8e7178f013cc4981f97e14d5dd5b5920456a0777 100644
--- a/convlab2/policy/pg/semantic_level_config.json
+++ b/convlab2/policy/pg/semantic_level_config.json
@@ -5,14 +5,9 @@
 		"pretrained_load_path": "",
 		"batchsz": 1000,
 		"seed": 0,
-		"epoch": 200,
+		"epoch": 50,
 		"eval_frequency": 5,
 		"process_num": 4,
-		"use_masking": false,
-		"use_state_entropy": false,
-		"manually_add_entity_names": false,
-		"use_state_mutual_info": false,
-		"use_confidence_scores": false,
 		"sys_semantic_to_usr": false,
 		"num_eval_dialogues": 500
 	},
diff --git a/convlab2/policy/pg/train.py b/convlab2/policy/pg/train.py
index 3abcd74b99fbaf46529ff07378bb052e1d8c4e97..0b5c385c28649dd7f5702ca7e6cedfe2c2e9ff7a 100755
--- a/convlab2/policy/pg/train.py
+++ b/convlab2/policy/pg/train.py
@@ -10,14 +10,14 @@ import logging
 import time
 import numpy as np
 import torch
+import random
 
 from convlab2.policy.pg import PG
 from convlab2.policy.rlmodule import Memory
 from torch import multiprocessing as mp
 from argparse import ArgumentParser
-from convlab2.policy.ppo.config import get_config
 from convlab2.util.custom_util import set_seed, init_logging, save_config, move_finished_training, env_config, \
-    eval_policy, log_start_args, save_best, load_config_file
+    eval_policy, log_start_args, save_best, load_config_file, get_config
 from datetime import datetime
 
 sys.path.append(os.path.dirname(os.path.dirname(
@@ -47,7 +47,7 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0):
     :return:
     """
 
-    buff = Memory(seed=train_seed)
+    buff = Memory()
     # we need to sample batchsz of (state, action, next_state, reward, mask)
     # each trajectory contains `trajectory_len` num of items, so we only need to sample
     # `batchsz//trajectory_len` num of trajectory totally
@@ -58,6 +58,8 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0):
     traj_len = 50
     real_traj_len = 0
 
+    set_seed(train_seed)
+
     while sampled_num < batchsz:
         # for each trajectory, we reset the env and get initial state
         s = env.reset()
@@ -121,6 +123,7 @@ def sample(env, policy, batchsz, process_num, seed):
     # batchsz will be splitted into each process,
     # final batchsz maybe larger than batchsz parameters
     process_batchsz = np.ceil(batchsz / process_num).astype(np.int32)
+    train_seeds = random.sample(range(0, 1000), process_num)
     # buffer to save all data
     queue = mp.Queue()
 
@@ -134,7 +137,7 @@ def sample(env, policy, batchsz, process_num, seed):
     evt = mp.Event()
     processes = []
     for i in range(process_num):
-        process_args = (i, queue, evt, env, policy, process_batchsz, seed)
+        process_args = (i, queue, evt, env, policy, process_batchsz, train_seeds[i])
         processes.append(mp.Process(target=sampler, args=process_args))
     for p in processes:
         # set the process as daemon, and it will be killed once the main process is stoped.
@@ -246,6 +249,7 @@ if __name__ == '__main__':
         tb_writer.add_scalar(key, eval_dict[key], 0)
     best_complete_rate = eval_dict['complete_rate']
     best_success_rate = eval_dict['success_rate_strict']
+    best_return = eval_dict['avg_return']
 
     logging.info("Start of Training: " +
                  time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()))
@@ -261,9 +265,11 @@ if __name__ == '__main__':
 
             eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path)
 
-            best_complete_rate, best_success_rate = \
-                save_best(policy_sys, best_complete_rate, best_success_rate,
-                          eval_dict["complete_rate"], eval_dict["success_rate_strict"], save_path)
+            best_complete_rate, best_success_rate, best_return = \
+                save_best(policy_sys, best_complete_rate, best_success_rate, best_return,
+                          eval_dict["complete_rate"], eval_dict["success_rate_strict"],
+                          eval_dict["avg_return"], save_path)
+            policy_sys.save(save_path, "last")
             for key in eval_dict:
                 tb_writer.add_scalar(key, eval_dict[key], idx * conf['model']['batchsz'])
 
diff --git a/convlab2/policy/ppo/config.py b/convlab2/policy/ppo/config.py
deleted file mode 100644
index cb119d776644986ac6af827dd24a95398bb4b906..0000000000000000000000000000000000000000
--- a/convlab2/policy/ppo/config.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import os
-import sys
-import json
-
-from convlab2.util.custom_util import load_config_file
-
-
-def map_class(cls_path: str):
-    """
-    Map to class via package text path
-    :param cls_path: str, path with `convlab2` project directory as relative path, separator with `,`
-                            E.g  `convlab2.nlu.svm.camrest.nlu.SVMNLU`
-    :return: class
-    """
-    pkgs = cls_path.split('.')
-    cls = __import__('.'.join(pkgs[:-1]))
-    for pkg in pkgs[1:]:
-        cls = getattr(cls, pkg)
-    return cls
-
-
-def get_config(filepath, args) -> dict:
-    """
-    The configuration file is used to create all the information needed for the deployment,
-    and the necessary security monitoring has been performed, including the mapping of the class.
-    :param filepath: str, dest config file path
-    :return: dict
-    """
-
-    conf = load_config_file(filepath)
-
-    # add project root dir
-    sys.path.append(os.path.abspath(os.path.join(
-        os.path.dirname(__file__), os.path.pardir)))
-
-    for arg in args:
-        if len(arg) == 3:
-            conf[arg[0]][arg[1]] = arg[2]
-        if len(arg) == 4:
-            conf[arg[0]][arg[1]][arg[2]] = arg[3]
-        if len(arg) == 5:
-            conf[arg[0]][arg[1]][arg[2]][arg[3]] = arg[4]
-
-    # Autoload uncertainty settings from policy based on the tracker used
-    dst_name = [model for model in conf['dst_sys']]
-    dst_name = dst_name[0] if dst_name else None
-    vec_name = [model for model in conf['vectorizer_sys']]
-    vec_name = vec_name[0] if vec_name else None
-    if dst_name and 'setsumbt' in dst_name.lower():
-        if 'get_confidence_scores' in conf['dst_sys'][dst_name]['ini_params']:
-            conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = conf['dst_sys'][dst_name]['ini_params']['get_confidence_scores']
-        else:
-            conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = False
-        if 'return_mutual_info' in conf['dst_sys'][dst_name]['ini_params']:
-            conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = conf['dst_sys'][dst_name]['ini_params']['return_mutual_info']
-        else:
-            conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = False
-        if 'return_entropy' in conf['dst_sys'][dst_name]['ini_params']:
-            conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = conf['dst_sys'][dst_name]['ini_params']['return_entropy']
-        else:
-            conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = False
-
-    from convlab2.nlu import NLU
-    from convlab2.dst import DST
-    from convlab2.policy import Policy
-    from convlab2.nlg import NLG
-
-    modules = ['vectorizer_sys', 'nlu_sys', 'dst_sys', 'sys_nlg',
-               'nlu_usr', 'dst_usr', 'policy_usr', 'usr_nlg']
-
-    # Syncronise all seeds
-    if 'seed' in conf['model']:
-        for module in modules:
-            module_name = [model for model in conf[module]]
-            module_name = module_name[0] if module_name else None
-            if conf[module] and module_name:
-                if 'ini_params' in conf[module][module_name]:
-                    if 'seed' in conf[module][module_name]['ini_params']:
-                        conf[module][module_name]['ini_params']['seed'] = conf['model']['seed']
-
-    # for each unit in modules above, create model save into conf
-    for unit in modules:
-        if conf[unit] == {}:
-            conf[unit + '_activated'] = None
-        else:
-            for (model, infos) in conf[unit].items():
-                cls_path = infos.get('class_path', '')
-                cls = map_class(cls_path)
-                conf[unit + '_class'] = cls
-                conf[unit + '_activated'] = conf[unit +
-                                                 '_class'](**conf[unit][model]['ini_params'])
-                print("Loaded " + model + " for " + unit)
-    return conf
-
-
-if __name__ == '__main__':
-    # test
-    args = [('model', 'seed', 'ThisIsATestSeed'),
-            ('dst_sys', "setsumbt-mul", "ini_params", "get_confidence_scores", True)]
-    path = "/Users/carel17/Projects/Convlab/convlab2/policy/ppo/setsumbt_config.json"
-    conf = get_config(path, args)
-    print(conf)
diff --git a/convlab2/policy/ppo/semantic_level_config.json b/convlab2/policy/ppo/semantic_level_config.json
index d46ad81753614f00ab46e78038b225aa3b723332..b5fa40bd32d2200db6f4be274d6e314a694cf0cf 100644
--- a/convlab2/policy/ppo/semantic_level_config.json
+++ b/convlab2/policy/ppo/semantic_level_config.json
@@ -1,18 +1,13 @@
 {
 	"model": {
-		"load_path": "convlab2/policy/mle/experiments/experiment_2022-03-10-11-44-08/save/supervised",
+		"load_path": "convlab2/policy/mle/experiments/experiment_2022-05-23-14-08-43/save/supervised",
 		"use_pretrained_initialisation": false,
 		"pretrained_load_path": "",
 		"batchsz": 1000,
 		"seed": 0,
-		"epoch": 200,
+		"epoch": 50,
 		"eval_frequency": 5,
 		"process_num": 4,
-		"use_masking": false,
-		"use_state_entropy": false,
-		"manually_add_entity_names": false,
-		"use_state_mutual_info": false,
-		"use_confidence_scores": false,
 		"sys_semantic_to_usr": false,
 		"num_eval_dialogues": 500
 	},
@@ -20,7 +15,7 @@
 		"uncertainty_vector_mul": {
 			"class_path": "convlab2.policy.vector.vector_binary.VectorBinary",
 			"ini_params": {
-				"use_masking": false,
+				"use_masking": true,
 				"manually_add_entity_names": false,
 				"seed": 0
 			}
diff --git a/convlab2/policy/ppo/train.py b/convlab2/policy/ppo/train.py
index 67a2b6c5282db18b624eb25852ad12ba2ca982b8..aa1e4e77824906676c8660c82a2185b0eaf8ec80 100755
--- a/convlab2/policy/ppo/train.py
+++ b/convlab2/policy/ppo/train.py
@@ -10,14 +10,14 @@ import logging
 import time
 import numpy as np
 import torch
+import random
 
 from convlab2.policy.ppo import PPO
 from convlab2.policy.rlmodule import Memory
 from torch import multiprocessing as mp
 from argparse import ArgumentParser
-from convlab2.policy.ppo.config import get_config
 from convlab2.util.custom_util import set_seed, init_logging, save_config, move_finished_training, env_config, \
-    eval_policy, log_start_args, save_best, load_config_file
+    eval_policy, log_start_args, save_best, load_config_file, get_config
 from datetime import datetime
 
 sys.path.append(os.path.dirname(os.path.dirname(
@@ -47,7 +47,7 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0):
     :return:
     """
 
-    buff = Memory(seed=train_seed)
+    buff = Memory()
     # we need to sample batchsz of (state, action, next_state, reward, mask)
     # each trajectory contains `trajectory_len` num of items, so we only need to sample
     # `batchsz//trajectory_len` num of trajectory totally
@@ -58,6 +58,8 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0):
     traj_len = 50
     real_traj_len = 0
 
+    set_seed(train_seed)
+
     while sampled_num < batchsz:
         # for each trajectory, we reset the env and get initial state
         s = env.reset()
@@ -121,6 +123,7 @@ def sample(env, policy, batchsz, process_num, seed):
     # batchsz will be splitted into each process,
     # final batchsz maybe larger than batchsz parameters
     process_batchsz = np.ceil(batchsz / process_num).astype(np.int32)
+    train_seeds = random.sample(range(0, 1000), process_num)
     # buffer to save all data
     queue = mp.Queue()
 
@@ -134,7 +137,7 @@ def sample(env, policy, batchsz, process_num, seed):
     evt = mp.Event()
     processes = []
     for i in range(process_num):
-        process_args = (i, queue, evt, env, policy, process_batchsz, seed)
+        process_args = (i, queue, evt, env, policy, process_batchsz, train_seeds[i])
         processes.append(mp.Process(target=sampler, args=process_args))
     for p in processes:
         # set the process as daemon, and it will be killed once the main process is stoped.
@@ -246,6 +249,7 @@ if __name__ == '__main__':
         tb_writer.add_scalar(key, eval_dict[key], 0)
     best_complete_rate = eval_dict['complete_rate']
     best_success_rate = eval_dict['success_rate_strict']
+    best_return = eval_dict['avg_return']
 
     logging.info("Start of Training: " +
                  time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()))
@@ -261,9 +265,11 @@ if __name__ == '__main__':
 
             eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path)
 
-            best_complete_rate, best_success_rate = \
-                save_best(policy_sys, best_complete_rate, best_success_rate,
-                          eval_dict["complete_rate"], eval_dict["success_rate_strict"], save_path)
+            best_complete_rate, best_success_rate, best_return = \
+                save_best(policy_sys, best_complete_rate, best_success_rate, best_return,
+                          eval_dict["complete_rate"], eval_dict["success_rate_strict"],
+                          eval_dict["avg_return"], save_path)
+            policy_sys.save(save_path, "last")
             for key in eval_dict:
                 tb_writer.add_scalar(key, eval_dict[key], idx * conf['model']['batchsz'])
 
diff --git a/convlab2/policy/rlmodule.py b/convlab2/policy/rlmodule.py
index b67cca2c03163ad65933df2d5ff2de7d22bca31f..db46026656d908b2453a9143b3f482ce7378e382 100755
--- a/convlab2/policy/rlmodule.py
+++ b/convlab2/policy/rlmodule.py
@@ -319,17 +319,8 @@ Transition = namedtuple('Transition', ('state', 'action',
 
 class Memory(object):
 
-    def __init__(self, seed=0):
+    def __init__(self):
         self.memory = []
-        self.set_seed(seed)
-    
-    def set_seed(self, seed):
-        np.random.seed(seed)
-        torch.random.manual_seed(seed)
-        random.seed(seed)
-        torch.manual_seed(seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(seed)
 
     def push(self, *args):
         """Saves a transition."""
diff --git a/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py b/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py
index 30e800d9a42b53a9d6b869130fe0343b814a0031..315227944c9bea49cb45c10d0715488b8469b14d 100755
--- a/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py
+++ b/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py
@@ -23,7 +23,8 @@ def unified_format(acts):
     new_acts = {'categorical': []}
     for act in acts:
         intent, domain, slot, value = act
-        new_acts['categorical'].append({"intent": intent, "domain": domain, "slot": slot, "value": value})
+        new_acts['categorical'].append(
+            {"intent": intent, "domain": domain, "slot": slot, "value": value})
     return new_acts
 
 
@@ -134,8 +135,8 @@ class UserPolicyAgendaMultiWoz(Policy):
         action = {}
         while len(action) == 0:
             # A -> A' + user_action
-            # action = self.agenda.get_action(random.randint(2, self.max_initiative))
-            action = self.agenda.get_action(self.max_initiative)
+            action = self.agenda.get_action(random.randint(1, self.max_initiative))
+            #action = self.agenda.get_action(self.max_initiative)
 
             # transform to DA
             action = self._transform_usract_out(action)
@@ -146,7 +147,8 @@ class UserPolicyAgendaMultiWoz(Policy):
             domain, intent = domain_intent.lower().split('-')
             for slot, value in svs:
                 try:
-                    domain, slot, value = normalize_domain_slot_value(domain, slot, value)
+                    domain, slot, value = normalize_domain_slot_value(
+                        domain, slot, value)
                 except:
                     pass
                 tuples.append([intent, domain, slot, value])
diff --git a/convlab2/policy/tus/multiwoz/TUS.py b/convlab2/policy/tus/multiwoz/TUS.py
index 1655d44a6552661e3b438c664b2e1392dd905641..3c0f85c031261f4da852bac13d5f8971390e74f8 100644
--- a/convlab2/policy/tus/multiwoz/TUS.py
+++ b/convlab2/policy/tus/multiwoz/TUS.py
@@ -15,6 +15,8 @@ from convlab2.policy.policy import Policy
 from convlab2.task.multiwoz.goal_generator import GoalGenerator
 from convlab2.util.multiwoz.multiwoz_slot_trans import REF_USR_DA
 from convlab2.util.custom_util import model_downloader
+from data.unified_datasets.multiwoz21.preprocess import normalize_domain_slot_value, reverse_da
+from convlab2.policy.rule.multiwoz.policy_agenda_multiwoz import unified_format, act_dict_to_flat_tuple
 
 
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -65,7 +67,7 @@ class UserActionPolicy(Policy):
 
     def _no_offer(self, system_in):
         for intent, domain, slot, value in system_in:
-            if intent == "NoOffer":
+            if intent.lower() == "nooffer":
                 self.terminated = True
                 return True
             else:
@@ -73,21 +75,26 @@ class UserActionPolicy(Policy):
 
     def predict(self, state, mode="max"):
         # update goal
-        self.goal.update_user_goal(action=state["system_action"],
+        sys_dialog_act = state["system_action"]
+        sys_dialog_act = unified_format(sys_dialog_act)
+        sys_dialog_act = reverse_da(sys_dialog_act)
+        sys_dialog_act = act_dict_to_flat_tuple(sys_dialog_act)
+
+        self.goal.update_user_goal(action=sys_dialog_act,
                                    state=state['belief_state'])
         # self.goal.update_info_record(sys_act=state["system_action"])
-        self.goal.add_sys_da(state["system_action"])
-        self.sys_acts.append(state["system_action"])
+        self.goal.add_sys_da(sys_dialog_act)
+        self.sys_acts.append(sys_dialog_act)
 
         # need better way to handle this
-        if self._no_offer(state["system_action"]):
+        if self._no_offer(sys_dialog_act):
             return [["bye", "general", "None", "None"]]
 
         # update constraint
         self.time_step += 2
 
         self.predict_action_list = self.goal.action_list(
-            sys_act=state["system_action"],
+            sys_act=sys_dialog_act,
             all_values=self.all_values)
 
         feature, mask = self.feat_handler.get_feature(
@@ -95,7 +102,7 @@ class UserActionPolicy(Policy):
             self.goal,
             state['belief_state'],
             self.sys_history_state,
-            state["system_action"],
+            sys_dialog_act,
             self.pre_usr_act)
         feature = torch.tensor([feature], dtype=torch.float).to(DEVICE)
         mask = torch.tensor([mask], dtype=torch.bool).to(DEVICE)
@@ -117,7 +124,17 @@ class UserActionPolicy(Policy):
         # self.goal.update_info_record(usr_act=usr_action)
         self.goal.add_usr_da(usr_action)
 
-        return usr_action
+        # convert user action to unify data format
+        norm_usr_action = []
+        for intent, domain, slot, value in usr_action:
+            intent = intent.lower()
+            domain, slot, value = normalize_domain_slot_value(
+                domain, slot, value)
+            norm_usr_action.append([intent, domain, slot, value])
+
+        return norm_usr_action
+
+        # return usr_action
 
     def init_session(self, goal=None):
         self.mentioned_domain = []
@@ -385,10 +402,13 @@ class UserActionPolicy(Policy):
 
 class UserPolicy(Policy):
     def __init__(self, config):
-        self.config = config
+        if isinstance(config, str):
+            self.config = json.load(open(config))
+        else:
+            self.config = config
         if not os.path.exists(self.config["model_dir"]):
-            os.mkdir(self.config["model_dir"])
-            model_downloader(self.config["model_dir"],
+            # os.mkdir(self.config["model_dir"])
+            model_downloader(os.path.dirname(self.config["model_dir"]),
                              "https://zenodo.org/record/5779832/files/default.zip")
 
         self.policy = UserActionPolicy(self.config)
diff --git a/convlab2/policy/vector/vector_base.py b/convlab2/policy/vector/vector_base.py
index ad0f6345819afa4d65523c3b88e3c341f3a8b2ed..497386a632615c8e775cf1a15c29af87f3348b4a 100644
--- a/convlab2/policy/vector/vector_base.py
+++ b/convlab2/policy/vector/vector_base.py
@@ -152,11 +152,13 @@ class VectorBase(Vector):
                 system = act['system']
                 user = act['user']
                 if system:
-                    system_acts_with_value = self.add_values_to_act(act['domain'], act['intent'], act['slot'], True)
+                    system_acts_with_value = self.add_values_to_act(
+                        act['domain'], act['intent'], act['slot'], True)
                     self.da_voc.extend(system_acts_with_value)
 
                 if user:
-                    user_acts_with_value = self.add_values_to_act(act['domain'], act['intent'], act['slot'], False)
+                    user_acts_with_value = self.add_values_to_act(
+                        act['domain'], act['intent'], act['slot'], False)
                     self.da_voc_opp.extend(user_acts_with_value)
 
         self.da_voc.sort()
@@ -311,7 +313,8 @@ class VectorBase(Vector):
 
         # Leave slots out of constraints to find which slot constraint results in no entities being found
         for constraint_slot in constraints:
-            state = [[slot, value] for slot, value in constraints.items() if slot != constraint_slot]
+            state = [[slot, value] for slot,
+                     value in constraints.items() if slot != constraint_slot]
             entities = self.db.query(domain, state, topk=1)
             if entities:
                 return constraint_slot
@@ -378,7 +381,8 @@ class VectorBase(Vector):
         for domint in nooffer:
             domain, intent = domint.split('-')
             slot = self.find_nooffer_slot(domain)
-            action[domint] = [[slot, '1']] if slot != 'none' else [[slot, 'none']]
+            action[domint] = [[slot, '1']
+                              ] if slot != 'none' else [[slot, 'none']]
 
         # Randomly select booking constraint "causing" no_book
         nobook = [domint for domint in action if 'nobook' in domint]
@@ -386,12 +390,14 @@ class VectorBase(Vector):
             domain, intent = domint.split('-')
             if domain in self.state:
                 slots = self.state[domain]
-                slots = [slot for slot, i in slots.items() if i and 'book' in slot]
+                slots = [slot for slot, i in slots.items()
+                         if i and 'book' in slot]
                 slots.append('none')
                 slot = np.random.choice(slots)
             else:
                 slot = 'none'
-            action[domint] = [[slot, '1']] if slot != 'none' else [[slot, 'none']]
+            action[domint] = [[slot, '1']
+                              ] if slot != 'none' else [[slot, 'none']]
 
         if self.always_inform_booking_reference:
             action = self.add_booking_reference(action)
@@ -405,9 +411,9 @@ class VectorBase(Vector):
             for [item, idx] in action[key]:
                 if index != -1 and index != idx and idx != '?':
                     pass
-                    #logging.debug(
+                    # logging.debug(
                     #    "System is likely refering multiple entities within this turn")
-                    #logging.debug(action[key])
+                    # logging.debug(action[key])
                 index = idx
         action = lexicalize_da(action, entities, self.state, self.requestable)
 
diff --git a/convlab2/util/custom_util.py b/convlab2/util/custom_util.py
index 8ca73ff60930ba8e5f8d39052fbbbf472bcce193..845316d3839cdb89b8dff84903d35e0d7e2788a6 100644
--- a/convlab2/util/custom_util.py
+++ b/convlab2/util/custom_util.py
@@ -7,7 +7,6 @@ import json
 import zipfile
 import numpy as np
 import torch
-from datasets import load_dataset
 from tensorboardX import SummaryWriter
 from convlab2.util.file_util import cached_path
 from convlab2.policy.evaluate_distributed import evaluate_distributed
@@ -19,6 +18,8 @@ from convlab2.dst.rule.multiwoz import RuleDST
 from convlab2.policy.rule.multiwoz import RulePolicy
 from convlab2.evaluator.multiwoz_eval import MultiWozEvaluator
 from convlab2.util import load_dataset
+from convlab2.policy.rule.multiwoz.policy_agenda_multiwoz import Goal
+
 import shutil
 
 
@@ -119,18 +120,22 @@ def log_start_args(conf):
         f"We use {conf['model']['num_eval_dialogues']} dialogues for evaluation.")
 
 
-def save_best(policy_sys, best_complete_rate, best_success_rate, complete_rate, success_rate, save_path):
+def save_best(policy_sys, best_complete_rate, best_success_rate, best_return, complete_rate, success_rate, avg_return,
+              save_path):
     # policy_sys.save(save_path, "best")
-    if success_rate > best_success_rate:
+    if avg_return > best_return:
         logging.info("Saving best policy.")
         policy_sys.save(save_path, "best")
+        best_return = avg_return
+    if success_rate > best_success_rate:
         best_success_rate = success_rate
     if complete_rate > best_complete_rate:
         best_complete_rate = complete_rate
         # policy_sys.save(save_path, "best")
     logging.info(
-        f"Best Complete Rate: {best_complete_rate}, Best Success Rate: {best_success_rate}")
-    return best_complete_rate, best_success_rate
+        f"Best Complete Rate: {best_complete_rate}, Best Success Rate: {best_success_rate}, "
+        f"Best Average Return: {best_return}")
+    return best_complete_rate, best_success_rate, best_return
 
 
 def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path):
@@ -432,6 +437,107 @@ def act_dict_to_flat_tuple(acts):
             tuples.append([intent, domain, slot, value])
 
 
+def create_goals(goal_generator, num_goals, single_domains=False, allowed_domains=None):
+
+    collected_goals = []
+    while len(collected_goals) != num_goals:
+        goal = Goal(goal_generator)
+        if single_domains and len(goal.domain_goals) > 1:
+            continue
+        if allowed_domains is not None and not set(goal.domain_goals).issubset(set(allowed_domains)):
+            continue
+        collected_goals.append(goal)
+    return collected_goals
+
+
+def map_class(cls_path: str):
+    """
+    Map to class via package text path
+    :param cls_path: str, path with `convlab2` project directory as relative path, separator with `,`
+                            E.g  `convlab2.nlu.svm.camrest.nlu.SVMNLU`
+    :return: class
+    """
+    pkgs = cls_path.split('.')
+    cls = __import__('.'.join(pkgs[:-1]))
+    for pkg in pkgs[1:]:
+        cls = getattr(cls, pkg)
+    return cls
+
+
+def get_config(filepath, args) -> dict:
+    """
+    The configuration file is used to create all the information needed for the deployment,
+    and the necessary security monitoring has been performed, including the mapping of the class.
+    :param filepath: str, dest config file path
+    :return: dict
+    """
+
+    conf = load_config_file(filepath)
+
+    # add project root dir
+    sys.path.append(os.path.abspath(os.path.join(
+        os.path.dirname(__file__), os.path.pardir)))
+
+    for arg in args:
+        if len(arg) == 3:
+            conf[arg[0]][arg[1]] = arg[2]
+        if len(arg) == 4:
+            conf[arg[0]][arg[1]][arg[2]] = arg[3]
+        if len(arg) == 5:
+            conf[arg[0]][arg[1]][arg[2]][arg[3]] = arg[4]
+
+    # Autoload uncertainty settings from policy based on the tracker used
+    dst_name = [model for model in conf['dst_sys']]
+    dst_name = dst_name[0] if dst_name else None
+    vec_name = [model for model in conf['vectorizer_sys']]
+    vec_name = vec_name[0] if vec_name else None
+    if dst_name and 'setsumbt' in dst_name.lower():
+        if 'get_confidence_scores' in conf['dst_sys'][dst_name]['ini_params']:
+            conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = conf['dst_sys'][dst_name]['ini_params']['get_confidence_scores']
+        else:
+            conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = False
+        if 'return_mutual_info' in conf['dst_sys'][dst_name]['ini_params']:
+            conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = conf['dst_sys'][dst_name]['ini_params']['return_mutual_info']
+        else:
+            conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = False
+        if 'return_entropy' in conf['dst_sys'][dst_name]['ini_params']:
+            conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = conf['dst_sys'][dst_name]['ini_params']['return_entropy']
+        else:
+            conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = False
+
+    from convlab2.nlu import NLU
+    from convlab2.dst import DST
+    from convlab2.policy import Policy
+    from convlab2.nlg import NLG
+
+    modules = ['vectorizer_sys', 'nlu_sys', 'dst_sys', 'sys_nlg',
+               'nlu_usr', 'dst_usr', 'policy_usr', 'usr_nlg']
+
+    # Syncronise all seeds
+    if 'seed' in conf['model']:
+        for module in modules:
+            module_name = [model for model in conf[module]]
+            module_name = module_name[0] if module_name else None
+            if conf[module] and module_name:
+                if 'ini_params' in conf[module][module_name]:
+                    if 'seed' in conf[module][module_name]['ini_params']:
+                        conf[module][module_name]['ini_params']['seed'] = conf['model']['seed']
+
+    # for each unit in modules above, create model save into conf
+    for unit in modules:
+        if conf[unit] == {}:
+            conf[unit + '_activated'] = None
+        else:
+            for (model, infos) in conf[unit].items():
+                cls_path = infos.get('class_path', '')
+                cls = map_class(cls_path)
+                conf[unit + '_class'] = cls
+                conf[unit + '_activated'] = conf[unit +
+                                                 '_class'](**conf[unit][model]['ini_params'])
+                print("Loaded " + model + " for " + unit)
+    return conf
+
+
 if __name__ == '__main__':
     get_goal_distribution()
 
diff --git a/convlab2/util/multiwoz/state.py b/convlab2/util/multiwoz/state.py
index 8f9aad11a074a8461c583810af4a388edda19d11..5b65ba066b33cf9cd1e2fb49515406d962f03588 100755
--- a/convlab2/util/multiwoz/state.py
+++ b/convlab2/util/multiwoz/state.py
@@ -7,3 +7,94 @@ def default_state():
                  terminated=False,
                  history=[])
     return state
+
+
+def default_state_old():
+    state = dict(user_action=[],
+                 system_action=[],
+                 belief_state={},
+                 request_state={},
+                 terminated=False,
+                 history=[])
+    state['belief_state'] = {
+        "police": {
+            "book": {
+                "booked": []
+            },
+            "semi": {}
+        },
+        "hotel": {
+            "book": {
+                "booked": [],
+                "people": "",
+                "day": "",
+                "stay": ""
+            },
+            "semi": {
+                "name": "",
+                "area": "",
+                "parking": "",
+                "pricerange": "",
+                "stars": "",
+                "internet": "",
+                "type": ""
+            }
+        },
+        "attraction": {
+            "book": {
+                "booked": []
+            },
+            "semi": {
+                "type": "",
+                "name": "",
+                "area": ""
+            }
+        },
+        "restaurant": {
+            "book": {
+                "booked": [],
+                "people": "",
+                "day": "",
+                "time": ""
+            },
+            "semi": {
+                "food": "",
+                "pricerange": "",
+                "name": "",
+                "area": "",
+            }
+        },
+        "hospital": {
+            "book": {
+                "booked": []
+            },
+            "semi": {
+                "department": ""
+            }
+        },
+        "taxi": {
+            "book": {
+                "booked": []
+            },
+            "semi": {
+                "leaveAt": "",
+                "destination": "",
+                "departure": "",
+                "arriveBy": ""
+            }
+        },
+        "train": {
+            "book": {
+                "booked": [],
+                "people": ""
+            },
+            "semi": {
+                "leaveAt": "",
+                "destination": "",
+                "day": "",
+                "arriveBy": "",
+                "departure": ""
+            }
+        }
+    }
+    return state