diff --git a/convlab/dialog_agent/agent.py b/convlab/dialog_agent/agent.py
index 79f61e2b18f04e702c690a33cdf313656b2615c6..9a9dbb31e0599ca7e73b384bb6028d479b245c25 100755
--- a/convlab/dialog_agent/agent.py
+++ b/convlab/dialog_agent/agent.py
@@ -64,7 +64,7 @@ class PipelineAgent(Agent):
            =====   =====    ======  ===     ==      ===
     """
 
-    def __init__(self, nlu: NLU, dst: DST, policy: Policy, nlg: NLG, name: str):
+    def __init__(self, nlu: NLU, dst: DST, policy: Policy, nlg: NLG, name: str, return_semantic_acts: bool = False):
         """The constructor of PipelineAgent class.
 
         Here are some special combination cases:
@@ -95,6 +95,7 @@ class PipelineAgent(Agent):
         self.dst = dst
         self.policy = policy
         self.nlg = nlg
+        self.return_semantic_acts = return_semantic_acts
 
         self.init_session()
         self.agent_saves = []
@@ -199,6 +200,8 @@ class PipelineAgent(Agent):
 
         self.turn += 1
         self.agent_saves.append(self.save_info())
+        if self.return_semantic_acts:
+            return self.output_action
         return model_response
 
     def save_info(self):
diff --git a/convlab/policy/ppo/semantic_level_config.json b/convlab/policy/ppo/semantic_level_config.json
index b9908c9cb7717515775221227f3fba19636d20dc..04b0626a10bc8d48add16732df26a7cc00a35088 100644
--- a/convlab/policy/ppo/semantic_level_config.json
+++ b/convlab/policy/ppo/semantic_level_config.json
@@ -6,7 +6,7 @@
 		"batchsz": 1000,
 		"seed": 0,
 		"epoch": 10,
-		"eval_frequency": 1,
+		"eval_frequency": 5,
 		"process_num": 4,
 		"sys_semantic_to_usr": false,
 		"num_eval_dialogues": 500
diff --git a/convlab/util/custom_util.py b/convlab/util/custom_util.py
index 38d8b92a36efd67bdf9166c1c5f9f20734d1ecb5..cc49d733d25566d502b10bac71189e05cfbc3fef 100644
--- a/convlab/util/custom_util.py
+++ b/convlab/util/custom_util.py
@@ -25,8 +25,7 @@ import signal
 
 
 slot_mapping = {"pricerange": "price range", "post": "postcode", "arriveBy": "arrive by", "leaveAt": "leave at",
-                "Id": "trainid", "ref": "reference"}
-
+                "Id": "train id", "ref": "reference", "trainID": "train id"}
 
 sys.path.append(os.path.dirname(os.path.dirname(
     os.path.dirname(os.path.abspath(__file__)))))
@@ -103,7 +102,8 @@ def load_config_file(filepath: str = None) -> dict:
 
 def save_config(terminal_args, config_file_args, config_save_path, policy_config=None):
     config_save_path = os.path.join(config_save_path, f'config_saved.json')
-    args_dict = {"args": terminal_args, "config": config_file_args, "policy_config": policy_config}
+    args_dict = {"args": terminal_args,
+                 "config": config_file_args, "policy_config": policy_config}
     json.dump(args_dict, open(config_save_path, 'w'))
 
 
@@ -165,26 +165,29 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do
     goals = []
     for seed in range(1000, 1000 + conf['model']['num_eval_dialogues']):
         set_seed(seed)
-        goal = create_goals(goal_generator, 1, single_domain_goals, allowed_domains)
+        goal = create_goals(goal_generator, 1,
+                            single_domain_goals, allowed_domains)
         goals.append(goal[0])
 
     if conf['model']['process_num'] == 1:
         complete_rate, success_rate, success_rate_strict, avg_return, turns, \
             avg_actions, task_success, book_acts, inform_acts, request_acts, \
-                select_acts, offer_acts, recommend_acts = evaluate(sess,
-                                                num_dialogues=conf['model']['num_eval_dialogues'],
-                                                sys_semantic_to_usr=conf['model'][
-                                                    'sys_semantic_to_usr'],
-                                                save_flag=save_eval, save_path=log_save_path, goals=goals)
-
-        total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts + recommend_acts
+            select_acts, offer_acts, recommend_acts = evaluate(sess,
+                                                               num_dialogues=conf['model']['num_eval_dialogues'],
+                                                               sys_semantic_to_usr=conf['model'][
+                                                                   'sys_semantic_to_usr'],
+                                                               save_flag=save_eval, save_path=log_save_path, goals=goals)
+
+        total_acts = book_acts + inform_acts + request_acts + \
+            select_acts + offer_acts + recommend_acts
     else:
         complete_rate, success_rate, success_rate_strict, avg_return, turns, \
             avg_actions, task_success, book_acts, inform_acts, request_acts, \
             select_acts, offer_acts, recommend_acts = \
             evaluate_distributed(sess, list(range(1000, 1000 + conf['model']['num_eval_dialogues'])),
                                  conf['model']['process_num'], goals)
-        total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts + recommend_acts
+        total_acts = book_acts + inform_acts + request_acts + \
+            select_acts + offer_acts + recommend_acts
 
         task_success_gathered = {}
         for task_dict in task_success:
@@ -196,12 +199,18 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do
 
     policy_sys.is_train = True
 
-    mean_complete, err_complete = np.average(complete_rate), np.std(complete_rate) / np.sqrt(len(complete_rate))
-    mean_success, err_success = np.average(success_rate), np.std(success_rate) / np.sqrt(len(success_rate))
-    mean_success_strict, err_success_strict = np.average(success_rate_strict), np.std(success_rate_strict) / np.sqrt(len(success_rate_strict))
-    mean_return, err_return = np.average(avg_return), np.std(avg_return) / np.sqrt(len(avg_return))
-    mean_turns, err_turns = np.average(turns), np.std(turns) / np.sqrt(len(turns))
-    mean_actions, err_actions = np.average(avg_actions), np.std(avg_actions) / np.sqrt(len(avg_actions))
+    mean_complete, err_complete = np.average(complete_rate), np.std(
+        complete_rate) / np.sqrt(len(complete_rate))
+    mean_success, err_success = np.average(success_rate), np.std(
+        success_rate) / np.sqrt(len(success_rate))
+    mean_success_strict, err_success_strict = np.average(success_rate_strict), np.std(
+        success_rate_strict) / np.sqrt(len(success_rate_strict))
+    mean_return, err_return = np.average(avg_return), np.std(
+        avg_return) / np.sqrt(len(avg_return))
+    mean_turns, err_turns = np.average(
+        turns), np.std(turns) / np.sqrt(len(turns))
+    mean_actions, err_actions = np.average(avg_actions), np.std(
+        avg_actions) / np.sqrt(len(avg_actions))
 
     logging.info(f"Complete: {mean_complete}+-{round(err_complete, 2)}, "
                  f"Success: {mean_success}+-{round(err_success, 2)}, "
@@ -380,7 +389,6 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
                 complete = sess.evaluator.complete
                 task_succ = sess.evaluator.success
                 task_succ_strict = sess.evaluator.success_strict
-                break
         else:
             complete = 0
             task_succ = 0
@@ -423,12 +431,12 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
         save_file.close()
     # save dialogue_info and clear mem
 
-    return np.average(task_success['All_user_sim']), np.average(task_success['All_evaluator']), \
-        np.average(task_success['All_evaluator_strict']), np.average(task_success['total_return']), \
-        np.average(task_success['turns']), np.average(task_success['avg_actions']), task_success, \
+    return task_success['All_user_sim'], task_success['All_evaluator'], task_success['All_evaluator_strict'], \
+        task_success['total_return'], task_success['turns'], task_success['avg_actions'], task_success, \
         np.average(task_success['total_booking_acts']), np.average(task_success['total_inform_acts']), \
         np.average(task_success['total_request_acts']), np.average(task_success['total_select_acts']), \
-        np.average(task_success['total_offer_acts']), np.average(task_success['total_recommend_acts'])
+        np.average(task_success['total_offer_acts']), np.average(
+            task_success['total_recommend_acts'])
 
 
 def model_downloader(download_dir, model_path):
@@ -622,7 +630,8 @@ def get_config(filepath, args) -> dict:
                 cls_path = infos.get('class_path', '')
                 cls = map_class(cls_path)
                 conf[unit + '_class'] = cls
-                conf[unit + '_activated'] = conf[unit + '_class'](**conf[unit][model]['ini_params'])
+                conf[unit + '_activated'] = conf[unit +
+                                                 '_class'](**conf[unit][model]['ini_params'])
                 print("Loaded " + model + " for " + unit)
     return conf