diff --git a/convlab/dialog_agent/agent.py b/convlab/dialog_agent/agent.py index 79f61e2b18f04e702c690a33cdf313656b2615c6..9a9dbb31e0599ca7e73b384bb6028d479b245c25 100755 --- a/convlab/dialog_agent/agent.py +++ b/convlab/dialog_agent/agent.py @@ -64,7 +64,7 @@ class PipelineAgent(Agent): ===== ===== ====== === == === """ - def __init__(self, nlu: NLU, dst: DST, policy: Policy, nlg: NLG, name: str): + def __init__(self, nlu: NLU, dst: DST, policy: Policy, nlg: NLG, name: str, return_semantic_acts: bool = False): """The constructor of PipelineAgent class. Here are some special combination cases: @@ -95,6 +95,7 @@ class PipelineAgent(Agent): self.dst = dst self.policy = policy self.nlg = nlg + self.return_semantic_acts = return_semantic_acts self.init_session() self.agent_saves = [] @@ -199,6 +200,8 @@ class PipelineAgent(Agent): self.turn += 1 self.agent_saves.append(self.save_info()) + if self.return_semantic_acts: + return self.output_action return model_response def save_info(self): diff --git a/convlab/policy/ppo/semantic_level_config.json b/convlab/policy/ppo/semantic_level_config.json index b9908c9cb7717515775221227f3fba19636d20dc..04b0626a10bc8d48add16732df26a7cc00a35088 100644 --- a/convlab/policy/ppo/semantic_level_config.json +++ b/convlab/policy/ppo/semantic_level_config.json @@ -6,7 +6,7 @@ "batchsz": 1000, "seed": 0, "epoch": 10, - "eval_frequency": 1, + "eval_frequency": 5, "process_num": 4, "sys_semantic_to_usr": false, "num_eval_dialogues": 500 diff --git a/convlab/util/custom_util.py b/convlab/util/custom_util.py index 38d8b92a36efd67bdf9166c1c5f9f20734d1ecb5..cc49d733d25566d502b10bac71189e05cfbc3fef 100644 --- a/convlab/util/custom_util.py +++ b/convlab/util/custom_util.py @@ -25,8 +25,7 @@ import signal slot_mapping = {"pricerange": "price range", "post": "postcode", "arriveBy": "arrive by", "leaveAt": "leave at", - "Id": "trainid", "ref": "reference"} - + "Id": "train id", "ref": "reference", "trainID": "train id"} sys.path.append(os.path.dirname(os.path.dirname( os.path.dirname(os.path.abspath(__file__))))) @@ -103,7 +102,8 @@ def load_config_file(filepath: str = None) -> dict: def save_config(terminal_args, config_file_args, config_save_path, policy_config=None): config_save_path = os.path.join(config_save_path, f'config_saved.json') - args_dict = {"args": terminal_args, "config": config_file_args, "policy_config": policy_config} + args_dict = {"args": terminal_args, + "config": config_file_args, "policy_config": policy_config} json.dump(args_dict, open(config_save_path, 'w')) @@ -165,26 +165,29 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do goals = [] for seed in range(1000, 1000 + conf['model']['num_eval_dialogues']): set_seed(seed) - goal = create_goals(goal_generator, 1, single_domain_goals, allowed_domains) + goal = create_goals(goal_generator, 1, + single_domain_goals, allowed_domains) goals.append(goal[0]) if conf['model']['process_num'] == 1: complete_rate, success_rate, success_rate_strict, avg_return, turns, \ avg_actions, task_success, book_acts, inform_acts, request_acts, \ - select_acts, offer_acts, recommend_acts = evaluate(sess, - num_dialogues=conf['model']['num_eval_dialogues'], - sys_semantic_to_usr=conf['model'][ - 'sys_semantic_to_usr'], - save_flag=save_eval, save_path=log_save_path, goals=goals) - - total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts + recommend_acts + select_acts, offer_acts, recommend_acts = evaluate(sess, + num_dialogues=conf['model']['num_eval_dialogues'], + sys_semantic_to_usr=conf['model'][ + 'sys_semantic_to_usr'], + save_flag=save_eval, save_path=log_save_path, goals=goals) + + total_acts = book_acts + inform_acts + request_acts + \ + select_acts + offer_acts + recommend_acts else: complete_rate, success_rate, success_rate_strict, avg_return, turns, \ avg_actions, task_success, book_acts, inform_acts, request_acts, \ select_acts, offer_acts, recommend_acts = \ evaluate_distributed(sess, list(range(1000, 1000 + conf['model']['num_eval_dialogues'])), conf['model']['process_num'], goals) - total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts + recommend_acts + total_acts = book_acts + inform_acts + request_acts + \ + select_acts + offer_acts + recommend_acts task_success_gathered = {} for task_dict in task_success: @@ -196,12 +199,18 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do policy_sys.is_train = True - mean_complete, err_complete = np.average(complete_rate), np.std(complete_rate) / np.sqrt(len(complete_rate)) - mean_success, err_success = np.average(success_rate), np.std(success_rate) / np.sqrt(len(success_rate)) - mean_success_strict, err_success_strict = np.average(success_rate_strict), np.std(success_rate_strict) / np.sqrt(len(success_rate_strict)) - mean_return, err_return = np.average(avg_return), np.std(avg_return) / np.sqrt(len(avg_return)) - mean_turns, err_turns = np.average(turns), np.std(turns) / np.sqrt(len(turns)) - mean_actions, err_actions = np.average(avg_actions), np.std(avg_actions) / np.sqrt(len(avg_actions)) + mean_complete, err_complete = np.average(complete_rate), np.std( + complete_rate) / np.sqrt(len(complete_rate)) + mean_success, err_success = np.average(success_rate), np.std( + success_rate) / np.sqrt(len(success_rate)) + mean_success_strict, err_success_strict = np.average(success_rate_strict), np.std( + success_rate_strict) / np.sqrt(len(success_rate_strict)) + mean_return, err_return = np.average(avg_return), np.std( + avg_return) / np.sqrt(len(avg_return)) + mean_turns, err_turns = np.average( + turns), np.std(turns) / np.sqrt(len(turns)) + mean_actions, err_actions = np.average(avg_actions), np.std( + avg_actions) / np.sqrt(len(avg_actions)) logging.info(f"Complete: {mean_complete}+-{round(err_complete, 2)}, " f"Success: {mean_success}+-{round(err_success, 2)}, " @@ -380,7 +389,6 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False complete = sess.evaluator.complete task_succ = sess.evaluator.success task_succ_strict = sess.evaluator.success_strict - break else: complete = 0 task_succ = 0 @@ -423,12 +431,12 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False save_file.close() # save dialogue_info and clear mem - return np.average(task_success['All_user_sim']), np.average(task_success['All_evaluator']), \ - np.average(task_success['All_evaluator_strict']), np.average(task_success['total_return']), \ - np.average(task_success['turns']), np.average(task_success['avg_actions']), task_success, \ + return task_success['All_user_sim'], task_success['All_evaluator'], task_success['All_evaluator_strict'], \ + task_success['total_return'], task_success['turns'], task_success['avg_actions'], task_success, \ np.average(task_success['total_booking_acts']), np.average(task_success['total_inform_acts']), \ np.average(task_success['total_request_acts']), np.average(task_success['total_select_acts']), \ - np.average(task_success['total_offer_acts']), np.average(task_success['total_recommend_acts']) + np.average(task_success['total_offer_acts']), np.average( + task_success['total_recommend_acts']) def model_downloader(download_dir, model_path): @@ -622,7 +630,8 @@ def get_config(filepath, args) -> dict: cls_path = infos.get('class_path', '') cls = map_class(cls_path) conf[unit + '_class'] = cls - conf[unit + '_activated'] = conf[unit + '_class'](**conf[unit][model]['ini_params']) + conf[unit + '_activated'] = conf[unit + + '_class'](**conf[unit][model]['ini_params']) print("Loaded " + model + " for " + unit) return conf