diff --git a/.gitignore b/.gitignore index 832e526dc1f997facec8833e85cd357b06d466b2..5665c62346b2f85cdea57cf2686d879e6d9cde1e 100644 --- a/.gitignore +++ b/.gitignore @@ -101,6 +101,7 @@ deploy/bert_multiwoz_all.zip deploy/templates/dialog_eg.html test.py +*convlab2/policy/vector/action_dicts *.egg-info pre-trained-models/ -venv \ No newline at end of file +venv diff --git a/convlab2/dialog_agent/env.py b/convlab2/dialog_agent/env.py index dc345786ab154d028e1c569a347fabcc9c5a5def..bee3e943db9d7363a672a4627b35ec23dde5c37d 100755 --- a/convlab2/dialog_agent/env.py +++ b/convlab2/dialog_agent/env.py @@ -6,6 +6,7 @@ Created on Wed Jul 17 14:27:34 2019 """ import pdb +from copy import deepcopy class Environment(): @@ -47,6 +48,7 @@ class Environment(): observation) if self.sys_nlu else observation self.sys_dst.state['user_action'] = dialog_act state = self.sys_dst.update(dialog_act) + state = deepcopy(state) dialog_act = self.sys_dst.state['user_action'] state['history'].append(["sys", model_response]) diff --git a/convlab2/dst/rule/multiwoz/dst.py b/convlab2/dst/rule/multiwoz/dst.py index a8602a7c249eac1b627fcbe851136d486850bed9..9ff59fbd3d5ae3818f42eea26adf377850b4a6ae 100755 --- a/convlab2/dst/rule/multiwoz/dst.py +++ b/convlab2/dst/rule/multiwoz/dst.py @@ -43,7 +43,8 @@ class RuleDST(DST): continue domain_dic = self.state['belief_state'][domain] if slot in domain_dic: - nvalue = normalize_value(self.value_dict, domain, slot, value) + nvalue = normalize_value( + self.value_dict, domain, slot, value) self.state['belief_state'][domain][slot] = nvalue elif slot != 'none' or slot != '': # raise Exception('unknown slot name <{}> of domain <{}>'.format(k, domain)) @@ -63,6 +64,7 @@ class RuleDST(DST): self.state = default_state() self.state['belief_state'] = deepcopy(self.default_belief_state) + if __name__ == '__main__': # from convlab2.dst.rule.multiwoz import RuleDST diff --git a/convlab2/dst/rule/multiwoz/usr_dst.py b/convlab2/dst/rule/multiwoz/usr_dst.py index 26ce84d25d2960ea48793f7d86079bc8e2f187ea..13fc39e0471bd07bfe52beffcd42c762c985f05d 100755 --- a/convlab2/dst/rule/multiwoz/usr_dst.py +++ b/convlab2/dst/rule/multiwoz/usr_dst.py @@ -1,12 +1,16 @@ import json import os -from convlab2.util.multiwoz.state import default_state +from convlab2.util.multiwoz.state import default_state_old as default_state from convlab2.dst.rule.multiwoz.dst_util import normalize_value from convlab2.dst.rule.multiwoz import RuleDST from convlab2.util.multiwoz.multiwoz_slot_trans import REF_SYS_DA from convlab2.policy.tus.multiwoz.Da2Goal import SysDa2Goal, UsrDa2Goal +from data.unified_datasets.multiwoz21.preprocess import normalize_domain_slot_value, reverse_da +from convlab2.policy.rule.multiwoz.policy_agenda_multiwoz import unified_format, act_dict_to_flat_tuple from pprint import pprint +from copy import deepcopy +from convlab2.util import load_ontology SLOT2SEMI = { "arriveby": "arriveBy", @@ -25,8 +29,14 @@ class UserRuleDST(RuleDST): It helps check whether ``user_act`` has correct content. """ - def __init__(self): + def __init__(self, dataset_name='multiwoz21'): super().__init__() + + self.state = default_state() + path = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) + path = os.path.join(path, 'data/multiwoz/value_dict.json') + self.value_dict = json.load(open(path)) self.mentioned_domain = [] def update(self, sys_act=None): @@ -35,6 +45,9 @@ class UserRuleDST(RuleDST): :param sys_act: :return: """ + sys_act = unified_format(sys_act) + sys_act = reverse_da(sys_act) + sys_act = act_dict_to_flat_tuple(sys_act) # print("dst", user_act) self.update_mentioned_domain(sys_act) for intent, domain, slot, value in sys_act: @@ -75,7 +88,7 @@ class UserRuleDST(RuleDST): assert domain in self.state['belief_state'] except: raise Exception( - 'Error: domain <{}> not in new belief state'.format(domain)) + f'Error: domain <{domain}> not in new belief state') domain_dic = self.state['belief_state'][domain] assert 'semi' in domain_dic assert 'book' in domain_dic diff --git a/convlab2/policy/README.md b/convlab2/policy/README.md index cc0b29edaaa66dde5febe62182ee709e800dab67..1990cdd6b03a38fe6f5a4f8b4eb8a9708761c590 100755 --- a/convlab2/policy/README.md +++ b/convlab2/policy/README.md @@ -16,3 +16,21 @@ The interfaces for dialog policy are defined in policy.Policy: and outputs the next system action. - **init_session** reset the model variables for a new dialog session. + +## Rule based simulator results + +| Model | Complete rate | Success rate | Average return | Turns | Average actions | +|-------|---------------|--------------|----------------|-------|-----------------| +| MLE | | | | | | +| PG | | | | | | +| GDPL | | | | | | +| PPO | | | | | | + +## Transformer based user simulator (TUS) results + +| Model | Complete rate | Success rate | Average return | Turns | Average actions | +|-------|---------------|--------------|----------------|-------|-----------------| +| MLE | | | | | | +| PG | | | | | | +| GDPL | | | | | | +| PPO | | | | | | diff --git a/convlab2/policy/evaluate.py b/convlab2/policy/evaluate.py index a473d49afb59c4ab56c212763c029e8e50bef858..00b8347315f81b0d5955b77738bbe9060203cbf7 100755 --- a/convlab2/policy/evaluate.py +++ b/convlab2/policy/evaluate.py @@ -2,7 +2,6 @@ import argparse import datetime -import json import logging import os @@ -10,12 +9,10 @@ import numpy as np import torch from convlab2.dialog_agent.agent import PipelineAgent from convlab2.dialog_agent.session import BiSession -from convlab2.dst.rule.multiwoz import RuleDST -from convlab2.dst.rule.multiwoz.usr_dst import UserRuleDST from convlab2.evaluator.multiwoz_eval import MultiWozEvaluator -from convlab2.policy.tus.multiwoz.TUS import UserPolicy from convlab2.policy.rule.multiwoz import RulePolicy -from convlab2.util.custom_util import set_seed +from convlab2.task.multiwoz.goal_generator import GoalGenerator +from convlab2.util.custom_util import set_seed, get_config, env_config, create_goals def init_logging(log_dir_path, path_suffix=None): @@ -39,68 +36,45 @@ def init_logging(log_dir_path, path_suffix=None): DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") -def evaluate(args, model_name, load_path, verbose=False): +def evaluate(config_path, model_name, verbose=False): seed = 0 set_seed(seed) - dst_sys = RuleDST() + conf = get_config(config_path, []) if model_name == "PPO": from convlab2.policy.ppo import PPO - if load_path: - policy_sys = PPO(False) - policy_sys.load(load_path) - else: - policy_sys = PPO.from_pretrained() + policy_sys = PPO(vectorizer=conf['vectorizer_sys_activated']) elif model_name == "RULE": policy_sys = RulePolicy() elif model_name == "PG": from convlab2.policy.pg import PG - if load_path: - policy_sys = PG(False) - policy_sys.load(load_path) - else: - policy_sys = PG.from_pretrained() + policy_sys = PG(vectorizer=conf['vectorizer_sys_activated']) elif model_name == "MLE": from convlab2.policy.mle import MLE - if load_path: - policy_sys = MLE() - policy_sys.load(load_path) - else: - policy_sys = MLE.from_pretrained() + policy_sys = MLE() elif model_name == "GDPL": from convlab2.policy.gdpl import GDPL - if load_path: - policy_sys = GDPL(False) - policy_sys.load(load_path) - else: - policy_sys = GDPL.from_pretrained() - user_type = args.user.lower() - if user_type == "rule": - dst_usr = None - policy_usr = RulePolicy(character='usr') - elif user_type == "tus": - dst_usr = UserRuleDST() - user_config = json.load(open(args.user_config)) - policy_usr = UserPolicy(user_config) - elif user_type == "vhus": - from convlab2.policy.vhus.multiwoz import UserPolicyVHUS - dst_usr = None - policy_usr = UserPolicyVHUS( - load_from_zip=True, model_file="/home/linh/convlab-2/vhus_simulator_multiwoz.zip") - - simulator = PipelineAgent(None, dst_usr, policy_usr, None, 'user') - agent_sys = PipelineAgent(None, dst_sys, policy_sys, None, 'sys') - - evaluator = MultiWozEvaluator() - sess = BiSession(agent_sys, simulator, None, evaluator) + policy_sys = GDPL(vectorizer=conf['vectorizer_sys_activated']) + try: + policy_sys.load(conf['model']['load_path']) + except Exception as e: + logging.info(f"Could not load a policy: {e}") + + env, sess = env_config(conf, policy_sys) action_dict = {} - task_success = {'Complete': [], 'Success': [], 'Success strict': [], 'total_return': [], 'turns': []} - for seed in range(1000, 1400): + task_success = {'Complete': [], 'Success': [], + 'Success strict': [], 'total_return': [], 'turns': []} + + dialogues = 500 + goal_generator = GoalGenerator() + goals = create_goals(goal_generator, num_goals=dialogues, single_domains=False, allowed_domains=None) + + for seed in range(1000, 1000 + dialogues): set_seed(seed) - sess.init_session() + sess.init_session(goal=goals[seed-1000]) sys_response = [] actions = 0.0 total_return = 0.0 @@ -133,7 +107,7 @@ def evaluate(args, model_name, load_path, verbose=False): # logging.info(f"Actions in turn: {len(sys_response)}") turns += 1 - total_return += evaluator.get_reward(session_over) + total_return += sess.evaluator.get_reward(session_over) if session_over: task_succ = sess.evaluator.task_success() @@ -158,31 +132,24 @@ def evaluate(args, model_name, load_path, verbose=False): for key in task_success: logging.info( f'{key} {len(task_success[key])} {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}') + logging.info(f"Average actions: {actions / turns}") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--model_name", type=str, default="PPO", help="name of model") - parser.add_argument("--load_path", type=str, + parser.add_argument("--config_path", type=str, default='', help="path of model") + parser.add_argument("--verbose", action='store_true', + help="whether to output utterances") parser.add_argument("--log_path_suffix", type=str, default="", help="suffix of path of log file") parser.add_argument("--log_dir_path", type=str, default="log", help="path of log directory") - parser.add_argument("--user_config", type=str, - default="convlab2/policy/tus/multiwoz/exp/default.json") - parser.add_argument("--user_mode", type=str, default="") - parser.add_argument("--user", type=str, default="rule") - parser.add_argument("--verbose", action='store_true', help="whether to output utterances") args = parser.parse_args() init_logging(log_dir_path=args.log_dir_path, path_suffix=args.log_path_suffix) - evaluate( - args=args, - model_name=args.model_name, - load_path=args.load_path, - verbose=args.verbose - ) + evaluate(config_path=args.config_path, model_name=args.model_name, verbose=args.verbose) diff --git a/convlab2/policy/evaluate_distributed.py b/convlab2/policy/evaluate_distributed.py index e6e16e4af41e5d45a4f74cdd3da1c636d61cb2bb..4be89cc726c35f7a31d41483a3d6f35f3fb3de14 100644 --- a/convlab2/policy/evaluate_distributed.py +++ b/convlab2/policy/evaluate_distributed.py @@ -3,6 +3,7 @@ import random import torch import sys +import torch from pprint import pprint import matplotlib.pyplot as plt diff --git a/convlab2/policy/gdpl/semantic_level_config.json b/convlab2/policy/gdpl/semantic_level_config.json index d46ad81753614f00ab46e78038b225aa3b723332..8e7178f013cc4981f97e14d5dd5b5920456a0777 100644 --- a/convlab2/policy/gdpl/semantic_level_config.json +++ b/convlab2/policy/gdpl/semantic_level_config.json @@ -5,14 +5,9 @@ "pretrained_load_path": "", "batchsz": 1000, "seed": 0, - "epoch": 200, + "epoch": 50, "eval_frequency": 5, "process_num": 4, - "use_masking": false, - "use_state_entropy": false, - "manually_add_entity_names": false, - "use_state_mutual_info": false, - "use_confidence_scores": false, "sys_semantic_to_usr": false, "num_eval_dialogues": 500 }, diff --git a/convlab2/policy/gdpl/train.py b/convlab2/policy/gdpl/train.py index bb3d1e1eb71002d57603d490f9b6849101263719..a58a54cd9bf679cfc8bb2567a2bc933a3f070164 100755 --- a/convlab2/policy/gdpl/train.py +++ b/convlab2/policy/gdpl/train.py @@ -10,15 +10,15 @@ import logging import time import numpy as np import torch +import random from convlab2.policy.gdpl import GDPL from convlab2.policy.gdpl import RewardEstimator from convlab2.policy.rlmodule import Memory from torch import multiprocessing as mp from argparse import ArgumentParser -from convlab2.policy.ppo.config import get_config from convlab2.util.custom_util import set_seed, init_logging, save_config, move_finished_training, env_config, \ - eval_policy, log_start_args, save_best, load_config_file + eval_policy, log_start_args, save_best, load_config_file, get_config from datetime import datetime sys.path.append(os.path.dirname(os.path.dirname( @@ -48,7 +48,7 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0): :return: """ - buff = Memory(seed=train_seed) + buff = Memory() # we need to sample batchsz of (state, action, next_state, reward, mask) # each trajectory contains `trajectory_len` num of items, so we only need to sample # `batchsz//trajectory_len` num of trajectory totally @@ -59,6 +59,8 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0): traj_len = 50 real_traj_len = 0 + set_seed(train_seed) + while sampled_num < batchsz: # for each trajectory, we reset the env and get initial state s = env.reset() @@ -122,6 +124,7 @@ def sample(env, policy, batchsz, process_num, seed): # batchsz will be splitted into each process, # final batchsz maybe larger than batchsz parameters process_batchsz = np.ceil(batchsz / process_num).astype(np.int32) + train_seeds = random.sample(range(0, 1000), process_num) # buffer to save all data queue = mp.Queue() @@ -135,7 +138,7 @@ def sample(env, policy, batchsz, process_num, seed): evt = mp.Event() processes = [] for i in range(process_num): - process_args = (i, queue, evt, env, policy, process_batchsz, seed) + process_args = (i, queue, evt, env, policy, process_batchsz, train_seeds[i]) processes.append(mp.Process(target=sampler, args=process_args)) for p in processes: # set the process as daemon, and it will be killed once the main process is stoped. @@ -250,6 +253,7 @@ if __name__ == '__main__': tb_writer.add_scalar(key, eval_dict[key], 0) best_complete_rate = eval_dict['complete_rate'] best_success_rate = eval_dict['success_rate_strict'] + best_return = eval_dict['avg_return'] logging.info("Start of Training: " + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) @@ -265,9 +269,11 @@ if __name__ == '__main__': eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path) - best_complete_rate, best_success_rate = \ - save_best(policy_sys, best_complete_rate, best_success_rate, - eval_dict["complete_rate"], eval_dict["success_rate_strict"], save_path) + best_complete_rate, best_success_rate, best_return = \ + save_best(policy_sys, best_complete_rate, best_success_rate, best_return, + eval_dict["complete_rate"], eval_dict["success_rate_strict"], + eval_dict["avg_return"], save_path) + policy_sys.save(save_path, "last") for key in eval_dict: tb_writer.add_scalar(key, eval_dict[key], idx * conf['model']['batchsz']) diff --git a/convlab2/policy/pg/config.json b/convlab2/policy/pg/config.json index 480325a980a981ad91f749e3223bdf4bf4ca8ae4..8079b6b7c6ceb5fed5012430ccf8cde1c4b48ee4 100755 --- a/convlab2/policy/pg/config.json +++ b/convlab2/policy/pg/config.json @@ -1,7 +1,7 @@ { "batchsz": 32, "gamma": 0.99, - "lr": 0.00001, + "lr": 0.0000001, "save_dir": "save", "log_dir": "log", "save_per_epoch": 5, diff --git a/convlab2/policy/pg/semantic_level_config.json b/convlab2/policy/pg/semantic_level_config.json index d46ad81753614f00ab46e78038b225aa3b723332..8e7178f013cc4981f97e14d5dd5b5920456a0777 100644 --- a/convlab2/policy/pg/semantic_level_config.json +++ b/convlab2/policy/pg/semantic_level_config.json @@ -5,14 +5,9 @@ "pretrained_load_path": "", "batchsz": 1000, "seed": 0, - "epoch": 200, + "epoch": 50, "eval_frequency": 5, "process_num": 4, - "use_masking": false, - "use_state_entropy": false, - "manually_add_entity_names": false, - "use_state_mutual_info": false, - "use_confidence_scores": false, "sys_semantic_to_usr": false, "num_eval_dialogues": 500 }, diff --git a/convlab2/policy/pg/train.py b/convlab2/policy/pg/train.py index 3abcd74b99fbaf46529ff07378bb052e1d8c4e97..0b5c385c28649dd7f5702ca7e6cedfe2c2e9ff7a 100755 --- a/convlab2/policy/pg/train.py +++ b/convlab2/policy/pg/train.py @@ -10,14 +10,14 @@ import logging import time import numpy as np import torch +import random from convlab2.policy.pg import PG from convlab2.policy.rlmodule import Memory from torch import multiprocessing as mp from argparse import ArgumentParser -from convlab2.policy.ppo.config import get_config from convlab2.util.custom_util import set_seed, init_logging, save_config, move_finished_training, env_config, \ - eval_policy, log_start_args, save_best, load_config_file + eval_policy, log_start_args, save_best, load_config_file, get_config from datetime import datetime sys.path.append(os.path.dirname(os.path.dirname( @@ -47,7 +47,7 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0): :return: """ - buff = Memory(seed=train_seed) + buff = Memory() # we need to sample batchsz of (state, action, next_state, reward, mask) # each trajectory contains `trajectory_len` num of items, so we only need to sample # `batchsz//trajectory_len` num of trajectory totally @@ -58,6 +58,8 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0): traj_len = 50 real_traj_len = 0 + set_seed(train_seed) + while sampled_num < batchsz: # for each trajectory, we reset the env and get initial state s = env.reset() @@ -121,6 +123,7 @@ def sample(env, policy, batchsz, process_num, seed): # batchsz will be splitted into each process, # final batchsz maybe larger than batchsz parameters process_batchsz = np.ceil(batchsz / process_num).astype(np.int32) + train_seeds = random.sample(range(0, 1000), process_num) # buffer to save all data queue = mp.Queue() @@ -134,7 +137,7 @@ def sample(env, policy, batchsz, process_num, seed): evt = mp.Event() processes = [] for i in range(process_num): - process_args = (i, queue, evt, env, policy, process_batchsz, seed) + process_args = (i, queue, evt, env, policy, process_batchsz, train_seeds[i]) processes.append(mp.Process(target=sampler, args=process_args)) for p in processes: # set the process as daemon, and it will be killed once the main process is stoped. @@ -246,6 +249,7 @@ if __name__ == '__main__': tb_writer.add_scalar(key, eval_dict[key], 0) best_complete_rate = eval_dict['complete_rate'] best_success_rate = eval_dict['success_rate_strict'] + best_return = eval_dict['avg_return'] logging.info("Start of Training: " + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) @@ -261,9 +265,11 @@ if __name__ == '__main__': eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path) - best_complete_rate, best_success_rate = \ - save_best(policy_sys, best_complete_rate, best_success_rate, - eval_dict["complete_rate"], eval_dict["success_rate_strict"], save_path) + best_complete_rate, best_success_rate, best_return = \ + save_best(policy_sys, best_complete_rate, best_success_rate, best_return, + eval_dict["complete_rate"], eval_dict["success_rate_strict"], + eval_dict["avg_return"], save_path) + policy_sys.save(save_path, "last") for key in eval_dict: tb_writer.add_scalar(key, eval_dict[key], idx * conf['model']['batchsz']) diff --git a/convlab2/policy/ppo/config.py b/convlab2/policy/ppo/config.py deleted file mode 100644 index cb119d776644986ac6af827dd24a95398bb4b906..0000000000000000000000000000000000000000 --- a/convlab2/policy/ppo/config.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -import sys -import json - -from convlab2.util.custom_util import load_config_file - - -def map_class(cls_path: str): - """ - Map to class via package text path - :param cls_path: str, path with `convlab2` project directory as relative path, separator with `,` - E.g `convlab2.nlu.svm.camrest.nlu.SVMNLU` - :return: class - """ - pkgs = cls_path.split('.') - cls = __import__('.'.join(pkgs[:-1])) - for pkg in pkgs[1:]: - cls = getattr(cls, pkg) - return cls - - -def get_config(filepath, args) -> dict: - """ - The configuration file is used to create all the information needed for the deployment, - and the necessary security monitoring has been performed, including the mapping of the class. - :param filepath: str, dest config file path - :return: dict - """ - - conf = load_config_file(filepath) - - # add project root dir - sys.path.append(os.path.abspath(os.path.join( - os.path.dirname(__file__), os.path.pardir))) - - for arg in args: - if len(arg) == 3: - conf[arg[0]][arg[1]] = arg[2] - if len(arg) == 4: - conf[arg[0]][arg[1]][arg[2]] = arg[3] - if len(arg) == 5: - conf[arg[0]][arg[1]][arg[2]][arg[3]] = arg[4] - - # Autoload uncertainty settings from policy based on the tracker used - dst_name = [model for model in conf['dst_sys']] - dst_name = dst_name[0] if dst_name else None - vec_name = [model for model in conf['vectorizer_sys']] - vec_name = vec_name[0] if vec_name else None - if dst_name and 'setsumbt' in dst_name.lower(): - if 'get_confidence_scores' in conf['dst_sys'][dst_name]['ini_params']: - conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = conf['dst_sys'][dst_name]['ini_params']['get_confidence_scores'] - else: - conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = False - if 'return_mutual_info' in conf['dst_sys'][dst_name]['ini_params']: - conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = conf['dst_sys'][dst_name]['ini_params']['return_mutual_info'] - else: - conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = False - if 'return_entropy' in conf['dst_sys'][dst_name]['ini_params']: - conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = conf['dst_sys'][dst_name]['ini_params']['return_entropy'] - else: - conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = False - - from convlab2.nlu import NLU - from convlab2.dst import DST - from convlab2.policy import Policy - from convlab2.nlg import NLG - - modules = ['vectorizer_sys', 'nlu_sys', 'dst_sys', 'sys_nlg', - 'nlu_usr', 'dst_usr', 'policy_usr', 'usr_nlg'] - - # Syncronise all seeds - if 'seed' in conf['model']: - for module in modules: - module_name = [model for model in conf[module]] - module_name = module_name[0] if module_name else None - if conf[module] and module_name: - if 'ini_params' in conf[module][module_name]: - if 'seed' in conf[module][module_name]['ini_params']: - conf[module][module_name]['ini_params']['seed'] = conf['model']['seed'] - - # for each unit in modules above, create model save into conf - for unit in modules: - if conf[unit] == {}: - conf[unit + '_activated'] = None - else: - for (model, infos) in conf[unit].items(): - cls_path = infos.get('class_path', '') - cls = map_class(cls_path) - conf[unit + '_class'] = cls - conf[unit + '_activated'] = conf[unit + - '_class'](**conf[unit][model]['ini_params']) - print("Loaded " + model + " for " + unit) - return conf - - -if __name__ == '__main__': - # test - args = [('model', 'seed', 'ThisIsATestSeed'), - ('dst_sys', "setsumbt-mul", "ini_params", "get_confidence_scores", True)] - path = "/Users/carel17/Projects/Convlab/convlab2/policy/ppo/setsumbt_config.json" - conf = get_config(path, args) - print(conf) diff --git a/convlab2/policy/ppo/semantic_level_config.json b/convlab2/policy/ppo/semantic_level_config.json index d46ad81753614f00ab46e78038b225aa3b723332..b5fa40bd32d2200db6f4be274d6e314a694cf0cf 100644 --- a/convlab2/policy/ppo/semantic_level_config.json +++ b/convlab2/policy/ppo/semantic_level_config.json @@ -1,18 +1,13 @@ { "model": { - "load_path": "convlab2/policy/mle/experiments/experiment_2022-03-10-11-44-08/save/supervised", + "load_path": "convlab2/policy/mle/experiments/experiment_2022-05-23-14-08-43/save/supervised", "use_pretrained_initialisation": false, "pretrained_load_path": "", "batchsz": 1000, "seed": 0, - "epoch": 200, + "epoch": 50, "eval_frequency": 5, "process_num": 4, - "use_masking": false, - "use_state_entropy": false, - "manually_add_entity_names": false, - "use_state_mutual_info": false, - "use_confidence_scores": false, "sys_semantic_to_usr": false, "num_eval_dialogues": 500 }, @@ -20,7 +15,7 @@ "uncertainty_vector_mul": { "class_path": "convlab2.policy.vector.vector_binary.VectorBinary", "ini_params": { - "use_masking": false, + "use_masking": true, "manually_add_entity_names": false, "seed": 0 } diff --git a/convlab2/policy/ppo/train.py b/convlab2/policy/ppo/train.py index 67a2b6c5282db18b624eb25852ad12ba2ca982b8..aa1e4e77824906676c8660c82a2185b0eaf8ec80 100755 --- a/convlab2/policy/ppo/train.py +++ b/convlab2/policy/ppo/train.py @@ -10,14 +10,14 @@ import logging import time import numpy as np import torch +import random from convlab2.policy.ppo import PPO from convlab2.policy.rlmodule import Memory from torch import multiprocessing as mp from argparse import ArgumentParser -from convlab2.policy.ppo.config import get_config from convlab2.util.custom_util import set_seed, init_logging, save_config, move_finished_training, env_config, \ - eval_policy, log_start_args, save_best, load_config_file + eval_policy, log_start_args, save_best, load_config_file, get_config from datetime import datetime sys.path.append(os.path.dirname(os.path.dirname( @@ -47,7 +47,7 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0): :return: """ - buff = Memory(seed=train_seed) + buff = Memory() # we need to sample batchsz of (state, action, next_state, reward, mask) # each trajectory contains `trajectory_len` num of items, so we only need to sample # `batchsz//trajectory_len` num of trajectory totally @@ -58,6 +58,8 @@ def sampler(pid, queue, evt, env, policy, batchsz, train_seed=0): traj_len = 50 real_traj_len = 0 + set_seed(train_seed) + while sampled_num < batchsz: # for each trajectory, we reset the env and get initial state s = env.reset() @@ -121,6 +123,7 @@ def sample(env, policy, batchsz, process_num, seed): # batchsz will be splitted into each process, # final batchsz maybe larger than batchsz parameters process_batchsz = np.ceil(batchsz / process_num).astype(np.int32) + train_seeds = random.sample(range(0, 1000), process_num) # buffer to save all data queue = mp.Queue() @@ -134,7 +137,7 @@ def sample(env, policy, batchsz, process_num, seed): evt = mp.Event() processes = [] for i in range(process_num): - process_args = (i, queue, evt, env, policy, process_batchsz, seed) + process_args = (i, queue, evt, env, policy, process_batchsz, train_seeds[i]) processes.append(mp.Process(target=sampler, args=process_args)) for p in processes: # set the process as daemon, and it will be killed once the main process is stoped. @@ -246,6 +249,7 @@ if __name__ == '__main__': tb_writer.add_scalar(key, eval_dict[key], 0) best_complete_rate = eval_dict['complete_rate'] best_success_rate = eval_dict['success_rate_strict'] + best_return = eval_dict['avg_return'] logging.info("Start of Training: " + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) @@ -261,9 +265,11 @@ if __name__ == '__main__': eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path) - best_complete_rate, best_success_rate = \ - save_best(policy_sys, best_complete_rate, best_success_rate, - eval_dict["complete_rate"], eval_dict["success_rate_strict"], save_path) + best_complete_rate, best_success_rate, best_return = \ + save_best(policy_sys, best_complete_rate, best_success_rate, best_return, + eval_dict["complete_rate"], eval_dict["success_rate_strict"], + eval_dict["avg_return"], save_path) + policy_sys.save(save_path, "last") for key in eval_dict: tb_writer.add_scalar(key, eval_dict[key], idx * conf['model']['batchsz']) diff --git a/convlab2/policy/rlmodule.py b/convlab2/policy/rlmodule.py index b67cca2c03163ad65933df2d5ff2de7d22bca31f..db46026656d908b2453a9143b3f482ce7378e382 100755 --- a/convlab2/policy/rlmodule.py +++ b/convlab2/policy/rlmodule.py @@ -319,17 +319,8 @@ Transition = namedtuple('Transition', ('state', 'action', class Memory(object): - def __init__(self, seed=0): + def __init__(self): self.memory = [] - self.set_seed(seed) - - def set_seed(self, seed): - np.random.seed(seed) - torch.random.manual_seed(seed) - random.seed(seed) - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) def push(self, *args): """Saves a transition.""" diff --git a/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py b/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py index 30e800d9a42b53a9d6b869130fe0343b814a0031..315227944c9bea49cb45c10d0715488b8469b14d 100755 --- a/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py +++ b/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py @@ -23,7 +23,8 @@ def unified_format(acts): new_acts = {'categorical': []} for act in acts: intent, domain, slot, value = act - new_acts['categorical'].append({"intent": intent, "domain": domain, "slot": slot, "value": value}) + new_acts['categorical'].append( + {"intent": intent, "domain": domain, "slot": slot, "value": value}) return new_acts @@ -134,8 +135,8 @@ class UserPolicyAgendaMultiWoz(Policy): action = {} while len(action) == 0: # A -> A' + user_action - # action = self.agenda.get_action(random.randint(2, self.max_initiative)) - action = self.agenda.get_action(self.max_initiative) + action = self.agenda.get_action(random.randint(1, self.max_initiative)) + #action = self.agenda.get_action(self.max_initiative) # transform to DA action = self._transform_usract_out(action) @@ -146,7 +147,8 @@ class UserPolicyAgendaMultiWoz(Policy): domain, intent = domain_intent.lower().split('-') for slot, value in svs: try: - domain, slot, value = normalize_domain_slot_value(domain, slot, value) + domain, slot, value = normalize_domain_slot_value( + domain, slot, value) except: pass tuples.append([intent, domain, slot, value]) diff --git a/convlab2/policy/tus/multiwoz/TUS.py b/convlab2/policy/tus/multiwoz/TUS.py index 1655d44a6552661e3b438c664b2e1392dd905641..3c0f85c031261f4da852bac13d5f8971390e74f8 100644 --- a/convlab2/policy/tus/multiwoz/TUS.py +++ b/convlab2/policy/tus/multiwoz/TUS.py @@ -15,6 +15,8 @@ from convlab2.policy.policy import Policy from convlab2.task.multiwoz.goal_generator import GoalGenerator from convlab2.util.multiwoz.multiwoz_slot_trans import REF_USR_DA from convlab2.util.custom_util import model_downloader +from data.unified_datasets.multiwoz21.preprocess import normalize_domain_slot_value, reverse_da +from convlab2.policy.rule.multiwoz.policy_agenda_multiwoz import unified_format, act_dict_to_flat_tuple DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -65,7 +67,7 @@ class UserActionPolicy(Policy): def _no_offer(self, system_in): for intent, domain, slot, value in system_in: - if intent == "NoOffer": + if intent.lower() == "nooffer": self.terminated = True return True else: @@ -73,21 +75,26 @@ class UserActionPolicy(Policy): def predict(self, state, mode="max"): # update goal - self.goal.update_user_goal(action=state["system_action"], + sys_dialog_act = state["system_action"] + sys_dialog_act = unified_format(sys_dialog_act) + sys_dialog_act = reverse_da(sys_dialog_act) + sys_dialog_act = act_dict_to_flat_tuple(sys_dialog_act) + + self.goal.update_user_goal(action=sys_dialog_act, state=state['belief_state']) # self.goal.update_info_record(sys_act=state["system_action"]) - self.goal.add_sys_da(state["system_action"]) - self.sys_acts.append(state["system_action"]) + self.goal.add_sys_da(sys_dialog_act) + self.sys_acts.append(sys_dialog_act) # need better way to handle this - if self._no_offer(state["system_action"]): + if self._no_offer(sys_dialog_act): return [["bye", "general", "None", "None"]] # update constraint self.time_step += 2 self.predict_action_list = self.goal.action_list( - sys_act=state["system_action"], + sys_act=sys_dialog_act, all_values=self.all_values) feature, mask = self.feat_handler.get_feature( @@ -95,7 +102,7 @@ class UserActionPolicy(Policy): self.goal, state['belief_state'], self.sys_history_state, - state["system_action"], + sys_dialog_act, self.pre_usr_act) feature = torch.tensor([feature], dtype=torch.float).to(DEVICE) mask = torch.tensor([mask], dtype=torch.bool).to(DEVICE) @@ -117,7 +124,17 @@ class UserActionPolicy(Policy): # self.goal.update_info_record(usr_act=usr_action) self.goal.add_usr_da(usr_action) - return usr_action + # convert user action to unify data format + norm_usr_action = [] + for intent, domain, slot, value in usr_action: + intent = intent.lower() + domain, slot, value = normalize_domain_slot_value( + domain, slot, value) + norm_usr_action.append([intent, domain, slot, value]) + + return norm_usr_action + + # return usr_action def init_session(self, goal=None): self.mentioned_domain = [] @@ -385,10 +402,13 @@ class UserActionPolicy(Policy): class UserPolicy(Policy): def __init__(self, config): - self.config = config + if isinstance(config, str): + self.config = json.load(open(config)) + else: + self.config = config if not os.path.exists(self.config["model_dir"]): - os.mkdir(self.config["model_dir"]) - model_downloader(self.config["model_dir"], + # os.mkdir(self.config["model_dir"]) + model_downloader(os.path.dirname(self.config["model_dir"]), "https://zenodo.org/record/5779832/files/default.zip") self.policy = UserActionPolicy(self.config) diff --git a/convlab2/policy/vector/vector_base.py b/convlab2/policy/vector/vector_base.py index ad0f6345819afa4d65523c3b88e3c341f3a8b2ed..497386a632615c8e775cf1a15c29af87f3348b4a 100644 --- a/convlab2/policy/vector/vector_base.py +++ b/convlab2/policy/vector/vector_base.py @@ -152,11 +152,13 @@ class VectorBase(Vector): system = act['system'] user = act['user'] if system: - system_acts_with_value = self.add_values_to_act(act['domain'], act['intent'], act['slot'], True) + system_acts_with_value = self.add_values_to_act( + act['domain'], act['intent'], act['slot'], True) self.da_voc.extend(system_acts_with_value) if user: - user_acts_with_value = self.add_values_to_act(act['domain'], act['intent'], act['slot'], False) + user_acts_with_value = self.add_values_to_act( + act['domain'], act['intent'], act['slot'], False) self.da_voc_opp.extend(user_acts_with_value) self.da_voc.sort() @@ -311,7 +313,8 @@ class VectorBase(Vector): # Leave slots out of constraints to find which slot constraint results in no entities being found for constraint_slot in constraints: - state = [[slot, value] for slot, value in constraints.items() if slot != constraint_slot] + state = [[slot, value] for slot, + value in constraints.items() if slot != constraint_slot] entities = self.db.query(domain, state, topk=1) if entities: return constraint_slot @@ -378,7 +381,8 @@ class VectorBase(Vector): for domint in nooffer: domain, intent = domint.split('-') slot = self.find_nooffer_slot(domain) - action[domint] = [[slot, '1']] if slot != 'none' else [[slot, 'none']] + action[domint] = [[slot, '1'] + ] if slot != 'none' else [[slot, 'none']] # Randomly select booking constraint "causing" no_book nobook = [domint for domint in action if 'nobook' in domint] @@ -386,12 +390,14 @@ class VectorBase(Vector): domain, intent = domint.split('-') if domain in self.state: slots = self.state[domain] - slots = [slot for slot, i in slots.items() if i and 'book' in slot] + slots = [slot for slot, i in slots.items() + if i and 'book' in slot] slots.append('none') slot = np.random.choice(slots) else: slot = 'none' - action[domint] = [[slot, '1']] if slot != 'none' else [[slot, 'none']] + action[domint] = [[slot, '1'] + ] if slot != 'none' else [[slot, 'none']] if self.always_inform_booking_reference: action = self.add_booking_reference(action) @@ -405,9 +411,9 @@ class VectorBase(Vector): for [item, idx] in action[key]: if index != -1 and index != idx and idx != '?': pass - #logging.debug( + # logging.debug( # "System is likely refering multiple entities within this turn") - #logging.debug(action[key]) + # logging.debug(action[key]) index = idx action = lexicalize_da(action, entities, self.state, self.requestable) diff --git a/convlab2/util/custom_util.py b/convlab2/util/custom_util.py index 8ca73ff60930ba8e5f8d39052fbbbf472bcce193..845316d3839cdb89b8dff84903d35e0d7e2788a6 100644 --- a/convlab2/util/custom_util.py +++ b/convlab2/util/custom_util.py @@ -7,7 +7,6 @@ import json import zipfile import numpy as np import torch -from datasets import load_dataset from tensorboardX import SummaryWriter from convlab2.util.file_util import cached_path from convlab2.policy.evaluate_distributed import evaluate_distributed @@ -19,6 +18,8 @@ from convlab2.dst.rule.multiwoz import RuleDST from convlab2.policy.rule.multiwoz import RulePolicy from convlab2.evaluator.multiwoz_eval import MultiWozEvaluator from convlab2.util import load_dataset +from convlab2.policy.rule.multiwoz.policy_agenda_multiwoz import Goal + import shutil @@ -119,18 +120,22 @@ def log_start_args(conf): f"We use {conf['model']['num_eval_dialogues']} dialogues for evaluation.") -def save_best(policy_sys, best_complete_rate, best_success_rate, complete_rate, success_rate, save_path): +def save_best(policy_sys, best_complete_rate, best_success_rate, best_return, complete_rate, success_rate, avg_return, + save_path): # policy_sys.save(save_path, "best") - if success_rate > best_success_rate: + if avg_return > best_return: logging.info("Saving best policy.") policy_sys.save(save_path, "best") + best_return = avg_return + if success_rate > best_success_rate: best_success_rate = success_rate if complete_rate > best_complete_rate: best_complete_rate = complete_rate # policy_sys.save(save_path, "best") logging.info( - f"Best Complete Rate: {best_complete_rate}, Best Success Rate: {best_success_rate}") - return best_complete_rate, best_success_rate + f"Best Complete Rate: {best_complete_rate}, Best Success Rate: {best_success_rate}, " + f"Best Average Return: {best_return}") + return best_complete_rate, best_success_rate, best_return def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path): @@ -432,6 +437,107 @@ def act_dict_to_flat_tuple(acts): tuples.append([intent, domain, slot, value]) +def create_goals(goal_generator, num_goals, single_domains=False, allowed_domains=None): + + collected_goals = [] + while len(collected_goals) != num_goals: + goal = Goal(goal_generator) + if single_domains and len(goal.domain_goals) > 1: + continue + if allowed_domains is not None and not set(goal.domain_goals).issubset(set(allowed_domains)): + continue + collected_goals.append(goal) + return collected_goals + + +def map_class(cls_path: str): + """ + Map to class via package text path + :param cls_path: str, path with `convlab2` project directory as relative path, separator with `,` + E.g `convlab2.nlu.svm.camrest.nlu.SVMNLU` + :return: class + """ + pkgs = cls_path.split('.') + cls = __import__('.'.join(pkgs[:-1])) + for pkg in pkgs[1:]: + cls = getattr(cls, pkg) + return cls + + +def get_config(filepath, args) -> dict: + """ + The configuration file is used to create all the information needed for the deployment, + and the necessary security monitoring has been performed, including the mapping of the class. + :param filepath: str, dest config file path + :return: dict + """ + + conf = load_config_file(filepath) + + # add project root dir + sys.path.append(os.path.abspath(os.path.join( + os.path.dirname(__file__), os.path.pardir))) + + for arg in args: + if len(arg) == 3: + conf[arg[0]][arg[1]] = arg[2] + if len(arg) == 4: + conf[arg[0]][arg[1]][arg[2]] = arg[3] + if len(arg) == 5: + conf[arg[0]][arg[1]][arg[2]][arg[3]] = arg[4] + + # Autoload uncertainty settings from policy based on the tracker used + dst_name = [model for model in conf['dst_sys']] + dst_name = dst_name[0] if dst_name else None + vec_name = [model for model in conf['vectorizer_sys']] + vec_name = vec_name[0] if vec_name else None + if dst_name and 'setsumbt' in dst_name.lower(): + if 'get_confidence_scores' in conf['dst_sys'][dst_name]['ini_params']: + conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = conf['dst_sys'][dst_name]['ini_params']['get_confidence_scores'] + else: + conf['vectorizer_sys'][vec_name]['ini_params']['use_confidence_scores'] = False + if 'return_mutual_info' in conf['dst_sys'][dst_name]['ini_params']: + conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = conf['dst_sys'][dst_name]['ini_params']['return_mutual_info'] + else: + conf['vectorizer_sys'][vec_name]['ini_params']['use_mutual_info'] = False + if 'return_entropy' in conf['dst_sys'][dst_name]['ini_params']: + conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = conf['dst_sys'][dst_name]['ini_params']['return_entropy'] + else: + conf['vectorizer_sys'][vec_name]['ini_params']['use_entropy'] = False + + from convlab2.nlu import NLU + from convlab2.dst import DST + from convlab2.policy import Policy + from convlab2.nlg import NLG + + modules = ['vectorizer_sys', 'nlu_sys', 'dst_sys', 'sys_nlg', + 'nlu_usr', 'dst_usr', 'policy_usr', 'usr_nlg'] + + # Syncronise all seeds + if 'seed' in conf['model']: + for module in modules: + module_name = [model for model in conf[module]] + module_name = module_name[0] if module_name else None + if conf[module] and module_name: + if 'ini_params' in conf[module][module_name]: + if 'seed' in conf[module][module_name]['ini_params']: + conf[module][module_name]['ini_params']['seed'] = conf['model']['seed'] + + # for each unit in modules above, create model save into conf + for unit in modules: + if conf[unit] == {}: + conf[unit + '_activated'] = None + else: + for (model, infos) in conf[unit].items(): + cls_path = infos.get('class_path', '') + cls = map_class(cls_path) + conf[unit + '_class'] = cls + conf[unit + '_activated'] = conf[unit + + '_class'](**conf[unit][model]['ini_params']) + print("Loaded " + model + " for " + unit) + return conf + + if __name__ == '__main__': get_goal_distribution() diff --git a/convlab2/util/multiwoz/state.py b/convlab2/util/multiwoz/state.py index 8f9aad11a074a8461c583810af4a388edda19d11..5b65ba066b33cf9cd1e2fb49515406d962f03588 100755 --- a/convlab2/util/multiwoz/state.py +++ b/convlab2/util/multiwoz/state.py @@ -7,3 +7,94 @@ def default_state(): terminated=False, history=[]) return state + + +def default_state_old(): + state = dict(user_action=[], + system_action=[], + belief_state={}, + request_state={}, + terminated=False, + history=[]) + state['belief_state'] = { + "police": { + "book": { + "booked": [] + }, + "semi": {} + }, + "hotel": { + "book": { + "booked": [], + "people": "", + "day": "", + "stay": "" + }, + "semi": { + "name": "", + "area": "", + "parking": "", + "pricerange": "", + "stars": "", + "internet": "", + "type": "" + } + }, + "attraction": { + "book": { + "booked": [] + }, + "semi": { + "type": "", + "name": "", + "area": "" + } + }, + "restaurant": { + "book": { + "booked": [], + "people": "", + "day": "", + "time": "" + }, + "semi": { + "food": "", + "pricerange": "", + "name": "", + "area": "", + } + }, + "hospital": { + "book": { + "booked": [] + }, + "semi": { + "department": "" + } + }, + "taxi": { + "book": { + "booked": [] + }, + "semi": { + "leaveAt": "", + "destination": "", + "departure": "", + "arriveBy": "" + } + }, + "train": { + "book": { + "booked": [], + "people": "" + }, + "semi": { + "leaveAt": "", + "destination": "", + "day": "", + "arriveBy": "", + "departure": "" + } + } + } + return state