diff --git a/.gitignore b/.gitignore index 4e467920e8bcde078b70436a7297f84669d54c9a..b8590175ec627a7a5ac34b117b814962e0037997 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,4 @@ test.py *.egg-info pre-trained-models/ +venv \ No newline at end of file diff --git a/convlab2/policy/dqn/dqn.py b/convlab2/policy/dqn/dqn.py index 3a029290ca9cb2591c2ee5ae44e832568c5eb623..f26ee735525b2d6cbbfb32cf1d7b9ce02d979613 100644 --- a/convlab2/policy/dqn/dqn.py +++ b/convlab2/policy/dqn/dqn.py @@ -10,7 +10,7 @@ import copy from convlab2.policy.policy import Policy from convlab2.policy.rlmodule import EpsilonGreedyPolicy, MemoryReplay from convlab2.util.train_util import init_logging_handler -from convlab2.policy.vector.vector_multiwoz import MultiWozVector +from convlab2.policy.vector.vector_binary import VectorBinary from convlab2.policy.rule.multiwoz.rule_based_multiwoz_bot import RuleBasedMultiwozBot from convlab2.util.file_util import cached_path import zipfile @@ -42,9 +42,7 @@ class DQN(Policy): # construct multiwoz vector if dataset == 'Multiwoz': - voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') - self.vector = MultiWozVector(voc_file, voc_opp_file, composite_actions=True, vocab_size=cfg['vocab_size']) + self.vector = VectorBinary() #replay memory self.memory = MemoryReplay(cfg['memory_size']) diff --git a/convlab2/policy/evaluate.py b/convlab2/policy/evaluate.py index da5d184f044cf5e7dcf11b15d3ebd61b078fb557..40f2b9dce237c0acae9a491de0c2b1f9c44e53bc 100755 --- a/convlab2/policy/evaluate.py +++ b/convlab2/policy/evaluate.py @@ -9,7 +9,7 @@ import json import logging import os import random -from convlab2.policy.vector.vector_multiwoz import MultiWozVector +from convlab2.policy.vector.vector_binary import VectorBinary import numpy as np import torch @@ -168,7 +168,7 @@ def evaluate(args, dataset_name, model_name, load_path, calculate_reward=True, v if model_name == "PPO": from convlab2.policy.ppo import PPO if load_path: - policy_sys = PPO(False, vectorizer=MultiWozVector()) + policy_sys = PPO(False, vectorizer=VectorBinary()) policy_sys.load(load_path) else: policy_sys = PPO.from_pretrained() @@ -183,7 +183,7 @@ def evaluate(args, dataset_name, model_name, load_path, calculate_reward=True, v else: policy_sys = PG.from_pretrained() elif model_name == "MLE": - from convlab2.policy.mle.multiwoz import MLE + from convlab2.policy.mle import MLE if load_path: policy_sys = MLE() policy_sys.load(load_path) diff --git a/convlab2/policy/gdpl/gdpl.py b/convlab2/policy/gdpl/gdpl.py index 7d213be5fc071c0632950e095194edccf489c255..fcb25a15d6ffab524b769644f720a05db045b32b 100755 --- a/convlab2/policy/gdpl/gdpl.py +++ b/convlab2/policy/gdpl/gdpl.py @@ -8,7 +8,7 @@ import json from convlab2.policy.policy import Policy from convlab2.policy.rlmodule import MultiDiscretePolicy, Value from convlab2.util.train_util import init_logging_handler -from convlab2.policy.vector.vector_multiwoz import MultiWozVector +from convlab2.policy.vector.vector_binary import VectorBinary from convlab2.util.file_util import cached_path import zipfile import sys @@ -38,9 +38,7 @@ class GDPL(Policy): # construct policy and value network if dataset == 'Multiwoz': - voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') - self.vector = MultiWozVector(voc_file, voc_opp_file) + self.vector = VectorBinary() self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) self.value = Value(self.vector.state_dim, cfg['hv_dim']).to(device=DEVICE) diff --git a/convlab2/policy/mle/camrest/README.md b/convlab2/policy/mle/camrest/README.md deleted file mode 100755 index 17213a4fff108d1227a875666bc94441d3ad9f98..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/camrest/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Imitation on camrest - -Vanilla MLE Policy employs a multi-class classification via Imitation Learning with a set of compositional actions where a compositional action consists of a set of dialog act items. - -## Train - -``` -python train.py -``` - -You can modify *config.json* to change the setting. - -## Data - -data/camrest/[train/val/test].json - -## Performance - -|Dialog act accuracy| -|-| -|0.7459| - diff --git a/convlab2/policy/mle/camrest/__init__.py b/convlab2/policy/mle/camrest/__init__.py deleted file mode 100755 index 270f08f589bed3d7d65de569c5d86169dd344d94..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/camrest/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from convlab2.policy.mle.camrest.mle import MLE \ No newline at end of file diff --git a/convlab2/policy/mle/camrest/config.json b/convlab2/policy/mle/camrest/config.json deleted file mode 100755 index b5971b0df5d7e19066363619133c7d373cd51642..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/camrest/config.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "batchsz": 32, - "epoch": 16, - "lr": 0.01, - "save_dir": "save", - "log_dir": "log", - "print_per_batch": 10, - "save_per_epoch": 5, - "h_dim": 10, - "load": "save/best" -} \ No newline at end of file diff --git a/convlab2/policy/mle/camrest/loader.py b/convlab2/policy/mle/camrest/loader.py deleted file mode 100755 index 1fadc13f2cb68f5243a271940b82aa1cda277a83..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/camrest/loader.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -import json -import pickle -import zipfile -from convlab2.util.camrest.state import default_state -from convlab2.util.dataloader.module_dataloader import ActPolicyDataloader -from convlab2.policy.vector.vector_camrest import CamrestVector - -class ActPolicyDataLoaderCamrest(ActPolicyDataloader): - - def __init__(self): - root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - voc_file = os.path.join(root_dir, 'data/camrest/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/camrest/usr_da_voc.txt') - self.vector = CamrestVector(voc_file, voc_opp_file) - - processed_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processed_data') - if os.path.exists(processed_dir): - print('Load processed data file') - self._load_data(processed_dir) - else: - print('Start preprocessing the dataset') - self._build_data(root_dir, processed_dir) - - def _build_data(self, root_dir, processed_dir): # TODO - raw_data = {} - for part in ['train', 'val', 'test']: - archive = zipfile.ZipFile(os.path.join(root_dir, 'data/camrest/{}.json.zip'.format(part)), 'r') - with archive.open('{}.json'.format(part), 'r') as f: - raw_data[part] = json.load(f) - - self.data = {} - for part in ['train', 'val', 'test']: - self.data[part] = [] - - for key in raw_data[part]: - sess = key['dial'] - state = default_state() - action = {} - for i, turn in enumerate(sess): - state['user_action'] = turn['usr']['dialog_act'] - if i + 1 == len(sess): - state['terminated'] = True - for da in turn['usr']['slu']: - if da['slots'][0][0] != 'slot': - state['belief_state'][da['slots'][0][0]] = da['slots'][0][1] - action = turn['sys']['dialog_act'] - self.data[part].append([self.vector.state_vectorize(state), - self.vector.action_vectorize(action)]) - state['system_action'] = turn['sys']['dialog_act'] - - os.makedirs(processed_dir) - for part in ['train', 'val', 'test']: - with open(os.path.join(processed_dir, '{}.pkl'.format(part)), 'wb') as f: - pickle.dump(self.data[part], f) diff --git a/convlab2/policy/mle/camrest/mle.py b/convlab2/policy/mle/camrest/mle.py deleted file mode 100755 index 933f0caff953a53b58ba056f955ffcd83a8c99ca..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/camrest/mle.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -import torch -import os -import json -from convlab2.policy.mle.mle import MLEAbstract -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_camrest import CamrestVector - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -DEFAULT_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") -DEFAULT_ARCHIVE_FILE = os.path.join(DEFAULT_DIRECTORY, "mle_policy_camrest.zip") - -class MLE(MLEAbstract): - - def __init__(self, - archive_file=DEFAULT_ARCHIVE_FILE, - model_file='https://convlab.blob.core.windows.net/convlab-2/mle_policy_camrest.zip'): - root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: - cfg = json.load(f) - - voc_file = os.path.join(root_dir, 'data/camrest/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/camrest/usr_da_voc.txt') - self.vector = CamrestVector(voc_file, voc_opp_file) - - self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) - - self.load(archive_file, model_file, cfg['load']) diff --git a/convlab2/policy/mle/camrest/train.py b/convlab2/policy/mle/camrest/train.py deleted file mode 100755 index b6692732cbcc2fca6e1e01143407d3ccb6c68e78..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/camrest/train.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import torch -import logging -import json -import sys -root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -sys.path.append(root_dir) - -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_camrest import CamrestVector -from convlab2.policy.mle.train import MLE_Trainer_Abstract -from convlab2.policy.mle.multiwoz.loader import ActPolicyDataLoaderCamrest -from convlab2.util.train_util import init_logging_handler - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -class MLE_Trainer(MLE_Trainer_Abstract): - def __init__(self, manager, cfg): - self._init_data(manager, cfg) - voc_file = os.path.join(root_dir, 'data/camrest/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/camrest/usr_da_voc.txt') - vector = CamrestVector(voc_file, voc_opp_file) - self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.da_dim).to(device=DEVICE) - self.policy.eval() - self.policy_optim = torch.optim.Adam(self.policy.parameters(), lr=cfg['lr']) - -if __name__ == '__main__': - manager = ActPolicyDataLoaderCamrest() - with open('config.json', 'r') as f: - cfg = json.load(f) - init_logging_handler(cfg['log_dir']) - agent = MLE_Trainer(manager, cfg) - - logging.debug('start training') - - best = float('inf') - for e in range(cfg['epoch']): - agent.imitating(e) - best = agent.imit_test(e, best) diff --git a/convlab2/policy/mle/multiwoz/config.json b/convlab2/policy/mle/config.json similarity index 100% rename from convlab2/policy/mle/multiwoz/config.json rename to convlab2/policy/mle/config.json diff --git a/convlab2/policy/mle/crosswoz/README.md b/convlab2/policy/mle/crosswoz/README.md deleted file mode 100755 index a0ebf6a8d8018a5beb630ea7e23791714039b39d..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Imitation on CrossWOZ - -Vanilla MLE Policy employs a multi-class classification via Imitation Learning with a set of compositional actions where a compositional action consists of a set of dialog act items. - -## Train - -``` -python train.py -``` - -You can modify *config.json* to change the setting. - -## Data - -data/crosswoz/[train/val/test].json diff --git a/convlab2/policy/mle/crosswoz/__init__.py b/convlab2/policy/mle/crosswoz/__init__.py deleted file mode 100755 index 63353185bd8c7604156a5a519442f630a2486275..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from convlab2.policy.mle.crosswoz.mle import MLE \ No newline at end of file diff --git a/convlab2/policy/mle/crosswoz/config.json b/convlab2/policy/mle/crosswoz/config.json deleted file mode 100755 index e98b3adcfa890ef276066e03c6bbfe3c25f7489f..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/config.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "batchsz": 32, - "epoch": 20, - "lr": 0.001, - "save_dir": "save", - "log_dir": "log", - "print_per_batch": 400, - "save_per_epoch": 5, - "h_dim": 100, - "load": "save/best" -} \ No newline at end of file diff --git a/convlab2/policy/mle/crosswoz/evaluate.py b/convlab2/policy/mle/crosswoz/evaluate.py deleted file mode 100755 index b7ce836f41011a43ffec72ef3ceb7c07b9bed984..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/evaluate.py +++ /dev/null @@ -1,237 +0,0 @@ -from convlab2.policy.mle.crosswoz.mle import MLE -from convlab2.dst.rule.crosswoz.dst import RuleDST -from convlab2.util.crosswoz.state import default_state -from convlab2.policy.rule.crosswoz.rule_simulator import Simulator -from convlab2.dialog_agent import PipelineAgent, BiSession -from convlab2.util.crosswoz.lexicalize import delexicalize_da -from convlab2.nlu.jointBERT.crosswoz.nlu import BERTNLU -from convlab2.nlg.template.crosswoz.nlg import TemplateNLG -from convlab2.nlg.sclstm.crosswoz.sc_lstm import SCLSTM -import os -import zipfile -import json -from copy import deepcopy -import random -import numpy as np -from pprint import pprint -import torch - - -def read_zipped_json(filepath, filename): - archive = zipfile.ZipFile(filepath, 'r') - return json.load(archive.open(filename)) - - -def calculateF1(predict_golden): - TP, FP, FN = 0, 0, 0 - for item in predict_golden: - predicts = item['predict'] - labels = item['golden'] - for quad in predicts: - if quad in labels: - TP += 1 - else: - FP += 1 - for quad in labels: - if quad not in predicts: - FN += 1 - print(TP, FP, FN) - precision = 1.0 * TP / (TP + FP) if (TP + FP) else 0. - recall = 1.0 * TP / (TP + FN) if (TP + FN) else 0. - F1 = 2.0 * precision * recall / (precision + recall) if (precision + recall) else 0. - return precision, recall, F1 - - -def evaluate_corpus_f1(policy, data, goal_type=None): - dst = RuleDST() - da_predict_golden = [] - delex_da_predict_golden = [] - for task_id, sess in data.items(): - if goal_type and sess['type']!=goal_type: - continue - dst.init_session() - for i, turn in enumerate(sess['messages']): - if turn['role'] == 'usr': - dst.update(usr_da=turn['dialog_act']) - if i + 2 == len(sess): - dst.state['terminated'] = True - else: - for domain, svs in turn['sys_state'].items(): - for slot, value in svs.items(): - if slot != 'selectedResults': - dst.state['belief_state'][domain][slot] = value - golden_da = turn['dialog_act'] - - predict_da = policy.predict(deepcopy(dst.state)) - # print(golden_da) - # print(predict_da) - # print() - # if 'Select' in [x[0] for x in sess['messages'][i - 1]['dialog_act']]: - da_predict_golden.append({ - 'predict': predict_da, - 'golden': golden_da - }) - delex_da_predict_golden.append({ - 'predict': delexicalize_da(predict_da), - 'golden': delexicalize_da(golden_da) - }) - # print(delex_da_predict_golden[-1]) - dst.state['system_action'] = golden_da - # break - print('origin precision/recall/f1:', calculateF1(da_predict_golden)) - print('delex precision/recall/f1:', calculateF1(delex_da_predict_golden)) - - -def end2end_evaluate_simulation(policy): - nlu = BERTNLU() - nlg_usr = TemplateNLG(is_user=True, mode='auto_manual') - nlg_sys = TemplateNLG(is_user=False, mode='auto_manual') - # nlg_usr = SCLSTM(is_user=True, use_cuda=False) - # nlg_sys = SCLSTM(is_user=False, use_cuda=False) - usr_policy = Simulator() - usr_agent = PipelineAgent(nlu, None, usr_policy, nlg_usr, name='user') - sys_policy = policy - sys_dst = RuleDST() - sys_agent = PipelineAgent(nlu, sys_dst, sys_policy, nlg_sys, name='sys') - sess = BiSession(sys_agent=sys_agent, user_agent=usr_agent) - - task_finish = {'All': list(), '单领域': list(), '独立多领域': list(), '独立多领域+交通': list(), '不独立多领域': list(), - '不独立多领域+交通': list()} - simulate_sess_num = 100 - repeat = 10 - random_seed = 2019 - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - random_seeds = [random.randint(1, 2**32-1) for _ in range(simulate_sess_num * repeat * 10000)] - while True: - sys_response = '' - random_seed = random_seeds[0] - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - random_seeds.pop(0) - sess.init_session() - # print(usr_policy.goal_type) - if len(task_finish[usr_policy.goal_type]) == simulate_sess_num*repeat: - continue - for i in range(15): - sys_response, user_response, session_over, reward = sess.next_turn(sys_response) - # print('user:', user_response) - # print('sys:', sys_response) - # print(session_over, reward) - # print() - if session_over is True: - task_finish['All'].append(1) - task_finish[usr_policy.goal_type].append(1) - break - else: - task_finish['All'].append(0) - task_finish[usr_policy.goal_type].append(0) - print([len(x) for x in task_finish.values()]) - # print(min([len(x) for x in task_finish.values()])) - if len(task_finish['All']) % 100 == 0: - for k, v in task_finish.items(): - print(k) - all_samples = [] - for i in range(repeat): - samples = v[i * simulate_sess_num:(i + 1) * simulate_sess_num] - all_samples += samples - print(sum(samples), len(samples), (sum(samples) / len(samples)) if len(samples) else 0) - print('avg', (sum(all_samples) / len(all_samples)) if len(all_samples) else 0) - if min([len(x) for x in task_finish.values()]) == simulate_sess_num*repeat: - break - # pprint(usr_policy.original_goal) - # pprint(task_finish) - print('task_finish') - for k, v in task_finish.items(): - print(k) - all_samples = [] - for i in range(repeat): - samples = v[i * simulate_sess_num:(i + 1) * simulate_sess_num] - all_samples += samples - print(sum(samples), len(samples), (sum(samples) / len(samples)) if len(samples) else 0) - print('avg', (sum(all_samples) / len(all_samples)) if len(all_samples) else 0) - - -def da_evaluate_simulation(policy): - usr_policy = Simulator() - usr_agent = PipelineAgent(None, None, usr_policy, None, name='user') - sys_policy = policy - sys_dst = RuleDST() - sys_agent = PipelineAgent(None, sys_dst, sys_policy, None, name='sys') - sess = BiSession(sys_agent=sys_agent, user_agent=usr_agent) - - task_finish = {'All': list(), '单领域': list(), '独立多领域': list(), '独立多领域+交通': list(), '不独立多领域': list(), - '不独立多领域+交通': list()} - simulate_sess_num = 100 - repeat = 10 - random_seed = 2019 - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - random_seeds = [random.randint(1, 2**32-1) for _ in range(simulate_sess_num * repeat * 10000)] - while True: - sys_response = [] - random_seed = random_seeds[0] - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - random_seeds.pop(0) - sess.init_session() - # print(usr_policy.goal_type) - if len(task_finish[usr_policy.goal_type]) == simulate_sess_num*repeat: - continue - for i in range(15): - sys_response, user_response, session_over, reward = sess.next_turn(sys_response) - # print('user:', user_response) - # print('sys:', sys_response) - # print(session_over, reward) - # print() - if session_over is True: - # pprint(sys_agent.tracker.state) - task_finish['All'].append(1) - task_finish[usr_policy.goal_type].append(1) - break - else: - task_finish['All'].append(0) - task_finish[usr_policy.goal_type].append(0) - print([len(x) for x in task_finish.values()]) - # print(min([len(x) for x in task_finish.values()])) - if len(task_finish['All']) % 100 == 0: - for k, v in task_finish.items(): - print(k) - all_samples = [] - for i in range(repeat): - samples = v[i * simulate_sess_num:(i + 1) * simulate_sess_num] - all_samples += samples - print(sum(samples), len(samples), (sum(samples) / len(samples)) if len(samples) else 0) - print('avg', (sum(all_samples) / len(all_samples)) if len(all_samples) else 0) - if min([len(x) for x in task_finish.values()]) == simulate_sess_num*repeat: - break - # pprint(usr_policy.original_goal) - # pprint(task_finish) - print('task_finish') - for k, v in task_finish.items(): - print(k) - all_samples = [] - for i in range(repeat): - samples = v[i * simulate_sess_num:(i + 1) * simulate_sess_num] - all_samples += samples - print(sum(samples), len(samples), (sum(samples) / len(samples)) if len(samples) else 0) - print('avg', (sum(all_samples) / len(all_samples)) if len(all_samples) else 0) - - -if __name__ == '__main__': - random_seed = 2019 - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - test_data = os.path.abspath(os.path.join(os.path.abspath(__file__),'../../../../../data/crosswoz/test.json.zip')) - test_data = read_zipped_json(test_data, 'test.json') - policy = MLE() - for goal_type in ['单领域','独立多领域','独立多领域+交通','不独立多领域','不独立多领域+交通',None]: - print(goal_type) - evaluate_corpus_f1(policy, test_data, goal_type=goal_type) - da_evaluate_simulation(policy) - end2end_evaluate_simulation(policy) diff --git a/convlab2/policy/mle/crosswoz/loader.py b/convlab2/policy/mle/crosswoz/loader.py deleted file mode 100755 index c29c11cabdfc95de0ab220114e3d08165962d695..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/loader.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import json -import pickle -import zipfile -import torch -import torch.utils.data as data -from convlab2.util.crosswoz.state import default_state -from convlab2.dst.rule.crosswoz.dst import RuleDST -from convlab2.policy.vector.vector_crosswoz import CrossWozVector -from copy import deepcopy - - -class PolicyDataLoaderCrossWoz(): - - def __init__(self): - root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - voc_file = os.path.join(root_dir, 'data/crosswoz/sys_da_voc.json') - voc_opp_file = os.path.join(root_dir, 'data/crosswoz/usr_da_voc.json') - self.vector = CrossWozVector(sys_da_voc_json=voc_file, usr_da_voc_json=voc_opp_file) - - processed_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processed_data') - if os.path.exists(processed_dir): - print('Load processed data file') - self._load_data(processed_dir) - else: - print('Start preprocessing the dataset') - self._build_data(root_dir, processed_dir) - - def _build_data(self, root_dir, processed_dir): - raw_data = {} - for part in ['train', 'val', 'test']: - archive = zipfile.ZipFile(os.path.join(root_dir, 'data/crosswoz/{}.json.zip'.format(part)), 'r') - with archive.open('{}.json'.format(part), 'r') as f: - raw_data[part] = json.load(f) - - self.data = {} - # for cur domain update - dst = RuleDST() - for part in ['train', 'val', 'test']: - self.data[part] = [] - - for key in raw_data[part]: - sess = raw_data[part][key]['messages'] - dst.init_session() - for i, turn in enumerate(sess): - if turn['role'] == 'usr': - dst.state['user_action'] = turn['dialog_act'] - dst.update(usr_da=turn['dialog_act']) - if i + 2 == len(sess): - dst.state['terminated'] = True - else: - for domain, svs in turn['sys_state'].items(): - for slot, value in svs.items(): - if slot != 'selectedResults': - dst.state['belief_state'][domain][slot] = value - action = turn['dialog_act'] - self.data[part].append([self.vector.state_vectorize(deepcopy(dst.state)), - self.vector.action_vectorize(action)]) - dst.state['system_action'] = turn['dialog_act'] - - os.makedirs(processed_dir) - for part in ['train', 'val', 'test']: - with open(os.path.join(processed_dir, '{}.pkl'.format(part)), 'wb') as f: - pickle.dump(self.data[part], f) - - def _load_data(self, processed_dir): - self.data = {} - for part in ['train', 'val', 'test']: - with open(os.path.join(processed_dir, '{}.pkl'.format(part)), 'rb') as f: - self.data[part] = pickle.load(f) - - def create_dataset(self, part, batchsz): - print('Start creating {} dataset'.format(part)) - s = [] - a = [] - for item in self.data[part]: - s.append(torch.Tensor(item[0])) - a.append(torch.Tensor(item[1])) - s = torch.stack(s) - a = torch.stack(a) - dataset = Dataset(s, a) - dataloader = data.DataLoader(dataset, batchsz, True) - print('Finish creating {} dataset'.format(part)) - return dataloader - - -class Dataset(data.Dataset): - def __init__(self, s_s, a_s): - self.s_s = s_s - self.a_s = a_s - self.num_total = len(s_s) - - def __getitem__(self, index): - s = self.s_s[index] - a = self.a_s[index] - return s, a - - def __len__(self): - return self.num_total - diff --git a/convlab2/policy/mle/crosswoz/mle.py b/convlab2/policy/mle/crosswoz/mle.py deleted file mode 100755 index ef65fd72918719c2d14345329c0c6dc69c21a0b0..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/mle.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -import torch -import os -import json -import zipfile -from convlab2.util.file_util import cached_path -from convlab2.policy.mle.mle import MLEAbstract -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_crosswoz import CrossWozVector - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -DEFAULT_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") -DEFAULT_ARCHIVE_FILE = os.path.join(DEFAULT_DIRECTORY, "mle_policy_crosswoz.zip") - - -class MLE(MLEAbstract): - - def __init__(self, - archive_file=DEFAULT_ARCHIVE_FILE, - model_file='https://convlab.blob.core.windows.net/convlab-2/mle_policy_crosswoz.zip'): - root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: - cfg = json.load(f) - - voc_file = os.path.join(root_dir, 'data/crosswoz/sys_da_voc.json') - voc_opp_file = os.path.join(root_dir, 'data/crosswoz/usr_da_voc.json') - self.vector = CrossWozVector(sys_da_voc_json=voc_file, usr_da_voc_json=voc_opp_file) - - self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.sys_da_dim).to(device=DEVICE) - - if not os.path.isfile(archive_file): - if not model_file: - raise Exception("No model for MLE Policy is specified!") - archive_file = cached_path(model_file) - model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'save') - if not os.path.exists(model_dir): - os.mkdir(model_dir) - if not os.path.exists(os.path.join(model_dir, 'best_mle.pol.mdl')): - archive = zipfile.ZipFile(archive_file, 'r') - archive.extractall(model_dir) - self.load_from_pretrained(archive_file, model_file, cfg['load']) diff --git a/convlab2/policy/mle/crosswoz/train.py b/convlab2/policy/mle/crosswoz/train.py deleted file mode 100755 index f8877c9cfa78d4baeac0e230a7d6c8e16f9c2c35..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/crosswoz/train.py +++ /dev/null @@ -1,160 +0,0 @@ -import os -import torch -import logging -import torch.nn as nn -import json -import pickle -import sys -import random -import numpy as np - -root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -sys.path.append(root_dir) - -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_crosswoz import CrossWozVector -from convlab2.policy.mle.crosswoz.loader import PolicyDataLoaderCrossWoz -from convlab2.util.train_util import to_device, init_logging_handler - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -class MLE_Trainer(): - def __init__(self, manager, cfg): - self.data_train = manager.create_dataset('train', cfg['batchsz']) - self.data_valid = manager.create_dataset('val', cfg['batchsz']) - self.data_test = manager.create_dataset('test', cfg['batchsz']) - self.save_dir = cfg['save_dir'] - self.print_per_batch = cfg['print_per_batch'] - self.save_per_epoch = cfg['save_per_epoch'] - - voc_file = os.path.join(root_dir, 'data/crosswoz/sys_da_voc.json') - voc_opp_file = os.path.join(root_dir, 'data/crosswoz/usr_da_voc.json') - vector = CrossWozVector(voc_file, voc_opp_file) - self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.sys_da_dim).to(device=DEVICE) - self.policy.eval() - self.policy_optim = torch.optim.Adam(self.policy.parameters(), lr=cfg['lr']) - self.multi_entropy_loss = nn.MultiLabelSoftMarginLoss() - - def policy_loop(self, data): - s, target_a = to_device(data) - a_weights = self.policy(s) - - loss_a = self.multi_entropy_loss(a_weights, target_a) - return loss_a - - def imitating(self, epoch): - """ - pretrain the policy by simple imitation learning (behavioral cloning) - """ - self.policy.train() - a_loss = 0. - for i, data in enumerate(self.data_train): - self.policy_optim.zero_grad() - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - loss_a.backward() - self.policy_optim.step() - - if (i + 1) % self.print_per_batch == 0: - a_loss /= self.print_per_batch - logging.debug('<<dialog policy>> epoch {}, iter {}, loss_a:{}'.format(epoch, i, a_loss)) - a_loss = 0. - - if (epoch + 1) % self.save_per_epoch == 0: - self.save(self.save_dir, epoch) - self.policy.eval() - - def imit_test(self, epoch, best): - """ - provide an unbiased evaluation of the policy fit on the training dataset - """ - a_loss = 0. - for i, data in enumerate(self.data_valid): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_valid) - logging.debug('<<dialog policy>> validation, epoch {}, loss_a:{}'.format(epoch, a_loss)) - if a_loss < best: - logging.info('<<dialog policy>> best model saved') - best = a_loss - self.save(self.save_dir, 'best') - - a_loss = 0. - for i, data in enumerate(self.data_test): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_test) - logging.debug('<<dialog policy>> test, epoch {}, loss_a:{}'.format(epoch, a_loss)) - return best - - def test(self): - def f1(a, target): - TP, FP, FN = 0, 0, 0 - real = target.nonzero().tolist() - predict = a.nonzero().tolist() - # print(real) - # print(predict) - # print() - for item in real: - if item in predict: - TP += 1 - else: - FN += 1 - for item in predict: - if item not in real: - FP += 1 - return TP, FP, FN - - a_TP, a_FP, a_FN = 0, 0, 0 - for i, data in enumerate(self.data_test): - s, target_a = to_device(data) - a_weights = self.policy(s) - a = a_weights.ge(0) - # TODO: fix batch F1 - TP, FP, FN = f1(a, target_a) - a_TP += TP - a_FP += FP - a_FN += FN - - prec = a_TP / (a_TP + a_FP) - rec = a_TP / (a_TP + a_FN) - F1 = 2 * prec * rec / (prec + rec) - print(a_TP, a_FP, a_FN, F1) - - def save(self, directory, epoch): - if not os.path.exists(directory): - os.makedirs(directory) - - torch.save(self.policy.state_dict(), directory + '/' + str(epoch) + '_mle.pol.mdl') - - logging.info('<<dialog policy>> epoch {}: saved network to mdl'.format(epoch)) - - def load(self, filename='save/best'): - policy_mdl = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename + '_mle.pol.mdl') - if os.path.exists(policy_mdl): - self.policy.load_state_dict(torch.load(policy_mdl)) - - -if __name__ == '__main__': - random_seed = 2019 - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - manager = PolicyDataLoaderCrossWoz() - with open('config.json', 'r') as f: - cfg = json.load(f) - init_logging_handler(cfg['log_dir']) - agent = MLE_Trainer(manager, cfg) - agent.load() - - logging.debug('start training') - - best = float('inf') - for e in range(cfg['epoch']): - agent.imitating(e) - best = agent.imit_test(e, best) - # agent.test() # 5731 1483 1880 0.7731534569983137 diff --git a/convlab2/policy/mle/loader.py b/convlab2/policy/mle/loader.py index 349783d8262d7dc137b3541023f069bba2a6772a..6be4faf15cf280b4a531736db71f0a487d20db42 100755 --- a/convlab2/policy/mle/loader.py +++ b/convlab2/policy/mle/loader.py @@ -3,63 +3,87 @@ import pickle import torch import torch.utils.data as data +from convlab2.policy.vector.vector_binary import VectorBinary +from convlab2.util import load_policy_data, load_dataset +from convlab2.util.custom_util import flatten_acts from convlab2.util.multiwoz.state import default_state from convlab2.policy.vector.dataset import ActDataset -from convlab2.util.dataloader.dataset_dataloader import MultiWOZDataloader -from convlab2.util.dataloader.module_dataloader import ActPolicyDataloader -class ActMLEPolicyDataLoader: +class PolicyDataVectorizer: - def __init__(self): - self.vector = None + def __init__(self, dataset_name='multiwoz21', vector=None): + self.dataset_name = dataset_name + if vector is None: + self.vector = VectorBinary(dataset_name) + else: + self.vector = vector + self.process_data() + + def process_data(self): + + processed_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), + f'processed_data/{self.dataset_name}_{type(self.vector).__name__}') + if os.path.exists(processed_dir): + print('Load processed data file') + self._load_data(processed_dir) + else: + print('Start preprocessing the dataset, this can take a while..') + self._build_data(processed_dir) - def _build_data(self, root_dir, processed_dir): + def _build_data(self, processed_dir): self.data = {} - print("Initialise DataLoader") - data_loader = ActPolicyDataloader(dataset_dataloader=MultiWOZDataloader()) - raw_data_all = data_loader.load_data(data_key='all', role='sys') - for part in ['train', 'val', 'test']: - self.data[part] = [] - raw_data = raw_data_all[part] - - for belief_state, context_dialog_act, terminated, dialog_act, goal in \ - zip(raw_data['belief_state'], raw_data['context_dialog_act'], raw_data['terminated'], - raw_data['dialog_act'], raw_data['goal']): + os.makedirs(processed_dir, exist_ok=True) + dataset = load_dataset(self.dataset_name) + data_split = load_policy_data(dataset, context_window_size=2) + + for split in data_split: + self.data[split] = [] + raw_data = data_split[split] + + for data_point in raw_data: state = default_state() - state['belief_state'] = belief_state - state['user_action'] = context_dialog_act[-1] - state['system_action'] = context_dialog_act[-2] if len(context_dialog_act) > 1 else {} - state['terminated'] = terminated - action = dialog_act - self.data[part].append([self.vector.state_vectorize(state), - self.vector.action_vectorize(action)]) - os.makedirs(processed_dir) - for part in ['train', 'val', 'test']: - with open(os.path.join(processed_dir, '{}.pkl'.format(part)), 'wb') as f: - pickle.dump(self.data[part], f) + state['belief_state'] = data_point['context'][-1]['state'] + state['user_action'] = flatten_acts(data_point['context'][-1]['dialogue_acts']) + last_system_act = data_point['context'][-2]['dialogue_acts'] \ + if len(data_point['context']) > 1 else {} + state['system_action'] = flatten_acts(last_system_act) + state['terminated'] = data_point['terminated'] + state['booked'] = data_point['booked'] + dialogue_act = flatten_acts(data_point['dialogue_acts']) + + vectorized_state, mask = self.vector.state_vectorize(state) + vectorized_action = self.vector.action_vectorize(dialogue_act) + self.data[split].append({"state": vectorized_state, "action": vectorized_action, "mask": mask}) + + with open(os.path.join(processed_dir, '{}.pkl'.format(split)), 'wb') as f: + pickle.dump(self.data[split], f) + + print("Data processing done.") def _load_data(self, processed_dir): self.data = {} - for part in ['train', 'val', 'test']: + for part in ['train', 'validation', 'test']: with open(os.path.join(processed_dir, '{}.pkl'.format(part)), 'rb') as f: self.data[part] = pickle.load(f) def create_dataset(self, part, batchsz): - print('Start creating {} dataset'.format(part)) - s = [] - a = [] - m = [] + states = [] + actions = [] + masks = [] for item in self.data[part]: - s.append(torch.Tensor(item[0][0])) - a.append(torch.Tensor(item[1])) - m.append(torch.zeros(len(item[1]))) - s = torch.stack(s) - a = torch.stack(a) - m = torch.stack(m) + states.append(torch.Tensor(item['state'])) + actions.append(torch.Tensor(item['action'])) + masks.append(torch.Tensor(item['mask'])) + s = torch.stack(states) + a = torch.stack(actions) + m = torch.stack(masks) dataset = ActDataset(s, a, m) dataloader = data.DataLoader(dataset, batchsz, True) - print('Finish creating {} dataset'.format(part)) return dataloader + + +if __name__ == '__main__': + data_loader = PolicyDataVectorizer() diff --git a/convlab2/policy/mle/mle.py b/convlab2/policy/mle/mle.py index 5a725f3659474e13b33a0472b602f86ab6974d14..08cbd3b995983796fcbb9e4b92d45cb8be9d849d 100755 --- a/convlab2/policy/mle/mle.py +++ b/convlab2/policy/mle/mle.py @@ -1,19 +1,26 @@ # -*- coding: utf-8 -*- +import zipfile +import logging import torch import os -import zipfile +import json + from convlab2.policy.policy import Policy from convlab2.util.file_util import cached_path -import logging +from convlab2.policy.rlmodule import MultiDiscretePolicy +from convlab2.policy.vector.vector_binary import VectorBinary DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") +DEFAULT_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") +DEFAULT_ARCHIVE_FILE = os.path.join(DEFAULT_DIRECTORY, "mle_policy_multiwoz.zip") + class MLEAbstract(Policy): - def __init__(self, archive_file, model_file): - self.vector = None - self.policy = None + def __init__(self, vector, policy): + self.vector = vector + self.policy = policy def predict(self, state): """ @@ -67,3 +74,34 @@ class MLEAbstract(Policy): self.policy.load_state_dict(torch.load(policy_mdl, map_location=DEVICE)) logging.info('<<dialog policy>> loaded checkpoint from file: {}'.format(policy_mdl)) break + + +class MLE(MLEAbstract): + + def __init__(self): + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: + cfg = json.load(f) + + self.vector = VectorBinary() + self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) + + @classmethod + def from_pretrained(cls, + archive_file=DEFAULT_ARCHIVE_FILE, + model_file='https://convlab.blob.core.windows.net/convlab-2/mle_policy_multiwoz.zip'): + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: + cfg = json.load(f) + model = cls() + model.load_from_pretrained(archive_file, model_file, cfg['load']) + return model + + +class MLEPolicy(MLE): + def __init__(self, + archive_file=DEFAULT_ARCHIVE_FILE, + model_file='https://convlab.blob.core.windows.net/convlab-2/mle_policy_multiwoz.zip'): + super().__init__() + if model_file: + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: + cfg = json.load(f) + self.load_from_pretrained(archive_file, model_file, cfg['load']) diff --git a/convlab2/policy/mle/multiwoz/README.md b/convlab2/policy/mle/multiwoz/README.md deleted file mode 100755 index 43e38dfcab865084be71e82971ccb1f7c46a2d5f..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/multiwoz/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Imitation on multiwoz - -Vanilla MLE Policy employs a multi-class classification via Imitation Learning with a set of compositional actions where a compositional action consists of a set of dialog act items. - -## Train - -``` -python train.py -``` - -You can modify *config.json* to change the setting. - -## Data - -data/multiwoz/[train/val/test].json - -## Trained Model - -Performance: - -| Task Success Rate | -| ------------ | -| 0.56 | - -The model can be downloaded from: - -https://convlab.blob.core.windows.net/convlab-2/mle_policy_multiwoz.zip diff --git a/convlab2/policy/mle/multiwoz/__init__.py b/convlab2/policy/mle/multiwoz/__init__.py deleted file mode 100755 index bfa5bab8f352f411fd46a4cc9878482526fc7b21..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/multiwoz/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from convlab2.policy.mle.multiwoz.mle import MLE -from convlab2.policy.mle.multiwoz.mle import MLEPolicy \ No newline at end of file diff --git a/convlab2/policy/mle/multiwoz/loader.py b/convlab2/policy/mle/multiwoz/loader.py deleted file mode 100755 index 13ea8f35dcd10eb2b970487c00eee22bb2408a01..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/multiwoz/loader.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -from convlab2.policy.vector.vector_multiwoz import MultiWozVector -from convlab2.policy.mle.loader import ActMLEPolicyDataLoader - - -class ActMLEPolicyDataLoaderMultiWoz(ActMLEPolicyDataLoader): - - def __init__(self, vectoriser=None): - root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - if vectoriser: - self.vector = vectoriser - else: - print("We use vanilla Vectoriser") - self.vector = MultiWozVector() - - processed_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processed_data') - - if os.path.exists(processed_dir): - print('Load processed data file') - self._load_data(processed_dir) - else: - print('Start preprocessing the dataset') - self._build_data(root_dir, processed_dir) - - diff --git a/convlab2/policy/mle/multiwoz/mle.py b/convlab2/policy/mle/multiwoz/mle.py deleted file mode 100755 index b614b55e733c6b74565a2cdf1079bf3857869dd5..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/multiwoz/mle.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- -import torch -import os -import json -from convlab2.policy.mle.mle import MLEAbstract -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_multiwoz import MultiWozVector - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -DEFAULT_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") -DEFAULT_ARCHIVE_FILE = os.path.join(DEFAULT_DIRECTORY, "mle_policy_multiwoz.zip") - - -class MLE(MLEAbstract): - - def __init__(self): - root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: - cfg = json.load(f) - - voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') - self.vector = MultiWozVector(voc_file, voc_opp_file) - - self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) - - @classmethod - def from_pretrained(cls, - archive_file=DEFAULT_ARCHIVE_FILE, - model_file='https://convlab.blob.core.windows.net/convlab-2/mle_policy_multiwoz.zip'): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: - cfg = json.load(f) - model = cls() - model.load_from_pretrained(archive_file, model_file, cfg['load']) - return model - -class MLEPolicy(MLE): - def __init__(self, - archive_file=DEFAULT_ARCHIVE_FILE, - model_file='https://convlab.blob.core.windows.net/convlab-2/mle_policy_multiwoz.zip'): - super().__init__() - if model_file: - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: - cfg = json.load(f) - self.load_from_pretrained(archive_file, model_file, cfg['load']) \ No newline at end of file diff --git a/convlab2/policy/mle/multiwoz/train.py b/convlab2/policy/mle/multiwoz/train.py deleted file mode 100755 index a9e848cabb3c58c5627065cbef3aea740fdcc1f9..0000000000000000000000000000000000000000 --- a/convlab2/policy/mle/multiwoz/train.py +++ /dev/null @@ -1,235 +0,0 @@ -import argparse -import os -import torch -import logging -import json -import sys -from torch import nn - -from convlab2.util.custom_util import set_seed, init_logging, save_config -from convlab2.util.train_util import to_device - -root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -sys.path.append(root_dir) - -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_multiwoz import MultiWozVector -from convlab2.policy.mle.multiwoz.loader import ActMLEPolicyDataLoaderMultiWoz - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -class MLE_Trainer_Abstract(): - def __init__(self, manager, cfg): - self._init_data(manager, cfg) - self.policy = None - self.policy_optim = None - - def _init_data(self, manager, cfg): - self.data_train = manager.create_dataset('train', cfg['batchsz']) - self.data_valid = manager.create_dataset('val', cfg['batchsz']) - self.data_test = manager.create_dataset('test', cfg['batchsz']) - self.save_dir = cfg['save_dir'] - self.multi_entropy_loss = nn.MultiLabelSoftMarginLoss() - - def policy_loop(self, data): - s, target_a, mask = to_device(data) - a_weights = self.policy(s) - - loss_a = self.multi_entropy_loss(a_weights + mask, target_a) - return loss_a - - def imitating(self, epoch): - """ - pretrain the policy by simple imitation learning (behavioral cloning) - """ - self.policy.train() - a_loss = 0. - for i, data in enumerate(self.data_train): - self.policy_optim.zero_grad() - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - loss_a.backward() - self.policy_optim.step() - - self.policy.eval() - - def imit_test(self, epoch, best): - """ - provide an unbiased evaluation of the policy fit on the training dataset - """ - a_loss = 0. - for i, data in enumerate(self.data_valid): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_valid) - logging.debug('<<dialog policy>> validation, epoch {}, loss_a:{}'.format(epoch, a_loss)) - if a_loss < best: - logging.info('<<dialog policy>> best model saved') - best = a_loss - self.save(self.save_dir, 'best') - - a_loss = 0. - for i, data in enumerate(self.data_test): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_test) - logging.debug('<<dialog policy>> test, epoch {}, loss_a:{}'.format(epoch, a_loss)) - return best - - def validate(self): - def f1(a, target): - TP, FP, FN = 0, 0, 0 - real = target.nonzero().tolist() - predict = a.nonzero().tolist() - for item in real: - if item in predict: - TP += 1 - else: - FN += 1 - for item in predict: - if item not in real: - FP += 1 - return TP, FP, FN - - a_TP, a_FP, a_FN = 0, 0, 0 - for i, data in enumerate(self.data_valid): - s, target_a, m = to_device(data) - a_weights = self.policy(s) - a_weights += m - a = a_weights.ge(0) - TP, FP, FN = f1(a, target_a) - a_TP += TP - a_FP += FP - a_FN += FN - - prec = a_TP / (a_TP + a_FP) - rec = a_TP / (a_TP + a_FN) - F1 = 2 * prec * rec / (prec + rec) - return prec, rec, F1 - - def test(self): - def f1(a, target): - TP, FP, FN = 0, 0, 0 - real = target.nonzero().tolist() - predict = a.nonzero().tolist() - for item in real: - if item in predict: - TP += 1 - else: - FN += 1 - for item in predict: - if item not in real: - FP += 1 - return TP, FP, FN - - a_TP, a_FP, a_FN = 0, 0, 0 - for i, data in enumerate(self.data_test): - s, target_a = to_device(data) - a_weights = self.policy(s) - a = a_weights.ge(0) - TP, FP, FN = f1(a, target_a) - a_TP += TP - a_FP += FP - a_FN += FN - - prec = a_TP / (a_TP + a_FP) - rec = a_TP / (a_TP + a_FN) - F1 = 2 * prec * rec / (prec + rec) - print(a_TP, a_FP, a_FN, F1) - - def save(self, directory, epoch): - if not os.path.exists(directory): - os.makedirs(directory) - - torch.save(self.policy.state_dict(), directory + '/supervised.pol.mdl') - - logging.info('<<dialog policy>> epoch {}: saved network to mdl'.format(epoch)) - - -class MLE_Trainer(MLE_Trainer_Abstract): - def __init__(self, manager, cfg): - self._init_data(manager, cfg) - - try: - self.use_entropy = manager.use_entropy - self.use_mutual_info = manager.use_mutual_info - self.use_confidence_scores = manager.use_confidence_scores - except: - self.use_entropy = False - self.use_mutual_info = False - self.use_confidence_scores = False - - vector = MultiWozVector() - # override the loss defined in the MLE_Trainer_Abstract to support pos_weight - pos_weight = cfg['pos_weight'] * torch.ones(vector.da_dim).to(device=DEVICE) - self.multi_entropy_loss = nn.BCEWithLogitsLoss(pos_weight=pos_weight) - logging.info(f"State dimension of policy: {vector.state_dim}") - logging.info(f"Action dimension of policy: {vector.da_dim}") - self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.da_dim).to(device=DEVICE) - self.policy.eval() - self.policy_optim = torch.optim.RMSprop(self.policy.parameters(), lr=cfg['lr_supervised'], - weight_decay=cfg['weight_decay']) - - -def arg_parser(): - parser = argparse.ArgumentParser() - - parser.add_argument("--seed", type=int, default=0) - parser.add_argument("--eval_freq", type=int, default=1) - - args = parser.parse_args() - return args - - -if __name__ == '__main__': - - args = arg_parser() - - directory = os.path.dirname(os.path.abspath(__file__)) - with open(os.path.join(directory, 'config.json'), 'r') as f: - cfg = json.load(f) - - logger, tb_writer, current_time, save_path, config_save_path, dir_path, log_save_path = \ - init_logging(os.path.dirname(os.path.abspath(__file__)), "info") - save_config(vars(args), cfg, config_save_path) - - set_seed(args.seed) - logging.info(f"Seed used: {args.seed}") - - manager = ActMLEPolicyDataLoaderMultiWoz() - agent = MLE_Trainer(manager, cfg) - - logging.info('Start training') - - best_recall = 0.0 - best_precision = 0.0 - best_f1 = 0.0 - precision = 0 - recall = 0 - f1 = 0 - - for e in range(cfg['epoch']): - agent.imitating(e) - logging.info(f"Epoch: {e}") - - if e % args.eval_freq == 0 and e != 0: - precision, recall, f1 = agent.validate() - - logging.info(f"Precision: {precision}") - logging.info(f"Recall: {recall}") - logging.info(f"F1: {f1}") - - if precision > best_precision: - best_precision = precision - if recall > best_recall: - best_recall = recall - if f1 > best_f1: - best_f1 = f1 - agent.save(save_path, e) - logging.info(f"Best Precision: {best_precision}") - logging.info(f"Best Recall: {best_recall}") - logging.info(f"Best F1: {best_f1}") diff --git a/convlab2/policy/mle/train.py b/convlab2/policy/mle/train.py index 3661ac0504cfda6fe36a901b9dfcf24aa0c53ea5..a958d6603948e988e61fe51df50215eb9d21cab1 100755 --- a/convlab2/policy/mle/train.py +++ b/convlab2/policy/mle/train.py @@ -1,35 +1,46 @@ +import argparse import os import torch import logging -import torch.nn as nn +import json +import sys +from torch import nn + +from convlab2.policy.mle.loader import PolicyDataVectorizer +from convlab2.util.custom_util import set_seed, init_logging, save_config from convlab2.util.train_util import to_device +from convlab2.policy.rlmodule import MultiDiscretePolicy +from convlab2.policy.vector.vector_binary import VectorBinary + +root_dir = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.append(root_dir) DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + class MLE_Trainer_Abstract(): def __init__(self, manager, cfg): self._init_data(manager, cfg) self.policy = None self.policy_optim = None - + def _init_data(self, manager, cfg): self.data_train = manager.create_dataset('train', cfg['batchsz']) - self.data_valid = manager.create_dataset('val', cfg['batchsz']) + self.data_valid = manager.create_dataset('validation', cfg['batchsz']) self.data_test = manager.create_dataset('test', cfg['batchsz']) self.save_dir = cfg['save_dir'] - self.print_per_batch = cfg['print_per_batch'] - self.save_per_epoch = cfg['save_per_epoch'] self.multi_entropy_loss = nn.MultiLabelSoftMarginLoss() - + def policy_loop(self, data): - s, target_a, m = to_device(data) + s, target_a, mask = to_device(data) a_weights = self.policy(s) - - loss_a = self.multi_entropy_loss(a_weights, target_a) + + loss_a = self.multi_entropy_loss(a_weights + mask, target_a) return loss_a - - def imitating(self, epoch): + + def imitating(self): """ pretrain the policy by simple imitation learning (behavioral cloning) """ @@ -41,42 +52,10 @@ class MLE_Trainer_Abstract(): a_loss += loss_a.item() loss_a.backward() self.policy_optim.step() - - if (i+1) % self.print_per_batch == 0: - a_loss /= self.print_per_batch - logging.debug('<<dialog policy>> epoch {}, iter {}, loss_a:{}'.format(epoch, i, a_loss)) - a_loss = 0. - - if (epoch+1) % self.save_per_epoch == 0: - self.save(self.save_dir, epoch) + self.policy.eval() - - def imit_test(self, epoch, best): - """ - provide an unbiased evaluation of the policy fit on the training dataset - """ - a_loss = 0. - for i, data in enumerate(self.data_valid): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_valid) - logging.debug('<<dialog policy>> validation, epoch {}, loss_a:{}'.format(epoch, a_loss)) - if a_loss < best: - logging.info('<<dialog policy>> best model saved') - best = a_loss - self.save(self.save_dir, 'best') - - a_loss = 0. - for i, data in enumerate(self.data_test): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_test) - logging.debug('<<dialog policy>> test, epoch {}, loss_a:{}'.format(epoch, a_loss)) - return best - def test(self): + def validate(self): def f1(a, target): TP, FP, FN = 0, 0, 0 real = target.nonzero().tolist() @@ -90,27 +69,141 @@ class MLE_Trainer_Abstract(): if item not in real: FP += 1 return TP, FP, FN - + a_TP, a_FP, a_FN = 0, 0, 0 for i, data in enumerate(self.data_valid): s, target_a, m = to_device(data) a_weights = self.policy(s) + a_weights += m a = a_weights.ge(0) TP, FP, FN = f1(a, target_a) a_TP += TP a_FP += FP a_FN += FN - + prec = a_TP / (a_TP + a_FP) rec = a_TP / (a_TP + a_FN) F1 = 2 * prec * rec / (prec + rec) - print(prec, rec, F1) + return prec, rec, F1 + + def test(self): + def f1(a, target): + TP, FP, FN = 0, 0, 0 + real = target.nonzero().tolist() + predict = a.nonzero().tolist() + for item in real: + if item in predict: + TP += 1 + else: + FN += 1 + for item in predict: + if item not in real: + FP += 1 + return TP, FP, FN + + a_TP, a_FP, a_FN = 0, 0, 0 + for i, data in enumerate(self.data_test): + s, target_a = to_device(data) + a_weights = self.policy(s) + a = a_weights.ge(0) + TP, FP, FN = f1(a, target_a) + a_TP += TP + a_FP += FP + a_FN += FN + + prec = a_TP / (a_TP + a_FP) + rec = a_TP / (a_TP + a_FN) + F1 = 2 * prec * rec / (prec + rec) + print(a_TP, a_FP, a_FN, F1) def save(self, directory, epoch): if not os.path.exists(directory): os.makedirs(directory) - torch.save(self.policy.state_dict(), directory + '/' + str(epoch) + '_mle.pol.mdl') + torch.save(self.policy.state_dict(), directory + '/supervised.pol.mdl') logging.info('<<dialog policy>> epoch {}: saved network to mdl'.format(epoch)) + +class MLE_Trainer(MLE_Trainer_Abstract): + def __init__(self, manager, vector, cfg): + self._init_data(manager, cfg) + + try: + self.use_entropy = manager.use_entropy + self.use_mutual_info = manager.use_mutual_info + self.use_confidence_scores = manager.use_confidence_scores + except: + self.use_entropy = False + self.use_mutual_info = False + self.use_confidence_scores = False + + # override the loss defined in the MLE_Trainer_Abstract to support pos_weight + pos_weight = cfg['pos_weight'] * torch.ones(vector.da_dim).to(device=DEVICE) + self.multi_entropy_loss = nn.BCEWithLogitsLoss(pos_weight=pos_weight) + self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.da_dim).to(device=DEVICE) + self.policy.eval() + self.policy_optim = torch.optim.RMSprop(self.policy.parameters(), lr=cfg['lr_supervised'], + weight_decay=cfg['weight_decay']) + + +def arg_parser(): + parser = argparse.ArgumentParser() + + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--eval_freq", type=int, default=1) + parser.add_argument("--dataset_name", type=str, default="multiwoz21") + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + + args = arg_parser() + + directory = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(directory, 'config.json'), 'r') as f: + cfg = json.load(f) + + logger, tb_writer, current_time, save_path, config_save_path, dir_path, log_save_path = \ + init_logging(os.path.dirname(os.path.abspath(__file__)), "info") + save_config(vars(args), cfg, config_save_path) + + set_seed(args.seed) + logging.info(f"Seed used: {args.seed}") + + vector = VectorBinary(dataset_name=args.dataset_name, use_masking=False) + manager = PolicyDataVectorizer(dataset_name=args.dataset_name, vector=vector) + agent = MLE_Trainer(manager, vector, cfg) + + logging.info('Start training') + + best_recall = 0.0 + best_precision = 0.0 + best_f1 = 0.0 + precision = 0 + recall = 0 + f1 = 0 + + for e in range(cfg['epoch']): + agent.imitating() + logging.info(f"Epoch: {e}") + + if e % args.eval_freq == 0 and e != 0: + precision, recall, f1 = agent.validate() + + logging.info(f"Precision: {precision}") + logging.info(f"Recall: {recall}") + logging.info(f"F1: {f1}") + + if precision > best_precision: + best_precision = precision + if recall > best_recall: + best_recall = recall + if f1 > best_f1: + best_f1 = f1 + agent.save(save_path, e) + logging.info(f"Best Precision: {best_precision}") + logging.info(f"Best Recall: {best_recall}") + logging.info(f"Best F1: {best_f1}") diff --git a/convlab2/policy/pg/pg.py b/convlab2/policy/pg/pg.py index fd8f8884aceac5085e082fe3075b34bd7f58cc6b..ddd13e7dfedd921f2faf3468e85f6cd5522bf1e5 100755 --- a/convlab2/policy/pg/pg.py +++ b/convlab2/policy/pg/pg.py @@ -8,7 +8,7 @@ import json from convlab2.policy.policy import Policy from convlab2.policy.rlmodule import MultiDiscretePolicy from convlab2.util.train_util import init_logging_handler -from convlab2.policy.vector.vector_multiwoz import MultiWozVector +from convlab2.policy.vector.vector_binary import VectorBinary from convlab2.util.file_util import cached_path import zipfile import sys @@ -34,9 +34,7 @@ class PG(Policy): init_logging_handler(cfg['log_dir'], logging_mode=logging.INFO) if dataset == 'Multiwoz': - voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') - voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') - self.vector = MultiWozVector(voc_file, voc_opp_file) + self.vector = VectorBinary() self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) # self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) diff --git a/convlab2/policy/ppo/ppo.py b/convlab2/policy/ppo/ppo.py index 0ea98a19e0a70c650d8318e3d7b03178196c56f4..b24674fc61edcb945596d9f4d1a7fb64bd78c5b9 100755 --- a/convlab2/policy/ppo/ppo.py +++ b/convlab2/policy/ppo/ppo.py @@ -299,7 +299,7 @@ class PPO(Policy): @staticmethod def load_vectoriser(name): if name == 'base': - from convlab2.policy.vector.vector_multiwoz import MultiWozVector + from convlab2.policy.vector.vector_binary import MultiWozVector return MultiWozVector() @classmethod diff --git a/convlab2/policy/ppo/train_supervised.py b/convlab2/policy/ppo/train_supervised.py deleted file mode 100644 index 5a3976b948e2eeddd01ca765c45cbf19c01ca496..0000000000000000000000000000000000000000 --- a/convlab2/policy/ppo/train_supervised.py +++ /dev/null @@ -1,234 +0,0 @@ -import argparse -import os -import torch -import logging -import json -import sys -from torch import nn -from convlab2.util.train_util import to_device -from data_loaders.utterance_loader import UtteranceDataLoaderVRNN - -root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -sys.path.append(root_dir) - -from convlab2.policy.rlmodule import MultiDiscretePolicy -from convlab2.policy.vector.vector_multiwoz import MultiWozVector -from convlab2.policy.mle.multiwoz.loader import ActMLEPolicyDataLoaderMultiWoz -from convlab2.util.train_util import init_logging_handler - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -class MLE_Trainer_Abstract(): - def __init__(self, manager, cfg): - self._init_data(manager, cfg) - self.policy = None - self.policy_optim = None - - def _init_data(self, manager, cfg): - self.data_train = manager.create_dataset('train', cfg['batchsz']) - self.data_valid = manager.create_dataset('val', cfg['batchsz']) - self.data_test = manager.create_dataset('test', cfg['batchsz']) - self.save_dir = cfg['save_dir'] - self.multi_entropy_loss = nn.MultiLabelSoftMarginLoss() - - def policy_loop(self, data): - s, mask, target_a = to_device(data) - a_weights = self.policy(s) - - loss_a = self.multi_entropy_loss(a_weights + mask, target_a) - return loss_a - - def imitating(self, epoch): - """ - pretrain the policy by simple imitation learning (behavioral cloning) - """ - self.policy.train() - a_loss = 0. - for i, data in enumerate(self.data_train): - self.policy_optim.zero_grad() - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - loss_a.backward() - self.policy_optim.step() - - self.policy.eval() - - def imit_test(self, epoch, best): - """ - provide an unbiased evaluation of the policy fit on the training dataset - """ - a_loss = 0. - for i, data in enumerate(self.data_valid): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_valid) - logging.debug('<<dialog policy>> validation, epoch {}, loss_a:{}'.format(epoch, a_loss)) - if a_loss < best: - logging.info('<<dialog policy>> best model saved') - best = a_loss - self.save(self.save_dir, 'best') - - a_loss = 0. - for i, data in enumerate(self.data_test): - loss_a = self.policy_loop(data) - a_loss += loss_a.item() - - a_loss /= len(self.data_test) - logging.debug('<<dialog policy>> test, epoch {}, loss_a:{}'.format(epoch, a_loss)) - return best - - def validate(self): - def f1(a, target): - TP, FP, FN = 0, 0, 0 - real = target.nonzero().tolist() - predict = a.nonzero().tolist() - for item in real: - if item in predict: - TP += 1 - else: - FN += 1 - for item in predict: - if item not in real: - FP += 1 - return TP, FP, FN - - a_TP, a_FP, a_FN = 0, 0, 0 - for i, data in enumerate(self.data_valid): - s, m, target_a = to_device(data) - a_weights = self.policy(s) - a_weights += m - a = a_weights.ge(0) - TP, FP, FN = f1(a, target_a) - a_TP += TP - a_FP += FP - a_FN += FN - - prec = a_TP / (a_TP + a_FP) - rec = a_TP / (a_TP + a_FN) - F1 = 2 * prec * rec / (prec + rec) - return prec, rec, F1 - - def test(self): - def f1(a, target): - TP, FP, FN = 0, 0, 0 - real = target.nonzero().tolist() - predict = a.nonzero().tolist() - for item in real: - if item in predict: - TP += 1 - else: - FN += 1 - for item in predict: - if item not in real: - FP += 1 - return TP, FP, FN - - a_TP, a_FP, a_FN = 0, 0, 0 - for i, data in enumerate(self.data_test): - s, target_a = to_device(data) - a_weights = self.policy(s) - a = a_weights.ge(0) - TP, FP, FN = f1(a, target_a) - a_TP += TP - a_FP += FP - a_FN += FN - - prec = a_TP / (a_TP + a_FP) - rec = a_TP / (a_TP + a_FN) - F1 = 2 * prec * rec / (prec + rec) - print(a_TP, a_FP, a_FN, F1) - - def save(self, directory, epoch): - if not os.path.exists(directory): - os.makedirs(directory) - - torch.save(self.policy.state_dict(), directory + '/supervised.pol.mdl') - - logging.info('<<dialog policy>> epoch {}: saved network to mdl'.format(epoch)) - - -class MLE_Trainer(MLE_Trainer_Abstract): - def __init__(self, manager, cfg): - self._init_data(manager, cfg) - - try: - self.use_entropy = manager.use_entropy - self.use_mutual_info = manager.use_mutual_info - self.use_confidence_scores = manager.use_confidence_scores - except: - self.use_entropy = False - self.use_mutual_info = False - self.use_confidence_scores = False - - vector = MultiWozVector() - # override the loss defined in the MLE_Trainer_Abstract to support pos_weight - pos_weight = cfg['pos_weight'] * torch.ones(vector.da_dim).to(device=DEVICE) - self.multi_entropy_loss = nn.BCEWithLogitsLoss(pos_weight=pos_weight) - print("ACTION DIM OF MLE: ", vector.da_dim) - self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.da_dim).to(device=DEVICE) - self.policy.eval() - self.policy_optim = torch.optim.RMSprop(self.policy.parameters(), lr=cfg['lr_supervised'], - weight_decay=cfg['weight_decay']) - - -def arg_parser(): - parser = argparse.ArgumentParser() - parser.add_argument('--use_confidence_scores', action='store_true', help="set to 1 if want to train with uncertainty") - parser.add_argument('--use_entropy', action='store_true', help="set to 1 if want to train with entropy") - parser.add_argument('--use_mutual_info', action='store_true', help="set to 1 if want to train with entropy") - parser.add_argument('--utterance_level', action='store_true', help="set to 1 to train on utterance level") - parser.add_argument('--use_masking', action='store_true', help="set to 1 to train using masking") - parser.add_argument("--setsumbt_path", type=str) - - args = parser.parse_args() - return args - - -if __name__ == '__main__': - - args = arg_parser() - - if args.utterance_level: - print("We use Utterance level data for pretraining") - print(f"Uncertainty used: {args.use_confidence_scores}") - print(f"Entropy used: {args.use_entropy}") - print(f"Mutual Information used: {args.use_entropy}") - manager = UtteranceDataLoaderVRNN(use_confidence_scores=args.use_confidence_scores, use_entropy=args.use_entropy, - use_action_masking=args.use_masking, tracker_path=args.setsumbt_path, - use_mutual_info=args.use_mutual_info) - else: - print("We train on ground truth level") - manager = ActMLEPolicyDataLoaderMultiWoz() - - directory = os.path.dirname(os.path.abspath(__file__)) - with open(os.path.join(directory, 'config.json'), 'r') as f: - cfg = json.load(f) - - init_logging_handler(cfg['log_dir']) - agent = MLE_Trainer(manager, cfg) - - logging.debug('Start training') - - best_recall = 0.0 - best_precision = 0.0 - best_f1 = 0.0 - - for e in range(cfg['epoch']): - agent.imitating(e) - #best = agent.imit_test(e, best) - print("Epoch: ", e) - precision, recall, f1 = agent.validate() - - if precision > best_precision: - best_precision = precision - if recall > best_recall: - best_recall = recall - if f1 > best_f1: - best_f1 = f1 - agent.save(directory, e) - print("Best Precision: ", best_precision) - print("Best Recall: ", best_recall) - print("Best F1: ", best_f1) diff --git a/convlab2/policy/ppo/train_with_vrnn.py b/convlab2/policy/ppo/train_with_vrnn.py deleted file mode 100644 index f939e664d3af08ad99bab7068836743b3688c725..0000000000000000000000000000000000000000 --- a/convlab2/policy/ppo/train_with_vrnn.py +++ /dev/null @@ -1,447 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sun Jul 14 16:14:07 2019 -@author: truthless -""" - -import sys, os, logging, random, time -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) - -import numpy as np -import torch -from tensorboardX import SummaryWriter -from convlab2.dialog_agent.session import BiSession -from convlab2.vrnn_semantic.model import VRNN - - -from torch import multiprocessing as mp -from convlab2.dialog_agent.agent import PipelineAgent -from convlab2.dialog_agent.env import Environment -from convlab2.nlu.svm.multiwoz import SVMNLU -from convlab2.dst.rule.multiwoz import RuleDST -from convlab2.policy.rule.multiwoz import RulePolicy -from convlab2.policy.ppo import PPO -from convlab2.policy.rlmodule import Memory_vrnn -from convlab2.nlg.template.multiwoz import TemplateNLG -from convlab2.evaluator.multiwoz_eval import MultiWozEvaluator -from argparse import ArgumentParser -from convlab2.dialog_agent.session import BiSession -#from convlab2.util.train_util import save_to_bucket, init_logging_handler -from convlab2.policy.vector.vector_multiwoz import MultiWozVector - - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -voc_file_shrinked = os.path.join(root_dir, 'data/multiwoz/sys_da_voc_shrinked.txt') -voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') -VECTOR = MultiWozVector(voc_file_shrinked, voc_opp_file, shrink=True) - -try: - mp = mp.get_context('spawn') -except RuntimeError: - pass - - -def save_log_to_bucket(policy_sys, bucket_dir='logs_PPO_masking_no_pretraining'): - try: - save_to_bucket('geishauser', f'convlab_experiments/{bucket_dir}/{policy_sys.save_name}/log.txt', - os.path.join(policy_sys.log_dir, f'log_{policy_sys.current_time}.txt')) - for file in os.listdir(policy_sys.log_dir): - if 'events.out' in file: - save_to_bucket('geishauser', f'convlab_experiments/{bucket_dir}/{policy_sys.save_name}/{file}', - os.path.join(policy_sys.log_dir, file)) - except: - logging.info('Could not save to bucket.') - - -def save_best_policy(policy, rate, best_rate, success=True, bucket_dir='logs_PPO_masking_no_pretraining'): - #try block is only for local testing, when we can not access the gcloud storage - try: - if rate > best_rate: - if success: - policy.save(policy.save_dir, best_success_rate=True) - save_to_bucket('geishauser', f'convlab_experiments/{bucket_dir}/' - f'{policy_sys.save_name}/best_success_rate_ppo.val.mdl', - os.path.join(policy.save_dir, policy.current_time + '_best_success_rate_ppo.val.mdl')) - save_to_bucket('geishauser', f'convlab_experiments/{bucket_dir}/' - f'{policy_sys.save_name}/best_success_rate_ppo.pol.mdl', - os.path.join(policy.save_dir, policy.current_time + '_best_success_rate_ppo.pol.mdl')) - else: - policy.save(policy.save_dir, best_complete_rate=True) - save_to_bucket('geishauser', f'convlab_experiments/{bucket_dir}/' - f'{policy_sys.save_name}/best_complete_rate_ppo.val.mdl', - os.path.join(policy.save_dir, policy.current_time + '_best_complete_rate_ppo.val.mdl')) - save_to_bucket('geishauser', f'convlab_experiments/{bucket_dir}/' - f'{policy_sys.save_name}/best_complete_rate_ppo.pol.mdl', - os.path.join(policy.save_dir, policy.current_time + '_best_complete_rate_ppo.pol.mdl')) - return rate - else: - return best_rate - except: - return best_rate - - -def evaluate(dataset_name, load_path=None, calculate_reward=False, policy_sys=None, counter=0, writer=None, evaluator_reward=False): - seed = 20190827 - random.seed(seed) - np.random.seed(seed) - - if dataset_name == 'MultiWOZ': - dst_sys = RuleDST() - - from convlab2.policy.ppo import PPO - if policy_sys is None: - if load_path: - policy_sys = PPO(False) - policy_sys.load(load_path) - else: - policy_sys = PPO.from_pretrained - - dst_usr = None - - policy_usr = RulePolicy(character='usr') - simulator = PipelineAgent(None, None, policy_usr, None, 'user') - - env = Environment(None, simulator, None, dst_sys) - - agent_sys = PipelineAgent(None, dst_sys, policy_sys, None, 'sys') - - evaluator = MultiWozEvaluator() - - sess = BiSession(agent_sys, simulator, None, evaluator) - - seeds = random.randrange(400, 1000000) - - actions = 0.0 - turn_counter = 0.0 - - task_success = {'All_user_sim': [], 'All_evaluator': [], 'total_return': [], 'turns': []} - for seed in range(seeds - 400, seeds): - random.seed(seed) - np.random.seed(seed) - sess.init_session() - sys_response = [] - - total_return = 0.0 - turns = 0 - for i in range(40): - sys_response, user_response, session_over, reward = sess.next_turn(sys_response) - actions += len(sys_response) - turn_counter += 1 - turns = i - if evaluator_reward: - total_return += reward - else: - total_return += simulator.policy.policy.get_reward() - - if session_over is True: - task_succ = sess.evaluator.task_success() - - break - else: - task_succ = 0 - - for key in sess.evaluator.goal: - if key not in task_success: - task_success[key] = [] - else: - task_success[key].append(task_succ) - - task_success['All_user_sim'].append(int(simulator.policy.policy.goal.task_complete())) - task_success['All_evaluator'].append(task_succ) - task_success['total_return'].append(total_return) - task_success['turns'].append(turns) - - with open(os.path.join(policy_sys.log_dir, f'log_{policy_sys.current_time}.txt'), 'a') as log_file: - - for key in task_success: - logging.info( - f'{key} {len(task_success[key])} {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}') - - log_file.write(f'{key} {len(task_success[key])} {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}\n') - - logging.info(f"Average number of actions per turn: {actions/turn_counter}") - #log_file.write(f"Average number of actions per turn: {actions/turn_counter}\n") - - if writer is not None: - writer.write_summary(np.average(task_success['All_user_sim']), np.average(task_success['All_evaluator']), - np.average(task_success['turns']), np.average(task_success['total_return']), counter) - - return np.average(task_success['All_user_sim']), np.average(task_success['All_evaluator']) - else: - raise Exception("currently supported dataset: MultiWOZ") - - -def sampler(pid, queue, evt, env, policy, batchsz, vrnn_model=None, mc_samples=1.0): - """ - This is a sampler function, and it will be called by multiprocess.Process to sample data from environment by multiple - processes. - :param pid: process id - :param queue: multiprocessing.Queue, to collect sampled data - :param evt: multiprocessing.Event, to keep the process alive - :param env: environment instance - :param policy: policy network, to generate action from current policy - :param batchsz: total sampled items - :return: - """ - buff = Memory_vrnn() - - # we need to sample batchsz of (state, action, next_state, reward, mask) - # each trajectory contains `trajectory_len` num of items, so we only need to sample - # `batchsz//trajectory_len` num of trajectory totally - # the final sampled number may be larger than batchsz. - - sampled_num = 0 - sampled_traj_num = 0 - traj_len = 50 - real_traj_len = 0 - - while sampled_num < batchsz: - # for each trajectory, we reset the env and get initial state - s = env.reset() - - user_act_list, sys_act_list, s_vec_list, next_s_vec_list, action_list, reward_list, trajectory_list, \ - mask_list, action_mask_list = [], [], [], [], [], [], [], [], [] - - for t in range(traj_len): - - # [s_dim] => [a_dim] - s_vec, action_mask = policy.vector.state_vectorize(s, output_mask=True) - s_vec = torch.Tensor(s_vec) - action_mask = torch.Tensor(action_mask) - - a = policy.predict(s) - - user_act_list.append(policy.vector.retrieve_user_action(s)) - sys_act_list.append(VECTOR.action_vectorize(a)) - - # interact with env - next_s, r, done = env.step(a) - - # a flag indicates ending or not - mask = 0 if done else 1 - - # get reward compared to demostrations - next_s_vec, next_action_mask = policy.vector.state_vectorize(next_s, output_mask=True) - next_s_vec = torch.Tensor(next_s_vec) - - s_vec_list.append(s_vec.numpy()) - action_list.append(policy.vector.action_vectorize(a)) - reward_list.append(r) - next_s_vec_list.append(next_s_vec.numpy()) - mask_list.append(mask) - action_mask_list.append(action_mask.numpy()) - - # save to queue - #buff.push(s_vec.numpy(), policy.vector.action_vectorize(a), r, next_s_vec.numpy(), mask, action_mask.numpy()) - - # update per step - s = next_s - real_traj_len = t - - if done: - break - - if vrnn_model: - crossentropy_loss = compute_vrnn_reward(mc_samples, sys_act_list, user_act_list, vrnn_model) - - for i in range(len(user_act_list)): - buff.push(s_vec_list[i], action_list[i], reward_list[i], next_s_vec_list[i], mask_list[i], - action_mask_list[i], crossentropy_loss[i]) - - # this is end of one trajectory - sampled_num += real_traj_len - sampled_traj_num += 1 - # t indicates the valid trajectory length - - # this is end of sampling all batchsz of items. - # when sampling is over, push all buff data into queue - queue.put([pid, buff]) - evt.wait() - - -def compute_vrnn_reward(mc_samples, sys_act_list, user_act_list, vrnn_model): - _, _, kld_loss, nll_loss = \ - vrnn_model(torch.Tensor([user_act_list]).repeat(mc_samples, 1, 1).to(DEVICE), - torch.Tensor([sys_act_list]).repeat(mc_samples, 1, 1).to(DEVICE), None, - torch.Tensor([len(user_act_list)]).repeat(mc_samples).to(DEVICE)) - - crossentropy_loss = -torch.stack(kld_loss) - torch.stack(nll_loss) - - # normalize reward, should maybe do it across episodes - #crossentropy_loss = (crossentropy_loss - crossentropy_loss.mean()) / (crossentropy_loss.std() + 0.00001) - crossentropy_loss = crossentropy_loss.detach() - return crossentropy_loss - - -def sample(env, policy, batchsz, process_num, vrnn_model=None, mc_samples=1): - """ - Given batchsz number of task, the batchsz will be splited equally to each processes - and when processes return, it merge all data and return - :param env: - :param policy: - :param batchsz: - :param process_num: - :return: batch - """ - - # batchsz will be splitted into each process, - # final batchsz maybe larger than batchsz parameters - process_batchsz = np.ceil(batchsz / process_num).astype(np.int32) - # buffer to save all data - queue = mp.Queue() - - # start processes for pid in range(1, processnum) - # if processnum = 1, this part will be ignored. - # when save tensor in Queue, the process should keep alive till Queue.get(), - # please refer to : https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847 - # however still some problem on CUDA tensors on multiprocessing queue, - # please refer to : https://discuss.pytorch.org/t/cuda-tensors-on-multiprocessing-queue/28626 - # so just transform tensors into numpy, then put them into queue. - evt = mp.Event() - processes = [] - for i in range(process_num): - process_args = (i, queue, evt, env, policy, process_batchsz, vrnn_model, mc_samples) - processes.append(mp.Process(target=sampler, args=process_args)) - for p in processes: - # set the process as daemon, and it will be killed once the main process is stoped. - p.daemon = True - p.start() - - # we need to get the first Memory object and then merge others Memory use its append function. - pid0, buff0 = queue.get() - for _ in range(1, process_num): - pid, buff_ = queue.get() - buff0.append(buff_) # merge current Memory into buff0 - evt.set() - - # now buff saves all the sampled data - buff = buff0 - - return buff.get_batch() - - -def update(env, policy, batchsz, epoch, process_num, only_critic=False, vrnn_model=None, mc_samples=1, ce_weighting=1.0): - - # sample data asynchronously - if vrnn_model is not None: - batch = sample(env, policy, batchsz, process_num, vrnn_model, mc_samples) - else: - batch = sample(env, policy, batchsz, process_num) - - # data in batch is : batch.state: ([1, s_dim], [1, s_dim]...) - # batch.action: ([1, a_dim], [1, a_dim]...) - # batch.reward/ batch.mask: ([1], [1]...) - s = torch.from_numpy(np.stack(batch.state)).to(device=DEVICE) - a = torch.from_numpy(np.stack(batch.action)).to(device=DEVICE) - r = torch.from_numpy(np.stack(batch.reward)).to(device=DEVICE) - mask = torch.Tensor(np.stack(batch.mask)).to(device=DEVICE) - action_mask = torch.Tensor(np.stack(batch.action_mask)).to(device=DEVICE) - vrnn_reward = torch.Tensor(np.stack(batch.vrnn_reward)).to(device=DEVICE) - batchsz_real = s.size(0) - - r_new = r + ce_weighting * (vrnn_reward - vrnn_reward.mean()) / (vrnn_reward.std() + 0.00001) - - policy.update(epoch, batchsz_real, s, a, r_new, mask, action_mask, only_critic=only_critic) - - -if __name__ == '__main__': - parser = ArgumentParser() - parser.add_argument("--load_path", type=str, default="", help="path of model to load") - parser.add_argument("--batchsz", type=int, default=1000, help="batch size of trajactory sampling") - parser.add_argument("--epoch", type=int, default=200, help="number of epochs to train") - parser.add_argument("--process_num", type=int, default=8, help="number of processes of trajactory sampling") - parser.add_argument("--seed", type=int, default=0, help="Seed for the policy parameter initialization") - parser.add_argument("--action_mask", type=bool, default=False, help="Use action masking for PPO") - parser.add_argument("--vrnn_path", type=str, default="") - parser.add_argument("--mc_num", type=int, default=1, help="How many MonteCarlo samples for VRNN") - parser.add_argument("--ce_weight", type=float, default=0.025, help="Weight of cross-entropy reward") - - args = parser.parse_args() - - random.seed(0) - np.random.seed(0) - - if args.action_mask: - args.action_mask = True - else: - args.action_mask = False - - print("ACTION MASK: ", args.action_mask) - - bucket_dir = 'logs_PPO_masking_no_pretraining' - if args.load_path and args.action_mask: - bucket_dir = 'logs_PPO_masking_pretraining' - elif args.load_path and not args.action_mask: - bucket_dir = 'logs_PPO_pretraining' - elif not args.load_path and not args.action_mask: - bucket_dir = 'logs_PPO_no_pretraining' - - bucket_dir += "_discrete" - - # simple rule DST - dst_sys = RuleDST() - - policy_sys = PPO(True, seed=args.seed, use_action_mask=args.action_mask, shrink=False) - policy_sys.load(args.load_path) - - if args.vrnn_path: - tb_writer = SummaryWriter(os.path.join(os.path.dirname(os.path.abspath(__file__)), - f'TB_summary/{policy_sys.current_time}_vrnn_normalized')) - else: - tb_writer = SummaryWriter(os.path.join(os.path.dirname(os.path.abspath(__file__)), - f'TB_summary/{policy_sys.current_time}')) - - if args.vrnn_path: - logging.info("We use VRNN reward signal") - with torch.no_grad(): - vrnn = VRNN(300, 300, 300, 1, 30).to(DEVICE) - vrnn.load_state_dict(torch.load(args.vrnn_path, map_location=DEVICE)) - vrnn.eval() - - # not use dst - dst_usr = None - # rule policy - policy_usr = RulePolicy(character='usr') - - simulator = PipelineAgent(None, None, policy_usr, None, 'user') - - evaluator = MultiWozEvaluator() - env = Environment(None, simulator, None, dst_sys, evaluator=evaluator) - - logging.info("Start of Training: " + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) - - best_complete_rate = 0.0 - best_success_rate = 0.0 - - #if args.load_path: - # for i in range(20): - # print("Updating only critic") - # update(env, policy_sys, args.batchsz, i, args.process_num, only_critic=True) - - for i in range(args.epoch): - - if args.vrnn_path: - update(env, policy_sys, args.batchsz, i, args.process_num, vrnn_model=vrnn, mc_samples=args.mc_num, - ce_weighting=args.ce_weight) - else: - update(env, policy_sys, args.batchsz, i, args.process_num) - - logging.info(f"Evaluating at Epoch: {i} " + '-' * 80) - with open(os.path.join(policy_sys.log_dir, f'log_{policy_sys.current_time}.txt'), 'a') as log_file: - #log_file.write(f"Evaluating at Epoch: {i} " + '-' * 80 + "\n") - pass - - policy_sys.is_train = False - complete_rate, success_rate = evaluate('MultiWOZ', policy_sys=policy_sys, counter=i * 1000, writer=None) - tb_writer.add_scalar('complete_rate', complete_rate, i * args.batchsz) - tb_writer.add_scalar('success_rate', success_rate, i * args.batchsz) - policy_sys.is_train = True - save_log_to_bucket(policy_sys, bucket_dir=bucket_dir) - - best_complete_rate = save_best_policy(policy_sys, complete_rate, best_complete_rate, success=False, bucket_dir=bucket_dir) - best_success_rate = save_best_policy(policy_sys, success_rate, best_success_rate, success=True, bucket_dir=bucket_dir) - - logging.info("End of Training: " + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) diff --git a/convlab2/policy/vector/vector_base.py b/convlab2/policy/vector/vector_base.py index d040e6e5db3be49e9d4b54500d73b403f23d0082..5880da6fd94f8cc39c57d169549406e0596d86c5 100644 --- a/convlab2/policy/vector/vector_base.py +++ b/convlab2/policy/vector/vector_base.py @@ -1,16 +1,17 @@ # -*- coding: utf-8 -*- import os import sys -import json import numpy as np import copy import logging +from copy import deepcopy from convlab2.policy.vec import Vector +from convlab2.util.custom_util import flatten_acts from convlab2.util.multiwoz.lexicalize import delexicalize_da, flat_da, deflat_da, lexicalize_da -from convlab2.util.multiwoz.state import default_state -from convlab2.util.multiwoz.dbquery import Database from convlab2.util.multiwoz.multiwoz_slot_trans import REF_SYS_DA, REF_USR_DA +from convlab2.util import load_ontology, load_database, load_dataset + DEFAULT_INTENT_FILEPATH = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname( os.path.dirname(os.path.abspath(__file__))))), @@ -24,52 +25,141 @@ sys.path.append(root_dir) SLOT_MAP = {'taxi_types': 'car type'} +#TODO: The masks depend on multiwoz, deal with that somehow, shall we build a Mask class? +#TODO: Check the masks with new action strings +#TODO: Where should i save the action dicts? +#TODO: Load actions from ontology properly +#TODO: method AddName is properly not working right anymore + -class MultiWozVectorBase(Vector): +class VectorBase(Vector): - def __init__(self, voc_file=None, voc_opp_file=None, - intent_file=DEFAULT_INTENT_FILEPATH, character='sys', - use_masking=False, - manually_add_entity_names=True, + def __init__(self, dataset_name='multiwoz21', character='sys', use_masking=False, manually_add_entity_names=True, seed=0): super().__init__() self.set_seed(seed) - self.belief_domains = ['Attraction', 'Restaurant', - 'Train', 'Hotel', 'Taxi', 'Hospital', 'Police'] - self.db_domains = ['Attraction', 'Restaurant', 'Train', 'Hotel'] + self.ontology = load_ontology(dataset_name) + try: + self.db = load_database(dataset_name) + self.db_domains = self.db.domains + except: + self.db = None + self.db_domains = None + print("VectorBase: Can not load a database, path is probably not existing.") + + self.dataset_name = dataset_name self.max_actionval = {} - - with open(intent_file) as f: - intents = json.load(f) - self.informable = intents['informable'] - self.requestable = intents['requestable'] - self.db = Database() - self.use_mask = use_masking self.use_add_name = manually_add_entity_names self.reqinfo_filler_action = None self.character = character - self.name_history_flag = True self.name_action_prev = [] + self.cur_domain = None + self.requestable = ['request'] + self.informable = ['inform', 'recommend'] - if not voc_file or not voc_opp_file: - voc_file = os.path.join( - root_dir, 'data/multiwoz/sys_da_voc_remapped.txt') - voc_opp_file = os.path.join( - root_dir, 'data/multiwoz/usr_da_voc.txt') + self.load_attributes() + self.get_state_dim() + print(f"State dimension: {self.state_dim}") - with open(voc_file) as f: - self.da_voc = f.read().splitlines() - with open(voc_opp_file) as f: - self.da_voc_opp = f.read().splitlines() + def load_attributes(self): + self.domains = list(self.ontology['domains'].keys()) + self.domains.sort() + + self.state = self.ontology['state'] + self.belief_domains = list(self.state.keys()) + self.belief_domains.sort() + + self.load_action_dicts() + + def load_action_dicts(self): + + self.load_actions_from_data() self.generate_dict() - self.cur_domain = None - self.get_state_dim() - self.state = default_state() + + def load_actions_from_data(self, frequency_threshold=50): + + data_split = load_dataset(self.dataset_name) + system_dict = {} + user_dict = {} + for key in data_split: + data = data_split[key] + for dialogue in data: + for turn in dialogue['turns']: + dialogue_acts = turn['dialogue_acts'] + act_list = flatten_acts(dialogue_acts) + delex_acts = delexicalize_da(act_list, self.requestable) + + if turn['speaker'] == 'system': + for act in delex_acts: + act = "-".join(act) + if act not in system_dict: + system_dict[act] = 1 + else: + system_dict[act] += 1 + else: + for act in delex_acts: + act = "-".join(act) + if act not in user_dict: + user_dict[act] = 1 + else: + user_dict[act] += 1 + + for key in deepcopy(system_dict): + if system_dict[key] < frequency_threshold: + del system_dict[key] + + for key in deepcopy(user_dict): + if user_dict[key] < frequency_threshold: + del user_dict[key] + + with open("sys_da_voc.txt", "w") as f: + system_acts = list(system_dict.keys()) + system_acts.sort() + for act in system_acts: + f.write(act + "\n") + with open("user_da_voc.txt", "w") as f: + user_acts = list(user_dict.keys()) + user_acts.sort() + for act in user_acts: + f.write(act + "\n") + print("Saved new action dict.") + + self.da_voc = system_acts + self.da_voc_opp = user_acts + + def load_actions_from_ontology(self): + + self.da_voc = [] + self.da_voc_opp = [] + for act_type in self.ontology['dialogue_acts']: + for act in self.ontology['dialogue_acts'][act_type]: + system = act['system'] + user = act['user'] + if system: + system_acts_with_value = self.add_values_to_act(act['domain'], act['intent'], act['slot'], True) + self.da_voc.extend(system_acts_with_value) + + if user: + user_acts_with_value = self.add_values_to_act(act['domain'], act['intent'], act['slot'], False) + self.da_voc_opp.extend(user_acts_with_value) + + def generate_dict(self): + """ + init the dict for mapping state/action into vector + """ + self.act2vec = dict((a, i) for i, a in enumerate(self.da_voc)) + self.vec2act = dict((v, k) for k, v in self.act2vec.items()) + self.da_dim = len(self.da_voc) + self.opp2vec = dict((a, i) for i, a in enumerate(self.da_voc_opp)) + self.da_opp_dim = len(self.da_voc_opp) + + print(f"Dimension of system actions: {self.da_dim}") + print(f"Dimension of user actions: {self.da_opp_dim}") def get_state_dim(self): ''' @@ -90,29 +180,36 @@ class MultiWozVectorBase(Vector): """ raise NotImplementedError - def set_seed(self, seed): - np.random.seed(seed) + def add_values_to_act(self, domain, intent, slot, system): + ''' + The ontology does not contain information about the value of an act. This method will add the value and + is based on how it is created in MultiWOZ. This might need to be changed for other datasets such as SGD. + ''' - def generate_dict(self): - """ - init the dict for mapping state/action into vector - """ - self.act2vec = dict((a, i) for i, a in enumerate(self.da_voc)) - self.vec2act = dict((v, k) for k, v in self.act2vec.items()) - self.da_dim = len(self.da_voc) - self.opp2vec = dict((a, i) for i, a in enumerate(self.da_voc_opp)) - self.da_opp_dim = len(self.da_voc_opp) + if intent == 'request': + return [f"{domain}-{intent}-{slot}-?"] - def retrieve_user_action(self, state): + if slot == '': + return [f"{domain}-{intent}-none-none"] - action = state['user_action'] - opp_action = delexicalize_da(action, self.requestable) - opp_action = flat_da(opp_action) - opp_act_vec = np.zeros(self.da_opp_dim) - for da in opp_action: - if da in self.opp2vec: - opp_act_vec[self.opp2vec[da]] = 1. - return opp_act_vec + if system: + if intent in ['recommend', 'select', 'inform']: + return [f"{domain}-{intent}-{slot}-{i}" for i in range(1, 4)] + else: + return [f"{domain}-{intent}-{slot}-1"] + else: + return [f"{domain}-{intent}-{slot}-1"] + + def init_domain_active_dict(self): + domain_active_dict = {} + for domain in self.domains: + if domain == 'general': + continue + domain_active_dict[domain] = False + return domain_active_dict + + def set_seed(self, seed): + np.random.seed(seed) def compute_domain_mask(self, domain_active_dict): @@ -192,16 +289,14 @@ class MultiWozVectorBase(Vector): entities list: list of entities of the specified domain """ - constraint = self.state[domain.lower()]['semi'] - constraint = {k: i for k, i in constraint.items() if i and i not in [ - 'dontcare', "do n't care", "do not care"]} - - return self.db.query(domain.lower(), constraint.items()) - - # Function used to find which user constraint results in no entities being found + constraints = [[slot, value] for slot, value in self.state[domain].items() if value] \ + if domain in self.state else [] + return self.db.query(domain.lower(), constraints, topk=10) def find_nooffer_slot(self, domain): """ + Function used to find which user constraint results in no entities being found + query entities of specified domain Args: domain string: @@ -371,3 +466,52 @@ class MultiWozVectorBase(Vector): self.name_action_prev = copy.deepcopy(name_inform) return action + + def pointer(self): + pointer_vector = np.zeros(6 * len(self.db_domains)) + number_entities_dict = {} + for domain in self.db_domains: + entities = self.dbquery_domain(domain.lower()) + number_entities_dict[domain] = len(entities) + pointer_vector = self.one_hot_vector( + len(entities), domain, pointer_vector) + + return pointer_vector, number_entities_dict + + def one_hot_vector(self, num, domain, vector): + """Return number of available entities for particular domain.""" + if domain != 'train': + idx = self.db_domains.index(domain) + if num == 0: + vector[idx * 6: idx * 6 + 6] = np.array([1, 0, 0, 0, 0, 0]) + elif num == 1: + vector[idx * 6: idx * 6 + 6] = np.array([0, 1, 0, 0, 0, 0]) + elif num == 2: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 1, 0, 0, 0]) + elif num == 3: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 1, 0, 0]) + elif num == 4: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 1, 0]) + elif num >= 5: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 0, 1]) + else: + idx = self.db_domains.index(domain) + if num == 0: + vector[idx * 6: idx * 6 + 6] = np.array([1, 0, 0, 0, 0, 0]) + elif num <= 2: + vector[idx * 6: idx * 6 + 6] = np.array([0, 1, 0, 0, 0, 0]) + elif num <= 5: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 1, 0, 0, 0]) + elif num <= 10: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 1, 0, 0]) + elif num <= 40: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 1, 0]) + elif num > 40: + vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 0, 1]) + + return vector + + +if __name__ == '__main__': + vector = VectorBase() + diff --git a/convlab2/policy/vector/vector_binary.py b/convlab2/policy/vector/vector_binary.py new file mode 100755 index 0000000000000000000000000000000000000000..8fde29144b98d1fc1b6b21002c1cc646e5047bcf --- /dev/null +++ b/convlab2/policy/vector/vector_binary.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +import sys +import os +import numpy as np +from convlab2.util.multiwoz.lexicalize import delexicalize_da, flat_da +from .vector_base import VectorBase + +DEFAULT_INTENT_FILEPATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.abspath(__file__))))), + 'data/multiwoz/trackable_intent.json' +) + + +SLOT_MAP = {'taxi_types': 'car type'} + + +class VectorBinary(VectorBase): + + def __init__(self, dataset_name='multiwoz21', character='sys', use_masking=False, manually_add_entity_names=True, + seed=0): + + super().__init__(dataset_name, character, use_masking, manually_add_entity_names, seed) + + def get_state_dim(self): + self.belief_state_dim = 0 + + for domain in self.ontology['state']: + for slot in self.ontology['state'][domain]: + self.belief_state_dim += 1 + + self.state_dim = self.da_opp_dim + self.da_dim + self.belief_state_dim + \ + len(self.db_domains) + 6 * len(self.db_domains) + 1 + + def state_vectorize(self, state): + """vectorize a state + + Args: + state (dict): + Dialog state + action (tuple): + Dialog act + Returns: + state_vec (np.array): + Dialog state vector + """ + self.state = state['belief_state'] + domain_active_dict = self.init_domain_active_dict() + + # when character is sys, to help query database when da is booking-book + # update current domain according to user action + if self.character == 'sys': + action = state['user_action'] + for intent, domain, slot, value in action: + domain_active_dict[domain] = True + if domain in self.db_domains: + self.cur_domain = domain + + opp_act_vec = self.vectorize_user_act(state) + last_act_vec = self.vectorize_system_act(state) + belief_state, domain_active_dict = self.vectorize_belief_state(state, domain_active_dict) + book = self.vectorize_booked(state) + degree, number_entities_dict = self.pointer() + final = 1. if state['terminated'] else 0. + + state_vec = np.r_[opp_act_vec, last_act_vec, + belief_state, book, degree, final] + assert len(state_vec) == self.state_dim + + if self.use_mask: + mask = self.get_mask(domain_active_dict, number_entities_dict) + for i in range(self.da_dim): + mask[i] = -int(bool(mask[i])) * sys.maxsize + else: + mask = np.zeros(self.da_dim) + + return state_vec, mask + + def get_mask(self, domain_active_dict, number_entities_dict): + domain_mask = self.compute_domain_mask(domain_active_dict) + entity_mask = self.compute_entity_mask(number_entities_dict) + general_mask = self.compute_general_mask() + mask = domain_mask + entity_mask + general_mask + return mask + + def vectorize_booked(self, state): + book = np.zeros(len(self.db_domains)) + for i, domain in enumerate(self.db_domains): + if domain in state['booked'] and state['booked'][domain]: + book[i] = 1. + return book + + def vectorize_belief_state(self, state, domain_active_dict): + belief_state = np.zeros(self.belief_state_dim) + i = 0 + for domain in self.belief_domains: + for slot, value in state['belief_state'][domain].items(): + if value: + belief_state[i] = 1. + i += 1 + + if [slot for slot, value in state['belief_state'][domain].items() if value]: + domain_active_dict[domain] = True + return belief_state, domain_active_dict + + def vectorize_system_act(self, state): + action = state['system_action'] if self.character == 'sys' else state['user_action'] + action = delexicalize_da(action, self.requestable) + action = flat_da(action) + last_act_vec = np.zeros(self.da_dim) + for da in action: + if da in self.act2vec: + last_act_vec[self.act2vec[da]] = 1. + return last_act_vec + + def vectorize_user_act(self, state): + action = state['user_action'] if self.character == 'sys' else state['system_action'] + opp_action = delexicalize_da(action, self.requestable) + opp_action = flat_da(opp_action) + opp_act_vec = np.zeros(self.da_opp_dim) + for da in opp_action: + if da in self.opp2vec: + prob = 1.0 + opp_act_vec[self.opp2vec[da]] = prob + return opp_act_vec diff --git a/convlab2/policy/vector/vector_multiwoz.py b/convlab2/policy/vector/vector_multiwoz.py deleted file mode 100755 index 83c804545503a37db79f9abc5f7b28304862daca..0000000000000000000000000000000000000000 --- a/convlab2/policy/vector/vector_multiwoz.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -import sys -import os -import numpy as np -from convlab2.util.multiwoz.lexicalize import delexicalize_da, flat_da -from convlab2.util.multiwoz.state import default_state -from .vector_base import MultiWozVectorBase - -DEFAULT_INTENT_FILEPATH = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname( - os.path.dirname(os.path.abspath(__file__))))), - 'data/multiwoz/trackable_intent.json' -) - - -SLOT_MAP = {'taxi_types': 'car type'} - - -class MultiWozVector(MultiWozVectorBase): - - def __init__(self, voc_file=None, voc_opp_file=None, character='sys', - intent_file=DEFAULT_INTENT_FILEPATH, - use_masking=False, - manually_add_entity_names=True, - seed=0): - - super().__init__(voc_file, voc_opp_file, intent_file, character, use_masking, manually_add_entity_names, seed) - - def get_state_dim(self): - self.belief_state_dim = 0 - for domain in self.belief_domains: - for slot, value in default_state()['belief_state'][domain.lower()]['semi'].items(): - self.belief_state_dim += 1 - - self.belief_state_dim += len(default_state()['belief_state'][domain.lower()]['book']) - 1 - - self.state_dim = self.da_opp_dim + self.da_dim + self.belief_state_dim + \ - len(self.db_domains) + 6 * len(self.db_domains) + 1 - - def pointer(self): - pointer_vector = np.zeros(6 * len(self.db_domains)) - number_entities_dict = {} - for domain in self.db_domains: - entities = self.dbquery_domain(domain.lower()) - number_entities_dict[domain] = len(entities) - pointer_vector = self.one_hot_vector( - len(entities), domain, pointer_vector) - - return pointer_vector, number_entities_dict - - def one_hot_vector(self, num, domain, vector): - """Return number of available entities for particular domain.""" - if domain != 'train': - idx = self.db_domains.index(domain) - if num == 0: - vector[idx * 6: idx * 6 + 6] = np.array([1, 0, 0, 0, 0, 0]) - elif num == 1: - vector[idx * 6: idx * 6 + 6] = np.array([0, 1, 0, 0, 0, 0]) - elif num == 2: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 1, 0, 0, 0]) - elif num == 3: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 1, 0, 0]) - elif num == 4: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 1, 0]) - elif num >= 5: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 0, 1]) - else: - idx = self.db_domains.index(domain) - if num == 0: - vector[idx * 6: idx * 6 + 6] = np.array([1, 0, 0, 0, 0, 0]) - elif num <= 2: - vector[idx * 6: idx * 6 + 6] = np.array([0, 1, 0, 0, 0, 0]) - elif num <= 5: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 1, 0, 0, 0]) - elif num <= 10: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 1, 0, 0]) - elif num <= 40: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 1, 0]) - elif num > 40: - vector[idx * 6: idx * 6 + 6] = np.array([0, 0, 0, 0, 0, 1]) - - return vector - - def state_vectorize(self, state): - """vectorize a state - - Args: - state (dict): - Dialog state - action (tuple): - Dialog act - Returns: - state_vec (np.array): - Dialog state vector - """ - self.state = state['belief_state'] - self.confidence_scores = state['belief_state_probs'] if 'belief_state_probs' in state else None - domain_active_dict = {} - for domain in self.belief_domains: - domain_active_dict[domain] = False - - # when character is sys, to help query database when da is booking-book - # update current domain according to user action - if self.character == 'sys': - action = state['user_action'] - for intent, domain, slot, value in action: - domain_active_dict[domain] = True - if domain in self.db_domains: - self.cur_domain = domain - - action = state['user_action'] if self.character == 'sys' else state['system_action'] - opp_action = delexicalize_da(action, self.requestable) - opp_action = flat_da(opp_action) - - opp_act_vec = np.zeros(self.da_opp_dim) - for da in opp_action: - if da in self.opp2vec: - prob = 1.0 - opp_act_vec[self.opp2vec[da]] = prob - - action = state['system_action'] if self.character == 'sys' else state['user_action'] - action = delexicalize_da(action, self.requestable) - action = flat_da(action) - last_act_vec = np.zeros(self.da_dim) - for da in action: - if da in self.act2vec: - last_act_vec[self.act2vec[da]] = 1. - - belief_state = np.zeros(self.belief_state_dim) - i = 0 - for domain in self.belief_domains: - - for slot, value in state['belief_state'][domain.lower()]['semi'].items(): - if value and value != 'not mentioned': - belief_state[i] = 1. - i += 1 - for slot, value in state['belief_state'][domain.lower()]['book'].items(): - if slot == 'booked': - continue - if value and value != "not mentioned": - belief_state[i] = 1. - - if 'active_domains' in state: - domain_active = state['active_domains'][domain.lower()] - domain_active_dict[domain] = domain_active - if domain in self.db_domains and domain_active: - self.cur_domain = domain - else: - if [slot for slot, value in state['belief_state'][domain.lower()]['semi'].items() if value]: - domain_active_dict[domain] = True - - book = np.zeros(len(self.db_domains)) - for i, domain in enumerate(self.db_domains): - if state['belief_state'][domain.lower()]['book']['booked']: - book[i] = 1. - - degree, number_entities_dict = self.pointer() - - final = 1. if state['terminated'] else 0. - - state_vec = np.r_[opp_act_vec, last_act_vec, - belief_state, book, degree, final] - assert len(state_vec) == self.state_dim - - if self.use_mask is not None: - # None covers the case for policies that don't use masking at all, so do not expect an output "state_vec, mask" - if self.use_mask: - domain_mask = self.compute_domain_mask(domain_active_dict) - entity_mask = self.compute_entity_mask(number_entities_dict) - general_mask = self.compute_general_mask() - mask = domain_mask + entity_mask + general_mask - for i in range(self.da_dim): - mask[i] = -int(bool(mask[i])) * sys.maxsize - else: - mask = np.zeros(self.da_dim) - - return state_vec, mask - else: - return state_vec diff --git a/convlab2/policy/vector/vector_multiwoz_uncertainty.py b/convlab2/policy/vector/vector_multiwoz_uncertainty.py index 1fc4a3d137178f6bbf4633c7c98905855d18e854..b0da48834e1be2b846ac5380d4b4e79547837daf 100644 --- a/convlab2/policy/vector/vector_multiwoz_uncertainty.py +++ b/convlab2/policy/vector/vector_multiwoz_uncertainty.py @@ -6,7 +6,7 @@ import logging from convlab2.util.multiwoz.lexicalize import delexicalize_da, flat_da from convlab2.util.multiwoz.state import default_state from convlab2.util.multiwoz.multiwoz_slot_trans import REF_SYS_DA -from .vector_multiwoz import MultiWozVector as MultiWozVectorBase +from .vector_binary import VectorBinary as VectorBase DEFAULT_INTENT_FILEPATH = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname( @@ -18,7 +18,7 @@ DEFAULT_INTENT_FILEPATH = os.path.join( SLOT_MAP = {'taxi_types': 'car type'} -class MultiWozVector(MultiWozVectorBase): +class MultiWozVector(VectorBase): def __init__(self, voc_file=None, voc_opp_file=None, character='sys', intent_file=DEFAULT_INTENT_FILEPATH, diff --git a/convlab2/util/custom_util.py b/convlab2/util/custom_util.py index 21083d9964fb329eae692a6d8638b59d5a5408ae..7fe9179c5e730278428af21529ad02f85bdafaac 100644 --- a/convlab2/util/custom_util.py +++ b/convlab2/util/custom_util.py @@ -54,6 +54,14 @@ def move_finished_training(dir_in, dir_to): logging.info("Moved results to finished experiments folder.") +def flatten_acts(dialogue_acts): + act_list = [] + for act_type in dialogue_acts: + for act in dialogue_acts[act_type]: + act_list.append([act['intent'], act['domain'], act['slot'], act.get('value', "")]) + return act_list + + def load_config_file(filepath: str = None) -> dict: """ load config setting from json file @@ -91,7 +99,7 @@ def set_seed(seed): def init_logging(root_dir, mode): current_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) - dir_path = os.path.join(root_dir, f'experiment_{current_time}') + dir_path = os.path.join(root_dir, f'experiments/experiment_{current_time}') # init_logging_nunu(dir_path) _, log_save_path = init_logging_nunu(dir_path, mode) save_path = os.path.join(dir_path, 'save') diff --git a/convlab2/util/multiwoz/lexicalize.py b/convlab2/util/multiwoz/lexicalize.py index 3f798e46a8e70a38469047cde07ba4a1525b0074..a365d0b16ec45c18e4254f4c54f27a33961a1a28 100755 --- a/convlab2/util/multiwoz/lexicalize.py +++ b/convlab2/util/multiwoz/lexicalize.py @@ -6,6 +6,8 @@ def delexicalize_da(da, requestable): delexicalized_da = [] counter = {} for intent, domain, slot, value in da: + if slot == "": + slot = 'none' if intent in requestable: v = '?' else: diff --git a/convlab2/util/multiwoz/state.py b/convlab2/util/multiwoz/state.py index e951d96f90cd8b4e290a2a435b8e442d8318aa5b..8f9aad11a074a8461c583810af4a388edda19d11 100755 --- a/convlab2/util/multiwoz/state.py +++ b/convlab2/util/multiwoz/state.py @@ -2,88 +2,8 @@ def default_state(): state = dict(user_action=[], system_action=[], belief_state={}, + booked={}, request_state={}, terminated=False, history=[]) - state['belief_state'] = { - "police": { - "book": { - "booked": [] - }, - "semi": {} - }, - "hotel": { - "book": { - "booked": [], - "people": "", - "day": "", - "stay": "" - }, - "semi": { - "name": "", - "area": "", - "parking": "", - "pricerange": "", - "stars": "", - "internet": "", - "type": "" - } - }, - "attraction": { - "book": { - "booked": [] - }, - "semi": { - "type": "", - "name": "", - "area": "" - } - }, - "restaurant": { - "book": { - "booked": [], - "people": "", - "day": "", - "time": "" - }, - "semi": { - "food": "", - "pricerange": "", - "name": "", - "area": "", - } - }, - "hospital": { - "book": { - "booked": [] - }, - "semi": { - "department": "" - } - }, - "taxi": { - "book": { - "booked": [] - }, - "semi": { - "leaveAt": "", - "destination": "", - "departure": "", - "arriveBy": "" - } - }, - "train": { - "book": { - "booked": [], - "people": "" - }, - "semi": { - "leaveAt": "", - "destination": "", - "day": "", - "arriveBy": "", - "departure": "" - } - } - } return state diff --git a/convlab2/util/unified_datasets_util.py b/convlab2/util/unified_datasets_util.py index e4344bd838785dda7a3736c37d7577a2887fd9d7..91cea687b9dc00cc308f671d1555a5f57861bc6c 100644 --- a/convlab2/util/unified_datasets_util.py +++ b/convlab2/util/unified_datasets_util.py @@ -117,6 +117,8 @@ def load_unified_data( sample['domains'] = dialogue['domains'] if terminated: sample['terminated'] = turn['utt_idx'] == len(dialogue['turns']) - 1 + if speaker == 'system': + sample['booked'] = turn['booked'] data_by_split[data_split].append(sample) if not split_to_turn: dialogue['turns'] = context @@ -150,6 +152,7 @@ def load_policy_data(dataset, data_split='all', speaker='system', context_window kwargs.setdefault('state', True) kwargs.setdefault('db_results', True) kwargs.setdefault('dialogue_acts', True) + kwargs.setdefault('terminated', True) return load_unified_data(dataset, **kwargs) def load_nlg_data(dataset, data_split='all', speaker='system', use_context=False, context_window_size=0, **kwargs): diff --git a/data/multiwoz/sys_da_voc_shrinked.txt b/data/multiwoz/sys_da_voc_shrinked.txt deleted file mode 100644 index 7a5311cf66b7453bfb14a868d8643c0f45feb0ed..0000000000000000000000000000000000000000 --- a/data/multiwoz/sys_da_voc_shrinked.txt +++ /dev/null @@ -1,162 +0,0 @@ -Attraction-Inform-Addr-1 -Attraction-Inform-Area-1 -Attraction-Inform-Choice-1 -Attraction-Inform-Fee-1 -Attraction-Inform-Name-1 -Attraction-Inform-Phone-1 -Attraction-Inform-Post-1 -Attraction-Inform-Type-1 -Attraction-NoOffer-Area-1 -Attraction-NoOffer-Type-1 -Attraction-NoOffer-none-none -Attraction-Recommend-Addr-1 -Attraction-Recommend-Area-1 -Attraction-Recommend-Fee-1 -Attraction-Recommend-Name-1 -Attraction-Recommend-Phone-1 -Attraction-Recommend-Post-1 -Attraction-Recommend-Type-1 -Attraction-Request-Area-? -Attraction-Request-Name-? -Attraction-Request-Price-? -Attraction-Request-Type-? -Attraction-Select-Type-1 -Attraction-Select-none-none -Booking-Book-Day-1 -Booking-Book-Name-1 -Booking-Book-People-1 -Booking-Book-Ref-1 -Booking-Book-Stay-1 -Booking-Book-Time-1 -Booking-Book-none-none -Booking-Inform-Day-1 -Booking-Inform-Name-1 -Booking-Inform-People-1 -Booking-Inform-none-none -Booking-NoBook-Day-1 -Booking-NoBook-Name-1 -Booking-NoBook-Time-1 -Booking-NoBook-none-none -Booking-Request-Day-? -Booking-Request-People-? -Booking-Request-Stay-? -Booking-Request-Time-? -Hospital-Inform-Addr-1 -Hospital-Inform-Department-1 -Hospital-Inform-Phone-1 -Hospital-Inform-Post-1 -Hospital-Request-Department-? -Hotel-Inform-Addr-1 -Hotel-Inform-Area-1 -Hotel-Inform-Choice-1 -Hotel-Inform-Internet-1 -Hotel-Inform-Name-1 -Hotel-Inform-Parking-1 -Hotel-Inform-Phone-1 -Hotel-Inform-Post-1 -Hotel-Inform-Price-1 -Hotel-Inform-Stars-1 -Hotel-Inform-Type-1 -Hotel-NoOffer-Area-1 -Hotel-NoOffer-Price-1 -Hotel-NoOffer-Stars-1 -Hotel-NoOffer-Type-1 -Hotel-NoOffer-none-none -Hotel-Recommend-Addr-1 -Hotel-Recommend-Area-1 -Hotel-Recommend-Internet-1 -Hotel-Recommend-Name-1 -Hotel-Recommend-Parking-1 -Hotel-Recommend-Price-1 -Hotel-Recommend-Stars-1 -Hotel-Recommend-Type-1 -Hotel-Request-Area-? -Hotel-Request-Internet-? -Hotel-Request-Name-? -Hotel-Request-Parking-? -Hotel-Request-Price-? -Hotel-Request-Stars-? -Hotel-Request-Type-? -Hotel-Select-Area-1 -Hotel-Select-Name-1 -Hotel-Select-Price-1 -Hotel-Select-Type-1 -Hotel-Select-none-none -Police-Inform-Addr-1 -Police-Inform-Name-1 -Police-Inform-Phone-1 -Police-Inform-Post-1 -Restaurant-Inform-Addr-1 -Restaurant-Inform-Area-1 -Restaurant-Inform-Choice-1 -Restaurant-Inform-Food-1 -Restaurant-Inform-Name-1 -Restaurant-Inform-Phone-1 -Restaurant-Inform-Post-1 -Restaurant-Inform-Price-1 -Restaurant-Inform-Ref-1 -Restaurant-NoOffer-Area-1 -Restaurant-NoOffer-Food-1 -Restaurant-NoOffer-Price-1 -Restaurant-NoOffer-none-none -Restaurant-Recommend-Addr-1 -Restaurant-Recommend-Area-1 -Restaurant-Recommend-Food-1 -Restaurant-Recommend-Name-1 -Restaurant-Recommend-Phone-1 -Restaurant-Recommend-Post-1 -Restaurant-Recommend-Price-1 -Restaurant-Request-Area-? -Restaurant-Request-Food-? -Restaurant-Request-Name-? -Restaurant-Request-Price-? -Restaurant-Select-Area-1 -Restaurant-Select-Food-1 -Restaurant-Select-Name-1 -Restaurant-Select-Price-1 -Restaurant-Select-none-none -Taxi-Inform-Arrive-1 -Taxi-Inform-Car-1 -Taxi-Inform-Depart-1 -Taxi-Inform-Dest-1 -Taxi-Inform-Leave-1 -Taxi-Inform-Phone-1 -Taxi-Request-Arrive-? -Taxi-Request-Depart-? -Taxi-Request-Dest-? -Taxi-Request-Leave-? -Train-Inform-Arrive-1 -Train-Inform-Choice-1 -Train-Inform-Day-1 -Train-Inform-Depart-1 -Train-Inform-Dest-1 -Train-Inform-Id-1 -Train-Inform-Leave-1 -Train-Inform-Ref-1 -Train-Inform-Ticket-1 -Train-Inform-Time-1 -Train-OfferBook-Arrive-1 -Train-OfferBook-Id-1 -Train-OfferBook-Leave-1 -Train-OfferBook-none-none -Train-OfferBooked-Arrive-1 -Train-OfferBooked-Day-1 -Train-OfferBooked-Depart-1 -Train-OfferBooked-Dest-1 -Train-OfferBooked-Id-1 -Train-OfferBooked-Leave-1 -Train-OfferBooked-People-1 -Train-OfferBooked-Ref-1 -Train-OfferBooked-Ticket-1 -Train-Request-Arrive-? -Train-Request-Day-? -Train-Request-Depart-? -Train-Request-Dest-? -Train-Request-Leave-? -Train-Request-People-? -Train-Select-Leave-1 -Train-Select-none-none -general-bye-none-none -general-greet-none-none -general-reqmore-none-none -general-welcome-none-none diff --git a/data/unified_datasets/multiwoz21/database.py b/data/unified_datasets/multiwoz21/database.py index 43ea5896285ebf9e8e38f99c89823bf6538a2bad..8d81a61d5426eae37409970d154a1ef84dce5e63 100644 --- a/data/unified_datasets/multiwoz21/database.py +++ b/data/unified_datasets/multiwoz21/database.py @@ -12,9 +12,9 @@ class Database(BaseDatabase): def __init__(self): """extract data.zip and load the database.""" archive = ZipFile(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data.zip')) - domains = ['restaurant', 'hotel', 'attraction', 'train', 'hospital', 'police'] + self.domains = ['restaurant', 'hotel', 'attraction', 'train', 'hospital', 'police'] self.dbs = {} - for domain in domains: + for domain in self.domains: with archive.open('data/{}_db.json'.format(domain)) as f: self.dbs[domain] = json.loads(f.read()) # add some missing information diff --git a/data_loaders/utterance_loader.py b/data_loaders/utterance_loader.py index 5a43578436f3fbee459a726a175e8bf568cc1ee3..d3f1f0867f75dff46f79ab1fc3b5c378ebf6f6ec 100644 --- a/data_loaders/utterance_loader.py +++ b/data_loaders/utterance_loader.py @@ -10,7 +10,7 @@ import numpy as np from tensorboardX import SummaryWriter from tqdm import tqdm -from convlab2.policy.vector.vector_multiwoz import MultiWozVector +from convlab2.policy.vector.vector_binary import VectorBinary from convlab2.dst.setsumbt.multiwoz.Tracker import SetSUMBTTracker PAD = '<pad>'