Skip to content
Snippets Groups Projects
Commit a6c20981 authored by Carel van Niekerk's avatar Carel van Niekerk :computer:
Browse files

Setsumbt merge

parent 8bbcad71
No related branches found
No related tags found
No related merge requests found
...@@ -22,6 +22,7 @@ from copy import deepcopy ...@@ -22,6 +22,7 @@ from copy import deepcopy
import torch import torch
import numpy as np import numpy as np
from tqdm import tqdm
def set_seed(args): def set_seed(args):
...@@ -94,8 +95,8 @@ def get_slot_candidate_embeddings(ontology: dict, set_type: str, args, tokenizer ...@@ -94,8 +95,8 @@ def get_slot_candidate_embeddings(ontology: dict, set_type: str, args, tokenizer
embedding_model.eval() embedding_model.eval()
slots = dict() slots = dict()
for domain, subset in ontology.items(): for domain, subset in tqdm(ontology.items(), desc='Domains'):
for slot, slot_info in subset.items(): for slot, slot_info in tqdm(subset.items(), desc='Slots'):
# Get description or use "domain-slot" # Get description or use "domain-slot"
if args.use_descriptions: if args.use_descriptions:
desc = slot_info['description'] desc = slot_info['description']
......
...@@ -258,8 +258,8 @@ class UnifiedFormatDataset(Dataset): ...@@ -258,8 +258,8 @@ class UnifiedFormatDataset(Dataset):
dataset_args = [{"dataset_name": dataset_name}] dataset_args = [{"dataset_name": dataset_name}]
self.dataset_dicts = [load_dataset(**dataset_args_) for dataset_args_ in dataset_args] self.dataset_dicts = [load_dataset(**dataset_args_) for dataset_args_ in dataset_args]
self.ontology = get_ontology_slots(dataset_name) self.ontology = get_ontology_slots(dataset_name)
values = [get_values_from_data(dataset) for dataset in self.dataset_dicts] values = [get_values_from_data(dataset, set_type) for dataset in self.dataset_dicts]
self.ontology = ontology_add_values(self.ontology, combine_value_sets(values)) self.ontology = ontology_add_values(self.ontology, combine_value_sets(values), set_type)
self.ontology = ontology_add_requestable_slots(self.ontology, get_requestable_slots(self.dataset_dicts)) self.ontology = ontology_add_requestable_slots(self.ontology, get_requestable_slots(self.dataset_dicts))
if train_ratio != 1.0: if train_ratio != 1.0:
......
...@@ -52,17 +52,23 @@ def get_ontology_slots(dataset_name: str) -> dict: ...@@ -52,17 +52,23 @@ def get_ontology_slots(dataset_name: str) -> dict:
return ontology_slots return ontology_slots
def get_values_from_data(dataset: dict) -> dict: def get_values_from_data(dataset: dict, data_split: str = "train") -> dict:
""" """
Function to extract slots, slot descriptions and categorical slot values from the dataset ontology. Function to extract slots, slot descriptions and categorical slot values from the dataset ontology.
Args: Args:
dataset (dict): Dataset dictionary obtained using the load_dataset function dataset (dict): Dataset dictionary obtained using the load_dataset function
data_split (str): Dataset split: train/validation/test
Returns: Returns:
value_sets (dict): Dictionary containing possible values obtained from dataset value_sets (dict): Dictionary containing possible values obtained from dataset
""" """
data = load_dst_data(dataset, data_split='all', speaker='user') data = load_dst_data(dataset, data_split='all', speaker='user')
# Remove test data from the data when building training/validation ontology
if data_split in ['train', 'validation']:
data = {key: itm for key, itm in data.items() if key in ['train', 'validation']}
value_sets = {} value_sets = {}
for set_type, dataset in data.items(): for set_type, dataset in data.items():
for turn in dataset: for turn in dataset:
...@@ -141,18 +147,22 @@ def clean_values(value_sets: dict, value_map: dict = VALUE_MAP) -> dict: ...@@ -141,18 +147,22 @@ def clean_values(value_sets: dict, value_map: dict = VALUE_MAP) -> dict:
return clean_vals return clean_vals
def ontology_add_values(ontology_slots: dict, value_sets: dict) -> dict: def ontology_add_values(ontology_slots: dict, value_sets: dict, data_split: str = "train") -> dict:
""" """
Add value sets obtained from the dataset to the ontology Add value sets obtained from the dataset to the ontology
Args: Args:
ontology_slots (dict): Ontology dictionary containing slots, descriptions and categorical slot values ontology_slots (dict): Ontology dictionary containing slots, descriptions and categorical slot values
value_sets (dict): Cleaned Dictionary containing possible values obtained from dataset value_sets (dict): Cleaned Dictionary containing possible values obtained from dataset
data_split (str): Dataset split: train/validation/test
Returns: Returns:
ontology_slots (dict): Ontology dictionary containing slots, slot descriptions and possible value sets ontology_slots (dict): Ontology dictionary containing slots, slot descriptions and possible value sets
""" """
ontology = {} ontology = {}
for domain in sorted(ontology_slots): for domain in sorted(ontology_slots):
if data_split in ['train', 'validation']:
if domain not in value_sets:
continue
ontology[domain] = {} ontology[domain] = {}
for slot in sorted(ontology_slots[domain]): for slot in sorted(ontology_slots[domain]):
if not ontology_slots[domain][slot]['possible_values']: if not ontology_slots[domain][slot]['possible_values']:
...@@ -172,7 +182,7 @@ def get_requestable_slots(datasets: list) -> dict: ...@@ -172,7 +182,7 @@ def get_requestable_slots(datasets: list) -> dict:
""" """
Function to get set of requestable slots from the dataset action labels. Function to get set of requestable slots from the dataset action labels.
Args: Args:
dataset (dict): Dataset dictionary obtained using the load_dataset function datasets (dict): Dataset dictionary obtained using the load_dataset function
Returns: Returns:
slots (dict): Dictionary containing requestable domain-slot pairs slots (dict): Dictionary containing requestable domain-slot pairs
......
...@@ -253,7 +253,7 @@ if __name__ == '__main__': ...@@ -253,7 +253,7 @@ if __name__ == '__main__':
if idx % conf['model']['eval_frequency'] == 0 and idx != 0: if idx % conf['model']['eval_frequency'] == 0 and idx != 0:
time_now = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) time_now = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
logging.info(f"Evaluating at Epoch: {idx} - {time_now}" + '-'*60) logging.info(f"Evaluating after Dialogues: {idx * conf['model']['batchsz']} - {time_now}" + '-' * 60)
eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path) eval_dict = eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path)
......
...@@ -21,6 +21,7 @@ from convlab.evaluator.multiwoz_eval import MultiWozEvaluator ...@@ -21,6 +21,7 @@ from convlab.evaluator.multiwoz_eval import MultiWozEvaluator
from convlab.util import load_dataset from convlab.util import load_dataset
import shutil import shutil
import signal
slot_mapping = {"pricerange": "price range", "post": "postcode", "arriveBy": "arrive by", "leaveAt": "leave at", slot_mapping = {"pricerange": "price range", "post": "postcode", "arriveBy": "arrive by", "leaveAt": "leave at",
...@@ -34,6 +35,22 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") ...@@ -34,6 +35,22 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = DEVICE device = DEVICE
class timeout:
def __init__(self, seconds=10, error_message='Timeout'):
self.seconds = seconds
self.error_message = error_message
def handle_timeout(self, signum, frame):
raise TimeoutError(self.error_message)
def __enter__(self):
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.alarm(self.seconds)
def __exit__(self, type, value, traceback):
signal.alarm(0)
class NumpyEncoder(json.JSONEncoder): class NumpyEncoder(json.JSONEncoder):
""" Special json encoder for numpy types """ """ Special json encoder for numpy types """
...@@ -154,20 +171,20 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do ...@@ -154,20 +171,20 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do
if conf['model']['process_num'] == 1: if conf['model']['process_num'] == 1:
complete_rate, success_rate, success_rate_strict, avg_return, turns, \ complete_rate, success_rate, success_rate_strict, avg_return, turns, \
avg_actions, task_success, book_acts, inform_acts, request_acts, \ avg_actions, task_success, book_acts, inform_acts, request_acts, \
select_acts, offer_acts = evaluate(sess, select_acts, offer_acts, recommend_acts = evaluate(sess,
num_dialogues=conf['model']['num_eval_dialogues'], num_dialogues=conf['model']['num_eval_dialogues'],
sys_semantic_to_usr=conf['model'][ sys_semantic_to_usr=conf['model'][
'sys_semantic_to_usr'], 'sys_semantic_to_usr'],
save_flag=save_eval, save_path=log_save_path, goals=goals) save_flag=save_eval, save_path=log_save_path, goals=goals)
total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts + recommend_acts
else: else:
complete_rate, success_rate, success_rate_strict, avg_return, turns, \ complete_rate, success_rate, success_rate_strict, avg_return, turns, \
avg_actions, task_success, book_acts, inform_acts, request_acts, \ avg_actions, task_success, book_acts, inform_acts, request_acts, \
select_acts, offer_acts = \ select_acts, offer_acts, recommend_acts = \
evaluate_distributed(sess, list(range(1000, 1000 + conf['model']['num_eval_dialogues'])), evaluate_distributed(sess, list(range(1000, 1000 + conf['model']['num_eval_dialogues'])),
conf['model']['process_num'], goals) conf['model']['process_num'], goals)
total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts total_acts = book_acts + inform_acts + request_acts + select_acts + offer_acts + recommend_acts
task_success_gathered = {} task_success_gathered = {}
for task_dict in task_success: for task_dict in task_success:
...@@ -178,22 +195,40 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do ...@@ -178,22 +195,40 @@ def eval_policy(conf, policy_sys, env, sess, save_eval, log_save_path, single_do
task_success = task_success_gathered task_success = task_success_gathered
policy_sys.is_train = True policy_sys.is_train = True
logging.info(f"Complete: {complete_rate}, Success: {success_rate}, Success strict: {success_rate_strict}, "
f"Average Return: {avg_return}, Turns: {turns}, Average Actions: {avg_actions}, " mean_complete, err_complete = np.average(complete_rate), np.std(complete_rate) / np.sqrt(len(complete_rate))
mean_success, err_success = np.average(success_rate), np.std(success_rate) / np.sqrt(len(success_rate))
mean_success_strict, err_success_strict = np.average(success_rate_strict), np.std(success_rate_strict) / np.sqrt(len(success_rate_strict))
mean_return, err_return = np.average(avg_return), np.std(avg_return) / np.sqrt(len(avg_return))
mean_turns, err_turns = np.average(turns), np.std(turns) / np.sqrt(len(turns))
mean_actions, err_actions = np.average(avg_actions), np.std(avg_actions) / np.sqrt(len(avg_actions))
logging.info(f"Complete: {mean_complete}+-{round(err_complete, 2)}, "
f"Success: {mean_success}+-{round(err_success, 2)}, "
f"Success strict: {mean_success_strict}+-{round(err_success_strict, 2)}, "
f"Average Return: {mean_return}+-{round(err_return, 2)}, "
f"Turns: {mean_turns}+-{round(err_turns, 2)}, "
f"Average Actions: {mean_actions}+-{round(err_actions, 2)}, "
f"Book Actions: {book_acts/total_acts}, Inform Actions: {inform_acts/total_acts}, " f"Book Actions: {book_acts/total_acts}, Inform Actions: {inform_acts/total_acts}, "
f"Request Actions: {request_acts/total_acts}, Select Actions: {select_acts/total_acts}, " f"Request Actions: {request_acts/total_acts}, Select Actions: {select_acts/total_acts}, "
f"Offer Actions: {offer_acts/total_acts}") f"Offer Actions: {offer_acts/total_acts}, Recommend Actions: {recommend_acts/total_acts}")
for key in task_success: for key in task_success:
logging.info( logging.info(
f"{key}: Num: {len(task_success[key])} Success: {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}") f"{key}: Num: {len(task_success[key])} Success: {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}")
return {"complete_rate": complete_rate, return {"complete_rate": mean_complete,
"success_rate": success_rate, "success_rate": mean_success,
"success_rate_strict": success_rate_strict, "success_rate_strict": mean_success_strict,
"avg_return": avg_return, "avg_return": mean_return,
"turns": turns, "turns": mean_turns,
"avg_actions": avg_actions} "avg_actions": mean_actions,
"book_acts": book_acts/total_acts,
"inform_acts": inform_acts/total_acts,
"request_acts": request_acts/total_acts,
"select_acts": select_acts/total_acts,
"offer_acts": offer_acts/total_acts,
"recommend_acts": recommend_acts/total_acts}
def env_config(conf, policy_sys, check_book_constraints=True): def env_config(conf, policy_sys, check_book_constraints=True):
...@@ -294,7 +329,7 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False ...@@ -294,7 +329,7 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
task_success = {'All_user_sim': [], 'All_evaluator': [], "All_evaluator_strict": [], task_success = {'All_user_sim': [], 'All_evaluator': [], "All_evaluator_strict": [],
'total_return': [], 'turns': [], 'avg_actions': [], 'total_return': [], 'turns': [], 'avg_actions': [],
'total_booking_acts': [], 'total_inform_acts': [], 'total_request_acts': [], 'total_booking_acts': [], 'total_inform_acts': [], 'total_request_acts': [],
'total_select_acts': [], 'total_offer_acts': []} 'total_select_acts': [], 'total_offer_acts': [], 'total_recommend_acts': []}
dial_count = 0 dial_count = 0
for seed in range(1000, 1000 + num_dialogues): for seed in range(1000, 1000 + num_dialogues):
set_seed(seed) set_seed(seed)
...@@ -310,6 +345,7 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False ...@@ -310,6 +345,7 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
request = 0 request = 0
select = 0 select = 0
offer = 0 offer = 0
recommend = 0
# this 40 represents the max turn of dialogue # this 40 represents the max turn of dialogue
for i in range(40): for i in range(40):
sys_response, user_response, session_over, reward = sess.next_turn( sys_response, user_response, session_over, reward = sess.next_turn(
...@@ -332,6 +368,8 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False ...@@ -332,6 +368,8 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
select += 1 select += 1
if intent.lower() == 'offerbook': if intent.lower() == 'offerbook':
offer += 1 offer += 1
if intent.lower() == 'recommend':
recommend += 1
avg_actions += len(acts) avg_actions += len(acts)
turn_counter += 1 turn_counter += 1
turns += 1 turns += 1
...@@ -368,6 +406,8 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False ...@@ -368,6 +406,8 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
task_success['total_request_acts'].append(request) task_success['total_request_acts'].append(request)
task_success['total_select_acts'].append(select) task_success['total_select_acts'].append(select)
task_success['total_offer_acts'].append(offer) task_success['total_offer_acts'].append(offer)
task_success['total_offer_acts'].append(offer)
task_success['total_recommend_acts'].append(recommend)
# print(agent_sys.agent_saves) # print(agent_sys.agent_saves)
eval_save['Conversation {}'.format(str(dial_count))] = [ eval_save['Conversation {}'.format(str(dial_count))] = [
...@@ -388,7 +428,7 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False ...@@ -388,7 +428,7 @@ def evaluate(sess, num_dialogues=400, sys_semantic_to_usr=False, save_flag=False
np.average(task_success['turns']), np.average(task_success['avg_actions']), task_success, \ np.average(task_success['turns']), np.average(task_success['avg_actions']), task_success, \
np.average(task_success['total_booking_acts']), np.average(task_success['total_inform_acts']), \ np.average(task_success['total_booking_acts']), np.average(task_success['total_inform_acts']), \
np.average(task_success['total_request_acts']), np.average(task_success['total_select_acts']), \ np.average(task_success['total_request_acts']), np.average(task_success['total_select_acts']), \
np.average(task_success['total_offer_acts']) np.average(task_success['total_offer_acts']), np.average(task_success['total_recommend_acts'])
def model_downloader(download_dir, model_path): def model_downloader(download_dir, model_path):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment