diff --git a/convlab2/base_models/bert/create_data.py b/convlab2/base_models/bert/create_data.py new file mode 100644 index 0000000000000000000000000000000000000000..825c736d2259552017166c93db168069fe0a6976 --- /dev/null +++ b/convlab2/base_models/bert/create_data.py @@ -0,0 +1,109 @@ +import os +import json +from tqdm import tqdm +from convlab2.util import load_dataset, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data +from nltk.tokenize import TreebankWordTokenizer, PunktSentenceTokenizer +from collections import Counter + +def create_bio_data(dataset, data_dir): + data_by_split = load_nlu_data(dataset, speaker='all') + os.makedirs(data_dir, exist_ok=True) + + sent_tokenizer = PunktSentenceTokenizer() + word_tokenizer = TreebankWordTokenizer() + + data_splits = data_by_split.keys() + cnt = Counter() + for data_split in data_splits: + data = [] + for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): + utterance = sample['utterance'] + dialogue_acts = [da for da in sample['dialogue_acts']['non-categorical'] if 'start' in da] + cnt[len(dialogue_acts)] += 1 + + sentences = sent_tokenizer.tokenize(utterance) + sent_spans = sent_tokenizer.span_tokenize(utterance) + tokens = [token for sent in sentences for token in word_tokenizer.tokenize(sent)] + token_spans = [(sent_span[0]+token_span[0], sent_span[0]+token_span[1]) for sent, sent_span in zip(sentences, sent_spans) for token_span in word_tokenizer.span_tokenize(sent)] + labels = ['O'] * len(tokens) + for da in dialogue_acts: + char_start = da['start'] + char_end = da['end'] + word_start, word_end = -1, -1 + for i, token_span in enumerate(token_spans): + if char_start == token_span[0]: + word_start = i + if char_end == token_span[1]: + word_end = i + 1 + if word_start == -1 and word_end == -1: + # char span does not match word, skip + continue + labels[word_start] = 'B' + for i in range(word_start+1, word_end): + labels[i] = "I" + data.append(json.dumps({'tokens': tokens, 'labels': labels}, ensure_ascii=False)+'\n') + file_name = os.path.join(data_dir, f"{data_split}.json") + with open(file_name, "w", encoding='utf-8') as f: + f.writelines(data) + print('num of spans in utterances', cnt) + +def create_dialogBIO_data(dataset, data_dir): + data_by_split = load_nlu_data(dataset, split_to_turn=False) + os.makedirs(data_dir, exist_ok=True) + + sent_tokenizer = PunktSentenceTokenizer() + word_tokenizer = TreebankWordTokenizer() + + data_splits = data_by_split.keys() + cnt = Counter() + for data_split in data_splits: + data = [] + for dialog in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): + all_tokens, all_labels = [], [] + for sample in dialog['turns']: + speaker = sample['speaker'] + utterance = sample['utterance'] + dialogue_acts = [da for da in sample['dialogue_acts']['non-categorical'] if 'start' in da] + cnt[len(dialogue_acts)] += 1 + + sentences = sent_tokenizer.tokenize(utterance) + sent_spans = sent_tokenizer.span_tokenize(utterance) + tokens = [token for sent in sentences for token in word_tokenizer.tokenize(sent)] + token_spans = [(sent_span[0]+token_span[0], sent_span[0]+token_span[1]) for sent, sent_span in zip(sentences, sent_spans) for token_span in word_tokenizer.span_tokenize(sent)] + labels = ['O'] * len(tokens) + for da in dialogue_acts: + char_start = da['start'] + char_end = da['end'] + word_start, word_end = -1, -1 + for i, token_span in enumerate(token_spans): + if char_start == token_span[0]: + word_start = i + if char_end == token_span[1]: + word_end = i + 1 + if word_start == -1 and word_end == -1: + # char span does not match word, skip + continue + labels[word_start] = 'B' + for i in range(word_start+1, word_end): + labels[i] = "I" + all_tokens.extend([speaker, ':']+tokens) + all_labels.extend(['O', 'O']+labels) + data.append(json.dumps({'tokens': all_tokens, 'labels': all_labels}, ensure_ascii=False)+'\n') + file_name = os.path.join(data_dir, f"{data_split}.json") + with open(file_name, "w", encoding='utf-8') as f: + f.writelines(data) + print('num of spans in utterances', cnt) + +if __name__ == '__main__': + from argparse import ArgumentParser + parser = ArgumentParser(description="create data for seq2seq training") + parser.add_argument('--tasks', metavar='task_name', nargs='*', choices=['bio', 'dialogBIO'], help='names of tasks') + parser.add_argument('--datasets', metavar='dataset_name', nargs='*', help='names of unified datasets') + parser.add_argument('--save_dir', metavar='save_directory', type=str, default='data', help='directory to save the data, default: data/$task_name/$dataset_name') + args = parser.parse_args() + print(args) + for dataset_name in tqdm(args.datasets, desc='datasets'): + dataset = load_dataset(dataset_name) + for task_name in tqdm(args.tasks, desc='tasks', leave=False): + data_dir = os.path.join(args.save_dir, task_name, dataset_name) + eval(f"create_{task_name}_data")(dataset, data_dir) diff --git a/convlab2/base_models/bert/infer_bio.sh b/convlab2/base_models/bert/infer_bio.sh new file mode 100644 index 0000000000000000000000000000000000000000..ed784c515c6703088313da0809b7c0442bcec333 --- /dev/null +++ b/convlab2/base_models/bert/infer_bio.sh @@ -0,0 +1,38 @@ +set -e +n_gpus=3 +task_name="dialogBIO" +dataset_name="multiwoz21" +data_dir="data/${task_name}/${dataset_name}" +output_dir="output/${task_name}/${dataset_name}" +cache_dir="cache" +logging_dir="${output_dir}/runs" +source_column="tokens" +target_column="labels" +model_name_or_path="output/dialogBIO/sgd" +per_device_eval_batch_size=16 + +python create_data.py --tasks ${task_name} --datasets ${dataset_name} --save_dir "data" + +for split in test validation train +do + python -m torch.distributed.launch \ + --nproc_per_node ${n_gpus} run_token_classification.py \ + --task_name ${task_name} \ + --train_file ${data_dir}/${split}.json \ + --validation_file ${data_dir}/${split}.json \ + --test_file ${data_dir}/${split}.json \ + --source_column ${source_column} \ + --target_column ${target_column} \ + --model_name_or_path ${model_name_or_path} \ + --do_predict \ + --cache_dir ${cache_dir} \ + --output_dir ${output_dir} \ + --logging_dir ${logging_dir} \ + --overwrite_output_dir \ + --preprocessing_num_workers 4 \ + --per_device_eval_batch_size ${per_device_eval_batch_size} \ + --debug underflow_overflow + + mv ${output_dir}/predictions.json ${output_dir}/${split}.json +done + diff --git a/convlab2/base_models/bert/run_token_classification.py b/convlab2/base_models/bert/run_token_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..c97fc60aa49a50d42a8470522d2dfaa09227b2ce --- /dev/null +++ b/convlab2/base_models/bert/run_token_classification.py @@ -0,0 +1,598 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2020 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the library models for token classification. +Modified from https://github.com/huggingface/transformers/blob/master/examples/pytorch/token-classification/run_ner.py +""" +# You can also adapt this script on your own token classification task and datasets. Pointers for this are left as comments. + +import logging +import os +import sys +import json +from dataclasses import dataclass, field +from typing import Optional + +import datasets +import numpy as np +from datasets import ClassLabel, load_dataset, load_metric + +import transformers +from transformers import ( + AutoConfig, + AutoModelForTokenClassification, + AutoTokenizer, + DataCollatorForTokenClassification, + HfArgumentParser, + PreTrainedTokenizerFast, + Trainer, + TrainingArguments, + set_seed, +) +from transformers.trainer_utils import EvalPrediction, get_last_checkpoint +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.12.5") + +require_version("datasets>=1.16.1") + +logger = logging.getLogger(__name__) +os.environ["WANDB_DISABLED"] = "true" + + +@dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + + +@dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + + task_name: Optional[str] = field( + default=None, metadata={"help": "The name of the task (ner, pos...)."} + ) + dataset_name: Optional[str] = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + source_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the source texts."}, + ) + target_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the target labels."}, + ) + train_file: Optional[str] = field( + default=None, metadata={"help": "The input training data file (a jsonlines or csv file)."} + ) + validation_file: Optional[str] = field( + default=None, + metadata={ + "help": "An optional input evaluation data file to evaluate the metrics on (a jsonlines or csv file)." + }, + ) + test_file: Optional[str] = field( + default=None, + metadata={ + "help": "An optional input test data file to evaluate the metrics on (a jsonlines or csv file)." + }, + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_seq_length: int = field( + default=None, + metadata={ + "help": "The maximum total input sequence length after tokenization. If set, sequences longer " + "than this will be truncated, sequences shorter will be padded." + }, + ) + pad_to_max_length: bool = field( + default=False, + metadata={ + "help": "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + }, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_predict_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + }, + ) + label_all_tokens: bool = field( + default=False, + metadata={ + "help": "Whether to put the label for one word on all tokens of generated by that word or just on the " + "one (in which case the other tokens will have a padding index)." + }, + ) + return_entity_level_metrics: bool = field( + default=False, + metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."}, + ) + + def __post_init__(self): + if ( + self.dataset_name is None + and self.train_file is None + and self.validation_file is None + and self.test_file is None + ): + raise ValueError("Need either a dataset name or a training/validation/testing file.") + else: + if self.train_file is not None: + extension = self.train_file.split(".")[-1] + assert extension in ["csv", "json"], "`train_file` should be a csv or a json file." + if self.validation_file is not None: + extension = self.validation_file.split(".")[-1] + assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." + if self.test_file is not None: + extension = self.test_file.split(".")[-1] + assert extension in ["csv", "json"], "`test_file` should be a csv or a json file." + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + logger.info(f"Training/evaluation parameters {training_args}") + + # Detecting last checkpoint. + last_checkpoint = None + if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: + last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Set seed before initializing model. + set_seed(training_args.seed) + + # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) + # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ + # (the dataset will be downloaded automatically from the datasets Hub). + # + # For CSV/JSON files this script will use the first column for the source texts and the second column for the + # target labels (unless you specify column names for this with the `source_column` and `target_column` arguments). + # + # In distributed training, the load_dataset function guarantee that only one local process can concurrently + # download the dataset. + if data_args.dataset_name is not None: + # Downloading and loading a dataset from the hub. + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) + else: + data_files = {} + if data_args.train_file is not None: + data_files["train"] = data_args.train_file + extension = data_args.train_file.split(".")[-1] + if data_args.validation_file is not None: + data_files["validation"] = data_args.validation_file + extension = data_args.validation_file.split(".")[-1] + if data_args.test_file is not None: + data_files["test"] = data_args.test_file + extension = data_args.test_file.split(".")[-1] + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at + # https://huggingface.co/docs/datasets/loading_datasets.html. + + # must provide train dataset for label space + column_names = raw_datasets["train"].column_names + features = raw_datasets["train"].features + + if data_args.source_column is None: + source_column = column_names[0] + else: + source_column = data_args.source_column + if source_column not in column_names: + raise ValueError( + f"--source_column' value '{data_args.source_column}' needs to be one of: {', '.join(column_names)}" + ) + + if data_args.target_column is None: + target_column = column_names[1] + else: + target_column = data_args.target_column + if target_column not in column_names: + raise ValueError( + f"--target_column' value '{data_args.target_column}' needs to be one of: {', '.join(column_names)}" + ) + + # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the + # unique labels. + def get_label_list(labels): + unique_labels = set() + for label in labels: + unique_labels = unique_labels | set(label) + label_list = list(unique_labels) + label_list.sort() + return label_list + + if isinstance(features[target_column].feature, ClassLabel): + label_list = features[target_column].feature.names + # No need to convert the labels since they are already ints. + label_to_id = {i: i for i in range(len(label_list))} + else: + label_list = get_label_list(raw_datasets["train"][target_column]) + label_to_id = {l: i for i, l in enumerate(label_list)} + num_labels = len(label_list) + + # Map that sends B-Xxx label to its I-Xxx counterpart for label_all_tokens + b_to_i_label = [] + for idx, label in enumerate(label_list): + if label.startswith("B-") and label.replace("B-", "I-") in label_list: + b_to_i_label.append(label_list.index(label.replace("B-", "I-"))) + else: + b_to_i_label.append(idx) + + # Load pretrained model and tokenizer + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + # download model & vocab. + config = AutoConfig.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + num_labels=num_labels, + label2id=label_to_id, + id2label={i: l for l, i in label_to_id.items()}, + finetuning_task=data_args.task_name, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path + if config.model_type in {"gpt2", "roberta"}: + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=True, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + add_prefix_space=True, + ) + else: + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=True, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + model = AutoModelForTokenClassification.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # Tokenizer check: this script requires a fast tokenizer. + if not isinstance(tokenizer, PreTrainedTokenizerFast): + raise ValueError( + "This example script only works for models that have a fast tokenizer. Checkout the big table of models " + "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this " + "requirement" + ) + + # Preprocessing the dataset + # Padding strategy + padding = "max_length" if data_args.pad_to_max_length else False + + # Tokenize all texts and align the labels with them. + def tokenize_and_align_labels(examples): + tokenized_inputs = tokenizer( + examples[source_column], + padding=padding, + truncation=True, + max_length=data_args.max_seq_length, + # We use this argument because the texts in our dataset are lists of words (with a label for each word). + is_split_into_words=True, + ) + labels = [] + for i, label in enumerate(examples[target_column]): + word_ids = tokenized_inputs.word_ids(batch_index=i) + previous_word_idx = None + label_ids = [] + for word_idx in word_ids: + # Special tokens have a word id that is None. We set the label to -100 so they are automatically + # ignored in the loss function. + if word_idx is None: + label_ids.append(-100) + # We set the label for the first token of each word. + elif word_idx != previous_word_idx: + label_ids.append(label_to_id[label[word_idx]]) + # For the other tokens in a word, we set the label to either the current label or -100, depending on + # the label_all_tokens flag. + else: + if data_args.label_all_tokens: + label_ids.append(b_to_i_label[label_to_id[label[word_idx]]]) + else: + label_ids.append(-100) + previous_word_idx = word_idx + + labels.append(label_ids) + tokenized_inputs["labels"] = labels + return tokenized_inputs + + if training_args.do_train: + if "train" not in raw_datasets: + raise ValueError("--do_train requires a train dataset") + train_dataset = raw_datasets["train"] + if data_args.max_train_samples is not None: + train_dataset = train_dataset.select(range(data_args.max_train_samples)) + with training_args.main_process_first(desc="train dataset map pre-processing"): + train_dataset = train_dataset.map( + tokenize_and_align_labels, + batched=True, + num_proc=data_args.preprocessing_num_workers, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on train dataset", + ) + + if training_args.do_eval: + if "validation" not in raw_datasets: + raise ValueError("--do_eval requires a validation dataset") + eval_dataset = raw_datasets["validation"] + if data_args.max_eval_samples is not None: + eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) + with training_args.main_process_first(desc="validation dataset map pre-processing"): + eval_dataset = eval_dataset.map( + tokenize_and_align_labels, + batched=True, + num_proc=data_args.preprocessing_num_workers, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on validation dataset", + ) + + if training_args.do_predict: + if "test" not in raw_datasets: + raise ValueError("--do_predict requires a test dataset") + predict_dataset = raw_datasets["test"] + if data_args.max_predict_samples is not None: + predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) + with training_args.main_process_first(desc="prediction dataset map pre-processing"): + predict_dataset = predict_dataset.map( + tokenize_and_align_labels, + batched=True, + num_proc=data_args.preprocessing_num_workers, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on prediction dataset", + ) + + # Data collator + data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None) + + # Metrics + metric = load_metric(path="seqeval_metric.py") + + def compute_metrics(p: EvalPrediction): + predictions, labels = p + predictions = np.argmax(predictions, axis=2) + + # Remove ignored index (special tokens) + true_predictions = [ + [label_list[p] for (p, l) in zip(prediction, label) if l != -100] + for prediction, label in zip(predictions, labels) + ] + true_labels = [ + [label_list[l] for (p, l) in zip(prediction, label) if l != -100] + for prediction, label in zip(predictions, labels) + ] + + results = metric.compute(predictions=true_predictions, references=true_labels) + if data_args.return_entity_level_metrics: + # Unpack nested dictionaries + final_results = {} + for key, value in results.items(): + if isinstance(value, dict): + for n, v in value.items(): + final_results[f"{key}_{n}"] = v + else: + final_results[key] = value + return final_results + else: + return { + "precision": results["overall_precision"], + "recall": results["overall_recall"], + "f1": results["overall_f1"], + "accuracy": results["overall_accuracy"], + } + + # Initialize our Trainer + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset if training_args.do_train else None, + eval_dataset=eval_dataset if training_args.do_eval else None, + tokenizer=tokenizer, + data_collator=data_collator, + compute_metrics=compute_metrics, + ) + + # Training + if training_args.do_train: + checkpoint = None + if training_args.resume_from_checkpoint is not None: + checkpoint = training_args.resume_from_checkpoint + elif last_checkpoint is not None: + checkpoint = last_checkpoint + train_result = trainer.train(resume_from_checkpoint=checkpoint) + trainer.save_model() # Saves the tokenizer too for easy upload + + metrics = train_result.metrics + max_train_samples = ( + data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) + ) + metrics["train_samples"] = min(max_train_samples, len(train_dataset)) + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + # Evaluation + if training_args.do_eval: + logger.info("*** Evaluate ***") + + metrics = trainer.evaluate() + + max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) + metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + + # Predict + if training_args.do_predict: + logger.info("*** Predict ***") + + predictions, labels, metrics = trainer.predict(predict_dataset, metric_key_prefix="predict") + predictions = np.argmax(predictions, axis=2) + + # Remove ignored index (special tokens) + true_predictions = [ + [label_list[p] for (p, l) in zip(prediction, label) if l != -100] + for prediction, label in zip(predictions, labels) + ] + + trainer.log_metrics("predict", metrics) + trainer.save_metrics("predict", metrics) + + # Save predictions + output_predictions_file = os.path.join(training_args.output_dir, "predictions.json") + if trainer.is_world_process_zero(): + with open(output_predictions_file, "w") as writer: + for sample, prediction in zip(raw_datasets["test"], true_predictions): + sample["predictions"] = prediction + writer.write(json.dumps(sample)+'\n') + + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + + if training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) + + +def _mp_fn(index): + # For xla_spawn (TPUs) + main() + + +if __name__ == "__main__": + main() diff --git a/convlab2/base_models/bert/seqeval_metric.py b/convlab2/base_models/bert/seqeval_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..58a6d07a2675f51a3d0ca8e56c015c4a856b3eda --- /dev/null +++ b/convlab2/base_models/bert/seqeval_metric.py @@ -0,0 +1,158 @@ +# coding=utf-8 +# Copyright 2020 The HuggingFace Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" seqeval metric. """ + +import importlib +from typing import List, Optional, Union + +from seqeval.metrics import accuracy_score, classification_report + +import datasets + + +_CITATION = """\ +@inproceedings{ramshaw-marcus-1995-text, + title = "Text Chunking using Transformation-Based Learning", + author = "Ramshaw, Lance and + Marcus, Mitch", + booktitle = "Third Workshop on Very Large Corpora", + year = "1995", + url = "https://www.aclweb.org/anthology/W95-0107", +} +@misc{seqeval, + title={{seqeval}: A Python framework for sequence labeling evaluation}, + url={https://github.com/chakki-works/seqeval}, + note={Software available from https://github.com/chakki-works/seqeval}, + author={Hiroki Nakayama}, + year={2018}, +} +""" + +_DESCRIPTION = """\ +seqeval is a Python framework for sequence labeling evaluation. +seqeval can evaluate the performance of chunking tasks such as named-entity recognition, part-of-speech tagging, semantic role labeling and so on. +This is well-tested by using the Perl script conlleval, which can be used for +measuring the performance of a system that has processed the CoNLL-2000 shared task data. +seqeval supports following formats: +IOB1 +IOB2 +IOE1 +IOE2 +IOBES +See the [README.md] file at https://github.com/chakki-works/seqeval for more information. +""" + +_KWARGS_DESCRIPTION = """ +Produces labelling scores along with its sufficient statistics +from a source against one or more references. +Args: + predictions: List of List of predicted labels (Estimated targets as returned by a tagger) + references: List of List of reference labels (Ground truth (correct) target values) + suffix: True if the IOB prefix is after type, False otherwise. default: False + scheme: Specify target tagging scheme. Should be one of ["IOB1", "IOB2", "IOE1", "IOE2", "IOBES", "BILOU"]. + default: None + mode: Whether to count correct entity labels with incorrect I/B tags as true positives or not. + If you want to only count exact matches, pass mode="strict". default: None. + sample_weight: Array-like of shape (n_samples,), weights for individual samples. default: None + zero_division: Which value to substitute as a metric value when encountering zero division. Should be on of 0, 1, + "warn". "warn" acts as 0, but the warning is raised. +Returns: + 'scores': dict. Summary of the scores for overall and per type + Overall: + 'accuracy': accuracy, + 'precision': precision, + 'recall': recall, + 'f1': F1 score, also known as balanced F-score or F-measure, + Per type: + 'precision': precision, + 'recall': recall, + 'f1': F1 score, also known as balanced F-score or F-measure +Examples: + >>> predictions = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] + >>> references = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']] + >>> seqeval = datasets.load_metric("seqeval") + >>> results = seqeval.compute(predictions=predictions, references=references) + >>> print(list(results.keys())) + ['MISC', 'PER', 'overall_precision', 'overall_recall', 'overall_f1', 'overall_accuracy'] + >>> print(results["overall_f1"]) + 0.5 + >>> print(results["PER"]["f1"]) + 1.0 +""" + + +@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) +class Seqeval(datasets.Metric): + def _info(self): + return datasets.MetricInfo( + description=_DESCRIPTION, + citation=_CITATION, + homepage="https://github.com/chakki-works/seqeval", + inputs_description=_KWARGS_DESCRIPTION, + features=datasets.Features( + { + "predictions": datasets.Sequence(datasets.Value("string", id="label"), id="sequence"), + "references": datasets.Sequence(datasets.Value("string", id="label"), id="sequence"), + } + ), + codebase_urls=["https://github.com/chakki-works/seqeval"], + reference_urls=["https://github.com/chakki-works/seqeval"], + ) + + def _compute( + self, + predictions, + references, + suffix: bool = False, + scheme: Optional[str] = None, + mode: Optional[str] = None, + sample_weight: Optional[List[int]] = None, + zero_division: Union[str, int] = "warn", + ): + if scheme is not None: + try: + scheme_module = importlib.import_module("seqeval.scheme") + scheme = getattr(scheme_module, scheme) + except AttributeError: + raise ValueError(f"Scheme should be one of [IOB1, IOB2, IOE1, IOE2, IOBES, BILOU], got {scheme}") + report = classification_report( + y_true=references, + y_pred=predictions, + suffix=suffix, + output_dict=True, + scheme=scheme, + mode=mode, + sample_weight=sample_weight, + zero_division=zero_division, + ) + report.pop("macro avg") + report.pop("weighted avg") + overall_score = report.pop("micro avg") + + scores = { + type_name: { + "precision": score["precision"], + "recall": score["recall"], + "f1": score["f1-score"], + "number": score["support"], + } + for type_name, score in report.items() + } + scores["overall_precision"] = overall_score["precision"] + scores["overall_recall"] = overall_score["recall"] + scores["overall_f1"] = overall_score["f1-score"] + scores["overall_accuracy"] = accuracy_score(y_true=references, y_pred=predictions) + + return scores \ No newline at end of file diff --git a/convlab2/base_models/bert/train_bio.sh b/convlab2/base_models/bert/train_bio.sh new file mode 100644 index 0000000000000000000000000000000000000000..db2ee860d2464c57dfb20d57a54ea5b34cda85b1 --- /dev/null +++ b/convlab2/base_models/bert/train_bio.sh @@ -0,0 +1,49 @@ +n_gpus=3 +task_name="dialogBIO" +dataset_name="sgd" +data_dir="data/${task_name}/${dataset_name}" +output_dir="output/${task_name}/${dataset_name}" +cache_dir="cache" +logging_dir="${output_dir}/runs" +train_file="${data_dir}/train.json" +validation_file="${data_dir}/validation.json" +test_file="${data_dir}/test.json" +source_column="tokens" +target_column="labels" +model_name_or_path="bert-base-uncased" +per_device_train_batch_size=8 +per_device_eval_batch_size=16 +gradient_accumulation_steps=2 +lr=2e-5 +num_train_epochs=1 +metric_for_best_model="f1" + +python create_data.py --tasks ${task_name} --datasets ${dataset_name} --save_dir "data" + +python -m torch.distributed.launch \ + --nproc_per_node ${n_gpus} run_token_classification.py \ + --task_name ${task_name} \ + --train_file ${train_file} \ + --validation_file ${validation_file} \ + --test_file ${test_file} \ + --source_column ${source_column} \ + --target_column ${target_column} \ + --model_name_or_path ${model_name_or_path} \ + --do_train \ + --do_eval \ + --do_predict \ + --save_strategy epoch \ + --evaluation_strategy epoch \ + --load_best_model_at_end \ + --metric_for_best_model ${metric_for_best_model} \ + --cache_dir ${cache_dir} \ + --output_dir ${output_dir} \ + --logging_dir ${logging_dir} \ + --overwrite_output_dir \ + --preprocessing_num_workers 4 \ + --per_device_train_batch_size ${per_device_train_batch_size} \ + --per_device_eval_batch_size ${per_device_eval_batch_size} \ + --gradient_accumulation_steps ${gradient_accumulation_steps} \ + --learning_rate ${lr} \ + --num_train_epochs ${num_train_epochs} \ + --debug underflow_overflow diff --git a/convlab2/base_models/t5/create_data.py b/convlab2/base_models/t5/create_data.py new file mode 100644 index 0000000000000000000000000000000000000000..6e637826c4bdc2a9fead90b71c4f969ea8a92408 --- /dev/null +++ b/convlab2/base_models/t5/create_data.py @@ -0,0 +1,37 @@ +import os +import json +from tqdm import tqdm +from convlab2.util import load_dataset, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data + +def create_rg_data(dataset, data_dir): + data_by_split = load_rg_data(dataset) + os.makedirs(data_dir, exist_ok=True) + + data_splits = data_by_split.keys() + file_name = os.path.join(data_dir, f"source_prefix.txt") + with open(file_name, "w") as f: + f.write("generate a system response according to the context: ") + for data_split in data_splits: + data = [] + for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): + context = ' '.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['context']]) + response = f"{sample['speaker']}: {sample['utterance']}" + data.append(json.dumps({'context': context, 'response': response}, ensure_ascii=False)+'\n') + + file_name = os.path.join(data_dir, f"{data_split}.json") + with open(file_name, "w", encoding='utf-8') as f: + f.writelines(data) + +if __name__ == '__main__': + from argparse import ArgumentParser + parser = ArgumentParser(description="create data for seq2seq training") + parser.add_argument('--tasks', metavar='task_name', nargs='*', choices=['rg'], help='names of tasks') + parser.add_argument('--datasets', metavar='dataset_name', nargs='*', help='names of unified datasets') + parser.add_argument('--save_dir', metavar='save_directory', type=str, default='data', help='directory to save the data, default: data/$task_name/$dataset_name') + args = parser.parse_args() + print(args) + for dataset_name in tqdm(args.datasets, desc='datasets'): + dataset = load_dataset(dataset_name) + for task_name in tqdm(args.tasks, desc='tasks', leave=False): + data_dir = os.path.join(args.save_dir, task_name, dataset_name) + eval(f"create_{task_name}_data")(dataset, data_dir) diff --git a/convlab2/base_models/t5/run_rg.sh b/convlab2/base_models/t5/run_rg.sh new file mode 100644 index 0000000000000000000000000000000000000000..7c72b9c8eb8c20dd4a03e6d5ddcaf62e029300a4 --- /dev/null +++ b/convlab2/base_models/t5/run_rg.sh @@ -0,0 +1,52 @@ +n_gpus=8 +task_name="rg" +dataset_name="multiwoz21" +data_dir="data/${task_name}/${dataset_name}" +output_dir="output/${task_name}/${dataset_name}" +cache_dir="cache" +logging_dir="${output_dir}/runs" +train_file="${data_dir}/train.json" +validation_file="${data_dir}/validation.json" +test_file="${data_dir}/test.json" +source_prefix="${data_dir}/source_prefix.txt" +source_column="context" +target_column="response" +model_name_or_path="t5-small" +per_device_train_batch_size=32 +per_device_eval_batch_size=128 +gradient_accumulation_steps=1 +lr=1e-3 +num_train_epochs=5 + +python create_data.py --tasks ${task_name} --datasets ${dataset_name} --save_dir "data" + +python -m torch.distributed.launch \ + --nproc_per_node ${n_gpus} run_seq2seq.py \ + --task_name ${task_name} \ + --train_file ${train_file} \ + --validation_file ${validation_file} \ + --test_file ${test_file} \ + --source_column ${source_column} \ + --target_column ${target_column} \ + --source_prefix ${source_prefix} \ + --model_name_or_path ${model_name_or_path} \ + --do_train \ + --do_eval \ + --do_predict \ + --save_strategy epoch \ + --evaluation_strategy epoch \ + --load_best_model_at_end \ + --predict_with_generate \ + --cache_dir ${cache_dir} \ + --output_dir ${output_dir} \ + --logging_dir ${logging_dir} \ + --overwrite_output_dir \ + --preprocessing_num_workers 4 \ + --per_device_train_batch_size ${per_device_train_batch_size} \ + --per_device_eval_batch_size ${per_device_eval_batch_size} \ + --gradient_accumulation_steps ${gradient_accumulation_steps} \ + --learning_rate ${lr} \ + --num_train_epochs ${num_train_epochs} \ + --debug underflow_overflow \ + --adafactor \ + --gradient_checkpointing diff --git a/convlab2/base_models/t5/run_seq2seq.py b/convlab2/base_models/t5/run_seq2seq.py new file mode 100644 index 0000000000000000000000000000000000000000..aaef4470845bb400fed28ecab1ef164ffa37d4b2 --- /dev/null +++ b/convlab2/base_models/t5/run_seq2seq.py @@ -0,0 +1,635 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the library models for sequence to sequence. +Modified from https://github.com/huggingface/transformers/blob/master/examples/pytorch/summarization/run_summarization.py +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import os +import sys +import json +from dataclasses import dataclass, field +from typing import Optional + +import datasets +import numpy as np +from datasets import load_dataset + +import transformers +from transformers import ( + AutoConfig, + AutoModelForSeq2SeqLM, + AutoTokenizer, + DataCollatorForSeq2Seq, + HfArgumentParser, + Seq2SeqTrainer, + Seq2SeqTrainingArguments, + set_seed, +) +from transformers.trainer_utils import EvalPrediction, get_last_checkpoint +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.12.5") + +require_version("datasets>=1.16.1") + +logger = logging.getLogger(__name__) +os.environ["WANDB_DISABLED"] = "true" + + +@dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + resize_position_embeddings: Optional[bool] = field( + default=None, + metadata={ + "help": "Whether to automatically resize the position embeddings if `max_source_length` exceeds " + "the model's position embeddings." + }, + ) + + +@dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + + task_name: Optional[str] = field( + default=None, metadata={"help": "The name of the task, e.g., rg (for rgresponse generation)."} + ) + dataset_name: Optional[str] = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + source_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the source texts."}, + ) + target_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the target texts."}, + ) + train_file: Optional[str] = field( + default=None, metadata={"help": "The input training data file (a jsonlines or csv file)."} + ) + validation_file: Optional[str] = field( + default=None, + metadata={ + "help": "An optional input evaluation data file to evaluate the metrics on (a jsonlines or csv file)." + }, + ) + test_file: Optional[str] = field( + default=None, + metadata={ + "help": "An optional input test data file to evaluate the metrics on (a jsonlines or csv file)." + }, + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_source_length: Optional[int] = field( + default=1024, + metadata={ + "help": "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + }, + ) + max_target_length: Optional[int] = field( + default=128, + metadata={ + "help": "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + }, + ) + val_max_target_length: Optional[int] = field( + default=None, + metadata={ + "help": "The maximum total sequence length for validation target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." + "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " + "during ``evaluate`` and ``predict``." + }, + ) + pad_to_max_length: bool = field( + default=False, + metadata={ + "help": "Whether to pad all samples to model maximum sentence length. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch. More " + "efficient on GPU but very bad for TPU." + }, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_predict_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + }, + ) + num_beams: Optional[int] = field( + default=None, + metadata={ + "help": "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " + "which is used during ``evaluate`` and ``predict``." + }, + ) + ignore_pad_token_for_loss: bool = field( + default=True, + metadata={ + "help": "Whether to ignore the tokens corresponding to padded labels in the loss computation or not." + }, + ) + source_prefix_filepath: Optional[str] = field( + default=None, metadata={"help": "A file whose first line is the prefix to add before every source text (useful for T5 models)."} + ) + + def __post_init__(self): + if ( + self.dataset_name is None + and self.train_file is None + and self.validation_file is None + and self.test_file is None + ): + raise ValueError("Need either a dataset name or a training/validation/testing file.") + else: + if self.train_file is not None: + extension = self.train_file.split(".")[-1] + assert extension in ["csv", "json"], "`train_file` should be a csv or a json file." + if self.validation_file is not None: + extension = self.validation_file.split(".")[-1] + assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." + if self.test_file is not None: + extension = self.test_file.split(".")[-1] + assert extension in ["csv", "json"], "`test_file` should be a csv or a json file." + if self.val_max_target_length is None: + self.val_max_target_length = self.max_target_length + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments)) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + logger.info(f"Training/evaluation parameters {training_args}") + + if data_args.source_prefix_filepath is None and model_args.model_name_or_path in [ + "t5-small", + "t5-base", + "t5-large", + "t5-3b", + "t5-11b", + ]: + logger.warning( + "You're running a t5 model but didn't provide a source prefix, which is the expected, e.g. with " + "`--source_prefix_filepath 'path_to_prefix_file' ` whose first line is the source prefix" + ) + + # Detecting last checkpoint. + last_checkpoint = None + if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: + last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Set seed before initializing model. + set_seed(training_args.seed) + + # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) + # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ + # (the dataset will be downloaded automatically from the datasets Hub). + # + # For CSV/JSON files this script will use the first column for the source texts and the second column for the + # target texts (unless you specify column names for this with the `source_column` and `target_column` arguments). + # + # In distributed training, the load_dataset function guarantee that only one local process can concurrently + # download the dataset. + if data_args.dataset_name is not None: + # Downloading and loading a dataset from the hub. + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) + else: + data_files = {} + if data_args.train_file is not None: + data_files["train"] = data_args.train_file + extension = data_args.train_file.split(".")[-1] + if data_args.validation_file is not None: + data_files["validation"] = data_args.validation_file + extension = data_args.validation_file.split(".")[-1] + if data_args.test_file is not None: + data_files["test"] = data_args.test_file + extension = data_args.test_file.split(".")[-1] + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at + # https://huggingface.co/docs/datasets/loading_datasets.html. + + # Load pretrained model and tokenizer + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + # download model & vocab. + config = AutoConfig.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=model_args.use_fast_tokenizer, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + model = AutoModelForSeq2SeqLM.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + model.resize_token_embeddings(len(tokenizer)) + + if training_args.gradient_checkpointing: + # use_cache=True is incompatible with gradient checkpointing. + config.use_cache = False + + if model.config.decoder_start_token_id is None: + raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined") + + if ( + hasattr(model.config, "max_position_embeddings") + and model.config.max_position_embeddings < data_args.max_source_length + ): + if model_args.resize_position_embeddings is None: + logger.warning( + f"Increasing the model's number of position embedding vectors from {model.config.max_position_embeddings} " + f"to {data_args.max_source_length}." + ) + model.resize_position_embeddings(data_args.max_source_length) + elif model_args.resize_position_embeddings: + model.resize_position_embeddings(data_args.max_source_length) + else: + raise ValueError( + f"`--max_source_length` is set to {data_args.max_source_length}, but the model only has {model.config.max_position_embeddings}" + f" position encodings. Consider either reducing `--max_source_length` to {model.config.max_position_embeddings} or to automatically " + "resize the model's position encodings by passing `--resize_position_embeddings`." + ) + + if data_args.source_prefix_filepath is not None: + prefix = open(data_args.source_prefix_filepath, 'r', encoding='utf-8').readline().strip() + else: + prefix = "" + + + # Preprocessing the datasets. + # We need to tokenize inputs and targets. + if training_args.do_train: + column_names = raw_datasets["train"].column_names + elif training_args.do_eval: + column_names = raw_datasets["validation"].column_names + elif training_args.do_predict: + column_names = raw_datasets["test"].column_names + else: + logger.info("There is nothing to do. Please pass `do_train`, `do_eval` and/or `do_predict`.") + return + + # Get the column names for input/target. + if data_args.source_column is None: + source_column = column_names[0] + else: + source_column = data_args.source_column + if source_column not in column_names: + raise ValueError( + f"--source_column' value '{data_args.source_column}' needs to be one of: {', '.join(column_names)}" + ) + if data_args.target_column is None: + target_column = column_names[1] + else: + target_column = data_args.target_column + if target_column not in column_names: + raise ValueError( + f"--target_column' value '{data_args.target_column}' needs to be one of: {', '.join(column_names)}" + ) + + # Temporarily set max_target_length for training. + max_target_length = data_args.max_target_length + padding = "max_length" if data_args.pad_to_max_length else False + + if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"): + logger.warning( + "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for" + f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory" + ) + + def preprocess_function(examples): + + # remove pairs where at least one record is None + inputs, targets = [], [] + for i in range(len(examples[source_column])): + if examples[source_column][i] is not None and examples[target_column][i] is not None: + inputs.append(examples[source_column][i]) + targets.append(examples[target_column][i]) + + inputs = [prefix + inp for inp in inputs] + model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True) + + # Setup the tokenizer for targets + with tokenizer.as_target_tokenizer(): + labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True) + + # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore + # padding in the loss. + if padding == "max_length" and data_args.ignore_pad_token_for_loss: + labels["input_ids"] = [ + [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"] + ] + + model_inputs["labels"] = labels["input_ids"] + return model_inputs + + if training_args.do_train: + if "train" not in raw_datasets: + raise ValueError("--do_train requires a train dataset") + train_dataset = raw_datasets["train"] + if data_args.max_train_samples is not None: + train_dataset = train_dataset.select(range(data_args.max_train_samples)) + with training_args.main_process_first(desc="train dataset map pre-processing"): + train_dataset = train_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on train dataset", + ) + + if training_args.do_eval: + max_target_length = data_args.val_max_target_length + if "validation" not in raw_datasets: + raise ValueError("--do_eval requires a validation dataset") + eval_dataset = raw_datasets["validation"] + if data_args.max_eval_samples is not None: + eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) + with training_args.main_process_first(desc="validation dataset map pre-processing"): + eval_dataset = eval_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on validation dataset", + ) + + if training_args.do_predict: + max_target_length = data_args.val_max_target_length + if "test" not in raw_datasets: + raise ValueError("--do_predict requires a test dataset") + predict_dataset = raw_datasets["test"] + if data_args.max_predict_samples is not None: + predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) + with training_args.main_process_first(desc="prediction dataset map pre-processing"): + predict_dataset = predict_dataset.map( + preprocess_function, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on prediction dataset", + ) + + # Data collator + label_pad_token_id = -100 if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id + data_collator = DataCollatorForSeq2Seq( + tokenizer, + model=model, + label_pad_token_id=label_pad_token_id, + pad_to_multiple_of=8 if training_args.fp16 else None, + ) + + # TODO: compute custom metric at evaluation. + # Must take a EvalPrediction and return a dictionary string to metric values. + def compute_metrics(p: EvalPrediction): + preds, labels = p.predictions, p.label_ids + if isinstance(preds, tuple): + preds = preds[0] + decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True) + if data_args.ignore_pad_token_for_loss: + # Replace -100 in the labels as we can't decode them. + labels = np.where(labels != -100, labels, tokenizer.pad_token_id) + decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) + + # TODO: compute metric using decoded_preds & decoded_labels + result = {} + + prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds] + result["gen_len"] = np.mean(prediction_lens) + result = {k: round(v, 4) for k, v in result.items()} + return result + + # Initialize our Trainer + trainer = Seq2SeqTrainer( + model=model, + args=training_args, + train_dataset=train_dataset if training_args.do_train else None, + eval_dataset=eval_dataset if training_args.do_eval else None, + tokenizer=tokenizer, + data_collator=data_collator, + compute_metrics=compute_metrics if training_args.predict_with_generate else None, + ) + + # Training + if training_args.do_train: + checkpoint = None + if training_args.resume_from_checkpoint is not None: + checkpoint = training_args.resume_from_checkpoint + elif last_checkpoint is not None: + checkpoint = last_checkpoint + train_result = trainer.train(resume_from_checkpoint=checkpoint) + trainer.save_model() # Saves the tokenizer too for easy upload + + metrics = train_result.metrics + max_train_samples = ( + data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) + ) + metrics["train_samples"] = min(max_train_samples, len(train_dataset)) + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + # Evaluation + results = {} + max_length = ( + training_args.generation_max_length + if training_args.generation_max_length is not None + else data_args.val_max_target_length + ) + num_beams = data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams + if training_args.do_eval: + logger.info("*** Evaluate ***") + metrics = trainer.evaluate(max_length=max_length, num_beams=num_beams, metric_key_prefix="eval") + max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) + metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + + # Predict + if training_args.do_predict: + logger.info("*** Predict ***") + + predict_results = trainer.predict( + predict_dataset, metric_key_prefix="predict", max_length=max_length, num_beams=num_beams + ) + metrics = predict_results.metrics + max_predict_samples = ( + data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset) + ) + metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset)) + + trainer.log_metrics("predict", metrics) + trainer.save_metrics("predict", metrics) + + if trainer.is_world_process_zero(): + if training_args.predict_with_generate: + predictions = tokenizer.batch_decode( + predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True + ) + predictions = [pred.strip() for pred in predictions] + output_prediction_file = os.path.join(training_args.output_dir, "generated_predictions.json") + with open(output_prediction_file, "w", encoding='utf-8') as writer: + for sample, pred in zip(raw_datasets["test"], predictions): + sample["predictions"] = pred + writer.write(json.dumps(sample, ensure_ascii=False)+'\n') + + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + + if training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) + + return results + + +def _mp_fn(index): + # For xla_spawn (TPUs) + main() + + +if __name__ == "__main__": + main() diff --git a/convlab2/util/__init__.py b/convlab2/util/__init__.py index 66c7233217086bea51d8fe8b5952fe70dca0b4bc..8c48bc11d3892be2c8a0511a6b833e9e9c8b24ff 100755 --- a/convlab2/util/__init__.py +++ b/convlab2/util/__init__.py @@ -1,2 +1,2 @@ from convlab2.util.unified_datasets_util import load_dataset, load_ontology, load_database, \ - load_unified_data, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data \ No newline at end of file + load_unified_data, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data \ No newline at end of file diff --git a/convlab2/util/unified_datasets_util.py b/convlab2/util/unified_datasets_util.py index b81fd17792ad212bc3ff8832106e5b3707c7de0c..e4344bd838785dda7a3736c37d7577a2887fd9d7 100644 --- a/convlab2/util/unified_datasets_util.py +++ b/convlab2/util/unified_datasets_util.py @@ -85,12 +85,14 @@ def load_unified_data( context_window_size=0, terminated=False, goal=False, - active_domains=False + active_domains=False, + split_to_turn=True ): data_splits = dataset.keys() if data_split == 'all' else [data_split] assert speaker in ['user', 'system', 'all'] assert not use_context or context_window_size > 0 info_list = list(filter(eval, ['utterance', 'dialogue_acts', 'state', 'db_results'])) + info_list += ['utt_idx'] data_by_split = {} for data_split in data_splits: data_by_split[data_split] = [] @@ -102,11 +104,11 @@ def load_unified_data( if ele in turn: sample[ele] = turn[ele] - if use_context: + if use_context or not split_to_turn: sample_copy = deepcopy(sample) context.append(sample_copy) - if speaker == turn['speaker'] or speaker == 'all': + if split_to_turn and speaker in [turn['speaker'], 'all']: if use_context: sample['context'] = context[-context_window_size-1:-1] if goal: @@ -116,6 +118,9 @@ def load_unified_data( if terminated: sample['terminated'] = turn['utt_idx'] == len(dialogue['turns']) - 1 data_by_split[data_split].append(sample) + if not split_to_turn: + dialogue['turns'] = context + data_by_split[data_split].append(dialogue) return data_by_split def load_nlu_data(dataset, data_split='all', speaker='user', use_context=False, context_window_size=0, **kwargs): @@ -167,6 +172,13 @@ def load_e2e_data(dataset, data_split='all', speaker='system', context_window_si kwargs.setdefault('dialogue_acts', True) return load_unified_data(dataset, **kwargs) +def load_rg_data(dataset, data_split='all', speaker='system', context_window_size=100, **kwargs): + kwargs.setdefault('data_split', data_split) + kwargs.setdefault('speaker', speaker) + kwargs.setdefault('use_context', True) + kwargs.setdefault('context_window_size', context_window_size) + kwargs.setdefault('utterance', True) + return load_unified_data(dataset, **kwargs) if __name__ == "__main__": dataset = load_dataset('multiwoz21') diff --git a/data/unified_datasets/multiwoz21/README.md b/data/unified_datasets/multiwoz21/README.md index 7bf47f2c79ba03faa21fe9d03b4bc8a6e0c89ed8..d8fbcf9b5b5fed37728cbe63f97c0c28b222fc1d 100644 --- a/data/unified_datasets/multiwoz21/README.md +++ b/data/unified_datasets/multiwoz21/README.md @@ -18,6 +18,7 @@ MultiWOZ 2.1 fixed the noise in state annotations and dialogue utterances. It al - Normalize slot name and value. See `normalize_domain_slot_value` function in `preprocess.py`. - Correct some non-categorical slots' values and provide character level span annotation. - Concatenate multiple values in user goal & state using `|`. + - Add `booked` information in system turns from original belief states. - **Annotations:** - user goal, dialogue acts, state. diff --git a/data/unified_datasets/multiwoz21/data.zip b/data/unified_datasets/multiwoz21/data.zip index 9bb5c40b6c58e821c306f7096a07d594f2e59f47..6f8fc9f7d43f8a10ae37d8de54b34bd95ca4554b 100644 Binary files a/data/unified_datasets/multiwoz21/data.zip and b/data/unified_datasets/multiwoz21/data.zip differ diff --git a/data/unified_datasets/multiwoz21/dummy_data.json b/data/unified_datasets/multiwoz21/dummy_data.json index 9945b92915b0f9739d8b31b3fd112f8ab86f2cb3..9009018f77d201baf284e4f80459a4f99814fa7a 100644 --- a/data/unified_datasets/multiwoz21/dummy_data.json +++ b/data/unified_datasets/multiwoz21/dummy_data.json @@ -111,7 +111,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -210,7 +217,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -317,7 +331,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -405,7 +426,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [ + { + "name": "the cambridge belfry", + "reference": "7GAWK763" + } + ], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -482,7 +515,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [ + { + "name": "the cambridge belfry", + "reference": "7GAWK763" + } + ], + "attraction": [], + "train": [] + } } ] }, @@ -602,7 +647,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -689,7 +741,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -759,7 +818,14 @@ "non-categorical": [], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -836,7 +902,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -912,7 +985,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } } ] }, @@ -1053,7 +1133,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -1132,7 +1219,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -1234,7 +1328,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -1315,7 +1416,19 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR4977", + "reference": "A9NHSO9Y" + } + ] + } }, { "speaker": "user", @@ -1400,7 +1513,19 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR4977", + "reference": "A9NHSO9Y" + } + ] + } }, { "speaker": "user", @@ -1470,7 +1595,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR4977", + "reference": "A9NHSO9Y" + } + ] + } }, { "speaker": "user", @@ -1566,7 +1703,24 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [ + { + "name": "the cambridge belfry", + "reference": "5NAWGJDC" + } + ], + "attraction": [], + "train": [ + { + "trainID": "TR4977", + "reference": "A9NHSO9Y" + } + ] + } }, { "speaker": "user", @@ -1643,7 +1797,24 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [ + { + "name": "the cambridge belfry", + "reference": "5NAWGJDC" + } + ], + "attraction": [], + "train": [ + { + "trainID": "TR4977", + "reference": "A9NHSO9Y" + } + ] + } } ] }, @@ -1740,7 +1911,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -1847,7 +2025,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -1924,7 +2109,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -1994,7 +2186,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } } ] }, @@ -2124,7 +2323,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2222,7 +2428,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2326,7 +2539,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2419,7 +2639,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2536,7 +2763,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2633,7 +2867,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2729,7 +2970,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -2833,7 +3081,19 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR5626", + "reference": "7ZCVR4Q3" + } + ] + } }, { "speaker": "user", @@ -2910,7 +3170,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR5626", + "reference": "7ZCVR4Q3" + } + ] + } } ] }, @@ -3039,7 +3311,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3135,7 +3414,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3229,7 +3515,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3377,7 +3670,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3469,7 +3769,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3577,7 +3884,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3647,7 +3961,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3724,7 +4045,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3811,7 +4139,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3905,7 +4240,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [ + { + "name": "acorn guest house", + "reference": "04CSEO7Q" + } + ], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -3982,7 +4329,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [ + { + "name": "acorn guest house", + "reference": "04CSEO7Q" + } + ], + "attraction": [], + "train": [] + } } ] }, @@ -4112,7 +4471,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4206,7 +4572,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4292,7 +4665,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4393,7 +4773,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4531,7 +4918,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4637,7 +5031,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4720,7 +5121,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } } ] }, @@ -4821,7 +5229,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4900,7 +5315,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -4977,7 +5399,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } } ] }, @@ -5127,7 +5556,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -5231,7 +5667,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -5339,7 +5782,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -5474,7 +5924,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR1879", + "reference": "XU1QLHVW" + } + ] + } }, { "speaker": "user", @@ -5609,7 +6071,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR1879", + "reference": "XU1QLHVW" + } + ] + } }, { "speaker": "user", @@ -5692,7 +6166,19 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [ + { + "trainID": "TR1879", + "reference": "XU1QLHVW" + } + ] + } } ] }, @@ -5800,7 +6286,14 @@ ], "binary": [] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } }, { "speaker": "user", @@ -5889,7 +6382,14 @@ } ] }, - "db_results": {} + "db_results": {}, + "booked": { + "taxi": [], + "restaurant": [], + "hotel": [], + "attraction": [], + "train": [] + } } ] } diff --git a/data/unified_datasets/multiwoz21/preprocess.py b/data/unified_datasets/multiwoz21/preprocess.py index 19a1634a434bd7118a035d4474114d704ce1acd0..140320813bc7a74a183e36115a251fedf32ecd56 100644 --- a/data/unified_datasets/multiwoz21/preprocess.py +++ b/data/unified_datasets/multiwoz21/preprocess.py @@ -860,17 +860,23 @@ def preprocess(): # add empty db_results turn_state = turn['metadata'] cur_state = copy.deepcopy(init_ontology['state']) + booked = {} for domain in turn_state: if domain not in cur_state: continue for subdomain in ['semi', 'book']: for slot, value in turn_state[domain][subdomain].items(): - if slot in ['booked', 'ticket']: + if slot == 'ticket': + continue + elif slot == 'booked': + assert domain in init_ontology['domains'] + booked[domain] = value continue _, slot, value = normalize_domain_slot_value(domain, slot, value) cur_state[domain][slot] = value dialogue['turns'][-2]['state'] = cur_state dialogue['turns'][-1]['db_results'] = {} + dialogue['turns'][-1]['booked'] = booked dialogues_by_split[split].append(dialogue) # pprint(cnt_domain_slot.most_common()) dialogues = [] diff --git a/data/unified_datasets/sgd/preprocess.py b/data/unified_datasets/sgd/preprocess.py index 8b7f26a86d26d062353289f16154e39d01200e90..7122e4e5736150d97a0cd5fa3d3669a800dacc68 100644 --- a/data/unified_datasets/sgd/preprocess.py +++ b/data/unified_datasets/sgd/preprocess.py @@ -135,7 +135,6 @@ def preprocess(): 'binary_dialogue_acts': set(), 'state': {}} splits = ['train', 'validation', 'test'] - dialogues = [] dataset_name = 'sgd' for data_split in splits: data_dir = os.path.join(original_data_dir, data_split if data_split != 'validation' else 'dev') diff --git a/data/unified_datasets/tm1/README.md b/data/unified_datasets/tm1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..35e7024f74d505f55dc5cee6af0ecc46fa5dcf53 --- /dev/null +++ b/data/unified_datasets/tm1/README.md @@ -0,0 +1,61 @@ +# Dataset Card for Taskmaster-1 + +- **Repository:** https://github.com/google-research-datasets/Taskmaster/tree/master/TM-1-2019 +- **Paper:** https://arxiv.org/pdf/1909.05358.pdf +- **Leaderboard:** None +- **Who transforms the dataset:** Qi Zhu(zhuq96 at gmail dot com) + +### Dataset Summary + +The original dataset consists of 13,215 task-based dialogs, including 5,507 spoken and 7,708 written dialogs created with two distinct procedures. Each conversation falls into one of six domains: ordering pizza, creating auto repair appointments, setting up ride service, ordering movie tickets, ordering coffee drinks and making restaurant reservations. + +- **How to get the transformed data from original data:** + - Download [master.zip](https://github.com/google-research-datasets/Taskmaster/archive/refs/heads/master.zip). + - Run `python preprocess.py` in the current directory. +- **Main changes of the transformation:** + - Remove dialogs that are empty or only contain one speaker. + - Split woz-dialogs into train/validation/test randomly (8:1:1). The split of self-dialogs is followed the original dataset. + - Merge continuous turns by the same speaker (ignore repeated turns). + - Annotate `dialogue acts` according to the original segment annotations. Add `intent` annotation (inform/accept/reject). The type of `dialogue act` is set to `non-categorical` if the original segment annotation includes a specified `slot`. Otherwise, the type is set to `binary` (and the `slot` and `value` are empty) since it means general reference to a transaction, e.g. "OK your pizza has been ordered". If there are multiple spans overlapping, we only keep the shortest one, since we found that this simple strategy can reduce the noise in annotation. + - Add `domain`, `intent`, and `slot` descriptions. + - Add `state` by accumulate `non-categorical dialogue acts` in the order that they appear, except those whose intents are **reject**. + - Keep the first annotation since each conversation was annotated by two workers. +- **Annotations:** + - dialogue acts, state. + +### Supported Tasks and Leaderboards + +NLU, DST, Policy, NLG + +### Languages + +English + +### Data Splits + +| split | dialogues | utterances | avg_utt | avg_tokens | avg_domains | cat slot match(state) | cat slot match(goal) | cat slot match(dialogue act) | non-cat slot span(dialogue act) | +|------------|-------------|--------------|-----------|--------------|---------------|-------------------------|------------------------|--------------------------------|-----------------------------------| +| train | 10535 | 223322 | 21.2 | 8.75 | 1 | - | - | - | 100 | +| validation | 1318 | 27903 | 21.17 | 8.75 | 1 | - | - | - | 100 | +| test | 1322 | 27660 | 20.92 | 8.87 | 1 | - | - | - | 100 | +| all | 13175 | 278885 | 21.17 | 8.76 | 1 | - | - | - | 100 | + +6 domains: ['uber_lyft', 'movie_ticket', 'restaurant_reservation', 'coffee_ordering', 'pizza_ordering', 'auto_repair'] +- **cat slot match**: how many values of categorical slots are in the possible values of ontology in percentage. +- **non-cat slot span**: how many values of non-categorical slots have span annotation in percentage. + +### Citation + +``` +@inproceedings{byrne-etal-2019-taskmaster, + title = {Taskmaster-1:Toward a Realistic and Diverse Dialog Dataset}, + author = {Bill Byrne and Karthik Krishnamoorthi and Chinnadhurai Sankar and Arvind Neelakantan and Daniel Duckworth and Semih Yavuz and Ben Goodrich and Amit Dubey and Kyu-Young Kim and Andy Cedilnik}, + booktitle = {2019 Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing}, + address = {Hong Kong}, + year = {2019} +} +``` + +### Licensing Information + +[**CC BY 4.0**](https://creativecommons.org/licenses/by/4.0/) \ No newline at end of file diff --git a/data/unified_datasets/tm1/data.zip b/data/unified_datasets/tm1/data.zip new file mode 100644 index 0000000000000000000000000000000000000000..ba8262228f13571f91889e01f977ce10bf830f7e Binary files /dev/null and b/data/unified_datasets/tm1/data.zip differ diff --git a/data/unified_datasets/tm1/dummy_data.json b/data/unified_datasets/tm1/dummy_data.json new file mode 100644 index 0000000000000000000000000000000000000000..982375d753b698cd6b427cf6c9e3e8bbfb2ab926 --- /dev/null +++ b/data/unified_datasets/tm1/dummy_data.json @@ -0,0 +1,5204 @@ +[ + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-0", + "original_id": "dlg-3369f6e3-6c81-4902-8259-138ffd830952", + "domains": [ + "movie_ticket" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hi. A friend and I would like to see the movie Us tonight. Can you help us get the tickets?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.movie", + "value": "Us", + "start": 47, + "end": 49 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "", + "num.tickets": "", + "time.start": "", + "location.theater": "", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, I heard that's good. Of course I can help. Where would you like to see the movie?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'm in Studio City in LA, so somewhere close to there would be good.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Studio City in LA", + "start": 7, + "end": 24 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "", + "num.tickets": "", + "time.start": "", + "location.theater": "Studio City in LA", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK, there are a number of theaters nearby. Did you have a time in mind?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "How about 7 o'clock this evening?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "time.start", + "value": "7 o'clock", + "start": 10, + "end": 19 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "", + "num.tickets": "", + "time.start": "7 o'clock", + "location.theater": "Studio City in LA", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "It looks like the movie is showing on both standard and IMAX theaters. Do you have a preference?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "standard", + "start": 43, + "end": 51 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "IMAX", + "start": 56, + "end": 60 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "IMAX would be cool.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "IMAX", + "start": 0, + "end": 4 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "", + "num.tickets": "", + "time.start": "7 o'clock", + "location.theater": "Studio City in LA", + "price.ticket": "", + "type.screening": "IMAX", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And you said you wanted two tickets, right?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "num.tickets", + "value": "two", + "start": 31, + "end": 34 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yep.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "", + "num.tickets": "two", + "time.start": "7 o'clock", + "location.theater": "Studio City in LA", + "price.ticket": "", + "type.screening": "IMAX", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "The AMC Cinema at the Universal CityWalk is playing Us in IMAX, but the next showing time is at 8 PM. Is that ok?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "AMC Cinema", + "start": 4, + "end": 14 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Universal CityWalk", + "start": 22, + "end": 40 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.movie", + "value": "Us", + "start": 52, + "end": 54 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "IMAX", + "start": 58, + "end": 62 + }, + { + "intent": "reject", + "domain": "movie_ticket", + "slot": "num.tickets", + "value": "8 PM", + "start": 96, + "end": 100 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Hm, I'd rather go earlier. What about the ArcLight theater in Sherman Oaks?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "ArcLight", + "start": 42, + "end": 50 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Sherman Oaks", + "start": 62, + "end": 74 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "ArcLight", + "num.tickets": "two", + "time.start": "7 o'clock", + "location.theater": "Sherman Oaks", + "price.ticket": "", + "type.screening": "IMAX", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "The ArcLight is showing the movie at 7:15 PM, but it's on a standard screen, not IMAX. Is that ok?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "ArcLight", + "start": 4, + "end": 12 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "time.start", + "value": "7:15 PM", + "start": 37, + "end": 44 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "standard", + "start": 60, + "end": 68 + }, + { + "intent": "reject", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "IMAX", + "start": 81, + "end": 85 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yeah, that will be fine, I guess. How much is it for two tickets?", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "num.tickets", + "value": "two", + "start": 53, + "end": 56 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "ArcLight", + "num.tickets": "two", + "time.start": "7:15 PM", + "location.theater": "Sherman Oaks", + "price.ticket": "", + "type.screening": "standard", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Tickets are $17 each, so it would be about $34 plus tax.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "price.ticket", + "value": "$17", + "start": 12, + "end": 15 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "price.ticket", + "value": "$34", + "start": 43, + "end": 46 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Hm, that's a little pricey. Any other theaters?", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "ArcLight", + "num.tickets": "two", + "time.start": "7:15 PM", + "location.theater": "Sherman Oaks", + "price.ticket": "$34", + "type.screening": "standard", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "The AMC 6 in Burbank is also showing the movie at that time, and tickets are $15 each.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "AMC 6", + "start": 4, + "end": 9 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Burbank", + "start": 13, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "price.ticket", + "value": "$15", + "start": 77, + "end": 80 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I guess the price isn't much different. Let's go with the ArcLight at 7:15 pm.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "ArcLight", + "start": 58, + "end": 66 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "time.start", + "value": "7:15 pm", + "start": 70, + "end": 77 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "ArcLight", + "num.tickets": "two", + "time.start": "7:15 pm", + "location.theater": "Burbank", + "price.ticket": "$15", + "type.screening": "standard", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK, to confirm: two tickets to see Us at the ArcLight Theater in Sherman Oaks at 7:15 pm. The price will be $34 plus tax.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "num.tickets", + "value": "two", + "start": 16, + "end": 19 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "name.movie", + "value": "Us", + "start": 35, + "end": 37 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "ArcLight Theater", + "start": 45, + "end": 61 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Sherman Oaks", + "start": 65, + "end": 77 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "time.start", + "value": "7:15 pm", + "start": 81, + "end": 88 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "price.ticket", + "value": "$34", + "start": 108, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That all sounds right.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "ArcLight Theater", + "num.tickets": "two", + "time.start": "7:15 pm", + "location.theater": "Sherman Oaks", + "price.ticket": "$34", + "type.screening": "standard", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great! I'll text the tickets to your phone so you'll be ready to go this evening.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you very much.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Us", + "name.theater": "ArcLight Theater", + "num.tickets": "two", + "time.start": "7:15 pm", + "location.theater": "Sherman Oaks", + "price.ticket": "$34", + "type.screening": "standard", + "time.end": "", + "time.duration": "" + } + } + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-1", + "original_id": "dlg-336c8165-068e-4b4b-803d-18ef0676f668", + "domains": [ + "restaurant_reservation" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hi, I'm looking for a place that sells spicy wet hotdogs, can you think of any?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "You might enjoy Spicy Wet Hotdogs LLC.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Spicy Wet Hotdogs LLC", + "start": 16, + "end": 37 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That sounds really good, can you make me a reservation?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Certainly, when would you like a reservation?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I have a party of 50 who want a really sloppy dog on Saturday at noon.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "50", + "start": 18, + "end": 20 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "50", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Saturday at noon is no good, I'm sorry.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "Saturday at noon", + "start": 0, + "end": 16 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Oh okay, is there another restauraunt nearby?", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "50", + "time.reservation": "Saturday at noon", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Yeah, you might enjoy Bob's Blender Burgers - they blend up hamburgers and sell them as milkshakes!", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Bob's Blender Burgers", + "start": 22, + "end": 43 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Wow that sounds delicious! ", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Bob's Blender Burgers", + "name.reservation": "", + "num.guests": "50", + "time.reservation": "Saturday at noon", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "I know, right?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'll make a reservation for 50 at the blended burger joint on Saturday.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "50", + "start": 28, + "end": 30 + }, + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "blended burger", + "start": 38, + "end": 52 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "blended burger", + "name.reservation": "", + "num.guests": "50", + "time.reservation": "Saturday at noon", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great, your reservation is made.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you so much!", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "blended burger", + "name.reservation": "", + "num.guests": "50", + "time.reservation": "Saturday at noon", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem, anything else I can do for you?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yeah - can you actually make sure that our table is by the play-place?", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "type.seating", + "value": "our table is by the play-place", + "start": 39, + "end": 69 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "blended burger", + "name.reservation": "", + "num.guests": "50", + "time.reservation": "Saturday at noon", + "type.seating": "our table is by the play-place", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Yeah, will there be children joining you?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes! Fourty of them.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "Fourty of them", + "start": 5, + "end": 19 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "blended burger", + "name.reservation": "", + "num.guests": "Fourty of them", + "time.reservation": "Saturday at noon", + "type.seating": "our table is by the play-place", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Alright, we've got you the table with the play-place.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "type.seating", + "value": "table with the play-place", + "start": 27, + "end": 52 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you so much, have a good one!", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "blended burger", + "name.reservation": "", + "num.guests": "Fourty of them", + "time.reservation": "Saturday at noon", + "type.seating": "table with the play-place", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "You too!", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-2", + "original_id": "dlg-3370fcc4-8914-434d-994d-9e741c0707b2", + "domains": [ + "auto_repair" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hello Pam, I need you to make an appointment for me to get some repairs done on my vehicle.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "", + "name.vehicle": "", + "year.vehicle": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay. Where would you like me to schedule the appointment?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "There's a shop called Intelligent Auto Solutions", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "auto_repair", + "slot": "name.store", + "value": "Intelligent Auto Solutions", + "start": 22, + "end": 48 + } + ] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "", + "name.vehicle": "", + "year.vehicle": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Where's that located?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "It' located in O'Fallon, IL. on Hwy 50.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "auto_repair", + "slot": "location.store", + "value": "O'Fallon, IL. on Hwy 50", + "start": 15, + "end": 38 + } + ] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "", + "name.vehicle": "", + "year.vehicle": "", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, so give me some information about what's going on with your vehicle?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Well, it's my 2015 Toyota Corolla that's giving me problems.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "auto_repair", + "slot": "year.vehicle", + "value": "2015", + "start": 14, + "end": 18 + }, + { + "intent": "inform", + "domain": "auto_repair", + "slot": "name.vehicle", + "value": "Toyota Corolla", + "start": 19, + "end": 33 + } + ] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "What type of problems?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Well, I would like a tune up and have them check my brakes. I believe I hear some grinding noise when I press the brakes.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "auto_repair", + "slot": "reason.appt", + "value": "tune up and have them check my brakes", + "start": 21, + "end": 58 + }, + { + "intent": "inform", + "domain": "auto_repair", + "slot": "reason.appt", + "value": "grinding noise when I press the brakes", + "start": 82, + "end": 120 + } + ] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I will call them and let them know.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thanks.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Could you tell me when would you like to take it in.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I really would like the fist available appointment. ", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I will call and let you know the outcome", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "Intelligent Auto Solutions", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I think that will be asking me your personal infomation.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, my name is Jim Wright, address is 222 W. Main Belleville", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "auto_repair", + "slot": "name.store", + "value": "Jim Wright", + "start": 17, + "end": 27 + } + ] + }, + "state": { + "auto_repair": { + "name.store": "Jim Wright", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, what about your telephone number.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Give them my cell phone number, which is 222-333-4576", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "Jim Wright", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I think I have all of the information needed.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thanks Pam.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "Jim Wright", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "grinding noise when I press the brakes", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "You're welcome. I am calling them now to make an appointment for your 2015 Toyota Corolla to have your brakes checked and to obtain a tune up.", + "utt_idx": 21, + "dialogue_acts": { + "binary": [ + { + "intent": "accept", + "domain": "auto_repair", + "slot": "", + "value": "" + } + ], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "auto_repair", + "slot": "year.vehicle", + "value": "2015", + "start": 70, + "end": 74 + }, + { + "intent": "accept", + "domain": "auto_repair", + "slot": "name.vehicle", + "value": "Toyota Corolla", + "start": 75, + "end": 89 + }, + { + "intent": "accept", + "domain": "auto_repair", + "slot": "reason.appt", + "value": "brakes checked and to obtain a tune up", + "start": 103, + "end": 141 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes and make sure they know that it's urgent.", + "utt_idx": 22, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "auto_repair": { + "name.store": "Jim Wright", + "name.customer": "", + "date.appt": "", + "time.appt": "", + "reason.appt": "brakes checked and to obtain a tune up", + "name.vehicle": "Toyota Corolla", + "year.vehicle": "2015", + "location.store": "O'Fallon, IL. on Hwy 50" + } + } + }, + { + "speaker": "system", + "utterance": "Will do. Have a great day.", + "utt_idx": 23, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-3", + "original_id": "dlg-33769877-7168-4b1d-b056-9f2df7b7ede3", + "domains": [ + "coffee_ordering" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hi there, I'd like you to order me a coffee from starbucks please.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "coffee_ordering": { + "location.store": "", + "name.drink": "", + "size.drink": "", + "num.drink": "", + "type.milk": "", + "preference": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok which store would you like me to order from?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "The one on Ashland ave..", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "coffee_ordering", + "slot": "location.store", + "value": "Ashland ave.", + "start": 11, + "end": 23 + } + ] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "", + "size.drink": "", + "num.drink": "", + "type.milk": "", + "preference": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok and what kind of coffee drink do you want?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'll take the Caffè Latte please.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "coffee_ordering", + "slot": "name.drink", + "value": "Caffè Latte", + "start": 14, + "end": 25 + } + ] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "", + "num.drink": "", + "type.milk": "", + "preference": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok and what size?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "The largest one, venti think its called.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "coffee_ordering", + "slot": "size.drink", + "value": "largest one", + "start": 4, + "end": 15 + }, + { + "intent": "accept", + "domain": "coffee_ordering", + "slot": "size.drink", + "value": "venti", + "start": 17, + "end": 22 + } + ] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "", + "preference": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great what milk do you want with it?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I would like whole milk.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "coffee_ordering", + "slot": "type.milk", + "value": "whole milk.", + "start": 13, + "end": 24 + } + ] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "whole milk.", + "preference": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok well it looks like the whole milk actually is not available.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "coffee_ordering", + "slot": "type.milk", + "value": "whole milk", + "start": 26, + "end": 36 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Well that sucks what is?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "whole milk.", + "preference": "" + } + } + }, + { + "speaker": "system", + "utterance": "Almond, cocunut, soy and 2 percent milk", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "coffee_ordering", + "slot": "type.milk", + "value": "Almond", + "start": 0, + "end": 6 + }, + { + "intent": "inform", + "domain": "coffee_ordering", + "slot": "type.milk", + "value": "cocunut", + "start": 8, + "end": 15 + }, + { + "intent": "inform", + "domain": "coffee_ordering", + "slot": "type.milk", + "value": "soy", + "start": 17, + "end": 20 + }, + { + "intent": "inform", + "domain": "coffee_ordering", + "slot": "preference", + "value": "2 percent milk", + "start": 25, + "end": 39 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I guess I'll take the almond milk then.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "coffee_ordering", + "slot": "type.milk", + "value": "almond milk", + "start": 22, + "end": 33 + } + ] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "almond milk", + "preference": "2 percent milk" + } + } + }, + { + "speaker": "system", + "utterance": "Great well I'll order it right now if that's all.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yep thats it thanks.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "almond milk", + "preference": "2 percent milk" + } + } + }, + { + "speaker": "system", + "utterance": "Ok its ordered, and will be ready at the starbucks shortly.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [ + { + "intent": "accept", + "domain": "coffee_ordering", + "slot": "", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yeah just got it thanks again.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "almond milk", + "preference": "2 percent milk" + } + } + }, + { + "speaker": "system", + "utterance": "Oh and the receipt will be sent to you're phone via text message.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Ah ok well thanks, bye.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "coffee_ordering": { + "location.store": "Ashland ave.", + "name.drink": "Caffè Latte", + "size.drink": "venti", + "num.drink": "", + "type.milk": "almond milk", + "preference": "2 percent milk" + } + } + }, + { + "speaker": "system", + "utterance": "No problem , bye.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-4", + "original_id": "dlg-33796d43-da7a-41df-98e1-6d47c5f8f20e", + "domains": [ + "pizza_ordering" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hey, can I get a pizza from Domino's?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.store", + "value": "Domino's", + "start": 28, + "end": 36 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "", + "size.pizza": "", + "type.topping": "", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure, what kind would you like?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "What are the specials they have right now?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "", + "size.pizza": "", + "type.topping": "", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "There are family and party combos currently on offer", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No, I just want a large pizza", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "size.pizza", + "value": "large", + "start": 18, + "end": 23 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "They have any large specialty pizza for 10.99", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "size.pizza", + "value": "large", + "start": 14, + "end": 19 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "specialty pizza", + "start": 20, + "end": 35 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "What are their specialty pizzas?", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "specialty pizza", + "size.pizza": "large", + "type.topping": "", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Well, there is the Extravagazza, Meatzza, Philly Cheesesteak, Hawaiian, Buffalo Chicken Ranch, and more. Would you like to hear more?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Extravagazza", + "start": 19, + "end": 31 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Meatzza", + "start": 33, + "end": 40 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Philly Cheesesteak", + "start": 42, + "end": 60 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Hawaiian", + "start": 62, + "end": 70 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Buffalo Chicken Ranch", + "start": 72, + "end": 93 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "What is on the Philly Cheesesteak?", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Philly Cheesesteak", + "start": 15, + "end": 33 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "Philly Cheesesteak", + "size.pizza": "large", + "type.topping": "", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Domino's describes it as thinly sliced beef, melted cheese whiz, onions, green pepper, and mozzarella.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "name.store", + "value": "Domino's", + "start": 0, + "end": 8 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "thinly sliced beef,", + "start": 25, + "end": 44 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "preference", + "value": "melted cheese whiz", + "start": 45, + "end": 63 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "onions", + "start": 65, + "end": 71 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "green pepper", + "start": 73, + "end": 85 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "mozzarella", + "start": 91, + "end": 101 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That sounds great, I will get that.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "Philly Cheesesteak", + "size.pizza": "large", + "type.topping": "mozzarella", + "type.crust": "", + "preference": "melted cheese whiz", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Would you like me to place the order now or later?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "A little later, at 7oclock tonight?", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "Philly Cheesesteak", + "size.pizza": "large", + "type.topping": "mozzarella", + "type.crust": "", + "preference": "melted cheese whiz", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, will this be for pick up or delivery?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'll pick it up.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "Domino's", + "name.pizza": "Philly Cheesesteak", + "size.pizza": "large", + "type.topping": "mozzarella", + "type.crust": "", + "preference": "melted cheese whiz", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Are you okay with using the credit card linked to you domino's rewards?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "name.store", + "value": "domino's", + "start": 54, + "end": 62 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes that will be fine?", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "domino's", + "name.pizza": "Philly Cheesesteak", + "size.pizza": "large", + "type.topping": "mozzarella", + "type.crust": "", + "preference": "melted cheese whiz", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Anything you want beyond the pizza?", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Nope, just that", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "domino's", + "name.pizza": "Philly Cheesesteak", + "size.pizza": "large", + "type.topping": "mozzarella", + "type.crust": "", + "preference": "melted cheese whiz", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great, I will order a Philly Cheesteak pizza large at 7ocklock tonight for pickup", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "name.pizza", + "value": "Philly Cheesteak pizza", + "start": 22, + "end": 44 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "size.pizza", + "value": "large", + "start": 45, + "end": 50 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thanks", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "domino's", + "name.pizza": "Philly Cheesteak pizza", + "size.pizza": "large", + "type.topping": "mozzarella", + "type.crust": "", + "preference": "melted cheese whiz", + "location.store": "" + } + } + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-5", + "original_id": "dlg-3388f38a-7ebd-4d73-9700-a34cea212f5a", + "domains": [ + "pizza_ordering" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I would like a large pizza", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "size.pizza", + "value": "large", + "start": 15, + "end": 20 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "ok, one large pizza, which toppings would you like on your pizza?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "large", + "start": 8, + "end": 13 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Sausage and onion please", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "Sausage", + "start": 0, + "end": 7 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "onion", + "start": 12, + "end": 17 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "onion", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Im sorry but we are currently out of onions, would you like to choose another topping, such as black olives or mushrooms?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "onions", + "start": 37, + "end": 43 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "black olives", + "start": 95, + "end": 107 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "mushrooms", + "start": 111, + "end": 120 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Mushrooms please", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "Mushrooms", + "start": 0, + "end": 9 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "Mushrooms", + "type.crust": "", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "one large pizza with sausage and mushrooms, would you like that with hand tossed crust or deep dish?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "size.pizza", + "value": "large", + "start": 4, + "end": 9 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "sausage", + "start": 21, + "end": 28 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "mushrooms", + "start": 33, + "end": 42 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.crust", + "value": "hand tossed crust", + "start": 69, + "end": 86 + }, + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.crust", + "value": "deep dish", + "start": 90, + "end": 99 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Deep dish", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "type.crust", + "value": "Deep dish", + "start": 0, + "end": 9 + } + ] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "Deep dish", + "preference": "", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "ok, and would you like to add extra cheese?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "pizza_ordering", + "slot": "preference", + "value": "extra cheese", + "start": 30, + "end": 42 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "yes please", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "Deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "would you like to add a side of hot wings to compliment the order?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No thank you", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "Deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "will that complete your order?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "Deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "so one large deep dish pizza with extra cheese, sausage and mushrooms, is that correct?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "size.pizza", + "value": "large", + "start": 7, + "end": 12 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.crust", + "value": "deep dish", + "start": 13, + "end": 22 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "preference", + "value": "extra cheese", + "start": 34, + "end": 46 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "sausage", + "start": 48, + "end": 55 + }, + { + "intent": "accept", + "domain": "pizza_ordering", + "slot": "type.topping", + "value": "mushrooms", + "start": 60, + "end": 69 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is your account information accurate?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "Thank you, your pizza order is complete and the pizzas will be ready for pickup in about 25 minutes.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Perfect, can I get a receipt?", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "your receipt will be sent to your mobile device via text message.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you, goodbye", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "pizza_ordering": { + "name.store": "", + "name.pizza": "", + "size.pizza": "large", + "type.topping": "mushrooms", + "type.crust": "deep dish", + "preference": "extra cheese", + "location.store": "" + } + } + }, + { + "speaker": "system", + "utterance": "your welcome, have a wonderful night", + "utt_idx": 21, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-6", + "original_id": "dlg-338edd6c-5fbe-4498-bce1-b7360bac2160", + "domains": [ + "restaurant_reservation" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Google, I want to eat out tonight.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, what are you in the mood for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Ethiopian food.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, how much would you prefer to spend per person?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Um, maybe fifteen dollars?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "There are 2 highly rated Ethiopian restaurants within ten miles in that price range. Would you like to hear them?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Sure.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Jackie's Bistro on 59th St. in Williamstown is open until 10pm and has an average rating of three and a half stars on Yelp . Would you like to book a table?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Jackie's Bistro", + "start": 0, + "end": 15 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "59th St. in Williamstown", + "start": 19, + "end": 43 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jackie's Bistro", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "59th St. in Williamstown" + } + } + }, + { + "speaker": "system", + "utterance": "Would you like to hear another?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jackie's Bistro", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "59th St. in Williamstown" + } + } + }, + { + "speaker": "system", + "utterance": "Tom's Cuisine on Main St in Williamstown is open until 9pm and has an average rating of 3.2 stars on Yelp. Would you like to book a table?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Tom's Cuisine", + "start": 0, + "end": 13 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "Main St in Williamstown", + "start": 17, + "end": 40 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Tom's Cuisine", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "Main St in Williamstown" + } + } + }, + { + "speaker": "system", + "utterance": "Would you like to continue searching?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Tom's Cuisine", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "Main St in Williamstown" + } + } + }, + { + "speaker": "system", + "utterance": "There is one other Ethiopian restaurant in that price range within 25 miles. Would you like to hear it?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "within 25 miles", + "start": 60, + "end": 75 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Tom's Cuisine", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "Jake's Place is open until 10pm today and has an average rating of 4 stars on Yelp. Would you like to book a table?", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Jake's Place", + "start": 0, + "end": 12 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jake's Place", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "What time?", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "8pm tonight.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "8pm", + "start": 0, + "end": 3 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jake's Place", + "name.reservation": "", + "num.guests": "", + "time.reservation": "8pm", + "type.seating": "", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "For how many people?", + "utt_idx": 21, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Ten.", + "utt_idx": 22, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "Ten", + "start": 0, + "end": 3 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jake's Place", + "name.reservation": "", + "num.guests": "Ten", + "time.reservation": "8pm", + "type.seating": "", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "I'm sorry, Jake's Place does not have a table for ten at 8pm tonight. Would you like to hear available times for a party of ten?", + "utt_idx": 23, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Jake's Place", + "start": 11, + "end": 23 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "ten", + "start": 50, + "end": 53 + }, + { + "intent": "reject", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "8pm", + "start": 57, + "end": 60 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 24, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jake's Place", + "name.reservation": "", + "num.guests": "ten", + "time.reservation": "8pm", + "type.seating": "", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "Jake's Place has tables for ten available at the following times tonight: 5, 5:30, 6, 6:30, and 7 pm. What time would you like?", + "utt_idx": 25, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Jake's Place", + "start": 0, + "end": 12 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "ten", + "start": 28, + "end": 31 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "5", + "start": 74, + "end": 75 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "5:30", + "start": 77, + "end": 81 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "6", + "start": 83, + "end": 84 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "6:30", + "start": 86, + "end": 90 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "7 pm", + "start": 96, + "end": 100 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "7:30 pm, please.", + "utt_idx": 26, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "7:30 pm", + "start": 0, + "end": 7 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jake's Place", + "name.reservation": "", + "num.guests": "ten", + "time.reservation": "7:30 pm", + "type.seating": "", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "Okay. I've booked a 7:30 pm reservation for a party of ten at Jake's Place tonight. Would you like to book another table?", + "utt_idx": 27, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "7:30 pm", + "start": 20, + "end": 27 + }, + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "ten", + "start": 55, + "end": 58 + }, + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Jake's Place", + "start": 62, + "end": 74 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "type.seating", + "value": "table", + "start": 115, + "end": 120 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No.", + "utt_idx": 28, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Jake's Place", + "name.reservation": "", + "num.guests": "ten", + "time.reservation": "7:30 pm", + "type.seating": "table", + "location.restaurant": "within 25 miles" + } + } + }, + { + "speaker": "system", + "utterance": "Okay.", + "utt_idx": 29, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-7", + "original_id": "dlg-3392e3ff-40b6-4004-a2ec-63ec0d557dfc", + "domains": [ + "uber_lyft" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I need a lyft ride.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "", + "location.to": "", + "type.ride": "", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Where do you want to go to and from?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I need to go from south boston to the north end.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "location.from", + "value": "south boston", + "start": 18, + "end": 30 + }, + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "location.to", + "value": "north end", + "start": 38, + "end": 47 + } + ] + }, + "state": { + "uber_lyft": { + "location.from": "south boston", + "location.to": "north end", + "type.ride": "", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "where exactly in the south end are you at.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "location.from", + "value": "south end", + "start": 21, + "end": 30 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "boston chops.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "uber_lyft", + "slot": "location.from", + "value": "boston chops.", + "start": 0, + "end": 13 + } + ] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end", + "type.ride": "", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "where in the north end do you want to go to?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "location.to", + "value": "north end", + "start": 13, + "end": 22 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "north end garage.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "uber_lyft", + "slot": "location.to", + "value": "north end garage", + "start": 0, + "end": 16 + } + ] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Do you want a personal ride or shared?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "personal", + "start": 14, + "end": 22 + }, + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "shared", + "start": 31, + "end": 37 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "personal please.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "personal", + "start": 0, + "end": 8 + } + ] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "personal", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "What type of features? XL, high-end, black?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "XL", + "start": 23, + "end": 25 + }, + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "high-end", + "start": 27, + "end": 35 + }, + { + "intent": "inform", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "black", + "start": 37, + "end": 42 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Just a regular lyft personal ride.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "uber_lyft", + "slot": "type.ride", + "value": "regular lyft personal ride", + "start": 7, + "end": 33 + } + ] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Got it. Let me input that data for you.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "How much will it cost.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "It is showing $8-10.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "uber_lyft", + "slot": "price.estimate", + "value": "$8-10", + "start": 14, + "end": 19 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "How long will it take.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "$8-10", + "duration.estimate": "", + "time.pickup": "", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "It will take 10 minutes to get to you.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "uber_lyft", + "slot": "time.pickup", + "value": "10 minutes", + "start": 13, + "end": 23 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Seems awfully long.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "$8-10", + "duration.estimate": "", + "time.pickup": "10 minutes", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Yah. Looks like things are slow tonight. Do you want me to book it. ", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, book it.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "$8-10", + "duration.estimate": "", + "time.pickup": "10 minutes", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Alright it is booked. It will show up on your phone. Ride is on its way. You can status it on your phone.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Do i need to tip them.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "$8-10", + "duration.estimate": "", + "time.pickup": "10 minutes", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Well that is up to you but it is already paid for.", + "utt_idx": 21, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thanks.", + "utt_idx": 22, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "uber_lyft": { + "location.from": "boston chops.", + "location.to": "north end garage", + "type.ride": "regular lyft personal ride", + "num.people": "", + "price.estimate": "$8-10", + "duration.estimate": "", + "time.pickup": "10 minutes", + "time.dropoff": "" + } + } + }, + { + "speaker": "system", + "utterance": "Your welcome.", + "utt_idx": 23, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-8", + "original_id": "dlg-3393df32-6c63-4569-b0f6-3e2f8e19852e", + "domains": [ + "restaurant_reservation" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I want to book a restaurant for dinner.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure, what kind of restaurant are you thinking of.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Maybe Thai or Indonesian food. ", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, any place in mind?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'm thinking about going to Thai Market or Awang Kitchen in New York City.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Thai Market", + "start": 28, + "end": 39 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Awang Kitchen", + "start": 43, + "end": 56 + }, + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "New York City", + "start": 60, + "end": 73 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Awang Kitchen", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "New York City" + } + } + }, + { + "speaker": "system", + "utterance": "Which one do you prefer?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Well, I was wondering, which place has better reviews and more space. I also want the restaurant that's closest to my office which is on 40th Street and 5th Avenue in New York.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "closest to my office", + "start": 104, + "end": 124 + }, + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "40th Street and 5th Avenue in New York", + "start": 137, + "end": 175 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Awang Kitchen", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "40th Street and 5th Avenue in New York" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, Thai Market is closer to your office. It is also larger than Awang Kitchen and it has a 4.6 rating on Google.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Thai Market", + "start": 6, + "end": 17 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "location.restaurant", + "value": "closer to your office", + "start": 21, + "end": 42 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Awang Kitchen", + "start": 67, + "end": 80 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Does it have spicy food?", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Awang Kitchen", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "closer to your office" + } + } + }, + { + "speaker": "system", + "utterance": "Yes, it has many spicy options on the menu, such as the Drunken Noodles. ", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, great, I think I will go to Thai Market. Can you make a reservation?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "name.restaurant", + "value": "Thai Market", + "start": 34, + "end": 45 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Thai Market", + "name.reservation": "", + "num.guests": "", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "closer to your office" + } + } + }, + { + "speaker": "system", + "utterance": "For what time and date?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "For 7 pm on March 1, 2019, for 2 people.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "7 pm on March 1, 2019", + "start": 4, + "end": 25 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "2 people", + "start": 31, + "end": 39 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Thai Market", + "name.reservation": "", + "num.guests": "2 people", + "time.reservation": "", + "type.seating": "", + "location.restaurant": "closer to your office" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I just checked for you, unfortunately they are booked for this time.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, then for 8 pm on March 1, 2019 for 2 people.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "time.reservation", + "value": "8 pm on March 1, 2019", + "start": 15, + "end": 36 + }, + { + "intent": "inform", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "2 people", + "start": 41, + "end": 49 + } + ] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Thai Market", + "name.reservation": "", + "num.guests": "2 people", + "time.reservation": "8 pm on March 1, 2019", + "type.seating": "", + "location.restaurant": "closer to your office" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, there is an opening. I made a reservation for a table for 2 on that date. Since the weather is chilly, I assume you want to eat indoors.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "num.guests", + "value": "2", + "start": 64, + "end": 65 + }, + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "type.seating", + "value": "indoors", + "start": 134, + "end": 141 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, that is correct, thank you.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Thai Market", + "name.reservation": "", + "num.guests": "2", + "time.reservation": "8 pm on March 1, 2019", + "type.seating": "indoors", + "location.restaurant": "closer to your office" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, the reservation is confirmed. ", + "utt_idx": 17, + "dialogue_acts": { + "binary": [ + { + "intent": "accept", + "domain": "restaurant_reservation", + "slot": "", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, great, thanks so much, have a nice day.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "restaurant_reservation": { + "name.restaurant": "Thai Market", + "name.reservation": "", + "num.guests": "2", + "time.reservation": "8 pm on March 1, 2019", + "type.seating": "indoors", + "location.restaurant": "closer to your office" + } + } + }, + { + "speaker": "system", + "utterance": "You too, have a nice day. ", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm1", + "data_split": "train", + "dialogue_id": "tm1-train-9", + "original_id": "dlg-339dfcb2-714f-4b53-95ff-8aa1bf43d12a", + "domains": [ + "movie_ticket" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I'd like to get tickets for Alita: Battle Angel", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.movie", + "value": "Alita: Battle Angel", + "start": 28, + "end": 47 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "", + "num.tickets": "", + "time.start": "", + "location.theater": "", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "What is your location?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'm in Portland, Oregon", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Portland, Oregon", + "start": 7, + "end": 23 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "", + "num.tickets": "", + "time.start": "", + "location.theater": "Portland, Oregon", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "How many tickets would you like to buy?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Two tickets should work", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "num.tickets", + "value": "Two", + "start": 0, + "end": 3 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "", + "num.tickets": "Two", + "time.start": "", + "location.theater": "Portland, Oregon", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "What time would you like to see the movie?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'd like to see it around 6:30 pm", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "time.start", + "value": "6:30 pm", + "start": 26, + "end": 33 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "", + "num.tickets": "Two", + "time.start": "6:30 pm", + "location.theater": "Portland, Oregon", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Let me check that for you", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Ok", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "", + "num.tickets": "Two", + "time.start": "6:30 pm", + "location.theater": "Portland, Oregon", + "price.ticket": "", + "type.screening": "", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "It seems like there aren't any showtimes then, but there's one at 8:00 pm at Regal Lloyd Center 10 & Imax", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "movie_ticket", + "slot": "time.start", + "value": "It seems like there aren't any showtimes then", + "start": 0, + "end": 45 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "time.start", + "value": "8:00 pm", + "start": 66, + "end": 73 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "Regal Lloyd Center 10 & Imax", + "start": 77, + "end": 105 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Is 3D available at that theater?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "3D", + "start": 3, + "end": 5 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "Regal Lloyd Center 10 & Imax", + "num.tickets": "Two", + "time.start": "8:00 pm", + "location.theater": "Portland, Oregon", + "price.ticket": "", + "type.screening": "3D", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "No, it isn't, but there's a 3D showing of Alita: Battle Angel at Regal Cinemas Fox Tower 10", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "reject", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "No, it isn't", + "start": 0, + "end": 12 + }, + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "type.screening", + "value": "3D", + "start": 28, + "end": 30 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "name.movie", + "value": "Alita: Battle Angel", + "start": 42, + "end": 61 + }, + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "name.theater", + "value": "Regal Cinemas Fox Tower 10", + "start": 65, + "end": 91 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Let's see it there", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "location.theater", + "value": "Let's see it there", + "start": 0, + "end": 18 + } + ] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "Regal Cinemas Fox Tower 10", + "num.tickets": "Two", + "time.start": "8:00 pm", + "location.theater": "Let's see it there", + "price.ticket": "", + "type.screening": "3D", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "So you would like to order tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [ + { + "intent": "inform", + "domain": "movie_ticket", + "slot": "", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "Regal Cinemas Fox Tower 10", + "num.tickets": "Two", + "time.start": "8:00 pm", + "location.theater": "Let's see it there", + "price.ticket": "", + "type.screening": "3D", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok, placing your order", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thanks", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "Regal Cinemas Fox Tower 10", + "num.tickets": "Two", + "time.start": "8:00 pm", + "location.theater": "Let's see it there", + "price.ticket": "", + "type.screening": "3D", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Your ticket order has been placed and your receipt has been sent to your mobile device", + "utt_idx": 17, + "dialogue_acts": { + "binary": [ + { + "intent": "accept", + "domain": "movie_ticket", + "slot": "", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thanks", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie_ticket": { + "name.movie": "Alita: Battle Angel", + "name.theater": "Regal Cinemas Fox Tower 10", + "num.tickets": "Two", + "time.start": "8:00 pm", + "location.theater": "Let's see it there", + "price.ticket": "", + "type.screening": "3D", + "time.end": "", + "time.duration": "" + } + } + }, + { + "speaker": "system", + "utterance": "Have a good day!", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + } +] \ No newline at end of file diff --git a/data/unified_datasets/tm1/preprocess.py b/data/unified_datasets/tm1/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..26eca3dd911268dba358456801f987adaec0bcea --- /dev/null +++ b/data/unified_datasets/tm1/preprocess.py @@ -0,0 +1,315 @@ +from zipfile import ZipFile, ZIP_DEFLATED +import json +import os +import copy +import zipfile +from tqdm import tqdm +import re +from collections import Counter +from shutil import rmtree +from convlab2.util.file_util import read_zipped_json, write_zipped_json +from pprint import pprint +import random + + +descriptions = { + "uber_lyft": { + "uber_lyft": "order a car for a ride inside a city", + "location.from": "pickup location", + "location.to": "destination of the ride", + "type.ride": "type of ride", + "num.people": "number of people", + "price.estimate": "estimated cost of the ride", + "duration.estimate": "estimated duration of the ride", + "time.pickup": "time of pickup", + "time.dropoff": "time of dropoff", + }, + "movie_ticket": { + "movie_ticket": "book movie tickets for a film", + "name.movie": "name of the movie", + "name.theater": "name of the theater", + "num.tickets": "number of tickets", + "time.start": "start time of the movie", + "location.theater": "location of the theater", + "price.ticket": "price of the ticket", + "type.screening": "type of the screening", + "time.end": "end time of the movie", + "time.duration": "duration of the movie", + }, + "restaurant_reservation": { + "restaurant_reservation": "searching for a restaurant and make reservation", + "name.restaurant": "name of the restaurant", + "name.reservation": "name of the person who make the reservation", + "num.guests": "number of guests", + "time.reservation": "time of the reservation", + "type.seating": "type of the seating", + "location.restaurant": "location of the restaurant", + }, + "coffee_ordering": { + "coffee_ordering": "order a coffee drink from either Starbucks or Peets for pick up", + "location.store": "location of the coffee store", + "name.drink": "name of the drink", + "size.drink": "size of the drink", + "num.drink": "number of drinks", + "type.milk": "type of the milk", + "preference": "user preference of the drink", + }, + "pizza_ordering": { + "pizza_ordering": "order a pizza", + "name.store": "name of the pizza store", + "name.pizza": "name of the pizza", + "size.pizza": "size of the pizza", + "type.topping": "type of the topping", + "type.crust": "type of the crust", + "preference": "user preference of the pizza", + "location.store": "location of the pizza store", + }, + "auto_repair": { + "auto_repair": "set up an auto repair appointment with a repair shop", + "name.store": "name of the repair store", + "name.customer": "name of the customer", + "date.appt": "date of the appointment", + "time.appt": "time of the appointment", + "reason.appt": "reason of the appointment", + "name.vehicle": "name of the vehicle", + "year.vehicle": "year of the vehicle", + "location.store": "location of the repair store", + } +} + +def normalize_domain_name(domain): + if domain == 'auto': + return 'auto_repair' + elif domain == 'pizza': + return 'pizza_ordering' + elif domain == 'coffee': + return 'coffee_ordering' + elif domain == 'uber': + return 'uber_lyft' + elif domain == 'restaurant': + return 'restaurant_reservation' + elif domain == 'movie': + return 'movie_ticket' + assert 0 + + +def format_turns(ori_turns): + # delete invalid turns and merge continuous turns + new_turns = [] + previous_speaker = None + utt_idx = 0 + for i, turn in enumerate(ori_turns): + speaker = 'system' if turn['speaker'] == 'ASSISTANT' else 'user' + turn['speaker'] = speaker + if turn['text'] == '(deleted)': + continue + if not previous_speaker: + # first turn + assert speaker != previous_speaker + if speaker != previous_speaker: + # switch speaker + previous_speaker = speaker + new_turns.append(copy.deepcopy(turn)) + utt_idx += 1 + else: + # continuous speaking of the same speaker + last_turn = new_turns[-1] + # skip repeated turn + if turn['text'] in ori_turns[i-1]['text']: + continue + # merge continuous turns + index_shift = len(last_turn['text']) + 1 + last_turn['text'] += ' '+turn['text'] + if 'segments' in turn: + last_turn.setdefault('segments', []) + for segment in turn['segments']: + segment['start_index'] += index_shift + segment['end_index'] += index_shift + last_turn['segments'] += turn['segments'] + return new_turns + + +def preprocess(): + original_data_dir = 'Taskmaster-master' + new_data_dir = 'data' + + if not os.path.exists(original_data_dir): + original_data_zip = 'master.zip' + if not os.path.exists(original_data_zip): + raise FileNotFoundError(f'cannot find original data {original_data_zip} in tm1/, should manually download master.zip from https://github.com/google-research-datasets/Taskmaster/archive/refs/heads/master.zip') + else: + archive = ZipFile(original_data_zip) + archive.extractall() + + os.makedirs(new_data_dir, exist_ok=True) + + ontology = {'domains': {}, + 'intents': { + 'inform': {'description': 'inform the value of a slot or general information.'}, + 'accept': {'description': 'accept the value of a slot or a transaction'}, + 'reject': {'description': 'reject the value of a slot or a transaction'} + }, + 'binary_dialogue_acts': set(), + 'state': {}} + global descriptions + ori_ontology = {} + for _, item in json.load(open(os.path.join(original_data_dir, "TM-1-2019/ontology.json"))).items(): + ori_ontology[item["id"]] = item + + for domain, item in ori_ontology.items(): + ontology['domains'][domain] = {'description': descriptions[domain][domain], 'slots': {}} + ontology['state'][domain] = {} + for slot in item['required']+item['optional']: + ontology['domains'][domain]['slots'][slot] = { + 'description': descriptions[domain][slot], + 'is_categorical': False, + 'possible_values': [], + } + ontology['state'][domain][slot] = '' + + dataset = 'tm1' + splits = ['train', 'validation', 'test'] + dialogues_by_split = {split:[] for split in splits} + dialog_files = ["TM-1-2019/self-dialogs.json", "TM-1-2019/woz-dialogs.json"] + for file_idx, filename in enumerate(dialog_files): + data = json.load(open(os.path.join(original_data_dir, filename))) + if file_idx == 0: + # original split for self dialogs + dial_id2split = {} + for data_split in ['train', 'dev', 'test']: + with open(os.path.join(original_data_dir, f"TM-1-2019/train-dev-test/{data_split}.csv")) as f: + for line in f: + dial_id = line.split(',')[0] + dial_id2split[dial_id] = data_split if data_split != 'dev' else 'validation' + else: + # random split for woz dialogs 8:1:1 + random.seed(42) + dial_ids = [d['conversation_id'] for d in data] + random.shuffle(dial_ids) + dial_id2split = {} + for dial_id in dial_ids[:int(0.8*len(dial_ids))]: + dial_id2split[dial_id] = 'train' + for dial_id in dial_ids[int(0.8*len(dial_ids)):int(0.9*len(dial_ids))]: + dial_id2split[dial_id] = 'validation' + for dial_id in dial_ids[int(0.9*len(dial_ids)):]: + dial_id2split[dial_id] = 'test' + + for d in tqdm(data, desc='processing taskmaster-{}'.format(filename)): + # delete empty dialogs and invalid dialogs + if len(d['utterances']) == 0: + continue + if len(set([t['speaker'] for t in d['utterances']])) == 1: + continue + data_split = dial_id2split[d["conversation_id"]] + dialogue_id = f'{dataset}-{data_split}-{len(dialogues_by_split[data_split])}' + cur_domains = [normalize_domain_name(d["instruction_id"].split('-', 1)[0])] + assert len(cur_domains) == 1 and cur_domains[0] in ontology['domains'] + domain = cur_domains[0] + goal = { + 'description': '', + 'inform': {}, + 'request': {} + } + dialogue = { + 'dataset': dataset, + 'data_split': data_split, + 'dialogue_id': dialogue_id, + 'original_id': d["conversation_id"], + 'domains': cur_domains, + 'goal': goal, + 'turns': [] + } + turns = format_turns(d['utterances']) + prev_state = {} + prev_state.setdefault(domain, copy.deepcopy(ontology['state'][domain])) + + for utt_idx, uttr in enumerate(turns): + speaker = uttr['speaker'] + turn = { + 'speaker': speaker, + 'utterance': uttr['text'], + 'utt_idx': utt_idx, + 'dialogue_acts': { + 'binary': [], + 'categorical': [], + 'non-categorical': [], + }, + } + in_span = [0] * len(turn['utterance']) + + if 'segments' in uttr: + # sort the span according to the length + segments = sorted(uttr['segments'], key=lambda x: len(x['text'])) + for segment in segments: + # Each conversation was annotated by two workers. + # only keep the first annotation for the span + item = segment['annotations'][0] + intent = 'inform' # default intent + slot = item['name'].split('.', 1)[-1] + if slot.endswith('.accept') or slot.endswith('.reject'): + # intent=accept/reject + intent = slot[-6:] + slot = slot[:-7] + if slot not in ontology['domains'][domain]['slots']: + # no slot, only general reference to a transaction, binary dialog act + turn['dialogue_acts']['binary'].append({ + 'intent': intent, + 'domain': domain, + 'slot': '', + 'value': '' + }) + else: + assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text'] + # skip overlapped spans, keep the shortest one + if sum(in_span[segment['start_index']: segment['end_index']]) > 0: + continue + else: + in_span[segment['start_index']: segment['end_index']] = [1]*(segment['end_index']-segment['start_index']) + turn['dialogue_acts']['non-categorical'].append({ + 'intent': intent, + 'domain': domain, + 'slot': slot, + 'value': segment['text'], + 'start': segment['start_index'], + 'end': segment['end_index'] + }) + + turn['dialogue_acts']['non-categorical'] = sorted(turn['dialogue_acts']['non-categorical'], key=lambda x: x['start']) + + bdas = set() + for da in turn['dialogue_acts']['binary']: + da_tuple = (da['intent'], da['domain'], da['slot'], da['value'],) + bdas.add(da_tuple) + if da_tuple not in ontology['binary_dialogue_acts']: + ontology['binary_dialogue_acts'].add(da_tuple) + turn['dialogue_acts']['binary'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(bdas)] + + for da in turn['dialogue_acts']['non-categorical']: + slot, value = da['slot'], da['value'] + assert slot in prev_state[domain] + # not add reject slot-value into state + if da['intent'] != 'reject': + prev_state[domain][slot] = value + + if speaker == 'user': + turn['state'] = copy.deepcopy(prev_state) + else: + turn['db_results'] = {} + + dialogue['turns'].append(turn) + dialogues_by_split[data_split].append(dialogue) + + ontology['binary_dialogue_acts'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(ontology['binary_dialogue_acts'])] + dialogues = dialogues_by_split['train']+dialogues_by_split['validation']+dialogues_by_split['test'] + json.dump(dialogues[:10], open(f'dummy_data.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(dialogues, open(f'{new_data_dir}/dialogues.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(ontology, open(f'{new_data_dir}/ontology.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf: + for filename in os.listdir(new_data_dir): + zf.write(f'{new_data_dir}/{filename}') + rmtree(original_data_dir) + rmtree(new_data_dir) + return dialogues, ontology + +if __name__ == '__main__': + preprocess() diff --git a/data/unified_datasets/tm2/README.md b/data/unified_datasets/tm2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6e0e96923e93b3516909a6a6277d98271a3101a6 --- /dev/null +++ b/data/unified_datasets/tm2/README.md @@ -0,0 +1,61 @@ +# Dataset Card for Taskmaster-1 + +- **Repository:** https://github.com/google-research-datasets/Taskmaster/tree/master/TM-2-2020 +- **Paper:** https://arxiv.org/pdf/1909.05358.pdf +- **Leaderboard:** None +- **Who transforms the dataset:** Qi Zhu(zhuq96 at gmail dot com) + +### Dataset Summary + +The Taskmaster-2 dataset consists of 17,289 dialogs in the seven domains. Unlike Taskmaster-1, which includes both written "self-dialogs" and spoken two-person dialogs, Taskmaster-2 consists entirely of spoken two-person dialogs. In addition, while Taskmaster-1 is almost exclusively task-based, Taskmaster-2 contains a good number of search- and recommendation-oriented dialogs, as seen for example in the restaurants, flights, hotels, and movies verticals. The music browsing and sports conversations are almost exclusively search- and recommendation-based. All dialogs in this release were created using a Wizard of Oz (WOz) methodology in which crowdsourced workers played the role of a 'user' and trained call center operators played the role of the 'assistant'. In this way, users were led to believe they were interacting with an automated system that “spoke” using text-to-speech (TTS) even though it was in fact a human behind the scenes. As a result, users could express themselves however they chose in the context of an automated interface. + +- **How to get the transformed data from original data:** + - Download [master.zip](https://github.com/google-research-datasets/Taskmaster/archive/refs/heads/master.zip). + - Run `python preprocess.py` in the current directory. +- **Main changes of the transformation:** + - Remove dialogs that are empty or only contain one speaker. + - Split each domain dialogs into train/validation/test randomly (8:1:1). + - Merge continuous turns by the same speaker (ignore repeated turns). + - Annotate `dialogue acts` according to the original segment annotations. Add `intent` annotation (`==inform`). The type of `dialogue act` is set to `non-categorical` if the `slot` is not in `anno2slot` in `preprocess.py`). Otherwise, the type is set to `binary` (and the `value` is empty). If there are multiple spans overlapping, we only keep the shortest one, since we found that this simple strategy can reduce the noise in annotation. + - Add `domain`, `intent`, and `slot` descriptions. + - Add `state` by accumulate `non-categorical dialogue acts` in the order that they appear. + - Keep the first annotation since each conversation was annotated by two workers. +- **Annotations:** + - dialogue acts, state. + +### Supported Tasks and Leaderboards + +NLU, DST, Policy, NLG + +### Languages + +English + +### Data Splits + +| split | dialogues | utterances | avg_utt | avg_tokens | avg_domains | cat slot match(state) | cat slot match(goal) | cat slot match(dialogue act) | non-cat slot span(dialogue act) | +|------------|-------------|--------------|-----------|--------------|---------------|-------------------------|------------------------|--------------------------------|-----------------------------------| +| train | 13838 | 234321 | 16.93 | 9.1 | 1 | - | - | - | 100 | +| validation | 1731 | 29349 | 16.95 | 9.15 | 1 | - | - | - | 100 | +| test | 1734 | 29447 | 16.98 | 9.07 | 1 | - | - | - | 100 | +| all | 17303 | 293117 | 16.94 | 9.1 | 1 | - | - | - | 100 | + +7 domains: ['flights', 'food-ordering', 'hotels', 'movies', 'music', 'restaurant-search', 'sports'] +- **cat slot match**: how many values of categorical slots are in the possible values of ontology in percentage. +- **non-cat slot span**: how many values of non-categorical slots have span annotation in percentage. + +### Citation + +``` +@inproceedings{byrne-etal-2019-taskmaster, + title = {Taskmaster-1:Toward a Realistic and Diverse Dialog Dataset}, + author = {Bill Byrne and Karthik Krishnamoorthi and Chinnadhurai Sankar and Arvind Neelakantan and Daniel Duckworth and Semih Yavuz and Ben Goodrich and Amit Dubey and Kyu-Young Kim and Andy Cedilnik}, + booktitle = {2019 Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing}, + address = {Hong Kong}, + year = {2019} +} +``` + +### Licensing Information + +[**CC BY 4.0**](https://creativecommons.org/licenses/by/4.0/) \ No newline at end of file diff --git a/data/unified_datasets/tm2/data.zip b/data/unified_datasets/tm2/data.zip new file mode 100644 index 0000000000000000000000000000000000000000..306d209368279ea67bcc07789327063ef3a2c57a Binary files /dev/null and b/data/unified_datasets/tm2/data.zip differ diff --git a/data/unified_datasets/tm2/dummy_data.json b/data/unified_datasets/tm2/dummy_data.json new file mode 100644 index 0000000000000000000000000000000000000000..6cb30f4be2581202be683487db67cdd37917ca3c --- /dev/null +++ b/data/unified_datasets/tm2/dummy_data.json @@ -0,0 +1,6797 @@ +[ + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-0", + "original_id": "dlg-00100680-00e0-40fe-8321-6d81b21bfc4f", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hello. I'd like to find a round trip commercial airline flight from San Francisco to Denver.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "round trip", + "start": 26, + "end": 36 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "San Francisco", + "start": 68, + "end": 81 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Denver", + "start": 85, + "end": 91 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Hello, how can I help you? San Francisco to Denver, got it.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "San Francisco", + "start": 27, + "end": 40 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Denver", + "start": 44, + "end": 50 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "You're really on top of things. I like that.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "So what days are you looking to fly? Hey, what else can you say?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'm looking to fly out sometime today, the earliest time today, and I'll be returning in 4 days. So, I would like to fly out sometime tonight and fly back in the evening in 4 days. From I'm looking to go to Denver. I'm flying out of San Francisco.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "today", + "start": 57, + "end": 62 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "4 days", + "start": 89, + "end": 95 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "in the evening", + "start": 155, + "end": 169 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "4 days", + "start": 173, + "end": 179 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Denver", + "start": 207, + "end": 213 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "San Francisco", + "start": 233, + "end": 246 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "in the evening", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "That sounds good, where you looking to go? That's right okay we have prices starting at $337.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "$337", + "start": 88, + "end": 92 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That sounds very good. I just have two preferences. I want a nonstop flight. And I'd like to get an aisle seat.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "nonstop", + "start": 61, + "end": 68 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "seat_location", + "value": "aisle seat", + "start": 100, + "end": 110 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "in the evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, Non-Stop and if I heard you correctly did you say you wanted to leave as early as possible and also Nile C.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "Non-Stop", + "start": 6, + "end": 14 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "in the evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "Non-Stop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, you got it so it looks like United Airlines leaves at 9:20 p.m. that is nonstop the flight duration is 2 hours and 28 minutes and is priced at $337.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "United", + "start": 34, + "end": 40 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "9:20 p.m.", + "start": 60, + "end": 69 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "nonstop", + "start": 78, + "end": 85 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "duration is 2 hours and 28 minutes", + "start": 97, + "end": 131 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$337", + "start": 149, + "end": 153 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That sounds very good.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "in the evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "duration is 2 hours and 28 minutes", + "from": "", + "to": "", + "airline": "United", + "flight_number": "", + "date": "", + "from.time": "9:20 p.m.", + "to.time": "", + "stops.location": "", + "fare": "$337" + } + } + }, + { + "speaker": "system", + "utterance": "Perfect.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "And what time will I be I would like an evening flight on the return flight in 4 days. Does that include the return flight?", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "evening", + "start": 40, + "end": 47 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "4 days", + "start": 79, + "end": 85 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "duration is 2 hours and 28 minutes", + "from": "", + "to": "", + "airline": "United", + "flight_number": "", + "date": "", + "from.time": "9:20 p.m.", + "to.time": "", + "stops.location": "", + "fare": "$337" + } + } + }, + { + "speaker": "system", + "utterance": "How does 5:30 a.m. work?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "5:30 a.m.", + "start": 9, + "end": 18 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Well, the evening works better.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "evening", + "start": 10, + "end": 17 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "duration is 2 hours and 28 minutes", + "from": "", + "to": "", + "airline": "United", + "flight_number": "", + "date": "", + "from.time": "5:30 a.m.", + "to.time": "", + "stops.location": "", + "fare": "$337" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, got it. 6:55 p.m.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That works. That's very good.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "duration is 2 hours and 28 minutes", + "from": "", + "to": "", + "airline": "United", + "flight_number": "", + "date": "", + "from.time": "5:30 a.m.", + "to.time": "", + "stops.location": "", + "fare": "$337" + } + } + }, + { + "speaker": "system", + "utterance": "Alright, perfect so that is United Airlines as well.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "United", + "start": 28, + "end": 34 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Excellent. Thank you so much, assistant.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "duration is 2 hours and 28 minutes", + "from": "", + "to": "", + "airline": "United", + "flight_number": "", + "date": "", + "from.time": "5:30 a.m.", + "to.time": "", + "stops.location": "", + "fare": "$337" + } + } + }, + { + "speaker": "system", + "utterance": "Pleasure all mine. Enjoy your trip to Denver, have a great night. Your welcome.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I think I'm all set. Thank you so much.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Denver", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "4 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "aisle seat", + "stops": "nonstop", + "price_range": "$337", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "duration is 2 hours and 28 minutes", + "from": "", + "to": "", + "airline": "United", + "flight_number": "", + "date": "", + "from.time": "5:30 a.m.", + "to.time": "", + "stops.location": "", + "fare": "$337" + } + } + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-1", + "original_id": "dlg-005d7a68-35ec-4ed0-a0ab-715a499b48b7", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hi assistant, need help finding a flight from Houston to London.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Houston", + "start": 46, + "end": 53 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "London", + "start": 57, + "end": 63 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "London", + "destination2": "", + "origin": "Houston", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, you got it.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Need to fly out on July 2nd in the afternoon.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "July 2nd", + "start": 19, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "in the afternoon", + "start": 28, + "end": 44 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "London", + "destination2": "", + "origin": "Houston", + "date.depart_origin": "July 2nd", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "in the afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "And when would you like to come back?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I need to fly back July 7th. I need something cheap.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "July 7th", + "start": 19, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "cheap", + "start": 46, + "end": 51 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "London", + "destination2": "", + "origin": "Houston", + "date.depart_origin": "July 2nd", + "date.depart_intermediate": "", + "date.return": "July 7th", + "time_of_day": "in the afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheap", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, the cheapest flight currently is $3,200.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "the cheapest flight", + "start": 6, + "end": 25 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "total_fare", + "value": "$3,200", + "start": 39, + "end": 45 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Let's go ahead and book that.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "London", + "destination2": "", + "origin": "Houston", + "date.depart_origin": "July 2nd", + "date.depart_intermediate": "", + "date.return": "July 7th", + "time_of_day": "in the afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "the cheapest flight", + "num.pax": "", + "luggage": "", + "total_fare": "$3,200", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right you're all set, is that it?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [ + { + "intent": "inform", + "domain": "flights", + "slot": "flight_booked", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That's it. Thank you.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "London", + "destination2": "", + "origin": "Houston", + "date.depart_origin": "July 2nd", + "date.depart_intermediate": "", + "date.return": "July 7th", + "time_of_day": "in the afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "the cheapest flight", + "num.pax": "", + "luggage": "", + "total_fare": "$3,200", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "My pleasure, take care, enjoy your flight. Have a great night.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Bye.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "London", + "destination2": "", + "origin": "Houston", + "date.depart_origin": "July 2nd", + "date.depart_intermediate": "", + "date.return": "July 7th", + "time_of_day": "in the afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "the cheapest flight", + "num.pax": "", + "luggage": "", + "total_fare": "$3,200", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-2", + "original_id": "dlg-006d8337-fc53-4aac-8895-b2f0caa14baa", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "system", + "utterance": "Hi. How can I help you?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Hello. I need to take a trip to Albany, New York.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Albany, New York", + "start": 32, + "end": 48 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany, New York", + "destination2": "", + "origin": "", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay. I see that you're in Seattle right now. Is that where you want to fly out from to Albany?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Seattle", + "start": 27, + "end": 34 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Albany", + "start": 88, + "end": 94 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Correct.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "What dates do you want to travel?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'd like to leave this Friday and return exactly in one week.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "this Friday", + "start": 18, + "end": 29 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "exactly in one week", + "start": 41, + "end": 60 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "exactly in one week", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Do you want to return on next Thursday or Friday?", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "next Thursday", + "start": 25, + "end": 38 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "Friday", + "start": 42, + "end": 48 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Friday morning.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "Friday", + "start": 0, + "end": 6 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "morning", + "start": 7, + "end": 14 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "Friday", + "time_of_day": "morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I will find a return flight on 19th morning.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "19th", + "start": 37, + "end": 41 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "morning", + "start": 42, + "end": 49 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you. United Airlines, preferable.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "United Airlines", + "start": 11, + "end": 26 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "How many tickets do you need?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Just one, direct flight, no layovers, please.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "num.pax", + "value": "one", + "start": 5, + "end": 8 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "direct flight", + "start": 10, + "end": 23 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "no layovers", + "start": 25, + "end": 36 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "", + "seat_location": "", + "stops": "no layovers", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Do you have a preference for class?", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "First.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "First", + "start": 0, + "end": 5 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "First", + "seat_location": "", + "stops": "no layovers", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "There are no direct flights. Do you want to check in business?", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "direct flights", + "start": 13, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "business", + "start": 53, + "end": 61 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, please. It'll be great.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "business", + "seat_location": "", + "stops": "direct flights", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "There are no direct flights in any class. I will look for the shortest layover for you.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "direct flights", + "start": 13, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "layover", + "start": 71, + "end": 78 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "business", + "seat_location": "", + "stops": "layover", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is there anything else you want me to look for?", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'll definitely need Wi-Fi on the flight, please. I'd like morning flights.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "Wi-Fi", + "start": 21, + "end": 26 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "morning", + "start": 59, + "end": 66 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "business", + "seat_location": "", + "stops": "layover", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "Wi-Fi", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, is that it?", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 21, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "business", + "seat_location": "", + "stops": "layover", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "Wi-Fi", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "I found you a 6 am flight with United Airlines. The cost of the ticket is $1,430. It has Wi-Fi, first class and has one stop for an hour and a half. The total flight time is 7 hours and 24 minutes. How does that sound?", + "utt_idx": 22, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "6 am", + "start": 14, + "end": 18 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "United Airlines", + "start": 31, + "end": 46 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$1,430", + "start": 74, + "end": 80 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "Wi-Fi", + "start": 89, + "end": 94 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "first class", + "start": 96, + "end": 107 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "one stop", + "start": 116, + "end": 124 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "hour and a half", + "start": 132, + "end": 147 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "7 hours and 24 minutes", + "start": 174, + "end": 196 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That is beautiful. I think I'm going to go ahead and take that. And you'll book that for me?", + "utt_idx": 23, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "first class", + "seat_location": "", + "stops": "one stop", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "7 hours and 24 minutes", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "6 am", + "to.time": "", + "stops.location": "", + "fare": "$1,430" + } + } + }, + { + "speaker": "system", + "utterance": "I can. Do you want to know the return flight details first?", + "utt_idx": 24, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, please.", + "utt_idx": 25, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "first class", + "seat_location": "", + "stops": "one stop", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "7 hours and 24 minutes", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "6 am", + "to.time": "", + "stops.location": "", + "fare": "$1,430" + } + } + }, + { + "speaker": "system", + "utterance": "The return flight leaves at 10:20 am has one stop for 15 minutes and arrives back in Seattle at 3:36 pm. How does that work for you?", + "utt_idx": 26, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "10:20 am", + "start": 28, + "end": 36 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "one stop", + "start": 41, + "end": 49 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "15 minutes", + "start": 54, + "end": 64 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to", + "value": "Seattle", + "start": 85, + "end": 92 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to.time", + "value": "3:36 pm", + "start": 96, + "end": 103 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That's perfect. Thank you.", + "utt_idx": 27, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "first class", + "seat_location": "", + "stops": "one stop", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "15 minutes", + "from": "", + "to": "Seattle", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "10:20 am", + "to.time": "3:36 pm", + "stops.location": "", + "fare": "$1,430" + } + } + }, + { + "speaker": "system", + "utterance": "Great. You're welcome. It's booked.", + "utt_idx": 28, + "dialogue_acts": { + "binary": [ + { + "intent": "inform", + "domain": "flights", + "slot": "flight_booked", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "And you'll send me the details to my email?", + "utt_idx": 29, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "first class", + "seat_location": "", + "stops": "one stop", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "15 minutes", + "from": "", + "to": "Seattle", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "10:20 am", + "to.time": "3:36 pm", + "stops.location": "", + "fare": "$1,430" + } + } + }, + { + "speaker": "system", + "utterance": "I will do that. It is done. Thank you.", + "utt_idx": 30, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "You're welcome. Thank you so much.", + "utt_idx": 31, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Albany", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "this Friday", + "date.depart_intermediate": "", + "date.return": "19th", + "time_of_day": "morning", + "seating_class": "first class", + "seat_location": "", + "stops": "one stop", + "price_range": "", + "num.pax": "one", + "luggage": "", + "total_fare": "", + "other_description": "15 minutes", + "from": "", + "to": "Seattle", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "10:20 am", + "to.time": "3:36 pm", + "stops.location": "", + "fare": "$1,430" + } + } + }, + { + "speaker": "system", + "utterance": "Have a good flight.", + "utt_idx": 32, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-3", + "original_id": "dlg-00754a9a-1b01-465d-adb9-5215a32d174d", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "system", + "utterance": "Hi, how can I help you?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I like to make a round-trip from Miami to Madrid.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "round-trip", + "start": 17, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Miami", + "start": 33, + "end": 38 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Madrid", + "start": 42, + "end": 48 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Miami to where?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Miami", + "start": 0, + "end": 5 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "M a d r i d.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "M a d r i d", + "start": 0, + "end": 11 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "M a d r i d", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, Madrid. When?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Madrid", + "start": 4, + "end": 10 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'd like to leave today evening and return in 5 days.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "today", + "start": 18, + "end": 23 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "evening", + "start": 24, + "end": 31 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "in 5 days", + "start": 43, + "end": 52 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "I have a 7:10 p.m, 7:45 p.m, 9 p.m.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:10 p.m", + "start": 9, + "end": 17 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:45 p.m", + "start": 19, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "9 p.m", + "start": 29, + "end": 34 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "How about the 7:00 p.m?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:00 p.m", + "start": 14, + "end": 22 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "7:00 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "The return flights are at 12 p.m, 4:05 p.m, 11:35 p.m.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "12 p.m", + "start": 26, + "end": 32 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "4:05 p.m", + "start": 34, + "end": 42 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "11:35 p.m", + "start": 44, + "end": 53 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Let's do the 4:05 p.m return on the flight. That would be nice.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "4:05 p.m", + "start": 13, + "end": 21 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "4:05 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Got it, the flight has been booked. The itinerary has been sent to your email.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Is that a Non-Stop?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "Non-Stop", + "start": 10, + "end": 18 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "", + "stops": "Non-Stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "4:05 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Yes.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "And also, I'd like that to have a window seat with that, please.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "seat_location", + "value": "window seat", + "start": 34, + "end": 45 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "window seat", + "stops": "Non-Stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "4:05 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, Flight has been booked.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [ + { + "intent": "inform", + "domain": "flights", + "slot": "flight_booked", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, can you send me the information to my email?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "window seat", + "stops": "Non-Stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "4:05 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Yes.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, thank you.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Madrid", + "destination2": "", + "origin": "Miami", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 5 days", + "time_of_day": "evening", + "seating_class": "", + "seat_location": "window seat", + "stops": "Non-Stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "4:05 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-4", + "original_id": "dlg-009c3fa1-6f6e-48dd-84c8-c52dbde6a4ae", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "system", + "utterance": "Hello user.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Hi, I would like to book a round trip commercial airline flight from Los Angeles to Boston.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "round trip", + "start": 27, + "end": 37 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Los Angeles", + "start": 69, + "end": 80 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Boston", + "start": 84, + "end": 90 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok, can you give me some specifications?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, I would like to be today for the departure, and return in four days from now?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "today", + "start": 24, + "end": 29 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "four days from now", + "start": 63, + "end": 81 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "four days from now", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "What seating class do you prefer?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "The cheapest economy class.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "cheapest", + "start": 4, + "end": 12 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "economy", + "start": 13, + "end": 20 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "four days from now", + "time_of_day": "", + "seating_class": "economy", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Did you want to leave in the morning or afternoon flight?", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "morning", + "start": 29, + "end": 36 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "afternoon", + "start": 40, + "end": 49 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Earliest possible in the morning.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "Earliest possible in the morning", + "start": 0, + "end": 32 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "four days from now", + "time_of_day": "Earliest possible in the morning", + "seating_class": "economy", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Do you prefer non-stop or layover?", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "non-stop", + "start": 14, + "end": 22 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "layover", + "start": 26, + "end": 33 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Non-stop.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "Non-stop", + "start": 0, + "end": 8 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "four days from now", + "time_of_day": "Earliest possible in the morning", + "seating_class": "economy", + "seat_location": "", + "stops": "Non-stop", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Ok, one moment, hold. I found a flight for you is United Airlines that departs at 10:47 p.m and arrives at 7:14 a.m.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "United Airlines", + "start": 50, + "end": 65 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "10:47 p.m", + "start": 82, + "end": 91 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to.time", + "value": "7:14 a.m", + "start": 107, + "end": 115 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, perfect.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "four days from now", + "time_of_day": "Earliest possible in the morning", + "seating_class": "economy", + "seat_location": "", + "stops": "Non-stop", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "10:47 p.m", + "to.time": "7:14 a.m", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "The total price is $555. You are all set.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$555", + "start": 19, + "end": 23 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you very much.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Boston", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "four days from now", + "time_of_day": "Earliest possible in the morning", + "seating_class": "economy", + "seat_location": "", + "stops": "Non-stop", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "United Airlines", + "flight_number": "", + "date": "", + "from.time": "10:47 p.m", + "to.time": "7:14 a.m", + "stops.location": "", + "fare": "$555" + } + } + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-5", + "original_id": "dlg-00e32998-0b0f-47f1-a4f0-2ce90f1718d0", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hi. I'm looking for a round-trip commercial airline flight. I got to get to a funeral. I'm going from San Francisco to Chicago.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "round-trip", + "start": 22, + "end": 32 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "San Francisco", + "start": 102, + "end": 115 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Chicago", + "start": 119, + "end": 126 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, sorry to hear about the loss.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Well, thank you. It's very kind. I need to leave today, and I want to get back in 3 days. And optimally, I would leave in the morning for both the departure and the return.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "today", + "start": 49, + "end": 54 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "in 3 days", + "start": 79, + "end": 88 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "in the morning", + "start": 119, + "end": 133 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, so let me see what we get.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "I'm assuming you want to leave as early as possible Right.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yeah. Right.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, for leaving as early as possible you could leave in two hours at 7:52 p.m. Is that too early?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "two hours at 7:52 p.m", + "start": 58, + "end": 79 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Is that the cheapest?", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "cheapest", + "start": 12, + "end": 20 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "two hours at 7:52 p.m", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "That is the cheapest at $1,474.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "cheapest", + "start": 12, + "end": 20 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$1,474", + "start": 24, + "end": 30 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "And does that have Wi-Fi?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "Wi-Fi", + "start": 19, + "end": 24 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "Wi-Fi", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "two hours at 7:52 p.m", + "to.time": "", + "stops.location": "", + "fare": "$1,474" + } + } + }, + { + "speaker": "system", + "utterance": "Yes. That does have Wi-Fi.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "Wi-Fi", + "start": 20, + "end": 25 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Alright, that's perfect. What time does that leave, and what time does the return flight leave?", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "Wi-Fi", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "two hours at 7:52 p.m", + "to.time": "", + "stops.location": "", + "fare": "$1,474" + } + } + }, + { + "speaker": "system", + "utterance": "It come with extended seating and full meals as well.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "extended seating and full meals", + "start": 13, + "end": 44 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, sounds good.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "extended seating and full meals", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "two hours at 7:52 p.m", + "to.time": "", + "stops.location": "", + "fare": "$1,474" + } + } + }, + { + "speaker": "system", + "utterance": "The return flight will come back at 6 p.m.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "6 p.m", + "start": 36, + "end": 41 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "What time does it leave from Chicago?", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from", + "value": "Chicago", + "start": 29, + "end": 36 + } + ] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "extended seating and full meals", + "from": "Chicago", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "6 p.m", + "to.time": "", + "stops.location": "", + "fare": "$1,474" + } + } + }, + { + "speaker": "system", + "utterance": "The plane leaves in Chicago at 7 a.m. for San Francisco.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from", + "value": "Chicago", + "start": 20, + "end": 27 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7 a.m", + "start": 31, + "end": 36 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to", + "value": "San Francisco", + "start": 42, + "end": 55 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay. No, that's perfect.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "extended seating and full meals", + "from": "Chicago", + "to": "San Francisco", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "7 a.m", + "to.time": "", + "stops.location": "", + "fare": "$1,474" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, great then you are all set.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round-trip", + "destination1": "Chicago", + "destination2": "", + "origin": "San Francisco", + "date.depart_origin": "today", + "date.depart_intermediate": "", + "date.return": "in 3 days", + "time_of_day": "in the morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "extended seating and full meals", + "from": "Chicago", + "to": "San Francisco", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "7 a.m", + "to.time": "", + "stops.location": "", + "fare": "$1,474" + } + } + }, + { + "speaker": "system", + "utterance": "Have a great night.", + "utt_idx": 21, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-6", + "original_id": "dlg-011f951c-2231-4dca-a55b-4ef97e599e7e", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "system", + "utterance": "Hello. How can I help you?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'm looking for a flight to JFK.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "JFK", + "start": 28, + "end": 31 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay and where are you traveling from?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Seattle, Washington.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Seattle, Washington", + "start": 0, + "end": 19 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay and then what are the dates you are leaving?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I want to fly out on March 1st and return on March 7th.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "March 1st", + "start": 21, + "end": 30 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "March 7th", + "start": 45, + "end": 54 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay and what time do you prefer to leave?", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "If there's a morning flight, that would be best.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "morning", + "start": 13, + "end": 20 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay great and any seat class?", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Coach is fine.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "Coach", + "start": 0, + "end": 5 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "Coach", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay perfect! let me look for a flight for you.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Sure. Thank you.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "Coach", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "This will just be a second.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Sure.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "Coach", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Hello, the flight for $355 round trip and it leaves at 7:45 a.m. and you arrive at 4:07 p.m. nonstop.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "total_fare", + "value": "$355", + "start": 22, + "end": 26 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "round trip", + "start": 27, + "end": 37 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:45 a.m.", + "start": 55, + "end": 64 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to.time", + "value": "4:07 p.m.", + "start": 83, + "end": 92 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "nonstop", + "start": 93, + "end": 100 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Alright. That sounds good.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "Coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "$355", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "7:45 a.m.", + "to.time": "4:07 p.m.", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Perfect! did you want me to book this flight?", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Sure.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "Coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "$355", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "7:45 a.m.", + "to.time": "4:07 p.m.", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, great booked!", + "utt_idx": 18, + "dialogue_acts": { + "binary": [ + { + "intent": "inform", + "domain": "flights", + "slot": "flight_booked", + "value": "" + } + ], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you. I appreciate it.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "JFK", + "destination2": "", + "origin": "Seattle, Washington", + "date.depart_origin": "March 1st", + "date.depart_intermediate": "", + "date.return": "March 7th", + "time_of_day": "morning", + "seating_class": "Coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "$355", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "7:45 a.m.", + "to.time": "4:07 p.m.", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "You're welcome. Enjoy your flight. Goodbye. Goodbye.", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-7", + "original_id": "dlg-019cbf4f-e4f4-40e5-b37d-e0d25be5d76a", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Hi, I need to book a flight ticket from Los Angels to Las Vegas on April 1st and I will be coming back on April 3rd.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Los Angels", + "start": 40, + "end": 50 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Las Vegas", + "start": 54, + "end": 63 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "April 1st", + "start": 67, + "end": 76 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "April 3rd", + "start": 106, + "end": 115 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angels", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "April 3rd", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Hello. I can help you book a flight.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Great. I need to go to Las Vegas on April 1st.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Las Vegas", + "start": 23, + "end": 32 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "April 1st", + "start": 36, + "end": 45 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angels", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "April 3rd", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, So you want to go from Los Angeles to Las Vegas on April 1st and return on April 3rd. Is that correct?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Los Angeles", + "start": 29, + "end": 40 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Las Vegas", + "start": 44, + "end": 53 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "April 1st", + "start": 57, + "end": 66 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "return on April 3rd", + "start": 71, + "end": 90 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That is correct.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "How many tickets do you need?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Two tickets. And I'd like to have a nonstop flight.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "num.pax", + "value": "Two", + "start": 0, + "end": 3 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "nonstop", + "start": 36, + "end": 43 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "Two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "What are your other preferences for the flight?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I would like to travel in the evening and I prefer coach class.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "coach", + "start": 51, + "end": 56 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "Two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, Got it. Anything else?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That should do it.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "Two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Hold on just a second. Hold on just a second. I found tickets that cost $79 and will be leaving at 7:50 PM and arrive at 8:53 PM This is Spirit Airlines and it is a nonstop flight.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$79", + "start": 72, + "end": 75 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:50 PM", + "start": 99, + "end": 106 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to.time", + "value": "8:53 PM", + "start": 121, + "end": 128 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "Spirit Airlines", + "start": 137, + "end": 152 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "nonstop", + "start": 165, + "end": 172 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That sounds perfect.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "Two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "Spirit Airlines", + "flight_number": "", + "date": "", + "from.time": "7:50 PM", + "to.time": "8:53 PM", + "stops.location": "", + "fare": "$79" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, Is it for two adults?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "num.pax", + "value": "two", + "start": 16, + "end": 19 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "Spirit Airlines", + "flight_number": "", + "date": "", + "from.time": "7:50 PM", + "to.time": "8:53 PM", + "stops.location": "", + "fare": "$79" + } + } + }, + { + "speaker": "system", + "utterance": "The total ticket price is $157. Shall I book this for you now?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$157", + "start": 26, + "end": 30 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, please.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "Spirit Airlines", + "flight_number": "", + "date": "", + "from.time": "7:50 PM", + "to.time": "8:53 PM", + "stops.location": "", + "fare": "$157" + } + } + }, + { + "speaker": "system", + "utterance": "Aright. The return trip will be leaving at 7:58 PM. Enjoy your flight.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:58 PM", + "start": 43, + "end": 50 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "All right, Thank you. Goodbye.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "Las Vegas", + "destination2": "", + "origin": "Los Angeles", + "date.depart_origin": "April 1st", + "date.depart_intermediate": "", + "date.return": "return on April 3rd", + "time_of_day": "", + "seating_class": "coach", + "seat_location": "", + "stops": "nonstop", + "price_range": "", + "num.pax": "two", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "Spirit Airlines", + "flight_number": "", + "date": "", + "from.time": "7:58 PM", + "to.time": "8:53 PM", + "stops.location": "", + "fare": "$157" + } + } + }, + { + "speaker": "system", + "utterance": "Bye, See you later.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-8", + "original_id": "dlg-01c15d77-d5ee-45f7-b149-386d4e04d26a", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "system", + "utterance": "Hello.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Hi. How can I help you? I'm looking to find a round trip commercial airline flight from Seattle to Chicago. I want to leave on September 13th and depart and return in 3 days, afternoon would be a good time of the day.", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "round trip", + "start": 46, + "end": 56 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "Seattle", + "start": 88, + "end": 95 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "Chicago", + "start": 99, + "end": 106 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "September 13th", + "start": 127, + "end": 141 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "date.return", + "value": "return in 3 days", + "start": 157, + "end": 173 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "afternoon", + "start": 175, + "end": 184 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Chicago", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "September 13th", + "date.depart_intermediate": "", + "date.return": "return in 3 days", + "time_of_day": "afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, what seating class do you prefer?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'd like to get cheapest seat. And I'd like to get be able to watch a movie during the flight.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "cheapest seat", + "start": 16, + "end": 29 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Chicago", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "September 13th", + "date.depart_intermediate": "", + "date.return": "return in 3 days", + "time_of_day": "afternoon", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "cheapest seat", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Do you prefer non stop or layover?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "non stop", + "start": 14, + "end": 22 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "layover", + "start": 26, + "end": 33 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Non-stop.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "Non-stop", + "start": 0, + "end": 8 + } + ] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Chicago", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "September 13th", + "date.depart_intermediate": "", + "date.return": "return in 3 days", + "time_of_day": "afternoon", + "seating_class": "", + "seat_location": "", + "stops": "Non-stop", + "price_range": "cheapest seat", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Hold. One moment. I found a flight at Delta Airline departs at 5:12 PM, arrives at 11:11 PM. Your return flight departs at 2:10 PM, arrives at 4:30 PM.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "Delta Airline", + "start": 38, + "end": 51 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "5:12 PM", + "start": 63, + "end": 70 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "11:11 PM", + "start": 83, + "end": 91 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "2:10 PM", + "start": 123, + "end": 130 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "to.time", + "value": "4:30 PM", + "start": 143, + "end": 150 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'll choose that flight. That sounds good. Thank you.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "round trip", + "destination1": "Chicago", + "destination2": "", + "origin": "Seattle", + "date.depart_origin": "September 13th", + "date.depart_intermediate": "", + "date.return": "return in 3 days", + "time_of_day": "afternoon", + "seating_class": "", + "seat_location": "", + "stops": "Non-stop", + "price_range": "cheapest seat", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "Delta Airline", + "flight_number": "", + "date": "", + "from.time": "2:10 PM", + "to.time": "4:30 PM", + "stops.location": "", + "fare": "" + } + } + } + ] + }, + { + "dataset": "tm2", + "data_split": "train", + "dialogue_id": "tm2-train-9", + "original_id": "dlg-01d9b972-93b3-4e89-9eee-a460fa64d241", + "domains": [ + "flights" + ], + "goal": { + "description": "", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "system", + "utterance": "Hi, how can I help you?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Hey, can you help me find a flight?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "", + "destination2": "", + "origin": "", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "I can do that for you.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Help me find a flight to San Francisco.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "destination1", + "value": "San Francisco", + "start": 25, + "end": 38 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, from which airport in New York?", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "New York", + "start": 28, + "end": 36 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "JFK.", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "origin", + "value": "JFK", + "start": 0, + "end": 3 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "What else can you tell me?", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'll be leaving April the 14th.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "date.depart_origin", + "value": "April the 14th", + "start": 16, + "end": 30 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'd like to leave in the morning before 10:00 A.M.", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "in the morning", + "start": 18, + "end": 32 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "time_of_day", + "value": "before 10:00 A.M", + "start": 33, + "end": 49 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Before what time?", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'm looking for a dine for tonight.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "", + "seat_location": "", + "stops": "", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure, what else?", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I'd like a non-stop flight.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "non-stop", + "start": 11, + "end": 19 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "", + "seat_location": "", + "stops": "non-stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure thing.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "And I'd like to sit in coach.", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "seating_class", + "value": "coach", + "start": 23, + "end": 28 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "And a flight with WiFi would be good.", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "other_description", + "value": "WiFi", + "start": 18, + "end": 22 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay.", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That's all.", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Alright, up to how much for a non-stop flight?", + "utt_idx": 20, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "non-stop", + "start": 30, + "end": 38 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "$900.", + "utt_idx": 21, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "price_range", + "value": "$900", + "start": 0, + "end": 4 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "$900", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "", + "flight_number": "", + "date": "", + "from.time": "", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "There are flights with Delta, Virgin America, Jet blue and America. There are 3 times to fly with Delta 7 A.M, 8:10 A.M or 9:30 A.M.", + "utt_idx": 22, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "Delta", + "start": 23, + "end": 28 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "Virgin America", + "start": 30, + "end": 44 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "Jet blue", + "start": 46, + "end": 54 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "America", + "start": 59, + "end": 66 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "Delta", + "start": 98, + "end": 103 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7 A.M", + "start": 104, + "end": 109 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "8:10 A.M", + "start": 111, + "end": 119 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "9:30 A.M", + "start": 123, + "end": 131 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "How about 7:00 A.M?", + "utt_idx": 23, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "from.time", + "value": "7:00 A.M", + "start": 10, + "end": 18 + } + ] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "$900", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "Delta", + "flight_number": "", + "date": "", + "from.time": "7:00 A.M", + "to.time": "", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "You will arrive in at 10:40 A.M.", + "utt_idx": 24, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "to.time", + "value": "10:40 A.M", + "start": 22, + "end": 31 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, that works for me.", + "utt_idx": 25, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "$900", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "Delta", + "flight_number": "", + "date": "", + "from.time": "7:00 A.M", + "to.time": "10:40 A.M", + "stops.location": "", + "fare": "" + } + } + }, + { + "speaker": "system", + "utterance": "Total for this one way non-stop flight with delta is $209.", + "utt_idx": 26, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "flights", + "slot": "type", + "value": "one way", + "start": 15, + "end": 22 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "stops", + "value": "non-stop", + "start": 23, + "end": 31 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "airline", + "value": "delta", + "start": 44, + "end": 49 + }, + { + "intent": "inform", + "domain": "flights", + "slot": "fare", + "value": "$209", + "start": 53, + "end": 57 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Okay, perfect.", + "utt_idx": 27, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "one way", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "$900", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "delta", + "flight_number": "", + "date": "", + "from.time": "7:00 A.M", + "to.time": "10:40 A.M", + "stops.location": "", + "fare": "$209" + } + } + }, + { + "speaker": "system", + "utterance": "I'll send you the details.", + "utt_idx": 28, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you.", + "utt_idx": 29, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "flights": { + "type": "one way", + "destination1": "San Francisco", + "destination2": "", + "origin": "JFK", + "date.depart_origin": "April the 14th", + "date.depart_intermediate": "", + "date.return": "", + "time_of_day": "before 10:00 A.M", + "seating_class": "coach", + "seat_location": "", + "stops": "non-stop", + "price_range": "$900", + "num.pax": "", + "luggage": "", + "total_fare": "", + "other_description": "WiFi", + "from": "", + "to": "", + "airline": "delta", + "flight_number": "", + "date": "", + "from.time": "7:00 A.M", + "to.time": "10:40 A.M", + "stops.location": "", + "fare": "$209" + } + } + }, + { + "speaker": "system", + "utterance": "Goodbye.", + "utt_idx": 30, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + } +] \ No newline at end of file diff --git a/data/unified_datasets/tm2/preprocess.py b/data/unified_datasets/tm2/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..c650fa73ed72c2f061969cb1fdbede3ed3200f2e --- /dev/null +++ b/data/unified_datasets/tm2/preprocess.py @@ -0,0 +1,413 @@ +from zipfile import ZipFile, ZIP_DEFLATED +import json +import os +import copy +import zipfile +from tqdm import tqdm +import re +from collections import Counter +from shutil import rmtree +from convlab2.util.file_util import read_zipped_json, write_zipped_json +from pprint import pprint +import random + + +descriptions = { + "flights": { + "flights": "find a round trip or multi-city flights", + "type": "type of the flight", + "destination1": "the first destination city of the trip", + "destination2": "the second destination city of the trip", + "origin": "the origin city of the trip", + "date.depart_origin": "date of departure from origin", + "date.depart_intermediate": "date of departure from intermediate", + "date.return": "date of return", + "time_of_day": "time of the flight", + "seating_class": "seat type (first class, business class, economy class, etc.", + "seat_location": "location of the seat", + "stops": "non-stop, layovers, etc.", + "price_range": "price range of the flight", + "num.pax": "number of people", + "luggage": "luggage information", + "total_fare": "total cost of the trip", + "other_description": "other description of the flight", + "from": "departure of the flight", + "to": "destination of the flight", + "airline": "airline of the flight", + "flight_number": "the number of the flight", + "date": "date of the flight", + "from.time": "departure time of the flight", + "to.time": "arrival time of the flight", + "stops.location": "location of the stop", + "fare": "cost of the flight", + }, + "food-ordering": { + "food-ordering": "order take-out for a particular cuisine choice", + "name.item": "name of the item", + "other_description.item": "other description of the item", + "type.retrieval": "type of the retrieval method", + "total_price": "total price", + "time.pickup": "pick up time", + "num.people": "number of people", + "name.restaurant": "name of the restaurant", + "type.food": "type of food", + "type.meal": "type of meal", + "location.restaurant": "location of the restaurant", + "rating.restaurant": "rating of the restaurant", + "price_range": "price range of the food", + }, + "hotels": { + "hotels": "find a hotel using typical preferences", + "name.hotel": "name of the hotel", + "location.hotel": "location of the hotel", + "sub_location.hotel": "rough location of the hotel", + "star_rating": "star rating of the hotel", + "customer_rating": "customer rating of the hotel", + "customer_review": "customer review of the hotel", + "price_range": "price range of the hotel", + "amenity": "amenity of the hotel", + "num.beds": "number of beds to book", + "type.bed": "type of the bed", + "num.rooms": "number of rooms to book", + "check-in_date": "check-in date", + "check-out_date": "check-out date", + "date_range": "date range of the reservation", + "num.guests": "number of guests", + "type.room": "type of the room", + "price_per_night": "price per night", + "total_fare": "total fare", + "location": "location of the hotel", + "other_request": "other request", + "other_detail": "other detail", + }, + "movies": { + "movies": "find a movie to watch in theaters or using a streaming service at home", + "name.movie": "name of the movie", + "genre": "genre of the movie", + "name.theater": "name of the theater", + "location.theater": "location of the theater", + "time.start": "start time of the movie", + "time.end": "end time of the movie", + "price.ticket": "price of the ticket", + "price.streaming": "price of the streaming", + "type.screening": "type of the screening", + "audience_rating": "audience rating", + "critic_rating": "critic rating", + "movie_rating": "film rating", + "release_date": "release date of the movie", + "runtime": "running time of the movie", + "real_person": "name of actors, directors, etc.", + "character": "name of character in the movie", + "streaming_service": "streaming service that provide the movie", + "num.tickets": "number of tickets", + "seating": "type of seating", + "other_description": "other description about the movie", + "synopsis": "synopsis of the movie", + }, + "music": { + "music": "find several tracks to play and then comment on each one", + "name.track": "name of the track", + "name.artist": "name of the artist", + "name.album": "name of the album", + "name.genre": "music genre", + "type.music": "rough type of the music", + "describes_track": "description of a track to find", + "describes_artist": "description of a artist to find", + "describes_album": "description of an album to find", + "describes_genre": "description of a genre to find", + "describes_type.music": "description of the music type", + "technical_difficulty": "there is a technical difficulty", + }, + "restaurant-search": { + "restaurant-search": "ask for recommendations for a particular type of cuisine", + "name.restaurant": "name of the restaurant", + "location": "location of the restaurant", + "sub-location": "rough location of the restaurant", + "type.food": "the cuisine of the restaurant", + "menu_item": "item in the menu", + "type.meal": "type of meal", + "rating": "rating of the restaurant", + "price_range": "price range of the restaurant", + "business_hours": "business hours of the restaurant", + "name.reservation": "name of the person who make the reservation", + "num.guests": "number of guests", + "time.reservation": "time of the reservation", + "date.reservation": "date of the reservation", + "type.seating": "type of the seating", + "other_description": "other description of the restaurant", + "phone": "phone number of the restaurant", + }, + "sports": { + "sports": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL", + "name.team": "name of the team", + "record.team": "record of the team (number of wins and losses)", + "record.games_ahead": "number of games ahead", + "record.games_back": "number of games behind", + "place.team": "ranking of the team", + "result.match": "result of the match", + "score.match": "score of the match", + "date.match": "date of the match", + "day.match": "day of the match", + "time.match": "time of the match", + "name.player": "name of the player", + "position.player": "position of the player", + "record.player": "record of the player", + "name.non_player": "name of non-palyer such as the manager, coach", + "venue": "venue of the match take place", + "other_description.person": "other description of the person", + "other_description.team": "other description of the team", + "other_description.match": "other description of the match", + } +} + +anno2slot = { + "flights": { + "date.depart": "date.depart_origin", # rename + "date.intermediate": "date.depart_intermediate", # rename + "flight_booked": False, # transform to binary dialog act + }, + "food-ordering": { + "name.person": None, # no sample, ignore + "phone.restaurant": None, # no sample, ignore + "business_hours.restaurant": None, # no sample, ignore + "official_description.restaurant": None, # 1 sample, ignore + }, + "hotels": { + "hotel_booked": False, # transform to binary dialog act + }, + "movies": { + "time.end.": "time.end", # rename + "seating ticket_booking": "seating", # mixed in the original ontology + "ticket_booking": False, # transform to binary dialog act + "synopsis": False, # too long, 54 words in avg. transform to binary dialog act + }, + "music": {}, + "restaurant-search": { + "offical_description": False, # too long, 15 words in avg. transform to binary dialog act + }, + "sports": {} +} + + +def format_turns(ori_turns): + # delete invalid turns and merge continuous turns + new_turns = [] + previous_speaker = None + utt_idx = 0 + for i, turn in enumerate(ori_turns): + speaker = 'system' if turn['speaker'] == 'ASSISTANT' else 'user' + turn['speaker'] = speaker + if turn['text'] == '(deleted)': + continue + if not previous_speaker: + # first turn + assert speaker != previous_speaker + if speaker != previous_speaker: + # switch speaker + previous_speaker = speaker + new_turns.append(copy.deepcopy(turn)) + utt_idx += 1 + else: + # continuous speaking of the same speaker + last_turn = new_turns[-1] + # skip repeated turn + if turn['text'] in ori_turns[i-1]['text']: + continue + # merge continuous turns + index_shift = len(last_turn['text']) + 1 + last_turn['text'] += ' '+turn['text'] + if 'segments' in turn: + last_turn.setdefault('segments', []) + for segment in turn['segments']: + segment['start_index'] += index_shift + segment['end_index'] += index_shift + last_turn['segments'] += turn['segments'] + return new_turns + + +def preprocess(): + original_data_dir = 'Taskmaster-master' + new_data_dir = 'data' + + if not os.path.exists(original_data_dir): + original_data_zip = 'master.zip' + if not os.path.exists(original_data_zip): + raise FileNotFoundError(f'cannot find original data {original_data_zip} in tm2/, should manually download master.zip from https://github.com/google-research-datasets/Taskmaster/archive/refs/heads/master.zip') + else: + archive = ZipFile(original_data_zip) + archive.extractall() + + os.makedirs(new_data_dir, exist_ok=True) + + ontology = {'domains': {}, + 'intents': { + 'inform': {'description': 'inform the value of a slot or general information.'} + }, + 'binary_dialogue_acts': set(), + 'state': {}} + global descriptions + global anno2slot + domains = ['flights', 'food-ordering', 'hotels', 'movies', 'music', 'restaurant-search', 'sports'] + for domain in domains: + domain_ontology = json.load(open(os.path.join(original_data_dir, f"TM-2-2020/ontology/{domain}.json"))) + assert len(domain_ontology) == 1 + ontology['domains'][domain] = {'description': descriptions[domain][domain], 'slots': {}} + ontology['state'][domain] = {} + for item in list(domain_ontology.values())[0]: + for anno in item['annotations']: + slot = anno.strip() + if slot in anno2slot[domain]: + if anno2slot[domain][slot] in [None, False]: + continue + else: + slot = anno2slot[domain][slot] + ontology['domains'][domain]['slots'][slot] = { + 'description': descriptions[domain][slot], + 'is_categorical': False, + 'possible_values': [], + } + ontology['state'][domain][slot] = '' + # add missing slots to the ontology + for domain, slot in [('movies', 'price.streaming'), ('restaurant-search', 'phone')]: + ontology['domains'][domain]['slots'][slot] = { + 'description': descriptions[domain][slot], + 'is_categorical': False, + 'possible_values': [], + } + ontology['state'][domain][slot] = '' + + dataset = 'tm2' + splits = ['train', 'validation', 'test'] + dialogues_by_split = {split:[] for split in splits} + for domain in domains: + data = json.load(open(os.path.join(original_data_dir, f"TM-2-2020/data/{domain}.json"))) + # random split, train:validation:test = 8:1:1 + random.seed(42) + dial_ids = list(range(len(data))) + random.shuffle(dial_ids) + dial_id2split = {} + for dial_id in dial_ids[:int(0.8*len(dial_ids))]: + dial_id2split[dial_id] = 'train' + for dial_id in dial_ids[int(0.8*len(dial_ids)):int(0.9*len(dial_ids))]: + dial_id2split[dial_id] = 'validation' + for dial_id in dial_ids[int(0.9*len(dial_ids)):]: + dial_id2split[dial_id] = 'test' + + for dial_id, d in tqdm(enumerate(data), desc='processing taskmaster-{}'.format(domain)): + # delete empty dialogs and invalid dialogs + if len(d['utterances']) == 0: + continue + if len(set([t['speaker'] for t in d['utterances']])) == 1: + continue + data_split = dial_id2split[dial_id] + dialogue_id = f'{dataset}-{data_split}-{len(dialogues_by_split[data_split])}' + cur_domains = [domain] + goal = { + 'description': '', + 'inform': {}, + 'request': {} + } + dialogue = { + 'dataset': dataset, + 'data_split': data_split, + 'dialogue_id': dialogue_id, + 'original_id': d["conversation_id"], + 'domains': cur_domains, + 'goal': goal, + 'turns': [] + } + turns = format_turns(d['utterances']) + prev_state = {} + prev_state.setdefault(domain, copy.deepcopy(ontology['state'][domain])) + + for utt_idx, uttr in enumerate(turns): + speaker = uttr['speaker'] + turn = { + 'speaker': speaker, + 'utterance': uttr['text'], + 'utt_idx': utt_idx, + 'dialogue_acts': { + 'binary': [], + 'categorical': [], + 'non-categorical': [], + }, + } + in_span = [0] * len(turn['utterance']) + + if 'segments' in uttr: + # sort the span according to the length + segments = sorted(uttr['segments'], key=lambda x: len(x['text'])) + for segment in segments: + # Each conversation was annotated by two workers. + # only keep the first annotation for the span + item = segment['annotations'][0] + intent = 'inform' # default intent + slot = item['name'].split('.', 1)[-1].strip() + if slot in anno2slot[domain]: + if anno2slot[domain][slot] is None: + # skip + continue + elif anno2slot[domain][slot] is False: + # binary dialog act + turn['dialogue_acts']['binary'].append({ + 'intent': intent, + 'domain': domain, + 'slot': slot, + 'value': '' + }) + continue + else: + slot = anno2slot[domain][slot] + assert slot in ontology['domains'][domain]['slots'], print(domain, [slot]) + assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text'] + # skip overlapped spans, keep the shortest one + if sum(in_span[segment['start_index']: segment['end_index']]) > 0: + continue + else: + in_span[segment['start_index']: segment['end_index']] = [1]*(segment['end_index']-segment['start_index']) + turn['dialogue_acts']['non-categorical'].append({ + 'intent': intent, + 'domain': domain, + 'slot': slot, + 'value': segment['text'], + 'start': segment['start_index'], + 'end': segment['end_index'] + }) + + turn['dialogue_acts']['non-categorical'] = sorted(turn['dialogue_acts']['non-categorical'], key=lambda x: x['start']) + + bdas = set() + for da in turn['dialogue_acts']['binary']: + da_tuple = (da['intent'], da['domain'], da['slot'], da['value'],) + bdas.add(da_tuple) + if da_tuple not in ontology['binary_dialogue_acts']: + ontology['binary_dialogue_acts'].add(da_tuple) + turn['dialogue_acts']['binary'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(bdas)] + + for da in turn['dialogue_acts']['non-categorical']: + slot, value = da['slot'], da['value'] + assert slot in prev_state[domain] + prev_state[domain][slot] = value + + if speaker == 'user': + turn['state'] = copy.deepcopy(prev_state) + else: + turn['db_results'] = {} + + dialogue['turns'].append(turn) + dialogues_by_split[data_split].append(dialogue) + + ontology['binary_dialogue_acts'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(ontology['binary_dialogue_acts'])] + dialogues = dialogues_by_split['train']+dialogues_by_split['validation']+dialogues_by_split['test'] + json.dump(dialogues[:10], open(f'dummy_data.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(dialogues, open(f'{new_data_dir}/dialogues.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(ontology, open(f'{new_data_dir}/ontology.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf: + for filename in os.listdir(new_data_dir): + zf.write(f'{new_data_dir}/{filename}') + rmtree(original_data_dir) + rmtree(new_data_dir) + return dialogues, ontology + +if __name__ == '__main__': + preprocess() diff --git a/data/unified_datasets/tm3/README.md b/data/unified_datasets/tm3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14342ca07e80046698e96c5c969a210b468e72ae --- /dev/null +++ b/data/unified_datasets/tm3/README.md @@ -0,0 +1,71 @@ +# Dataset Card for Taskmaster-1 + +- **Repository:** https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020 +- **Paper:** https://aclanthology.org/2021.acl-long.55.pdf +- **Leaderboard:** None +- **Who transforms the dataset:** Qi Zhu(zhuq96 at gmail dot com) + +### Dataset Summary + +The Taskmaster-3 (aka TicketTalk) dataset consists of 23,789 movie ticketing dialogs (located in Taskmaster/TM-3-2020/data/). By "movie ticketing" we mean conversations where the customer's goal is to purchase tickets after deciding on theater, time, movie name, number of tickets, and date, or opt out of the transaction. + +This collection was created using the "self-dialog" method. This means a single, crowd-sourced worker is paid to create a conversation writing turns for both speakers, i.e. the customer and the ticketing agent. In order to gather a wide range of conversational scenarios and linguistic phenomena, workers were given both open-ended as well as highly structured conversational tasks. In all, we used over three dozen sets of instructions while building this corpus. The "instructions" field in data.json provides the exact scenario workers were given to complete each dialog. In this way, conversations involve a wide variety of paths, from those where the customer decides on a movie based on genre, their location, current releases, or from what they already have in mind. In addition, dialogs also include error handling with repect to repair (e.g. "No, I said Tom Cruise."), clarifications (e.g. "Sorry. Did you want the AMC 16 or Century City 16?") and other common conversational hiccups. In some cases instructions are completely open ended e.g. "Pretend you are taking your friend to a movie in Salem, Oregon. Create a conversation where you end up buying two tickets after finding out what is playing in at least two local theaters. Make sure the ticket purchase includes a confirmation of the deatils by the agent before the purchase, including date, time, movie, theater, and number of tickets." In other cases we restrict the conversational content and structure by offering a partially completed conversation that the workers must finalize or fill in based a certain parameters. These partially completed dialogs are labeled "Auto template" in the "scenario" field shown for each conversation in the data.json file. In some cases, we provided a small KB from which workers would choose movies, theaters, etc. but in most cases (pre-pandemic) workers were told to use the internet to get accurate current details for their dialogs. In any case, all relevant entities are annotated. + +- **How to get the transformed data from original data:** + - Download [master.zip](https://github.com/google-research-datasets/Taskmaster/archive/refs/heads/master.zip). + - Run `python preprocess.py` in the current directory. +- **Main changes of the transformation:** + - Remove dialogs that are empty or only contain one speaker. + - Split each domain dialogs into train/validation/test randomly (8:1:1). + - Merge continuous turns by the same speaker (ignore repeated turns). + - Annotate `dialogue acts` according to the original segment annotations. Add `intent` annotation (`==inform`). The type of `dialogue act` is set to `non-categorical` if the `slot` is not `description.other` or `description.plot`. Otherwise, the type is set to `binary` (and the `value` is empty). If there are multiple spans overlapping, we only keep the shortest one, since we found that this simple strategy can reduce the noise in annotation. + - Add `domain` and `intent` descriptions. + - Rename `api` to `db_results`. + - Add `state` by accumulate `non-categorical dialogue acts` in the order that they appear. +- **Annotations:** + - dialogue acts, state, db_results. + +### Supported Tasks and Leaderboards + +NLU, DST, Policy, NLG, E2E + +### Languages + +English + +### Data Splits + +| split | dialogues | utterances | avg_utt | avg_tokens | avg_domains | cat slot match(state) | cat slot match(goal) | cat slot match(dialogue act) | non-cat slot span(dialogue act) | +|------------|-------------|--------------|-----------|--------------|---------------|-------------------------|------------------------|--------------------------------|-----------------------------------| +| train | 18997 | 380646 | 20.04 | 10.48 | 1 | - | - | - | 100 | +| validation | 2380 | 47531 | 19.97 | 10.38 | 1 | - | - | - | 100 | +| test | 2380 | 48849 | 20.52 | 10.12 | 1 | - | - | - | 100 | +| all | 23757 | 477026 | 20.08 | 10.43 | 1 | - | - | - | 100 | + +1 domains: ['movie'] +- **cat slot match**: how many values of categorical slots are in the possible values of ontology in percentage. +- **non-cat slot span**: how many values of non-categorical slots have span annotation in percentage. + +### Citation + +``` +@inproceedings{byrne-etal-2021-tickettalk, + title = "{T}icket{T}alk: Toward human-level performance with end-to-end, transaction-based dialog systems", + author = "Byrne, Bill and + Krishnamoorthi, Karthik and + Ganesh, Saravanan and + Kale, Mihir", + booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", + month = aug, + year = "2021", + address = "Online", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2021.acl-long.55", + doi = "10.18653/v1/2021.acl-long.55", + pages = "671--680", +} +``` + +### Licensing Information + +[**CC BY 4.0**](https://creativecommons.org/licenses/by/4.0/) \ No newline at end of file diff --git a/data/unified_datasets/tm3/data.zip b/data/unified_datasets/tm3/data.zip new file mode 100644 index 0000000000000000000000000000000000000000..21455efde30494a3396aaeedeca896e8e1599434 Binary files /dev/null and b/data/unified_datasets/tm3/data.zip differ diff --git a/data/unified_datasets/tm3/dummy_data.json b/data/unified_datasets/tm3/dummy_data.json new file mode 100644 index 0000000000000000000000000000000000000000..48fe1ae8c97e4f7ff625cc582532ce4e91453551 --- /dev/null +++ b/data/unified_datasets/tm3/dummy_data.json @@ -0,0 +1,5507 @@ +[ + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-0", + "original_id": "dlg-9962109d-437b-4ed3-b037-980272a3bda1", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: In the conversation below, a customer is talking to a booking agent to purchase movie tickets. However, the customer’s turns are missing. The details mentioned (theater names, cities, etc.) do NOT necessarily reflect real life, but you should pretend they are real for the purposes of this exercise.\n\nYOUR TASK: {{LIST *Pretend you are the customer*., Fill in the *missing turns* (in green) so that the conversation makes sense and sounds natural., Make sure you *follow the instructions* provided in parentheses for each turn. (Start typing to make the instructions move outside the box.), Do not copy the wording. *Use your own words*., *Use your normal speaking style* for this scenario but don’t be overly chatty., *Do NOT* call the booking agent by any name including “sir” “madam” etc.}}\n\n{{HIDE movie_1 name.movie No Time To Die}}\n{{HIDE theater_1 name.theater AMC Mountain 16}}\n{{HIDE theater_2 name.theater AMC Mercado 24}}\n{{HIDE genre_1 name.genre action}}\n{{HIDE movie_1_showtime_1 time.showing 4:30pm}}\n{{HIDE movie_1_showtime_2 time.showing 6:40pm}}\n{{HIDE movie_1_showtime_3 time.showing 9:10pm}}\n{{HIDE num_tickets num.tickets three}}\n{{HIDE date date.showing tonight}}\n{{HIDE success_closing success.closing OK. Enjoy your movie!}}\nBEFORE YOU BEGIN: Provide a value for this detail.\n{{ENTER people people.tickets Who’s all going to the movie? (use a phrase describing *three people* that includes reference to yourself.)}}\n\n{{CONVERSATION\nhint user (In your own words, express interest in buying movie tickets for var_date and specify theater ‘var_theater_1’.)\nforce assistant No problem. Is there a particular type of movie you’re looking for?\nhint user (mention ‘var_genre_1’ in your response)\nforce assistant Sure. I can help with that. Let me check listings at var_theater_2.\nhint user (Correct the agent’s mistake: You wanted ‘var_theater_1’.)\nforce assistant Oh, sorry about that. So you’re interested in var_genre_1 films at var_theater_1, right?\nhint user (Confirm the agent’s correction.)\ncall find_movies name.genre var_genre_1 name.theater var_theater_1\nresponse find_movies name.movie var_movie_1\ncall find_showtimes name.theater var_theater_1 date.showing var_date name.movie var_movie_1\nresponse find_showtimes time.showing var_movie_1_showtime_1 time.showing var_movie_1_showtime_2 time.showing var_movie_1_showtime_3\nforce assistant OK. I see one var_genre_1 movie playing at var_theater_1: var_movie_1. Remaining showtimes are var_movie_1_showtime_1, var_movie_1_showtime_2 and var_movie_1_showtime_3. Does any of those work?\nhint user (Answer positively and choose ‘var_movie_1_showtime_3’)\nforce assistant Great. And how many tickets?\nhint user (Include your earlier answer of ‘var_people’ in this response.)\nforce assistant All right. Let me confirm that you’d like var_num_tickets tickets for var_movie_1 at var_theater_1 var_date at var_movie_1_showtime_3. Is that all correct?\nhint user (Confirm.)\nforce assistant Is it OK to go ahead and purchase these tickets?\nhint user (Give positive answer).\ncall book_tickets name.movie var_movie_1 name.theater var_theater_1 time.showing var_movie_1_showtime_1 num.tickets var_num_tickets date.showing var_date\nresponse book_tickets api.response success\nforce assistant OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?\nhint user (polite negative answer)\nforce assistant var_success_closing\n}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_logical The conversation between the user and the assistant is cooperative and logical.}}\n{{CHECK confirm_natural All the individual turns from the user (green boxes) sound natural and familiar.}}\n{{CHECK confirm_casual All the individual turns from the user (green boxes) are casual but polite.}}\n{{CHECK confirm_appropriate All the individual turns from the user (green boxes) are free of offensive or inappropriate language.}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I'd like to buy tickets for movie particularly on AMC Mercado 24.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem. Is there a particular type of movie you’re looking for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I prefer to go for an action movie, Let me know the movie lists & show timings.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 22, + "end": 28 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure. I can help with that. Let me check listings at AMC Mercado 24.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mercado 24", + "start": 53, + "end": 67 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Please do check at AMC Mercado 24 instead of AMC Mercado 16", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mercado 24", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, sorry about that. So you’re interested in action films at AMC Mountain 16, right?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 46, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 62, + "end": 77 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, It is", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. I show one action movie playing at AMC Mountain 16: No Time To Die. Remaining showtimes are 4:30pm, 6:40pm and 9:10pm. Does any of those work?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 15, + "end": 21 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16:", + "start": 39, + "end": 55 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 56, + "end": 70 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:30pm", + "start": 96, + "end": 102 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:40pm", + "start": 104, + "end": 110 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 115, + "end": 121 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you. Make it for 9:10 pm", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10 pm", + "start": 23, + "end": 30 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10 pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And how many tickets?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I need 9 tickets.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10 pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right. Let me confirm that you’d like three tickets for No Time To Die at AMC Mountain 16 tonight at 9:10pm. Is that all correct?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "three", + "start": 42, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 60, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 94, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 105, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, Go ahead. Good to go then.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is it OK to go ahead and purchase these tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Thank you for your assistance. Have a good one.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No, that is all for the day. Thanks!", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Enjoy your movie!", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-1", + "original_id": "dlg-684cfa8c-0f50-4ecb-9543-12b4080d5dec", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: In the conversation below, a customer is talking to a booking agent to purchase movie tickets. However, the customer’s turns are missing. The details mentioned (theater names, cities, etc.) do NOT necessarily reflect real life, but you should pretend they are real for the purposes of this exercise.\n\nYOUR TASK: {{LIST *Pretend you are the customer*., Fill in the *missing turns* (in green) so that the conversation makes sense and sounds natural., Make sure you *follow the instructions* provided in parentheses for each turn. (Start typing to make the instructions move outside the box.), Do not copy the wording. *Use your own words*., *Use your normal speaking style* for this scenario but don’t be overly chatty., *Do NOT* call the booking agent by any name including “sir” “madam” etc.}}\n\n{{HIDE movie_1 name.movie No Time To Die}}\n{{HIDE theater_1 name.theater AMC Mountain 16}}\n{{HIDE theater_2 name.theater AMC Mercado 24}}\n{{HIDE genre_1 name.genre action}}\n{{HIDE movie_1_showtime_1 time.showing 4:30pm}}\n{{HIDE movie_1_showtime_2 time.showing 6:40pm}}\n{{HIDE movie_1_showtime_3 time.showing 9:10pm}}\n{{HIDE num_tickets num.tickets three}}\n{{HIDE date date.showing tonight}}\n{{HIDE success_closing success.closing OK. Enjoy your movie!}}\nBEFORE YOU BEGIN: Provide a value for this detail.\n{{ENTER people people.tickets Who’s all going to the movie? (use a phrase describing *three people* that includes reference to yourself.)}}\n\n{{CONVERSATION\nhint user (In your own words, express interest in buying movie tickets for var_date and specify theater ‘var_theater_1’.)\nforce assistant No problem. Is there a particular type of movie you’re looking for?\nhint user (mention ‘var_genre_1’ in your response)\nforce assistant Sure. I can help with that. Let me check listings at var_theater_2.\nhint user (Correct the agent’s mistake: You wanted ‘var_theater_1’.)\nforce assistant Oh, sorry about that. So you’re interested in var_genre_1 films at var_theater_1, right?\nhint user (Confirm the agent’s correction.)\ncall find_movies name.genre var_genre_1 name.theater var_theater_1\nresponse find_movies name.movie var_movie_1\ncall find_showtimes name.theater var_theater_1 date.showing var_date name.movie var_movie_1\nresponse find_showtimes time.showing var_movie_1_showtime_1 time.showing var_movie_1_showtime_2 time.showing var_movie_1_showtime_3\nforce assistant OK. I see one var_genre_1 movie playing at var_theater_1: var_movie_1. Remaining showtimes are var_movie_1_showtime_1, var_movie_1_showtime_2 and var_movie_1_showtime_3. Does any of those work?\nhint user (Answer positively and choose ‘var_movie_1_showtime_3’)\nforce assistant Great. And how many tickets?\nhint user (Include your earlier answer of ‘var_people’ in this response.)\nforce assistant All right. Let me confirm that you’d like var_num_tickets tickets for var_movie_1 at var_theater_1 var_date at var_movie_1_showtime_3. Is that all correct?\nhint user (Confirm.)\nforce assistant Is it OK to go ahead and purchase these tickets?\nhint user (Give positive answer).\ncall book_tickets name.movie var_movie_1 name.theater var_theater_1 time.showing var_movie_1_showtime_1 num.tickets var_num_tickets date.showing var_date\nresponse book_tickets api.response success\nforce assistant OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?\nhint user (polite negative answer)\nforce assistant var_success_closing\n}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_logical The conversation between the user and the assistant is cooperative and logical.}}\n{{CHECK confirm_natural All the individual turns from the user (green boxes) sound natural and familiar.}}\n{{CHECK confirm_casual All the individual turns from the user (green boxes) are casual but polite.}}\n{{CHECK confirm_appropriate All the individual turns from the user (green boxes) are free of offensive or inappropriate language.}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Can you get me movie tickets for tonight at the AMC Mountain 16?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 33, + "end": 40 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 48, + "end": 63 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem. Is there a particular type of movie you’re looking for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Are there any action movies?", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 14, + "end": 20 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure. I can help with that. Let me check listings at AMC Mercado 24.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mercado 24", + "start": 53, + "end": 67 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No, not the Mercado, the AMC Mountain 16.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 25, + "end": 40 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, sorry about that. So you’re interested in action films at AMC Mountain 16, right?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 46, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 62, + "end": 77 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That's correct.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. I see one action movie playing at AMC Mountain 16: No Time To Die. Remaining showtimes are 4:30pm, 6:40pm and 9:10pm. Does any of those work?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 14, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16:", + "start": 38, + "end": 54 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 55, + "end": 69 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:30pm", + "start": 95, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:40pm", + "start": 103, + "end": 109 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 114, + "end": 120 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.genre": "action", + "name.theater": "AMC Mountain 16" + }, + "response": { + "name.movie_1": "No Time To Die" + }, + "name": "find_movies" + }, + { + "index": 6, + "args": { + "name.theater": "AMC Mountain 16", + "name.movie": "No Time To Die" + }, + "response": { + "time.showing_1": "4:30pm", + "time.showing_2": "6:40pm", + "time.showing_3": "9:10pm" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "Yes, the 9:10pm sounds good.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 9, + "end": 15 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And how many tickets?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Me and two others.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right. Let me confirm that you’d like three tickets for No Time To Die at AMC Mountain 16 tonight at 9:10pm. Is that all correct?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "three", + "start": 42, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 60, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 94, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 105, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Affirmative.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is it OK to go ahead and purchase these tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Make it so.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 14, + "args": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "time.showing": "4:30pm", + "num.tickets": "three", + "date.showing": "tonight" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + }, + { + "speaker": "user", + "utterance": "No, that's all.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Enjoy your movie!", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-2", + "original_id": "dlg-0717348f-786f-477a-a87b-7fcade729006", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: In the conversation below, a customer is talking to a booking agent to purchase movie tickets. However, the customer’s turns are missing. The details mentioned (theater names, cities, etc.) do NOT necessarily reflect real life, but you should pretend they are real for the purposes of this exercise.\n\nYOUR TASK: {{LIST *Pretend you are the customer*., Fill in the *missing turns* (in green) so that the conversation makes sense and sounds natural., Make sure you *follow the instructions* provided in parentheses for each turn. (Start typing to make the instructions move outside the box.), Do not copy the wording. *Use your own words*., *Use your normal speaking style* for this scenario but don’t be overly chatty., *Do NOT* call the booking agent by any name including “sir” “madam” etc.}}\n\n{{HIDE movie_1 name.movie No Time To Die}}\n{{HIDE theater_1 name.theater AMC Mountain 16}}\n{{HIDE theater_2 name.theater AMC Mercado 24}}\n{{HIDE genre_1 name.genre action}}\n{{HIDE movie_1_showtime_1 time.showing 4:30pm}}\n{{HIDE movie_1_showtime_2 time.showing 6:40pm}}\n{{HIDE movie_1_showtime_3 time.showing 9:10pm}}\n{{HIDE num_tickets num.tickets three}}\n{{HIDE date date.showing tonight}}\n{{HIDE success_closing success.closing OK. Enjoy your movie!}}\nBEFORE YOU BEGIN: Provide a value for this detail.\n{{ENTER people people.tickets Who’s all going to the movie? (use a phrase describing *three people* that includes reference to yourself.)}}\n\n{{CONVERSATION\nhint user (In your own words, express interest in buying movie tickets for var_date and specify theater ‘var_theater_1’.)\nforce assistant No problem. Is there a particular type of movie you’re looking for?\nhint user (mention ‘var_genre_1’ in your response)\nforce assistant Sure. I can help with that. Let me check listings at var_theater_2.\nhint user (Correct the agent’s mistake: You wanted ‘var_theater_1’.)\nforce assistant Oh, sorry about that. So you’re interested in var_genre_1 films at var_theater_1, right?\nhint user (Confirm the agent’s correction.)\ncall find_movies name.genre var_genre_1 name.theater var_theater_1\nresponse find_movies name.movie var_movie_1\ncall find_showtimes name.theater var_theater_1 date.showing var_date name.movie var_movie_1\nresponse find_showtimes time.showing var_movie_1_showtime_1 time.showing var_movie_1_showtime_2 time.showing var_movie_1_showtime_3\nforce assistant OK. I see one var_genre_1 movie playing at var_theater_1: var_movie_1. Remaining showtimes are var_movie_1_showtime_1, var_movie_1_showtime_2 and var_movie_1_showtime_3. Does any of those work?\nhint user (Answer positively and choose ‘var_movie_1_showtime_3’)\nforce assistant Great. And how many tickets?\nhint user (Include your earlier answer of ‘var_people’ in this response.)\nforce assistant All right. Let me confirm that you’d like var_num_tickets tickets for var_movie_1 at var_theater_1 var_date at var_movie_1_showtime_3. Is that all correct?\nhint user (Confirm.)\nforce assistant Is it OK to go ahead and purchase these tickets?\nhint user (Give positive answer).\ncall book_tickets name.movie var_movie_1 name.theater var_theater_1 time.showing var_movie_1_showtime_1 num.tickets var_num_tickets date.showing var_date\nresponse book_tickets api.response success\nforce assistant OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?\nhint user (polite negative answer)\nforce assistant var_success_closing\n}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_logical The conversation between the user and the assistant is cooperative and logical.}}\n{{CHECK confirm_natural All the individual turns from the user (green boxes) sound natural and familiar.}}\n{{CHECK confirm_casual All the individual turns from the user (green boxes) are casual but polite.}}\n{{CHECK confirm_appropriate All the individual turns from the user (green boxes) are free of offensive or inappropriate language.}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I'd like to see a movie tonight, specifically at the AMC Mountain 16.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 24, + "end": 31 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 53, + "end": 68 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem. Is there a particular type of movie you’re looking for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yeah, I want an action movie.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 16, + "end": 22 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure. I can help with that. Let me check listings at AMC Mercado 24.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mercado 24", + "start": 53, + "end": 67 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Actually, I wanted the AMC Mountain 16.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 23, + "end": 38 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, sorry about that. So you’re interested in action films at AMC Mountain 16, right?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 46, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 62, + "end": 77 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That's right.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. I see one action movie playing at AMC Mountain 16: No Time To Die. Remaining showtimes are 4:30pm, 6:40pm and 9:10pm. Does any of those work?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 14, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16:", + "start": 38, + "end": 54 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 55, + "end": 69 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:30pm", + "start": 95, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:40pm", + "start": 103, + "end": 109 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 114, + "end": 120 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.genre": "action", + "name.theater": "AMC Mountain 16" + }, + "response": { + "name.movie_1": "No Time To Die" + }, + "name": "find_movies" + }, + { + "index": 6, + "args": { + "name.theater": "AMC Mountain 16", + "name.movie": "No Time To Die" + }, + "response": { + "time.showing_1": "4:30pm", + "time.showing_2": "6:40pm", + "time.showing_3": "9:10pm" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "Yes, let's do the 9:10 showing.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And how many tickets?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Myself, my wife, and my friend.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right. Let me confirm that you’d like three tickets for No Time To Die at AMC Mountain 16 tonight at 9:10pm. Is that all correct?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "three", + "start": 42, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 60, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 94, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 105, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "That is correct.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is it OK to go ahead and purchase these tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes it is.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 14, + "args": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "time.showing": "4:30pm", + "num.tickets": "three", + "date.showing": "tonight" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + }, + { + "speaker": "user", + "utterance": "No, thank you very much.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Enjoy your movie!", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-3", + "original_id": "dlg-9ef3529f-fed0-4d76-8f3c-4d1fcdab9ed1", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: In the conversation below, a customer is talking to a booking agent to purchase movie tickets. However, the customer’s turns are missing. The details mentioned (theater names, cities, etc.) do NOT necessarily reflect real life, but you should pretend they are real for the purposes of this exercise.\n\nYOUR TASK: {{LIST *Pretend you are the customer*., Fill in the *missing turns* (in green) so that the conversation makes sense and sounds natural., Make sure you *follow the instructions* provided in parentheses for each turn. (Start typing to make the instructions move outside the box.), Do not copy the wording. *Use your own words*., *Use your normal speaking style* for this scenario but don’t be overly chatty., *Do NOT* call the booking agent by any name including “sir” “madam” etc.}}\n\n{{HIDE movie_1 name.movie No Time To Die}}\n{{HIDE theater_1 name.theater AMC Mountain 16}}\n{{HIDE theater_2 name.theater AMC Mercado 24}}\n{{HIDE genre_1 name.genre action}}\n{{HIDE movie_1_showtime_1 time.showing 4:30pm}}\n{{HIDE movie_1_showtime_2 time.showing 6:40pm}}\n{{HIDE movie_1_showtime_3 time.showing 9:10pm}}\n{{HIDE num_tickets num.tickets three}}\n{{HIDE date date.showing tonight}}\n{{HIDE success_closing success.closing OK. Enjoy your movie!}}\nBEFORE YOU BEGIN: Provide a value for this detail.\n{{ENTER people people.tickets Who’s all going to the movie? (use a phrase describing *three people* that includes reference to yourself.)}}\n\n{{CONVERSATION\nhint user (In your own words, express interest in buying movie tickets for var_date and specify theater ‘var_theater_1’.)\nforce assistant No problem. Is there a particular type of movie you’re looking for?\nhint user (mention ‘var_genre_1’ in your response)\nforce assistant Sure. I can help with that. Let me check listings at var_theater_2.\nhint user (Correct the agent’s mistake: You wanted ‘var_theater_1’.)\nforce assistant Oh, sorry about that. So you’re interested in var_genre_1 films at var_theater_1, right?\nhint user (Confirm the agent’s correction.)\ncall find_movies name.genre var_genre_1 name.theater var_theater_1\nresponse find_movies name.movie var_movie_1\ncall find_showtimes name.theater var_theater_1 date.showing var_date name.movie var_movie_1\nresponse find_showtimes time.showing var_movie_1_showtime_1 time.showing var_movie_1_showtime_2 time.showing var_movie_1_showtime_3\nforce assistant OK. I see one var_genre_1 movie playing at var_theater_1: var_movie_1. Remaining showtimes are var_movie_1_showtime_1, var_movie_1_showtime_2 and var_movie_1_showtime_3. Does any of those work?\nhint user (Answer positively and choose ‘var_movie_1_showtime_3’)\nforce assistant Great. And how many tickets?\nhint user (Include your earlier answer of ‘var_people’ in this response.)\nforce assistant All right. Let me confirm that you’d like var_num_tickets tickets for var_movie_1 at var_theater_1 var_date at var_movie_1_showtime_3. Is that all correct?\nhint user (Confirm.)\nforce assistant Is it OK to go ahead and purchase these tickets?\nhint user (Give positive answer).\ncall book_tickets name.movie var_movie_1 name.theater var_theater_1 time.showing var_movie_1_showtime_1 num.tickets var_num_tickets date.showing var_date\nresponse book_tickets api.response success\nforce assistant OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?\nhint user (polite negative answer)\nforce assistant var_success_closing\n}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_logical The conversation between the user and the assistant is cooperative and logical.}}\n{{CHECK confirm_natural All the individual turns from the user (green boxes) sound natural and familiar.}}\n{{CHECK confirm_casual All the individual turns from the user (green boxes) are casual but polite.}}\n{{CHECK confirm_appropriate All the individual turns from the user (green boxes) are free of offensive or inappropriate language.}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I'd like to get some tickets to a movie for tonight at AMC Mountain 16.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 44, + "end": 51 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 55, + "end": 70 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem. Is there a particular type of movie you’re looking for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "If there is an action movie playing, that would be great.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 15, + "end": 21 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure. I can help with that. Let me check listings at AMC Mercado 24.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mercado 24", + "start": 53, + "end": 67 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Wait, it's supposed to be AMC Mountain 16.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 26, + "end": 41 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, sorry about that. So you’re interested in action films at AMC Mountain 16, right?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 46, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 62, + "end": 77 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, that is right.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. I see one action movie playing at AMC Mountain 16: No Time To Die. Remaining showtimes are 4:30pm, 6:40pm and 9:10pm. Does any of those work?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 14, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16:", + "start": 38, + "end": 54 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 55, + "end": 69 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:30pm", + "start": 95, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:40pm", + "start": 103, + "end": 109 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 114, + "end": 120 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.genre": "action", + "name.theater": "AMC Mountain 16" + }, + "response": { + "name.movie_1": "No Time To Die" + }, + "name": "find_movies" + }, + { + "index": 6, + "args": { + "name.theater": "AMC Mountain 16", + "name.movie": "No Time To Die" + }, + "response": { + "time.showing_1": "4:30pm", + "time.showing_2": "6:40pm", + "time.showing_3": "9:10pm" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "That will work. Let's go with the 9:10 start time.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And how many tickets?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "They are for myself, my mom, and my brother, so get three tickets please.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right. Let me confirm that you’d like three tickets for No Time To Die at AMC Mountain 16 tonight at 9:10pm. Is that all correct?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "three", + "start": 42, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 60, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 94, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 105, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yep, that's right.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is it OK to go ahead and purchase these tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, please buy them.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 14, + "args": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "time.showing": "4:30pm", + "num.tickets": "three", + "date.showing": "tonight" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + }, + { + "speaker": "user", + "utterance": "No, that's all I need for today. Thanks a lot for your help.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Enjoy your movie!", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-4", + "original_id": "dlg-317727d0-4881-4e08-a3da-d598ee157605", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: Pretend you’re *using a digital assistant to purchase tickets for a movie currently showing in theaters*. {{LIST The entire conversation needs to follow the structure given below *in the TASK section*., You already know which movie you want to see as well as all the details (which you can pick from the section below)., The conversation ends with *a successful ticket purchase*., Do not include any extras like “thank you” or “goodbye” or “hello” etc. }}\n\nDETAILS: This is *made up data* which you should use to complete your dialog creation (since there are actually no movies playing anywhere right now due to COVID-19).:\n{{LIST You are interested in seeing either *No Time To Die* OR *Mulan*., There is one theater in your town: *Mountain AMC 16*., Showtimes for Mulan: 4:00pm 6:10pm 8:30pm 10:40pm, Showtimes for No Time To Die: 3:00pm 5:10pm 7:30pm 9:40pm, Make sure to include the number of tickets.}}\n*USE THIS Structure*\nUser: (Express desire to buy movie tickets and include TWO of the details needed for the purchase--one should be the movie name. See DETAILS section above.)\nAssistant: (Confirm the TWO details given so far and ask for confirmation *make one of the details incorrect*.)\nUser: (Alert the assistant of the detail that is wrong and give the right value.)\nAssistant: (Correct the wrong value, repeating back the correct value.)\nUser: (Confirms the correction, *THEN adds the THREE remaining details*.)\nAssistant: (Confirms all five details then asks if this is correct and whether they can go ahead with the purchase.)\nUser: (Answers yes.)\nAssistant: (Indicates successful purchase saying ”Your tickets are confirmed and they’ve been sent to your email address.”)\n\nTASK: *Write the transcript* of your spoken conversation according to the structure given above. Make sure it includes one exchange where *one of the details the assistant confirms is incorrect* (movie, theater, showtime, number, tickets or day).\n\nSPEAKING STYLE: {{LIST The assistant is polite and efficient, The assistant’s speaking style should match yours but they are not overly formal or too casual, Don’t call your assistant by name (e.g. “Johnny” or “Alexa” or “Siri” or “Assistant” or “Computer”). Just assume they’re already listening to you.}}\n\n{{CONVERSATION}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_slots The users discusses *each of these details*: 1) movie, 2) theater, 3) showtime, 4) number of tickets, 5) day.}}\n{{CHECK confirm_error_exchange The conversation includes *one exchange where one of the details the assistant reads back is wrong* and so the user must point it out so it can be corrected.}}\n{{CHECK confirm_details The assistant *confirms all the ticket details* together just before booking.}}\n{{CHECK confirm_purchase The assistant *completes purchasing the tickets* saying *”Your tickets are confirmed and they’ve been sent to your email address.”*}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I would like 2 tickets to see Mulan.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "2", + "start": 13, + "end": 14 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 30, + "end": 35 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "What day would you like 2 tickets to see No Time To Die?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "2", + "start": 24, + "end": 25 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 41, + "end": 55 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I would not, It should be Mulan", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 26, + "end": 31 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "What is incorrect?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Movie Title should Be Mulan.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 22, + "end": 27 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "What day would you like 2 tickets to see Mulan?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "2", + "start": 24, + "end": 25 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 41, + "end": 46 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Tomorrow", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "Tomorrow", + "start": 0, + "end": 8 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "", + "date.showing": "Tomorrow", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Mulan is showing in one theater, the Mountain AMC 16 at the following times tomorrow: 4:00pm 6:10pm 8:30pm 10:40pm; which showing would you prefer?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 0, + "end": 5 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 37, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tomorrow:", + "start": 76, + "end": 85 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:00pm", + "start": 86, + "end": 92 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:10pm", + "start": 93, + "end": 99 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "8:30pm", + "start": 100, + "end": 106 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "10:40pm;", + "start": 107, + "end": 115 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.movie": "Mulan" + }, + "response": { + "name.theater_1": "Mountain AMC 16" + }, + "name": "find_theaters" + }, + { + "index": 6, + "args": { + "name.movie": "Mulan", + "date.showing": "Tomorrow", + "name.theater": "Mountain AMC 16" + }, + "response": { + "time.showing_1": "4:00pm", + "time.showing_2": "6:10pm", + "time.showing_3": "8:30pm", + "time.showing_4": "10:40pm;" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "I would like the 4pm showing.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4pm", + "start": 17, + "end": 20 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "tomorrow:", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "4pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "You would like 2 tickets for Mulan which is playing at the Mountain AMC 16 at 4pm, is that correct?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "2", + "start": 15, + "end": 16 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 29, + "end": 34 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 59, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4pm", + "start": 78, + "end": 81 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "tomorrow:", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "4pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Which Payment method would you like to use? You can say Default, Visa 1 or Visa 2.", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Default", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "tomorrow:", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "2", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "4pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great, I've purchased those tickets for you. A Receipt and the tickets themselves have been sent to your email. I have added the movie to your personal calendar for tomorrow. Enjoy the show.", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 12, + "args": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "num.tickets": "2", + "time.showing": "4:00pm", + "date.showing": "Tomorrow", + "seating": "any" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-5", + "original_id": "dlg-c4439035-6aa6-4373-9349-7b40ab213c48", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: Pretend you’re *using a digital assistant to purchase tickets for a movie currently showing in theaters*. {{LIST The entire conversation needs to follow the structure given below *in the TASK section*., You already know which movie you want to see as well as all the details (which you can pick from the section below)., The conversation ends with *a successful ticket purchase*., Do not include any extras like “thank you” or “goodbye” or “hello” etc. }}\n\nDETAILS: This is *made up data* which you should use to complete your dialog creation (since there are actually no movies playing anywhere right now due to COVID-19).:\n{{LIST You are interested in seeing either *No Time To Die* OR *Mulan*., There is one theater in your town: *Mountain AMC 16*., Showtimes for Mulan: 4:00pm 6:10pm 8:30pm 10:40pm, Showtimes for No Time To Die: 3:00pm 5:10pm 7:30pm 9:40pm, Make sure to include the number of tickets.}}\n*USE THIS Structure*\nUser: (Express desire to buy movie tickets and include TWO of the details needed for the purchase--one should be the movie name. See DETAILS section above.)\nAssistant: (Confirm the TWO details given so far and ask for confirmation *make one of the details incorrect*.)\nUser: (Alert the assistant of the detail that is wrong and give the right value.)\nAssistant: (Correct the wrong value, repeating back the correct value.)\nUser: (Confirms the correction, *THEN adds the THREE remaining details*.)\nAssistant: (Confirms all five details then asks if this is correct and whether they can go ahead with the purchase.)\nUser: (Answers yes.)\nAssistant: (Indicates successful purchase saying ”Your tickets are confirmed and they’ve been sent to your email address.”)\n\nTASK: *Write the transcript* of your spoken conversation according to the structure given above. Make sure it includes one exchange where *one of the details the assistant confirms is incorrect* (movie, theater, showtime, number, tickets or day).\n\nSPEAKING STYLE: {{LIST The assistant is polite and efficient, The assistant’s speaking style should match yours but they are not overly formal or too casual, Don’t call your assistant by name (e.g. “Johnny” or “Alexa” or “Siri” or “Assistant” or “Computer”). Just assume they’re already listening to you.}}\n\n{{CONVERSATION}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_slots The users discusses *each of these details*: 1) movie, 2) theater, 3) showtime, 4) number of tickets, 5) day.}}\n{{CHECK confirm_error_exchange The conversation includes *one exchange where one of the details the assistant reads back is wrong* and so the user must point it out so it can be corrected.}}\n{{CHECK confirm_details The assistant *confirms all the ticket details* together just before booking.}}\n{{CHECK confirm_purchase The assistant *completes purchasing the tickets* saying *”Your tickets are confirmed and they’ve been sent to your email address.”*}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "buy tickets for the movie No Time to Die playing at Mountain AMC 16 today", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time to Die", + "start": 26, + "end": 40 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 52, + "end": 67 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "today", + "start": 68, + "end": 73 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time to Die", + "name.theater": "Mountain AMC 16", + "date.showing": "today", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, you want me to buy tickets for the movie \"No Time to Die\" playing at Cinemark, is this correct?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "\"No Time to Die\"", + "start": 47, + "end": 63 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Cinemark", + "start": 75, + "end": 83 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No, the movie is playing at the Mountain AMC 16 closest to me", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 32, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "location", + "value": "closest to me", + "start": 48, + "end": 61 + } + ] + }, + "state": { + "movie": { + "name.movie": "\"No Time to Die\"", + "name.theater": "Mountain AMC 16", + "date.showing": "today", + "date.release": "", + "duration.movie": "", + "location": "closest to me", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, I have changed my theater selection to \"Mountain AMC 16\". What showtime and number of tickets would you like?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "\"Mountain AMC 16\"", + "start": 45, + "end": 62 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I would like two tickets for the 7:30pm showtime today", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "two", + "start": 13, + "end": 16 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.preference", + "value": "7:30pm", + "start": 33, + "end": 39 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "today", + "start": 49, + "end": 54 + } + ] + }, + "state": { + "movie": { + "name.movie": "\"No Time to Die\"", + "name.theater": "\"Mountain AMC 16\"", + "date.showing": "today", + "date.release": "", + "duration.movie": "", + "location": "closest to me", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "two", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "7:30pm", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, just to confirm, I will buy two tickets for the movie \"No Time to Die\" playing at 7:30PM at the Mountain AMC 16 Theater, is this correct?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "two", + "start": 34, + "end": 37 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "\"No Time to Die\"", + "start": 60, + "end": 76 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "7:30PM", + "start": 88, + "end": 94 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 102, + "end": 117 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes this is correct", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "\"No Time to Die\"", + "name.theater": "Mountain AMC 16", + "date.showing": "today", + "date.release": "", + "duration.movie": "", + "location": "closest to me", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "two", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "7:30pm", + "time.showing": "7:30PM", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Your tickets are confirmed and they’ve been sent to your email address.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.movie": "No Time to Die", + "name.theater": "Mountain AMC 16", + "num.tickets": "two", + "time.showing": "7:30PM", + "date.showing": "today" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-6", + "original_id": "dlg-0a9d4930-5dab-4125-a598-6c45f94e92dd", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: Pretend you’re *using a digital assistant to purchase tickets for a movie currently showing in theaters*. {{LIST The entire conversation needs to follow the structure given below *in the TASK section*., You already know which movie you want to see as well as all the details (which you can pick from the section below)., The conversation ends with *a successful ticket purchase*., Do not include any extras like “thank you” or “goodbye” or “hello” etc. }}\n\nDETAILS: This is *made up data* which you should use to complete your dialog creation (since there are actually no movies playing anywhere right now due to COVID-19).:\n{{LIST You are interested in seeing either *No Time To Die* OR *Mulan*., There is one theater in your town: *Mountain AMC 16*., Showtimes for Mulan: 4:00pm 6:10pm 8:30pm 10:40pm, Showtimes for No Time To Die: 3:00pm 5:10pm 7:30pm 9:40pm, Make sure to include the number of tickets.}}\n*USE THIS Structure*\nUser: (Express desire to buy movie tickets and include TWO of the details needed for the purchase--one should be the movie name. See DETAILS section above.)\nAssistant: (Confirm the TWO details given so far and ask for confirmation *make one of the details incorrect*.)\nUser: (Alert the assistant of the detail that is wrong and give the right value.)\nAssistant: (Correct the wrong value, repeating back the correct value.)\nUser: (Confirms the correction, *THEN adds the THREE remaining details*.)\nAssistant: (Confirms all five details then asks if this is correct and whether they can go ahead with the purchase.)\nUser: (Answers yes.)\nAssistant: (Indicates successful purchase saying ”Your tickets are confirmed and they’ve been sent to your email address.”)\n\nTASK: *Write the transcript* of your spoken conversation according to the structure given above. Make sure it includes one exchange where *one of the details the assistant confirms is incorrect* (movie, theater, showtime, number, tickets or day).\n\nSPEAKING STYLE: {{LIST The assistant is polite and efficient, The assistant’s speaking style should match yours but they are not overly formal or too casual, Don’t call your assistant by name (e.g. “Johnny” or “Alexa” or “Siri” or “Assistant” or “Computer”). Just assume they’re already listening to you.}}\n\n{{CONVERSATION}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_slots The users discusses *each of these details*: 1) movie, 2) theater, 3) showtime, 4) number of tickets, 5) day.}}\n{{CHECK confirm_error_exchange The conversation includes *one exchange where one of the details the assistant reads back is wrong* and so the user must point it out so it can be corrected.}}\n{{CHECK confirm_details The assistant *confirms all the ticket details* together just before booking.}}\n{{CHECK confirm_purchase The assistant *completes purchasing the tickets* saying *”Your tickets are confirmed and they’ve been sent to your email address.”*}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "hi i need tickets for a movie?", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "hello could you tell me theater name?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "name of theater Mountain AMC 16", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 16, + "end": 31 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "fine tell me the movie name?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Time To Die OR Mulan.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Time To Die", + "start": 0, + "end": 11 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 15, + "end": 20 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "sorry one of your detail is incorrect", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "give the correct values", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "the name of the theater Mountain AMC 16.\\rare interested in seeing either No Time To Die OR Mulan.", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 24, + "end": 39 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Di", + "start": 74, + "end": 87 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mula", + "start": 92, + "end": 96 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "tell me show time those movies?", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "Mula", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Showtimes for Mulan: 4:00pm 6:10pm 8:30pm 10:40pm\\rShowtimes for No Time To Die: 3:00pm 5:10pm 7:30pm 9:40pm", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan:", + "start": 14, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:00pm", + "start": 21, + "end": 27 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:10pm", + "start": 28, + "end": 34 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "8:30pm", + "start": 35, + "end": 41 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "10:40pm", + "start": 42, + "end": 49 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 65, + "end": 79 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "3:00p", + "start": 81, + "end": 86 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "5:10p", + "start": 88, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "7:30p", + "start": 95, + "end": 100 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:40p", + "start": 102, + "end": 107 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 8, + "args": { + "name.theater": "Mountain AMC 16", + "name.movie": "Mulan" + }, + "response": { + "time.showing_1": "4:00pm", + "time.showing_2": "6:10pm", + "time.showing_3": "8:30pm", + "time.showing_4": "10:40pm" + }, + "name": "find_showtimes" + }, + { + "index": 8, + "args": { + "name.theater": "Mountain AMC 16", + "name.movie": "No Time To Die" + }, + "response": { + "time.showing_1": "3:00pm", + "time.showing_2": "5:10pm", + "time.showing_3": "7:30pm", + "time.showing_4": "9:40pm" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "No Time To Die is fine", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 0, + "end": 14 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:40p", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "please select the show time", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "7:30pm", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": ":30pm", + "start": 1, + "end": 6 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": ":30pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "how many tickets you want?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "i want 5 tickets", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "5", + "start": 7, + "end": 8 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "5", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": ":30pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "your details are correct can go ahead with purchase?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "yes", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "5", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": ":30pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "please wait i will confirm your tickets", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "thank you", + "utt_idx": 18, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "Mountain AMC 16", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "5", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": ":30pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Your tickets are confirmed and they’ve been sent to your email address", + "utt_idx": 19, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 18, + "args": { + "name.movie": "No Time To Die", + "name.theater": "Mountain AMC 16", + "num.tickets": "5", + "time.showing": "7:30pm", + "date.showing": "today", + "seating": "any" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-7", + "original_id": "dlg-0b2cddfe-673d-439e-9bcd-b5356156cbd6", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: Pretend you’re *using a digital assistant to purchase tickets for a movie currently showing in theaters*. {{LIST The entire conversation needs to follow the structure given below *in the TASK section*., You already know which movie you want to see as well as all the details (which you can pick from the section below)., The conversation ends with *a successful ticket purchase*., Do not include any extras like “thank you” or “goodbye” or “hello” etc. }}\n\nDETAILS: This is *made up data* which you should use to complete your dialog creation (since there are actually no movies playing anywhere right now due to COVID-19).:\n{{LIST You are interested in seeing either *No Time To Die* OR *Mulan*., There is one theater in your town: *Mountain AMC 16*., Showtimes for Mulan: 4:00pm 6:10pm 8:30pm 10:40pm, Showtimes for No Time To Die: 3:00pm 5:10pm 7:30pm 9:40pm, Make sure to include the number of tickets.}}\n*USE THIS Structure*\nUser: (Express desire to buy movie tickets and include TWO of the details needed for the purchase--one should be the movie name. See DETAILS section above.)\nAssistant: (Confirm the TWO details given so far and ask for confirmation *make one of the details incorrect*.)\nUser: (Alert the assistant of the detail that is wrong and give the right value.)\nAssistant: (Correct the wrong value, repeating back the correct value.)\nUser: (Confirms the correction, *THEN adds the THREE remaining details*.)\nAssistant: (Confirms all five details then asks if this is correct and whether they can go ahead with the purchase.)\nUser: (Answers yes.)\nAssistant: (Indicates successful purchase saying ”Your tickets are confirmed and they’ve been sent to your email address.”)\n\nTASK: *Write the transcript* of your spoken conversation according to the structure given above. Make sure it includes one exchange where *one of the details the assistant confirms is incorrect* (movie, theater, showtime, number, tickets or day).\n\nSPEAKING STYLE: {{LIST The assistant is polite and efficient, The assistant’s speaking style should match yours but they are not overly formal or too casual, Don’t call your assistant by name (e.g. “Johnny” or “Alexa” or “Siri” or “Assistant” or “Computer”). Just assume they’re already listening to you.}}\n\n{{CONVERSATION}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_slots The users discusses *each of these details*: 1) movie, 2) theater, 3) showtime, 4) number of tickets, 5) day.}}\n{{CHECK confirm_error_exchange The conversation includes *one exchange where one of the details the assistant reads back is wrong* and so the user must point it out so it can be corrected.}}\n{{CHECK confirm_details The assistant *confirms all the ticket details* together just before booking.}}\n{{CHECK confirm_purchase The assistant *completes purchasing the tickets* saying *”Your tickets are confirmed and they’ve been sent to your email address.”*}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I would like to purchase some tickets to the 4pm showing of Mulan.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.preference", + "value": "4pm", + "start": 45, + "end": 48 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 60, + "end": 65 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "4pm", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "So let me get this right; you want tickets to the 8:30pm showing of Mulan?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "8:30pm", + "start": 50, + "end": 56 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 68, + "end": 73 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No, I wanted tickets to the 4pm showing of Mulan.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.preference", + "value": "4pm", + "start": 28, + "end": 31 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 43, + "end": 48 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "", + "date.showing": "", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "4pm", + "time.showing": "8:30pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Okay, so you want tickets to the 4pm showing of Mulan?", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4pm", + "start": 33, + "end": 36 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 48, + "end": 53 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Correct. I'd like to purchase two tickets to the 4 PM showing of Mulan at the Mountain AMC 16 for this Saturday.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "two", + "start": 30, + "end": 33 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.preference", + "value": "4 PM", + "start": 49, + "end": 53 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 65, + "end": 70 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "this Saturday", + "start": 98, + "end": 111 + } + ] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "this Saturday", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "two", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "4 PM", + "time.showing": "4pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "So let me get this right; you would like purchase two tickets to the 4 PM showing of Mulan at the Mountain AMC 16 for this Saturday?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "two", + "start": 50, + "end": 53 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4 PM", + "start": 69, + "end": 73 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "Mulan", + "start": 85, + "end": 90 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "Mountain AMC 16", + "start": 98, + "end": 113 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "this Saturday", + "start": 118, + "end": 131 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "date.showing": "this Saturday", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "two", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "4 PM", + "time.showing": "4 PM", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Excellent. Your tickets are confirmed and they’ve been sent to your email address", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.movie": "Mulan", + "name.theater": "Mountain AMC 16", + "num.tickets": "two", + "time.showing": "4pm", + "date.showing": "this Saturday", + "seating": "any" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-8", + "original_id": "dlg-0976855e-c0ce-49de-a217-cbfbd4670d33", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: In the conversation below, a customer is talking to a booking agent to purchase movie tickets. However, the customer’s turns are missing. The details mentioned (theater names, cities, etc.) do NOT necessarily reflect real life, but you should pretend they are real for the purposes of this exercise.\n\nYOUR TASK: {{LIST *Pretend you are the customer*., Fill in the *missing turns* (in green) so that the conversation makes sense and sounds natural., Make sure you *follow the instructions* provided in parentheses for each turn. (Start typing to make the instructions move outside the box.), Do not copy the wording. *Use your own words*., *Use your normal speaking style* for this scenario but don’t be overly chatty., *Do NOT* call the booking agent by any name including “sir” “madam” etc.}}\n\n{{HIDE movie_1 name.movie No Time To Die}}\n{{HIDE theater_1 name.theater AMC Mountain 16}}\n{{HIDE theater_2 name.theater AMC Mercado 24}}\n{{HIDE genre_1 name.genre action}}\n{{HIDE movie_1_showtime_1 time.showing 4:30pm}}\n{{HIDE movie_1_showtime_2 time.showing 6:40pm}}\n{{HIDE movie_1_showtime_3 time.showing 9:10pm}}\n{{HIDE num_tickets num.tickets three}}\n{{HIDE date date.showing tonight}}\n{{HIDE success_closing success.closing OK. Enjoy your movie!}}\nBEFORE YOU BEGIN: Provide a value for this detail.\n{{ENTER people people.tickets Who’s all going to the movie? (use a phrase describing *three people* that includes reference to yourself.)}}\n\n{{CONVERSATION\nhint user (In your own words, express interest in buying movie tickets for var_date and specify theater ‘var_theater_1’.)\nforce assistant No problem. Is there a particular type of movie you’re looking for?\nhint user (mention ‘var_genre_1’ in your response)\nforce assistant Sure. I can help with that. Let me check listings at var_theater_2.\nhint user (Correct the agent’s mistake: You wanted ‘var_theater_1’.)\nforce assistant Oh, sorry about that. So you’re interested in var_genre_1 films at var_theater_1, right?\nhint user (Confirm the agent’s correction.)\ncall find_movies name.genre var_genre_1 name.theater var_theater_1\nresponse find_movies name.movie var_movie_1\ncall find_showtimes name.theater var_theater_1 date.showing var_date name.movie var_movie_1\nresponse find_showtimes time.showing var_movie_1_showtime_1 time.showing var_movie_1_showtime_2 time.showing var_movie_1_showtime_3\nforce assistant OK. I see one var_genre_1 movie playing at var_theater_1: var_movie_1. Remaining showtimes are var_movie_1_showtime_1, var_movie_1_showtime_2 and var_movie_1_showtime_3. Does any of those work?\nhint user (Answer positively and choose ‘var_movie_1_showtime_3’)\nforce assistant Great. And how many tickets?\nhint user (Include your earlier answer of ‘var_people’ in this response.)\nforce assistant All right. Let me confirm that you’d like var_num_tickets tickets for var_movie_1 at var_theater_1 var_date at var_movie_1_showtime_3. Is that all correct?\nhint user (Confirm.)\nforce assistant Is it OK to go ahead and purchase these tickets?\nhint user (Give positive answer).\ncall book_tickets name.movie var_movie_1 name.theater var_theater_1 time.showing var_movie_1_showtime_1 num.tickets var_num_tickets date.showing var_date\nresponse book_tickets api.response success\nforce assistant OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?\nhint user (polite negative answer)\nforce assistant var_success_closing\n}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_logical The conversation between the user and the assistant is cooperative and logical.}}\n{{CHECK confirm_natural All the individual turns from the user (green boxes) sound natural and familiar.}}\n{{CHECK confirm_casual All the individual turns from the user (green boxes) are casual but polite.}}\n{{CHECK confirm_appropriate All the individual turns from the user (green boxes) are free of offensive or inappropriate language.}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "I would like to buy tickets for a movie tonight at AMC Mountain 16.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 40, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 51, + "end": 66 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem. Is there a particular type of movie you’re looking for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I am looking for something that has action and adventure in it.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 36, + "end": 42 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure. I can help with that. Let me check listings at AMC Mercado 24.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mercado 24", + "start": 53, + "end": 67 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "No not Mercado, I want it at AMC Mountain 16.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 29, + "end": 44 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, sorry about that. So you’re interested in action films at AMC Mountain 16, right?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 46, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 62, + "end": 77 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. I see one action movie playing at AMC Mountain 16: No Time To Die. Remaining showtimes are 4:30pm, 6:40pm and 9:10pm. Does any of those work?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 14, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16:", + "start": 38, + "end": 54 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 55, + "end": 69 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:30pm", + "start": 95, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:40pm", + "start": 103, + "end": 109 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 114, + "end": 120 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.genre": "action", + "name.theater": "AMC Mountain 16" + }, + "response": { + "name.movie_1": "No Time To Die" + }, + "name": "find_movies" + }, + { + "index": 6, + "args": { + "name.theater": "AMC Mountain 16", + "name.movie": "No Time To Die" + }, + "response": { + "time.showing_1": "4:30pm", + "time.showing_2": "6:40pm", + "time.showing_3": "9:10pm" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "Yes that's great. I'd like tickets to the 9:10 pm showing.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": ":10 pm", + "start": 43, + "end": 49 + } + ] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": ":10 pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And how many tickets?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I need three tickets.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": ":10 pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right. Let me confirm that you’d like three tickets for No Time To Die at AMC Mountain 16 tonight at 9:10pm. Is that all correct?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "three", + "start": 42, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 60, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 94, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 105, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes that's correct.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is it OK to go ahead and purchase these tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, thank you.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 14, + "args": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "time.showing": "4:30pm", + "num.tickets": "three", + "date.showing": "tonight" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + }, + { + "speaker": "user", + "utterance": "No thank you.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Enjoy your movie!", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + }, + { + "dataset": "tm3", + "data_split": "train", + "dialogue_id": "tm3-train-9", + "original_id": "dlg-266f509a-b0f9-4316-a644-8b3d55674611", + "domains": [ + "movie" + ], + "goal": { + "description": "SCENARIO: In the conversation below, a customer is talking to a booking agent to purchase movie tickets. However, the customer’s turns are missing. The details mentioned (theater names, cities, etc.) do NOT necessarily reflect real life, but you should pretend they are real for the purposes of this exercise.\n\nYOUR TASK: {{LIST *Pretend you are the customer*., Fill in the *missing turns* (in green) so that the conversation makes sense and sounds natural., Make sure you *follow the instructions* provided in parentheses for each turn. (Start typing to make the instructions move outside the box.), Do not copy the wording. *Use your own words*., *Use your normal speaking style* for this scenario but don’t be overly chatty., *Do NOT* call the booking agent by any name including “sir” “madam” etc.}}\n\n{{HIDE movie_1 name.movie No Time To Die}}\n{{HIDE theater_1 name.theater AMC Mountain 16}}\n{{HIDE theater_2 name.theater AMC Mercado 24}}\n{{HIDE genre_1 name.genre action}}\n{{HIDE movie_1_showtime_1 time.showing 4:30pm}}\n{{HIDE movie_1_showtime_2 time.showing 6:40pm}}\n{{HIDE movie_1_showtime_3 time.showing 9:10pm}}\n{{HIDE num_tickets num.tickets three}}\n{{HIDE date date.showing tonight}}\n{{HIDE success_closing success.closing OK. Enjoy your movie!}}\nBEFORE YOU BEGIN: Provide a value for this detail.\n{{ENTER people people.tickets Who’s all going to the movie? (use a phrase describing *three people* that includes reference to yourself.)}}\n\n{{CONVERSATION\nhint user (In your own words, express interest in buying movie tickets for var_date and specify theater ‘var_theater_1’.)\nforce assistant No problem. Is there a particular type of movie you’re looking for?\nhint user (mention ‘var_genre_1’ in your response)\nforce assistant Sure. I can help with that. Let me check listings at var_theater_2.\nhint user (Correct the agent’s mistake: You wanted ‘var_theater_1’.)\nforce assistant Oh, sorry about that. So you’re interested in var_genre_1 films at var_theater_1, right?\nhint user (Confirm the agent’s correction.)\ncall find_movies name.genre var_genre_1 name.theater var_theater_1\nresponse find_movies name.movie var_movie_1\ncall find_showtimes name.theater var_theater_1 date.showing var_date name.movie var_movie_1\nresponse find_showtimes time.showing var_movie_1_showtime_1 time.showing var_movie_1_showtime_2 time.showing var_movie_1_showtime_3\nforce assistant OK. I see one var_genre_1 movie playing at var_theater_1: var_movie_1. Remaining showtimes are var_movie_1_showtime_1, var_movie_1_showtime_2 and var_movie_1_showtime_3. Does any of those work?\nhint user (Answer positively and choose ‘var_movie_1_showtime_3’)\nforce assistant Great. And how many tickets?\nhint user (Include your earlier answer of ‘var_people’ in this response.)\nforce assistant All right. Let me confirm that you’d like var_num_tickets tickets for var_movie_1 at var_theater_1 var_date at var_movie_1_showtime_3. Is that all correct?\nhint user (Confirm.)\nforce assistant Is it OK to go ahead and purchase these tickets?\nhint user (Give positive answer).\ncall book_tickets name.movie var_movie_1 name.theater var_theater_1 time.showing var_movie_1_showtime_1 num.tickets var_num_tickets date.showing var_date\nresponse book_tickets api.response success\nforce assistant OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?\nhint user (polite negative answer)\nforce assistant var_success_closing\n}}\n\n{{CHECK confirm_native_speaker This conversation task was written by a native English speaker.}}\n{{CHECK confirm_logical The conversation between the user and the assistant is cooperative and logical.}}\n{{CHECK confirm_natural All the individual turns from the user (green boxes) sound natural and familiar.}}\n{{CHECK confirm_casual All the individual turns from the user (green boxes) are casual but polite.}}\n{{CHECK confirm_appropriate All the individual turns from the user (green boxes) are free of offensive or inappropriate language.}}\n", + "inform": {}, + "request": {} + }, + "turns": [ + { + "speaker": "user", + "utterance": "Yes, I would like to buy tickets for tonight. At AMC Mountain 16.", + "utt_idx": 0, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 37, + "end": 44 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 49, + "end": 64 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "No problem. Is there a particular type of movie you’re looking for?", + "utt_idx": 1, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "I want to see a good action flick.", + "utt_idx": 2, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 21, + "end": 27 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Sure. I can help with that. Let me check listings at AMC Mercado 24.", + "utt_idx": 3, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mercado 24", + "start": 53, + "end": 67 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, but I wanted AMC Mountain 16.", + "utt_idx": 4, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 18, + "end": 33 + } + ] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Oh, sorry about that. So you’re interested in action films at AMC Mountain 16, right?", + "utt_idx": 5, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 46, + "end": 52 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 62, + "end": 77 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, that is right.", + "utt_idx": 6, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. I see one action movie playing at AMC Mountain 16: No Time To Die. Remaining showtimes are 4:30pm, 6:40pm and 9:10pm. Does any of those work?", + "utt_idx": 7, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "name.genre", + "value": "action", + "start": 14, + "end": 20 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16:", + "start": 38, + "end": 54 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 55, + "end": 69 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "4:30pm", + "start": 95, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "6:40pm", + "start": 103, + "end": 109 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 114, + "end": 120 + } + ] + }, + "db_results": { + "movie": [ + { + "index": 6, + "args": { + "name.genre": "action", + "name.theater": "AMC Mountain 16" + }, + "response": { + "name.movie_1": "No Time To Die" + }, + "name": "find_movies" + }, + { + "index": 6, + "args": { + "name.theater": "AMC Mountain 16", + "name.movie": "No Time To Die" + }, + "response": { + "time.showing_1": "4:30pm", + "time.showing_2": "6:40pm", + "time.showing_3": "9:10pm" + }, + "name": "find_showtimes" + } + ] + } + }, + { + "speaker": "user", + "utterance": "Yes, the 9:10 works great.", + "utt_idx": 8, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Great. And how many tickets?", + "utt_idx": 9, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Just three including myself.", + "utt_idx": 10, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16:", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "All right. Let me confirm that you’d like three tickets for No Time To Die at AMC Mountain 16 tonight at 9:10pm. Is that all correct?", + "utt_idx": 11, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [ + { + "intent": "inform", + "domain": "movie", + "slot": "num.tickets", + "value": "three", + "start": 42, + "end": 47 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.movie", + "value": "No Time To Die", + "start": 60, + "end": 74 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "name.theater", + "value": "AMC Mountain 16", + "start": 78, + "end": 93 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "date.showing", + "value": "tonight", + "start": 94, + "end": 101 + }, + { + "intent": "inform", + "domain": "movie", + "slot": "time.showing", + "value": "9:10pm", + "start": 105, + "end": 111 + } + ] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes, that is exactly right.", + "utt_idx": 12, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "Is it OK to go ahead and purchase these tickets?", + "utt_idx": 13, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + }, + { + "speaker": "user", + "utterance": "Yes.", + "utt_idx": 14, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Your tickets are purchased and details for how to proceed have been sent to your email address. Can I help with anything else?", + "utt_idx": 15, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": { + "movie": [ + { + "index": 14, + "args": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "time.showing": "4:30pm", + "num.tickets": "three", + "date.showing": "tonight" + }, + "response": { + "status": "success" + }, + "name": "book_tickets" + } + ] + } + }, + { + "speaker": "user", + "utterance": "No, that is it. You have been a low of help. Thanks again.", + "utt_idx": 16, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "state": { + "movie": { + "name.movie": "No Time To Die", + "name.theater": "AMC Mountain 16", + "date.showing": "tonight", + "date.release": "", + "duration.movie": "", + "location": "", + "name.character": "", + "name.genre": "action", + "name.person": "", + "num.tickets": "three", + "price.ticket": "", + "price.total": "", + "rating.movie": "", + "review.audience": "", + "review.critic": "", + "seating": "", + "time.preference": "", + "time.showing": "9:10pm", + "type.screening": "" + } + } + }, + { + "speaker": "system", + "utterance": "OK. Enjoy your movie!", + "utt_idx": 17, + "dialogue_acts": { + "binary": [], + "categorical": [], + "non-categorical": [] + }, + "db_results": {} + } + ] + } +] \ No newline at end of file diff --git a/data/unified_datasets/tm3/preprocess.py b/data/unified_datasets/tm3/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..e8c9234beb439f870c805b4362ca60da09ee1c87 --- /dev/null +++ b/data/unified_datasets/tm3/preprocess.py @@ -0,0 +1,253 @@ +from zipfile import ZipFile, ZIP_DEFLATED +import json +import os +import copy +import zipfile +from tqdm import tqdm +import re +from collections import Counter +from shutil import rmtree +from convlab2.util.file_util import read_zipped_json, write_zipped_json +from pprint import pprint +import random +import glob + + +descriptions = { + 'movie': 'Book movie tickets for the user', + 'name.movie': 'Name of the movie, e.g. Joker, Parasite, The Avengers', + 'name.theater': 'Name of the theater, e.g. Century City, AMC Mercado 20', + 'num.tickets': 'Number of tickets, e.g. two, me and my friend, John and I', + 'time.preference': 'Preferred time or range, e.g. around 2pm, later in the evening, 4:30pm', + 'time.showing': 'The showtimes published by the theater, e.g. 5:10pm, 8:30pm', + 'date.showing': 'the date or day of the showing, e.g. today, tonight, tomrrow, April 12th.', + 'location': 'The city, or city and state, zip code and sometimes more specific regions, e.g. downtown', + 'type.screening': 'IMAX, Dolby, 3D, standard, or similar phrases for technology offerings', + 'seating': 'Various phrases from specific "row 1" to "near the back", "on an aisle", etc.', + 'date.release': 'Movie attribute published for the official movie release date.', + 'price.ticket': 'Price per ticket', + 'price.total': 'The total for the purchase of all tickets', + 'name.genre': 'Includes a wide range from classic genres like action, drama, etc. to categories like "slasher" or series like Marvel or Harry Potter', + 'description.plot': 'The movie synopsis or shorter description', + 'description.other': 'Any other movie description that is not captured by genre, name, plot.', + 'duration.movie': 'The movie runtime, e.g. 120 minutes', + 'name.person': 'Names of actors, directors, producers but NOT movie characters', + 'name.character': 'Character names like James Bond, Harry Potter, Wonder Woman', + 'review.audience': 'The audience review', + 'review.critic': 'Critic reviews like those from Rotten Tomatoes, IMDB, etc.', + 'rating.movie': 'G, PG, PG-13, R, etc.', +} + +anno2slot = { + "movie": { + "description.other": False, # transform to binary dialog act + "description.plot": False, # too long, 19 words in avg. transform to binary dialog act + } +} + + +def format_turns(ori_turns): + # delete invalid turns and merge continuous turns + new_turns = [] + previous_speaker = None + utt_idx = 0 + for i, turn in enumerate(ori_turns): + speaker = 'system' if turn['speaker'].upper() == 'ASSISTANT' else 'user' + turn['speaker'] = speaker + if turn['text'] == '(deleted)': + continue + if not previous_speaker: + # first turn + assert speaker != previous_speaker + if speaker != previous_speaker: + # switch speaker + previous_speaker = speaker + new_turns.append(copy.deepcopy(turn)) + utt_idx += 1 + else: + # continuous speaking of the same speaker + last_turn = new_turns[-1] + # skip repeated turn + if turn['text'] in ori_turns[i-1]['text']: + continue + # merge continuous turns + index_shift = len(last_turn['text']) + 1 + last_turn['text'] += ' '+turn['text'] + if 'segments' in turn: + last_turn.setdefault('segments', []) + for segment in turn['segments']: + segment['start_index'] += index_shift + segment['end_index'] += index_shift + last_turn['segments'] += turn['segments'] + return new_turns + + +def preprocess(): + original_data_dir = 'Taskmaster-master' + new_data_dir = 'data' + + if not os.path.exists(original_data_dir): + original_data_zip = 'master.zip' + if not os.path.exists(original_data_zip): + raise FileNotFoundError(f'cannot find original data {original_data_zip} in tm3/, should manually download master.zip from https://github.com/google-research-datasets/Taskmaster/archive/refs/heads/master.zip') + else: + archive = ZipFile(original_data_zip) + archive.extractall() + + os.makedirs(new_data_dir, exist_ok=True) + + ontology = {'domains': {}, + 'intents': { + 'inform': {'description': 'inform the value of a slot or general information.'} + }, + 'binary_dialogue_acts': set(), + 'state': {}} + global descriptions + global anno2slot + ori_ontology = json.load(open(os.path.join(original_data_dir, "TM-3-2020/ontology/entities.json"))) + assert len(ori_ontology) == 1 + domain = list(ori_ontology.keys())[0] + domain_ontology = ori_ontology[domain] + ontology['domains'][domain] = {'description': descriptions[domain], 'slots': {}} + ontology['state'][domain] = {} + for slot in domain_ontology['required']+domain_ontology['optional']: + ontology['domains'][domain]['slots'][slot] = { + 'description': descriptions[slot], + 'is_categorical': False, + 'possible_values': [], + } + if slot not in anno2slot[domain]: + ontology['state'][domain][slot] = '' + + dataset = 'tm3' + splits = ['train', 'validation', 'test'] + dialogues_by_split = {split:[] for split in splits} + for data_file in tqdm(glob.glob(os.path.join(original_data_dir, f"TM-3-2020/data/*.json")), desc='processing taskmaster-{}'.format(domain)): + data = json.load(open(data_file)) + # random split, train:validation:test = 8:1:1 + random.seed(42) + dial_ids = list(range(len(data))) + random.shuffle(dial_ids) + dial_id2split = {} + for dial_id in dial_ids[:int(0.8*len(dial_ids))]: + dial_id2split[dial_id] = 'train' + for dial_id in dial_ids[int(0.8*len(dial_ids)):int(0.9*len(dial_ids))]: + dial_id2split[dial_id] = 'validation' + for dial_id in dial_ids[int(0.9*len(dial_ids)):]: + dial_id2split[dial_id] = 'test' + + for dial_id, d in enumerate(data): + # delete empty dialogs and invalid dialogs + if len(d['utterances']) == 0: + continue + if len(set([t['speaker'] for t in d['utterances']])) == 1: + continue + data_split = dial_id2split[dial_id] + dialogue_id = f'{dataset}-{data_split}-{len(dialogues_by_split[data_split])}' + cur_domains = [domain] + goal = { + 'description': d['instructions'], + 'inform': {}, + 'request': {} + } + dialogue = { + 'dataset': dataset, + 'data_split': data_split, + 'dialogue_id': dialogue_id, + 'original_id': d["conversation_id"], + 'domains': cur_domains, + 'goal': goal, + 'turns': [] + } + turns = format_turns(d['utterances']) + prev_state = {} + prev_state.setdefault(domain, copy.deepcopy(ontology['state'][domain])) + + for utt_idx, uttr in enumerate(turns): + speaker = uttr['speaker'] + turn = { + 'speaker': speaker, + 'utterance': uttr['text'], + 'utt_idx': utt_idx, + 'dialogue_acts': { + 'binary': [], + 'categorical': [], + 'non-categorical': [], + }, + } + in_span = [0] * len(turn['utterance']) + + if 'segments' in uttr: + # sort the span according to the length + segments = sorted(uttr['segments'], key=lambda x: len(x['text'])) + for segment in segments: + assert len(['annotations']) == 1 + item = segment['annotations'][0] + intent = 'inform' # default intent + slot = item['name'].strip() + assert slot in ontology['domains'][domain]['slots'] + if slot in anno2slot[domain]: + # binary dialog act + turn['dialogue_acts']['binary'].append({ + 'intent': intent, + 'domain': domain, + 'slot': slot, + 'value': '' + }) + continue + assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text'] + # skip overlapped spans, keep the shortest one + if sum(in_span[segment['start_index']: segment['end_index']]) > 0: + continue + else: + in_span[segment['start_index']: segment['end_index']] = [1]*(segment['end_index']-segment['start_index']) + turn['dialogue_acts']['non-categorical'].append({ + 'intent': intent, + 'domain': domain, + 'slot': slot, + 'value': segment['text'], + 'start': segment['start_index'], + 'end': segment['end_index'] + }) + + turn['dialogue_acts']['non-categorical'] = sorted(turn['dialogue_acts']['non-categorical'], key=lambda x: x['start']) + + bdas = set() + for da in turn['dialogue_acts']['binary']: + da_tuple = (da['intent'], da['domain'], da['slot'], da['value'],) + bdas.add(da_tuple) + if da_tuple not in ontology['binary_dialogue_acts']: + ontology['binary_dialogue_acts'].add(da_tuple) + turn['dialogue_acts']['binary'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(bdas)] + + for da in turn['dialogue_acts']['non-categorical']: + slot, value = da['slot'], da['value'] + assert slot in prev_state[domain], print(da) + prev_state[domain][slot] = value + + if speaker == 'user': + turn['state'] = copy.deepcopy(prev_state) + else: + turn['db_results'] = {} + if 'apis' in turns[utt_idx-1]: + turn['db_results'].setdefault(domain, []) + apis = turns[utt_idx-1]['apis'] + turn['db_results'][domain] += apis + + dialogue['turns'].append(turn) + dialogues_by_split[data_split].append(dialogue) + + ontology['binary_dialogue_acts'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(ontology['binary_dialogue_acts'])] + dialogues = dialogues_by_split['train']+dialogues_by_split['validation']+dialogues_by_split['test'] + json.dump(dialogues[:10], open(f'dummy_data.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(dialogues, open(f'{new_data_dir}/dialogues.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(ontology, open(f'{new_data_dir}/ontology.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf: + for filename in os.listdir(new_data_dir): + zf.write(f'{new_data_dir}/{filename}') + rmtree(original_data_dir) + rmtree(new_data_dir) + return dialogues, ontology + +if __name__ == '__main__': + preprocess() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9fecf8056275f8430c88d292bbb26565cba59a5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,213 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +allennlp==2.8.0 +argon2-cffi==21.1.0 +argon2-cffi-bindings==21.2.0 +async-timeout==4.0.1 +attrs==21.2.0 +autopep8==1.6.0 +backcall==0.2.0 +backports.csv==1.0.7 +base58==2.1.1 +beautifulsoup4==4.10.0 +bert-score==0.3.11 +bleach==4.1.0 +blis==0.7.5 +boto3==1.20.14 +botocore==1.23.14 +cached-path==0.3.2 +cachetools==4.2.4 +catalogue==2.0.6 +certifi==2021.10.8 +cffi==1.15.0 +chardet==4.0.0 +charset-normalizer==2.0.8 +checklist==0.0.11 +cheroot==8.5.2 +CherryPy==18.6.1 +click==8.0.3 +colorama==0.4.4 +configparser==5.1.0 +cryptography==36.0.0 +cycler==0.11.0 +cymem==2.0.6 +datasets==1.16.1 +debugpy==1.5.1 +decorator==5.1.0 +deepspeech==0.9.3 +defusedxml==0.7.1 +dill==0.3.4 +docker-pycreds==0.4.0 +embeddings==0.0.8 +entrypoints==0.3 +fairscale==0.4.0 +feedparser==6.0.8 +filelock==3.3.2 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +fuzzywuzzy==0.18.0 +gitdb==4.0.9 +GitPython==3.1.24 +google-api-core==2.2.2 +google-auth==2.3.3 +google-cloud-core==2.2.1 +google-cloud-storage==1.43.0 +google-crc32c==1.3.0 +google-resumable-media==2.1.0 +googleapis-common-protos==1.53.0 +gTTS==2.2.3 +h5py==3.6.0 +huggingface-hub==0.1.2 +idna==3.3 +iniconfig==1.1.1 +ipykernel==6.5.1 +ipython==7.30.0 +ipython-genutils==0.2.0 +ipywidgets==7.6.5 +iso-639==0.4.5 +jaraco.classes==3.2.1 +jaraco.collections==3.4.0 +jaraco.functools==3.4.0 +jaraco.text==3.6.0 +jedi==0.18.1 +jieba==0.42.1 +Jinja2==3.0.3 +jmespath==0.10.0 +joblib==1.1.0 +json-lines==0.5.0 +jsonnet==0.17.0 +jsonpatch==1.32 +jsonpointer==2.2 +jsonschema==4.2.1 +jupyter==1.0.0 +jupyter-client==7.1.0 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyterlab-pygments==0.1.2 +jupyterlab-widgets==1.0.2 +kiwisolver==1.3.2 +langcodes==3.3.0 +lmdb==1.2.1 +lxml==4.6.4 +MarkupSafe==2.0.1 +matplotlib==3.5.1 +matplotlib-inline==0.1.3 +mistune==0.8.4 +more-itertools==8.12.0 +multidict==5.2.0 +multiprocess==0.70.12.2 +munch==2.5.0 +murmurhash==1.0.6 +nbclient==0.5.9 +nbconvert==6.3.0 +nbformat==5.1.3 +nest-asyncio==1.5.1 +nltk==3.6.5 +notebook==6.4.6 +numpy==1.21.4 +overrides==3.1.0 +packaging==21.3 +pandas==1.3.4 +pandocfilters==1.5.0 +parso==0.8.2 +pathtools==0.1.2 +pathy==0.6.1 +patternfork-nosql==3.6 +pdfminer.six==20211012 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==8.4.0 +pipdeptree==2.2.0 +pluggy==1.0.0 +portalocker==2.3.2 +portend==3.1.0 +preshed==3.0.6 +prometheus-client==0.12.0 +promise==2.3 +prompt-toolkit==3.0.23 +protobuf==3.19.1 +psutil==5.8.0 +ptyprocess==0.7.0 +py==1.11.0 +pyarrow==6.0.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycodestyle==2.8.0 +pycparser==2.21 +pydantic==1.8.2 +pydub==0.25.1 +Pygments==2.10.0 +pyparsing==3.0.6 +pyrsistent==0.18.0 +pytest==6.2.5 +python-dateutil==2.8.2 +python-docx==0.8.11 +python-Levenshtein==0.12.2 +pytokenizations==0.8.4 +pytz==2021.3 +PyYAML==6.0 +pyzmq==22.3.0 +qtconsole==5.2.1 +QtPy==1.11.2 +quadprog==0.1.10 +regex==2021.11.10 +requests==2.26.0 +rouge-score==0.0.4 +rsa==4.8 +s3transfer==0.5.0 +sacrebleu==2.0.0 +sacremoses==0.0.46 +scikit-learn==1.0.1 +scipy==1.7.3 +Send2Trash==1.8.0 +sentencepiece==0.1.96 +sentry-sdk==1.5.0 +seqeval==1.2.2 +sgmllib3k==1.0.0 +shortuuid==1.0.8 +simplejson==3.17.6 +six==1.16.0 +smart-open==5.2.1 +smmap==5.0.0 +soupsieve==2.3.1 +spacy==3.1.4 +spacy-legacy==3.0.8 +spacy-loggers==1.0.1 +sqlitedict==1.7.0 +srsly==2.4.2 +subprocess32==3.5.4 +tabulate==0.8.9 +tempora==4.1.2 +tensorboardX==2.4.1 +termcolor==1.1.0 +terminado==0.12.1 +testpath==0.5.0 +thinc==8.0.13 +threadpoolctl==3.0.0 +tokenizers==0.10.3 +toml==0.10.2 +torch==1.8.1+cu101 +torchfile==0.1.0 +torchvision==0.9.1+cu101 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.12.5 +typer==0.4.0 +typing_extensions==4.0.0 +Unidecode==1.3.2 +urllib3==1.26.7 +visdom==0.1.8.9 +wandb==0.12.7 +wasabi==0.8.2 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.1 +widgetsnbextension==3.5.2 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zc.lockfile==2.0 diff --git a/setup.py b/setup.py index fda3338b031df119318cabe9c11892eea812bed4..2e426a70fc9a9e6c74bbd1f5a5ae0e4f7cffc8d4 100755 --- a/setup.py +++ b/setup.py @@ -42,6 +42,8 @@ setup( 'scipy', 'torch>=1.6', 'transformers>=4.0', + 'datasets>=1.8', + 'seqeval', 'spacy', 'allennlp', 'simplejson',