diff --git a/convlab/util/__init__.py b/convlab/util/__init__.py index 6a84b7db276389d9bbcd6ba097a0b7bb00440a48..1688e21b80c08562a3ca1e5ca45fd181b57cbc98 100755 --- a/convlab/util/__init__.py +++ b/convlab/util/__init__.py @@ -1,3 +1 @@ -from convlab.util.unified_datasets_util import load_dataset, load_ontology, load_database, \ - load_unified_data, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data, \ - download_unified_datasets, relative_import_module_from_unified_datasets \ No newline at end of file +from convlab.util.unified_datasets_util import * \ No newline at end of file diff --git a/convlab/util/unified_datasets_util.py b/convlab/util/unified_datasets_util.py index 1e3b0c20bd959ea3098b07b813ed98189aac840f..e24658410738b290da97149382c8c89030936679 100644 --- a/convlab/util/unified_datasets_util.py +++ b/convlab/util/unified_datasets_util.py @@ -65,12 +65,14 @@ def relative_import_module_from_unified_datasets(dataset_name, filename, names2i variables.append(eval(f'module.{name}')) return variables -def load_dataset(dataset_name:str, dial_ids_order=None) -> Dict: +def load_dataset(dataset_name:str, dial_ids_order=None, split2ratio={}) -> Dict: """load unified dataset from `data/unified_datasets/$dataset_name` Args: dataset_name (str): unique dataset name in `data/unified_datasets` dial_ids_order (int): idx of shuffled dial order in `data/unified_datasets/$dataset_name/shuffled_dial_ids.json` + split2ratio (dict): a dictionary that maps the data split to the ratio of the data you want to use. + For example, if you want to use only half of the training data, you can set split2ratio = {'train': 0.5} Returns: dataset (dict): keys are data splits and the values are lists of dialogues @@ -86,13 +88,17 @@ def load_dataset(dataset_name:str, dial_ids_order=None) -> Dict: data_path = download_unified_datasets(dataset_name, 'shuffled_dial_ids.json', data_dir) dial_ids = json.load(open(data_path))[dial_ids_order] for data_split in dial_ids: - dataset[data_split] = [dialogues[i] for i in dial_ids[data_split]] + ratio = split2ratio.get(data_split, 1) + dataset[data_split] = [dialogues[i] for i in dial_ids[data_split][:round(len(dial_ids[data_split])*ratio)]] else: for dialogue in dialogues: if dialogue['data_split'] not in dataset: dataset[dialogue['data_split']] = [dialogue] else: dataset[dialogue['data_split']].append(dialogue) + for data_split in dataset: + if data_split in split2ratio: + dataset[data_split] = dataset[data_split][:round(len(dataset[data_split])*split2ratio[data_split])] return dataset def load_ontology(dataset_name:str) -> Dict: diff --git a/data/unified_datasets/camrest/database.py b/data/unified_datasets/camrest/database.py new file mode 100644 index 0000000000000000000000000000000000000000..f6245754ca07923b8b9375c4c42c91be3ce7c16c --- /dev/null +++ b/data/unified_datasets/camrest/database.py @@ -0,0 +1,67 @@ +import json +import os +import random +from fuzzywuzzy import fuzz +from itertools import chain +from zipfile import ZipFile +from copy import deepcopy +from convlab.util.unified_datasets_util import BaseDatabase, download_unified_datasets + + +class Database(BaseDatabase): + def __init__(self): + """extract data.zip and load the database.""" + data_path = download_unified_datasets('camrest', 'data.zip', os.path.dirname(os.path.abspath(__file__))) + archive = ZipFile(data_path) + self.dbs = {} + with archive.open('data/CamRestDB.json') as f: + self.dbs['restaurant'] = json.loads(f.read()) + self.slot2dbattr = { + 'price range': 'pricerange', + } + + def query(self, domain: str, state: dict, topk: int, ignore_open=False, soft_contraints=(), fuzzy_match_ratio=60) -> list: + """return a list of topk entities (dict containing slot-value pairs) for a given domain based on the dialogue state.""" + # query the db + assert domain == 'restaurant' + state = list(map(lambda ele: (self.slot2dbattr.get(ele[0], ele[0]), ele[1]) if not(ele[0] == 'area' and ele[1] == 'center') else ('area', 'centre'), state)) + + found = [] + for i, record in enumerate(self.dbs[domain]): + constraints_iterator = zip(state, [False] * len(state)) + soft_contraints_iterator = zip(soft_contraints, [True] * len(soft_contraints)) + for (key, val), fuzzy_match in chain(constraints_iterator, soft_contraints_iterator): + if val in ["", "dont care", 'not mentioned', "don't care", "dontcare", "do n't care"]: + pass + else: + try: + if key not in record: + continue + if record[key].strip() == '?': + # '?' matches any constraint + continue + else: + if not fuzzy_match: + if val.strip().lower() != record[key].strip().lower(): + break + else: + if fuzz.partial_ratio(val.strip().lower(), record[key].strip().lower()) < fuzzy_match_ratio: + break + except: + continue + else: + res = deepcopy(record) + res['Ref'] = '{0:08d}'.format(i) + found.append(res) + if len(found) == topk: + return found + return found + + +if __name__ == '__main__': + db = Database() + assert issubclass(Database, BaseDatabase) + assert isinstance(db, BaseDatabase) + res = db.query("restaurant", [['price range', 'expensive']], topk=3) + print(res, len(res)) + # print(db.query("hotel", [['price range', 'moderate'], ['stars','4'], ['type', 'guesthouse'], ['internet', 'yes'], ['parking', 'no'], ['area', 'east']])) diff --git a/data/unified_datasets/camrest/ontology.json b/data/unified_datasets/camrest/ontology.json deleted file mode 100644 index b5617f7ef4d441674a361a280acdf3123468d53a..0000000000000000000000000000000000000000 --- a/data/unified_datasets/camrest/ontology.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "domains": { - "restaurant": { - "description": "find a restaurant to eat", - "slots": { - "food": { - "description": "food type the restaurant serves", - "is_categorical": false, - "possible_values": [] - }, - "area": { - "description": "area where the restaurant is located", - "is_categorical": true, - "possible_values": [ - "north", - "east", - "west", - "south", - "centre" - ] - }, - "name": { - "description": "name of the restaurant", - "is_categorical": false, - "possible_values": [] - }, - "pricerange": { - "description": "price range of the restaurant", - "is_categorical": true, - "possible_values": [ - "cheap", - "moderate", - "expensive" - ] - }, - "phone": { - "description": "phone number of the restaurant", - "is_categorical": false, - "possible_values": [] - }, - "address": { - "description": "exact location of the restaurant", - "is_categorical": false, - "possible_values": [] - }, - "postcode": { - "description": "postal code of the restaurant", - "is_categorical": false, - "possible_values": [] - } - } - } - }, - "intents": { - "inform": { - "description": "inform user of value of a slot" - }, - "request": { - "description": "ask for value of a slot" - }, - "nooffer": { - "description": "inform user that no restaurant matches his request" - } - }, - "binary_dialogue_act": [ - { - "intent": "request", - "domain": "restaurant", - "slot": "food", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "address", - "value": "" - }, - { - "intent": "nooffer", - "domain": "restaurant", - "slot": "", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "area", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "phone", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "pricerange", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "postcode", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "name", - "value": "" - } - ], - "state": { - "restaurant": { - "pricerange": "", - "area": "", - "food": "" - } - } -} \ No newline at end of file diff --git a/data/unified_datasets/taskmaster/README.md b/data/unified_datasets/taskmaster/README.md deleted file mode 100644 index 303a82317a50d1aa4f40bd625ad17ccf97da392a..0000000000000000000000000000000000000000 --- a/data/unified_datasets/taskmaster/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# README - -## Features - -- Annotations: character-level span for non-categorical slots. No slot descriptions. - -Statistics: - -| | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains | -| ----- | ------------ | ------------- | ---------- | ----------- | ---------- | -| train | 30483 | 540311 | 17.72 | 9.18 | 13 | - -## Main changes - -- each speaker for one turn -- intent is set to **inform** -- not annotate state and state upadte -- span info is provided by original data - -## Original data - -https://github.com/google-research-datasets/Taskmaster - -TM-1: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-1-2019 - -TM-2: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-2-2020 \ No newline at end of file diff --git a/data/unified_datasets/taskmaster/data.zip b/data/unified_datasets/taskmaster/data.zip deleted file mode 100644 index f52a3808df8a413962c71f305c64d437eb196e00..0000000000000000000000000000000000000000 Binary files a/data/unified_datasets/taskmaster/data.zip and /dev/null differ diff --git a/data/unified_datasets/taskmaster/ontology.json b/data/unified_datasets/taskmaster/ontology.json deleted file mode 100644 index 9b5532e2bbcc36039486954e5b7bee1205a33cb5..0000000000000000000000000000000000000000 --- a/data/unified_datasets/taskmaster/ontology.json +++ /dev/null @@ -1,1168 +0,0 @@ -{ - "domains": { - "uber_lyft": { - "description": "order a car for a ride inside a city", - "slots": { - "location.from": { - "description": "pickup location", - "is_categorical": false, - "possible_values": [], - "count": 5764, - "in original ontology": true - }, - "location.to": { - "description": "destination of the ride", - "is_categorical": false, - "possible_values": [], - "count": 6026, - "in original ontology": true - }, - "type.ride": { - "description": "type of ride", - "is_categorical": false, - "possible_values": [], - "count": 13317, - "in original ontology": true - }, - "num.people": { - "description": "number of people", - "is_categorical": false, - "possible_values": [], - "count": 2636, - "in original ontology": true - }, - "price.estimate": { - "description": "estimated cost of the ride", - "is_categorical": false, - "possible_values": [], - "count": 4996, - "in original ontology": true - }, - "duration.estimate": { - "description": "estimated duration of the ride", - "is_categorical": false, - "possible_values": [], - "count": 1152, - "in original ontology": true - }, - "time.pickup": { - "description": "time of pickup", - "is_categorical": false, - "possible_values": [], - "count": 4303, - "in original ontology": true - }, - "time.dropoff": { - "description": "time of dropoff", - "is_categorical": false, - "possible_values": [], - "count": 111, - "in original ontology": true - } - } - }, - "movie_ticket": { - "description": "book movie tickets for a film", - "slots": { - "name.movie": { - "description": "name of the movie", - "is_categorical": false, - "possible_values": [], - "count": 8959, - "in original ontology": true - }, - "name.theater": { - "description": "name of the theater", - "is_categorical": false, - "possible_values": [], - "count": 6842, - "in original ontology": true - }, - "num.tickets": { - "description": "number of tickets", - "is_categorical": false, - "possible_values": [], - "count": 7368, - "in original ontology": true - }, - "time.start": { - "description": "start time of the movie", - "is_categorical": false, - "possible_values": [], - "count": 14820, - "in original ontology": true - }, - "location.theater": { - "description": "location of the theater", - "is_categorical": false, - "possible_values": [], - "count": 5295, - "in original ontology": true - }, - "price.ticket": { - "description": "price of the ticket", - "is_categorical": false, - "possible_values": [], - "count": 2418, - "in original ontology": true - }, - "type.screening": { - "description": "type of the screening", - "is_categorical": false, - "possible_values": [], - "count": 4579, - "in original ontology": true - }, - "time.end": { - "description": "end time of the movie", - "is_categorical": false, - "possible_values": [], - "count": 10, - "in original ontology": true - }, - "time.duration": { - "description": "duration of the movie", - "is_categorical": false, - "possible_values": [], - "count": 250, - "in original ontology": true - } - } - }, - "restaurant_reservation": { - "description": "searching for a restaurant and make reservation", - "slots": { - "name.restaurant": { - "description": "name of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 14544, - "in original ontology": true - }, - "name.reservation": { - "description": "name of the person who make the reservation", - "is_categorical": false, - "possible_values": [], - "count": 609, - "in original ontology": true - }, - "num.guests": { - "description": "number of guests", - "is_categorical": false, - "possible_values": [], - "count": 6962, - "in original ontology": true - }, - "time.reservation": { - "description": "time of the reservation", - "is_categorical": false, - "possible_values": [], - "count": 12073, - "in original ontology": true - }, - "type.seating": { - "description": "type of the seating", - "is_categorical": false, - "possible_values": [], - "count": 8983, - "in original ontology": true - }, - "location.restaurant": { - "description": "location of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 4311, - "in original ontology": true - } - } - }, - "coffee_ordering": { - "description": "order a coffee drink from either Starbucks or Peets for pick up", - "slots": { - "location.store": { - "description": "location of the coffee store", - "is_categorical": false, - "possible_values": [], - "count": 5510, - "in original ontology": true - }, - "name.drink": { - "description": "name of the drink", - "is_categorical": false, - "possible_values": [], - "count": 9182, - "in original ontology": true - }, - "size.drink": { - "description": "size of the drink", - "is_categorical": false, - "possible_values": [], - "count": 7804, - "in original ontology": true - }, - "num.drink": { - "description": "number of drinks", - "is_categorical": false, - "possible_values": [], - "count": 848, - "in original ontology": true - }, - "type.milk": { - "description": "type of the milk", - "is_categorical": false, - "possible_values": [], - "count": 8433, - "in original ontology": true - }, - "preference": { - "description": "user preference of the drink", - "is_categorical": false, - "possible_values": [], - "count": 11266, - "in original ontology": true - } - } - }, - "pizza_ordering": { - "description": "order a pizza", - "slots": { - "name.store": { - "description": "name of the pizza store", - "is_categorical": false, - "possible_values": [], - "count": 5127, - "in original ontology": true - }, - "name.pizza": { - "description": "name of the pizza", - "is_categorical": false, - "possible_values": [], - "count": 9208, - "in original ontology": true - }, - "size.pizza": { - "description": "size of the pizza", - "is_categorical": false, - "possible_values": [], - "count": 9661, - "in original ontology": true - }, - "type.topping": { - "description": "type of the topping", - "is_categorical": false, - "possible_values": [], - "count": 20639, - "in original ontology": true - }, - "type.crust": { - "description": "type of the crust", - "is_categorical": false, - "possible_values": [], - "count": 5099, - "in original ontology": true - }, - "preference": { - "description": "user preference of the pizza", - "is_categorical": false, - "possible_values": [], - "count": 4998, - "in original ontology": true - }, - "location.store": { - "description": "location of the pizza store", - "is_categorical": false, - "possible_values": [], - "count": 1487, - "in original ontology": true - } - } - }, - "auto_repair": { - "description": "set up an auto repair appointment with a repair shop", - "slots": { - "name.store": { - "description": "name of the repair store", - "is_categorical": false, - "possible_values": [], - "count": 4005, - "in original ontology": true - }, - "name.customer": { - "description": "name of the customer", - "is_categorical": false, - "possible_values": [], - "count": 4547, - "in original ontology": true - }, - "date.appt": { - "description": "date of the appointment", - "is_categorical": false, - "possible_values": [], - "count": 7650, - "in original ontology": true - }, - "time.appt": { - "description": "time of the appointment", - "is_categorical": false, - "possible_values": [], - "count": 9827, - "in original ontology": true - }, - "reason.appt": { - "description": "reason of the appointment", - "is_categorical": false, - "possible_values": [], - "count": 6509, - "in original ontology": true - }, - "name.vehicle": { - "description": "name of the vehicle", - "is_categorical": false, - "possible_values": [], - "count": 5262, - "in original ontology": true - }, - "year.vehicle": { - "description": "year of the vehicle", - "is_categorical": false, - "possible_values": [], - "count": 4561, - "in original ontology": true - }, - "location.store": { - "description": "location of the repair store", - "is_categorical": false, - "possible_values": [], - "count": 709, - "in original ontology": true - } - } - }, - "flights": { - "description": "find a round trip or multi-city flights", - "slots": { - "type": { - "description": "type of the flight", - "is_categorical": false, - "possible_values": [], - "count": 1999, - "in original ontology": true - }, - "destination1": { - "description": "the first destination city of the trip", - "is_categorical": false, - "possible_values": [], - "count": 3993, - "in original ontology": true - }, - "destination2": { - "description": "the second destination city of the trip", - "is_categorical": false, - "possible_values": [], - "count": 128, - "in original ontology": true - }, - "origin": { - "description": "the origin city of the trip", - "is_categorical": false, - "possible_values": [], - "count": 2595, - "in original ontology": true - }, - "date.depart_origin": { - "description": "date of departure from origin", - "is_categorical": false, - "possible_values": [], - "count": 3177, - "in original ontology": true - }, - "date.depart_intermediate": { - "description": "date of departure from intermediate", - "is_categorical": false, - "possible_values": [], - "count": 48, - "in original ontology": true - }, - "date.return": { - "description": "date of return", - "is_categorical": false, - "possible_values": [], - "count": 2822, - "in original ontology": true - }, - "time_of_day": { - "description": "time of the flight", - "is_categorical": false, - "possible_values": [], - "count": 3840, - "in original ontology": true - }, - "seating_class": { - "description": "seat type (first class, business class, economy class, etc.", - "is_categorical": false, - "possible_values": [], - "count": 3626, - "in original ontology": true - }, - "seat_location": { - "description": "location of the seat", - "is_categorical": false, - "possible_values": [], - "count": 313, - "in original ontology": true - }, - "stops": { - "description": "non-stop, layovers, etc.", - "is_categorical": false, - "possible_values": [], - "count": 6174, - "in original ontology": true - }, - "price_range": { - "description": "price range of the flight", - "is_categorical": false, - "possible_values": [], - "count": 2646, - "in original ontology": true - }, - "num.pax": { - "description": "number of people", - "is_categorical": false, - "possible_values": [], - "count": 437, - "in original ontology": true - }, - "luggage": { - "description": "luggage information", - "is_categorical": false, - "possible_values": [], - "count": 17, - "in original ontology": true - }, - "total_fare": { - "description": "total cost of the trip", - "is_categorical": false, - "possible_values": [], - "count": 1238, - "in original ontology": true - }, - "other_description": { - "description": "other description of the flight", - "is_categorical": false, - "possible_values": [], - "count": 2620, - "in original ontology": true - }, - "from": { - "description": "departure of the flight", - "is_categorical": false, - "possible_values": [], - "count": 1293, - "in original ontology": true - }, - "to": { - "description": "destination of the flight", - "is_categorical": false, - "possible_values": [], - "count": 1979, - "in original ontology": true - }, - "airline": { - "description": "airline of the flight", - "is_categorical": false, - "possible_values": [], - "count": 3981, - "in original ontology": true - }, - "flight_number": { - "description": "the number of the flight", - "is_categorical": false, - "possible_values": [], - "count": 42, - "in original ontology": true - }, - "date": { - "description": "date of the flight", - "is_categorical": false, - "possible_values": [], - "count": 756, - "in original ontology": true - }, - "from.time": { - "description": "departure time of the flight", - "is_categorical": false, - "possible_values": [], - "count": 6440, - "in original ontology": true - }, - "to.time": { - "description": "arrival time of the flight", - "is_categorical": false, - "possible_values": [], - "count": 2571, - "in original ontology": true - }, - "stops.location": { - "description": "location of the stop", - "is_categorical": false, - "possible_values": [], - "count": 1097, - "in original ontology": true - }, - "fare": { - "description": "cost of the flight", - "is_categorical": false, - "possible_values": [], - "count": 1475, - "in original ontology": true - } - } - }, - "food_order": { - "description": "order take-out for a particular cuisine choice", - "slots": { - "name.item": { - "description": "name of the item", - "is_categorical": false, - "possible_values": [], - "count": 6080, - "in original ontology": true - }, - "other_description.item": { - "description": "other description of the item", - "is_categorical": false, - "possible_values": [], - "count": 1474, - "in original ontology": true - }, - "type.retrieval": { - "description": "type of the retrieval method", - "is_categorical": false, - "possible_values": [], - "count": 1868, - "in original ontology": true - }, - "total_price": { - "description": "total price", - "is_categorical": false, - "possible_values": [], - "count": 5, - "in original ontology": true - }, - "time.pickup": { - "description": "pick up time", - "is_categorical": false, - "possible_values": [], - "count": 981, - "in original ontology": true - }, - "num.people": { - "description": "number of people", - "is_categorical": false, - "possible_values": [], - "count": 880, - "in original ontology": true - }, - "name.restaurant": { - "description": "name of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 106, - "in original ontology": true - }, - "type.food": { - "description": "type of food", - "is_categorical": false, - "possible_values": [], - "count": 1247, - "in original ontology": true - }, - "type.meal": { - "description": "type of meal", - "is_categorical": false, - "possible_values": [], - "count": 64, - "in original ontology": true - }, - "location.restaurant": { - "description": "location of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 8, - "in original ontology": true - }, - "rating.restaurant": { - "description": "rating of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 5, - "in original ontology": true - }, - "price_range": { - "description": "price range of the food", - "is_categorical": false, - "possible_values": [], - "count": 5, - "in original ontology": true - } - } - }, - "hotel": { - "description": "find a hotel using typical preferences", - "slots": { - "name.hotel": { - "description": "name of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 5241, - "in original ontology": true - }, - "location.hotel": { - "description": "location of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 2940, - "in original ontology": true - }, - "sub_location.hotel": { - "description": "rough location of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 1869, - "in original ontology": true - }, - "star_rating": { - "description": "star rating of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 2049, - "in original ontology": true - }, - "customer_rating": { - "description": "customer rating of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 1239, - "in original ontology": true - }, - "price_range": { - "description": "price range of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 2357, - "in original ontology": true - }, - "amenity": { - "description": "amenity of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 19030, - "in original ontology": true - }, - "num.beds": { - "description": "number of beds to book", - "is_categorical": false, - "possible_values": [], - "count": 733, - "in original ontology": true - }, - "type.bed": { - "description": "type of the bed", - "is_categorical": false, - "possible_values": [], - "count": 1423, - "in original ontology": true - }, - "num.rooms": { - "description": "number of rooms to book", - "is_categorical": false, - "possible_values": [], - "count": 256, - "in original ontology": true - }, - "check-in_date": { - "description": "check-in date", - "is_categorical": false, - "possible_values": [], - "count": 608, - "in original ontology": true - }, - "check-out_date": { - "description": "check-out date", - "is_categorical": false, - "possible_values": [], - "count": 428, - "in original ontology": true - }, - "date_range": { - "description": "date range of the reservation", - "is_categorical": false, - "possible_values": [], - "count": 2458, - "in original ontology": true - }, - "num.guests": { - "description": "number of guests", - "is_categorical": false, - "possible_values": [], - "count": 1323, - "in original ontology": true - }, - "type.room": { - "description": "type of the room", - "is_categorical": false, - "possible_values": [], - "count": 1840, - "in original ontology": true - }, - "price_per_night": { - "description": "price per night", - "is_categorical": false, - "possible_values": [], - "count": 2357, - "in original ontology": true - }, - "total_fare": { - "description": "total fare", - "is_categorical": false, - "possible_values": [], - "count": 28, - "in original ontology": true - }, - "location": { - "description": "location of the hotel", - "is_categorical": false, - "possible_values": [], - "count": 352, - "in original ontology": true - } - } - }, - "movie": { - "description": "find a movie to watch in theaters or using a streaming service at home", - "slots": { - "name.movie": { - "description": "name of the movie", - "is_categorical": false, - "possible_values": [], - "count": 13413, - "in original ontology": true - }, - "genre": { - "description": "genre of the movie", - "is_categorical": false, - "possible_values": [], - "count": 4982, - "in original ontology": true - }, - "name.theater": { - "description": "name of the theater", - "is_categorical": false, - "possible_values": [], - "count": 2371, - "in original ontology": true - }, - "location.theater": { - "description": "location of the theater", - "is_categorical": false, - "possible_values": [], - "count": 2894, - "in original ontology": true - }, - "time.start": { - "description": "start time of the movie", - "is_categorical": false, - "possible_values": [], - "count": 6455, - "in original ontology": true - }, - "time.end": { - "description": "end time of the movie", - "is_categorical": false, - "possible_values": [], - "count": 3, - "in original ontology": true - }, - "price.ticket": { - "description": "price of the ticket", - "is_categorical": false, - "possible_values": [], - "count": 989, - "in original ontology": true - }, - "price.streaming": { - "description": "price of the streaming", - "is_categorical": false, - "possible_values": [], - "count": 397, - "in original ontology": true - }, - "type.screening": { - "description": "type of the screening", - "is_categorical": false, - "possible_values": [], - "count": 1419, - "in original ontology": true - }, - "audience_rating": { - "description": "audience rating", - "is_categorical": false, - "possible_values": [], - "count": 1506, - "in original ontology": true - }, - "movie_rating": { - "description": "film rating", - "is_categorical": false, - "possible_values": [], - "count": 273, - "in original ontology": true - }, - "release_date": { - "description": "release date of the movie", - "is_categorical": false, - "possible_values": [], - "count": 386, - "in original ontology": true - }, - "runtime": { - "description": "running time of the movie", - "is_categorical": false, - "possible_values": [], - "count": 262, - "in original ontology": true - }, - "real_person": { - "description": "name of actors, directors, etc.", - "is_categorical": false, - "possible_values": [], - "count": 3406, - "in original ontology": true - }, - "character": { - "description": "name of character in the movie", - "is_categorical": false, - "possible_values": [], - "count": 1676, - "in original ontology": true - }, - "streaming_service": { - "description": "streaming service that provide the movie", - "is_categorical": false, - "possible_values": [], - "count": 2729, - "in original ontology": true - }, - "num.tickets": { - "description": "number of tickets", - "is_categorical": false, - "possible_values": [], - "count": 1045, - "in original ontology": true - }, - "seating": { - "description": "type of seating", - "is_categorical": false, - "possible_values": [], - "count": 13, - "in original ontology": true - } - } - }, - "music": { - "description": "find several tracks to play and then comment on each one", - "slots": { - "name.track": { - "description": "name of the track", - "is_categorical": false, - "possible_values": [], - "count": 4916, - "in original ontology": true - }, - "name.artist": { - "description": "name of the artist", - "is_categorical": false, - "possible_values": [], - "count": 9287, - "in original ontology": true - }, - "name.album": { - "description": "name of the album", - "is_categorical": false, - "possible_values": [], - "count": 1106, - "in original ontology": true - }, - "name.genre": { - "description": "music genre", - "is_categorical": false, - "possible_values": [], - "count": 452, - "in original ontology": true - }, - "type.music": { - "description": "rough type of the music", - "is_categorical": false, - "possible_values": [], - "count": 603, - "in original ontology": true - }, - "describes_track": { - "description": "description of a track to find", - "is_categorical": false, - "possible_values": [], - "count": 2969, - "in original ontology": true - }, - "describes_artist": { - "description": "description of a artist to find", - "is_categorical": false, - "possible_values": [], - "count": 612, - "in original ontology": true - }, - "describes_album": { - "description": "description of an album to find", - "is_categorical": false, - "possible_values": [], - "count": 189, - "in original ontology": true - }, - "describes_genre": { - "description": "description of a genre to find", - "is_categorical": false, - "possible_values": [], - "count": 26, - "in original ontology": true - }, - "describes_type.music": { - "description": "description of the music type", - "is_categorical": false, - "possible_values": [], - "count": 375, - "in original ontology": true - } - } - }, - "restaurant": { - "description": "ask for recommendations for a particular type of cuisine", - "slots": { - "name.restaurant": { - "description": "name of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 8676, - "in original ontology": true - }, - "location": { - "description": "location of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 5165, - "in original ontology": true - }, - "sub-location": { - "description": "rough location of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 1210, - "in original ontology": true - }, - "type.food": { - "description": "the cuisine of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 12412, - "in original ontology": true - }, - "menu_item": { - "description": "item in the menu", - "is_categorical": false, - "possible_values": [], - "count": 1499, - "in original ontology": true - }, - "type.meal": { - "description": "type of meal", - "is_categorical": false, - "possible_values": [], - "count": 2677, - "in original ontology": true - }, - "rating": { - "description": "rating of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 2951, - "in original ontology": true - }, - "price_range": { - "description": "price range of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 1930, - "in original ontology": true - }, - "business_hours": { - "description": "business hours of the restaurant", - "is_categorical": false, - "possible_values": [], - "count": 2024, - "in original ontology": true - }, - "name.reservation": { - "description": "name of the person who make the reservation", - "is_categorical": false, - "possible_values": [], - "count": 16, - "in original ontology": true - }, - "num.guests": { - "description": "number of guests", - "is_categorical": false, - "possible_values": [], - "count": 179, - "in original ontology": true - }, - "time.reservation": { - "description": "time of the reservation", - "is_categorical": false, - "possible_values": [], - "count": 216, - "in original ontology": true - }, - "date.reservation": { - "description": "date of the reservation", - "is_categorical": false, - "possible_values": [], - "count": 130, - "in original ontology": true - }, - "type.seating": { - "description": "type of the seating", - "is_categorical": false, - "possible_values": [], - "count": 11, - "in original ontology": true - } - } - }, - "sport": { - "description": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL", - "slots": { - "name.team": { - "description": "name of the team", - "is_categorical": false, - "possible_values": [], - "count": 19651, - "in original ontology": true - }, - "record.team": { - "description": "record of the team (number of wins and losses)", - "is_categorical": false, - "possible_values": [], - "count": 3338, - "in original ontology": true - }, - "record.games_ahead": { - "description": "number of games ahead", - "is_categorical": false, - "possible_values": [], - "count": 33, - "in original ontology": true - }, - "record.games_back": { - "description": "number of games behind", - "is_categorical": false, - "possible_values": [], - "count": 361, - "in original ontology": true - }, - "place.team": { - "description": "ranking of the team", - "is_categorical": false, - "possible_values": [], - "count": 4075, - "in original ontology": true - }, - "result.match": { - "description": "result of the match", - "is_categorical": false, - "possible_values": [], - "count": 3245, - "in original ontology": true - }, - "score.match": { - "description": "score of the match", - "is_categorical": false, - "possible_values": [], - "count": 3241, - "in original ontology": true - }, - "date.match": { - "description": "date of the match", - "is_categorical": false, - "possible_values": [], - "count": 2660, - "in original ontology": true - }, - "day.match": { - "description": "day of the match", - "is_categorical": false, - "possible_values": [], - "count": 4743, - "in original ontology": true - }, - "time.match": { - "description": "time of the match", - "is_categorical": false, - "possible_values": [], - "count": 1283, - "in original ontology": true - }, - "name.player": { - "description": "name of the player", - "is_categorical": false, - "possible_values": [], - "count": 2365, - "in original ontology": true - }, - "position.player": { - "description": "position of the player", - "is_categorical": false, - "possible_values": [], - "count": 2746, - "in original ontology": true - }, - "record.player": { - "description": "record of the player", - "is_categorical": false, - "possible_values": [], - "count": 80, - "in original ontology": true - }, - "name.non_player": { - "description": "name of non-palyer such as the manager, coach", - "is_categorical": false, - "possible_values": [], - "count": 843, - "in original ontology": true - }, - "venue": { - "description": "venue of the match take place", - "is_categorical": false, - "possible_values": [], - "count": 328, - "in original ontology": true - } - } - } - }, - "intents": { - "inform": { - "description": "" - } - }, - "binary_dialogue_act": [], - "state": {} -} \ No newline at end of file diff --git a/data/unified_datasets/taskmaster/original_data.zip b/data/unified_datasets/taskmaster/original_data.zip deleted file mode 100644 index 8a36e95bf829063c7bff98404eb795107baf7b87..0000000000000000000000000000000000000000 Binary files a/data/unified_datasets/taskmaster/original_data.zip and /dev/null differ diff --git a/data/unified_datasets/taskmaster/preprocess.py b/data/unified_datasets/taskmaster/preprocess.py deleted file mode 100644 index 3f82931ab1781fb59ff5fb4d6757c6f5b9bfaeea..0000000000000000000000000000000000000000 --- a/data/unified_datasets/taskmaster/preprocess.py +++ /dev/null @@ -1,445 +0,0 @@ -import json -import os -import copy -import zipfile -from tqdm import tqdm -import re -from convlab.util.file_util import read_zipped_json, write_zipped_json -from pprint import pprint - -descriptions = { - "uber_lyft": { - "uber_lyft": "order a car for a ride inside a city", - "location.from": "pickup location", - "location.to": "destination of the ride", - "type.ride": "type of ride", - "num.people": "number of people", - "price.estimate": "estimated cost of the ride", - "duration.estimate": "estimated duration of the ride", - "time.pickup": "time of pickup", - "time.dropoff": "time of dropoff", - }, - "movie_ticket": { - "movie_ticket": "book movie tickets for a film", - "name.movie": "name of the movie", - "name.theater": "name of the theater", - "num.tickets": "number of tickets", - "time.start": "start time of the movie", - "location.theater": "location of the theater", - "price.ticket": "price of the ticket", - "type.screening": "type of the screening", - "time.end": "end time of the movie", - "time.duration": "duration of the movie", - }, - "restaurant_reservation": { - "restaurant_reservation": "searching for a restaurant and make reservation", - "name.restaurant": "name of the restaurant", - "name.reservation": "name of the person who make the reservation", - "num.guests": "number of guests", - "time.reservation": "time of the reservation", - "type.seating": "type of the seating", - "location.restaurant": "location of the restaurant", - }, - "coffee_ordering": { - "coffee_ordering": "order a coffee drink from either Starbucks or Peets for pick up", - "location.store": "location of the coffee store", - "name.drink": "name of the drink", - "size.drink": "size of the drink", - "num.drink": "number of drinks", - "type.milk": "type of the milk", - "preference": "user preference of the drink", - }, - "pizza_ordering": { - "pizza_ordering": "order a pizza", - "name.store": "name of the pizza store", - "name.pizza": "name of the pizza", - "size.pizza": "size of the pizza", - "type.topping": "type of the topping", - "type.crust": "type of the crust", - "preference": "user preference of the pizza", - "location.store": "location of the pizza store", - }, - "auto_repair": { - "auto_repair": "set up an auto repair appointment with a repair shop", - "name.store": "name of the repair store", - "name.customer": "name of the customer", - "date.appt": "date of the appointment", - "time.appt": "time of the appointment", - "reason.appt": "reason of the appointment", - "name.vehicle": "name of the vehicle", - "year.vehicle": "year of the vehicle", - "location.store": "location of the repair store", - }, - "flights": { - "flights": "find a round trip or multi-city flights", - "type": "type of the flight", - "destination1": "the first destination city of the trip", - "destination2": "the second destination city of the trip", - "origin": "the origin city of the trip", - "date.depart_origin": "date of departure from origin", - "date.depart_intermediate": "date of departure from intermediate", - "date.return": "date of return", - "time_of_day": "time of the flight", - "seating_class": "seat type (first class, business class, economy class, etc.", - "seat_location": "location of the seat", - "stops": "non-stop, layovers, etc.", - "price_range": "price range of the flight", - "num.pax": "number of people", - "luggage": "luggage information", - "total_fare": "total cost of the trip", - "other_description": "other description of the flight", - "from": "departure of the flight", - "to": "destination of the flight", - "airline": "airline of the flight", - "flight_number": "the number of the flight", - "date": "date of the flight", - "from.time": "departure time of the flight", - "to.time": "arrival time of the flight", - "stops.location": "location of the stop", - "fare": "cost of the flight", - }, - "food_order": { - "food_order": "order take-out for a particular cuisine choice", - "name.item": "name of the item", - "other_description.item": "other description of the item", - "type.retrieval": "type of the retrieval method", - "total_price": "total price", - "time.pickup": "pick up time", - "num.people": "number of people", - "name.restaurant": "name of the restaurant", - "type.food": "type of food", - "type.meal": "type of meal", - "location.restaurant": "location of the restaurant", - "rating.restaurant": "rating of the restaurant", - "price_range": "price range of the food", - }, - "hotel": { - "hotel": "find a hotel using typical preferences", - "name.hotel": "name of the hotel", - "location.hotel": "location of the hotel", - "sub_location.hotel": "rough location of the hotel", - "star_rating": "star rating of the hotel", - "customer_rating": "customer rating of the hotel", - "price_range": "price range of the hotel", - "amenity": "amenity of the hotel", - "num.beds": "number of beds to book", - "type.bed": "type of the bed", - "num.rooms": "number of rooms to book", - "check-in_date": "check-in date", - "check-out_date": "check-out date", - "date_range": "date range of the reservation", - "num.guests": "number of guests", - "type.room": "type of the room", - "price_per_night": "price per night", - "total_fare": "total fare", - "location": "location of the hotel", - }, - "movie": { - "movie": "find a movie to watch in theaters or using a streaming service at home", - "name.movie": "name of the movie", - "genre": "genre of the movie", - "name.theater": "name of the theater", - "location.theater": "location of the theater", - "time.start": "start time of the movie", - "time.end": "end time of the movie", - "price.ticket": "price of the ticket", - "price.streaming": "price of the streaming", - "type.screening": "type of the screening", - "audience_rating": "audience rating", - "movie_rating": "film rating", - "release_date": "release date of the movie", - "runtime": "running time of the movie", - "real_person": "name of actors, directors, etc.", - "character": "name of character in the movie", - "streaming_service": "streaming service that provide the movie", - "num.tickets": "number of tickets", - "seating": "type of seating", - }, - "music": { - "music": "find several tracks to play and then comment on each one", - "name.track": "name of the track", - "name.artist": "name of the artist", - "name.album": "name of the album", - "name.genre": "music genre", - "type.music": "rough type of the music", - "describes_track": "description of a track to find", - "describes_artist": "description of a artist to find", - "describes_album": "description of an album to find", - "describes_genre": "description of a genre to find", - "describes_type.music": "description of the music type", - }, - "restaurant": { - "restaurant": "ask for recommendations for a particular type of cuisine", - "name.restaurant": "name of the restaurant", - "location": "location of the restaurant", - "sub-location": "rough location of the restaurant", - "type.food": "the cuisine of the restaurant", - "menu_item": "item in the menu", - "type.meal": "type of meal", - "rating": "rating of the restaurant", - "price_range": "price range of the restaurant", - "business_hours": "business hours of the restaurant", - "name.reservation": "name of the person who make the reservation", - "num.guests": "number of guests", - "time.reservation": "time of the reservation", - "date.reservation": "date of the reservation", - "type.seating": "type of the seating", - }, - "sport": { - "sport": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL", - "name.team": "name of the team", - "record.team": "record of the team (number of wins and losses)", - "record.games_ahead": "number of games ahead", - "record.games_back": "number of games behind", - "place.team": "ranking of the team", - "result.match": "result of the match", - "score.match": "score of the match", - "date.match": "date of the match", - "day.match": "day of the match", - "time.match": "time of the match", - "name.player": "name of the player", - "position.player": "position of the player", - "record.player": "record of the player", - "name.non_player": "name of non-palyer such as the manager, coach", - "venue": "venue of the match take place", - } -} - - -def normalize_domain_name(domain): - if domain == 'auto': - return 'auto_repair' - elif domain == 'pizza': - return 'pizza_ordering' - elif domain == 'coffee': - return 'coffee_ordering' - elif domain == 'uber': - return 'uber_lyft' - elif domain == 'restaurant': - return 'restaurant_reservation' - elif domain == 'movie': - return 'movie_ticket' - elif domain == 'flights': - return 'flights' - elif domain == 'food-ordering': - return 'food_order' - elif domain == 'hotels': - return 'hotel' - elif domain == 'movies': - return 'movie' - elif domain == 'music': - return 'music' - elif domain == 'restaurant-search': - return 'restaurant' - elif domain == 'sports': - return 'sport' - assert 0 - - -def format_turns(ori_turns): - new_turns = [] - previous_speaker = None - utt_idx = 0 - for i, turn in enumerate(ori_turns): - speaker = 'system' if turn['speaker'] == 'ASSISTANT' else 'user' - turn['speaker'] = speaker - if utt_idx == 0 and speaker == 'system': - continue - if turn['text'] == '(deleted)': - continue - if not previous_speaker: - assert speaker != previous_speaker - if speaker != previous_speaker: - previous_speaker = speaker - new_turns.append(copy.deepcopy(turn)) - utt_idx += 1 - else: - # continuous speaking - last_turn = new_turns[-1] - # if ori_turns[i-1]['text'] == turn['text']: - # # skip repeat turn - # continue - if turn['text'] in ori_turns[i-1]['text']: - continue - index_shift = len(last_turn['text']) + 1 - last_turn['text'] += ' '+turn['text'] - if 'segments' in turn: - last_turn.setdefault('segments', []) - for segment in turn['segments']: - segment['start_index'] += index_shift - segment['end_index'] += index_shift - last_turn['segments'] += turn['segments'] - if new_turns and new_turns[-1]['speaker'] == 'system': - new_turns = new_turns[:-1] - return new_turns - - -def log_ontology(acts, ontology, ori_ontology): - for item in acts: - intent, domain, slot, value = item['intent'], item['domain'], item['slot'], item['value'] - if domain not in ontology['domains']: - ontology['domains'][domain] = {'description': "", 'slots': {}} - if slot not in ontology['domains'][domain]['slots']: - ontology['domains'][domain]['slots'][slot] = { - 'description': '', - 'is_categorical': False, - 'possible_values': [], - 'count': 1 - } - else: - ontology['domains'][domain]['slots'][slot]['count'] += 1 - ontology['domains'][domain]['slots'][slot]['in original ontology'] = slot in ori_ontology[domain] - if intent is not None and intent not in ontology['intents']: - ontology['intents'][intent] = { - "description": '' - } - - -def preprocess(): - self_dir = os.path.dirname(os.path.abspath(__file__)) - processed_dialogue = [] - ontology = {'domains': {}, - 'intents': {}, - 'binary_dialogue_act': [], - 'state': {}} - original_zipped_path = os.path.join(self_dir, 'original_data.zip') - new_dir = os.path.join(self_dir, 'original_data') - if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(os.path.join(self_dir, 'ontology.json')): - print('unzip to', new_dir) - print('This may take several minutes') - archive = zipfile.ZipFile(original_zipped_path, 'r') - archive.extractall(self_dir) - files = [ - ('TM-1-2019/woz-dialogs.json', 'TM-1-2019/ontology.json'), - ('TM-1-2019/self-dialogs.json', 'TM-1-2019/ontology.json'), - ('TM-2-2020/data/flights.json', 'TM-2-2020/ontology/flights.json'), - ('TM-2-2020/data/food-ordering.json', 'TM-2-2020/ontology/food-ordering.json'), - ('TM-2-2020/data/hotels.json', 'TM-2-2020/ontology/hotels.json'), - ('TM-2-2020/data/movies.json', 'TM-2-2020/ontology/movies.json'), - ('TM-2-2020/data/music.json', 'TM-2-2020/ontology/music.json'), - ('TM-2-2020/data/restaurant-search.json', 'TM-2-2020/ontology/restaurant-search.json'), - ('TM-2-2020/data/sports.json', 'TM-2-2020/ontology/sports.json') - ] - idx_count = 1 - total = 0 - - for filename, ontology_filename in files: - data = json.load(open(os.path.join(new_dir, filename))) - ori_ontology = {} - if 'TM-1' in filename: - for domain, item in json.load(open(os.path.join(new_dir, ontology_filename))).items(): - ori_ontology[item["id"]] = {} - for slot in item["required"] + item["optional"]: - ori_ontology[item["id"]][slot] = 0 - else: - domain = normalize_domain_name(filename.split('/')[-1].split('.')[0]) - ori_ontology[domain] = {} - for _, item in json.load(open(os.path.join(new_dir, ontology_filename))).items(): - for group in item: - for anno in group["annotations"]: - ori_ontology[domain][anno] = 0 - for d in ori_ontology: - if d not in ontology['domains']: - ontology['domains'][d] = {'description': descriptions[d][d], 'slots': {}} - for s in ori_ontology[d]: - if s not in ontology['domains'][d]['slots']: - ontology['domains'][d]['slots'][s] = { - 'description': descriptions[d][s], - 'is_categorical': False, - 'possible_values': [], - 'count': 0, - 'in original ontology': True - } - # pprint(ori_ontology) - for ori_sess in tqdm(data, desc='processing taskmaster-{}'.format(filename)): - total += 1 - turns = format_turns(ori_sess['utterances']) - if not turns: - continue - if 'TM-2' in filename: - dial_domain = normalize_domain_name(filename.split('/')[-1].split('.')[0]) - else: - dial_domain = normalize_domain_name(ori_sess['instruction_id'].split('-', 1)[0]) - dialogue = { - "dataset": "taskmaster", - "data_split": "train", - "dialogue_id": 'taskmaster_' + str(idx_count), - "original_id": ori_sess['conversation_id'], - "instruction_id": ori_sess['instruction_id'], - "domains": [ - dial_domain - ], - "turns": [] - } - idx_count += 1 - assert turns[0]['speaker'] == 'user' and turns[-1]['speaker'] == 'user', print(turns) - for utt_idx, uttr in enumerate(turns): - speaker = uttr['speaker'] - turn = { - 'speaker': speaker, - 'utterance': uttr['text'], - 'utt_idx': utt_idx, - 'dialogue_act': { - 'binary': [], - 'categorical': [], - 'non-categorical': [], - }, - } - if speaker == 'user': - turn['state'] = {} - turn['state_update'] = {'categorical': [], 'non-categorical': []} - - if 'segments' in uttr: - for segment in uttr['segments']: - for item in segment['annotations']: - # domain = item['name'].split('.', 1)[0] - domain = dial_domain - - # if domain != item['name'].split('.', 1)[0]: - # print(domain, item['name'].split('.', 1), dialogue["original_id"]) - # assert domain in item['name'].split('.', 1)[0] - - # if item['name'].split('.', 1)[0] != domain: - # print(domain, item['name'].split('.', 1), dialogue["original_id"]) - slot = item['name'].split('.', 1)[-1] - if slot.endswith('.accept') or slot.endswith('.reject'): - slot = slot[:-7] - if slot not in ori_ontology[domain]: - # print(domain, item['name'].split('.', 1), dialogue["original_id"]) - continue - # if domain in ori_ontology: - # ori_ontology[domain][slot] += 1 - # else: - # print(domain, item['name'].split('.', 1), dialogue["original_id"]) - # assert domain in ori_ontology, print(domain, item['name'].split('.', 1), dialogue["original_id"]) - - if not segment['text']: - print(slot) - print(segment) - print() - assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text'] - turn['dialogue_act']['non-categorical'].append({ - 'intent': 'inform', - 'domain': domain, - 'slot': slot, - 'value': segment['text'].lower(), - 'start': segment['start_index'], - 'end': segment['end_index'] - }) - log_ontology(turn['dialogue_act']['non-categorical'], ontology, ori_ontology) - dialogue['turns'].append(turn) - processed_dialogue.append(dialogue) - # pprint(ori_ontology) - # save ontology json - json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=2) - json.dump(processed_dialogue, open('data.json', 'w'), indent=2) - write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json') - os.remove('data.json') - else: - # read from file - processed_dialogue = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json') - ontology = json.load(open(os.path.join(self_dir, 'ontology.json'))) - return processed_dialogue, ontology - -if __name__ == '__main__': - preprocess() diff --git a/data/unified_datasets/woz/ontology.json b/data/unified_datasets/woz/ontology.json deleted file mode 100644 index 8f863f679941e2bdf4347a4d6c992a4881b8ef60..0000000000000000000000000000000000000000 --- a/data/unified_datasets/woz/ontology.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "domains": { - "restaurant": { - "description": "search for a restaurant to dine", - "slots": { - "food": { - "is_categorical": false, - "possible_values": [], - "description": "food type of the restaurant" - }, - "area": { - "is_categorical": true, - "possible_values": [ - "east", - "west", - "center", - "north", - "south" - ], - "description": "area of the restaurant" - }, - "postcode": { - "is_categorical": false, - "possible_values": [], - "description": "postal code of the restaurant" - }, - "phone": { - "is_categorical": false, - "possible_values": [], - "description": "phone number of the restaurant" - }, - "address": { - "is_categorical": false, - "possible_values": [], - "description": "address of the restaurant" - }, - "price range": { - "is_categorical": true, - "possible_values": [ - "expensive", - "moderate", - "cheap" - ], - "description": "price range of the restaurant" - }, - "name": { - "is_categorical": false, - "possible_values": [], - "description": "name of the restaurant" - } - } - } - }, - "intents": { - "inform": { - "description": "system informs user the value of a slot" - }, - "request": { - "description": "system asks the user to provide value of a slot" - } - }, - "binary_dialogue_act": [ - { - "intent": "request", - "domain": "restaurant", - "slot": "postcode", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "phone", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "address", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "area", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "price range", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "food", - "value": "" - }, - { - "intent": "request", - "domain": "restaurant", - "slot": "name", - "value": "" - } - ], - "state": { - "restaurant": { - "food": "", - "area": "", - "postcode": "", - "phone": "", - "address": "", - "price range": "", - "name": "" - } - } -} \ No newline at end of file