Skip to content
Snippets Groups Projects
Commit 1be8d5dc authored by zqwerty's avatar zqwerty
Browse files

rm unused files

parent b67ccc26
No related branches found
No related tags found
No related merge requests found
{
"domains": {
"restaurant": {
"description": "find a restaurant to eat",
"slots": {
"food": {
"description": "food type the restaurant serves",
"is_categorical": false,
"possible_values": []
},
"area": {
"description": "area where the restaurant is located",
"is_categorical": true,
"possible_values": [
"north",
"east",
"west",
"south",
"centre"
]
},
"name": {
"description": "name of the restaurant",
"is_categorical": false,
"possible_values": []
},
"pricerange": {
"description": "price range of the restaurant",
"is_categorical": true,
"possible_values": [
"cheap",
"moderate",
"expensive"
]
},
"phone": {
"description": "phone number of the restaurant",
"is_categorical": false,
"possible_values": []
},
"address": {
"description": "exact location of the restaurant",
"is_categorical": false,
"possible_values": []
},
"postcode": {
"description": "postal code of the restaurant",
"is_categorical": false,
"possible_values": []
}
}
}
},
"intents": {
"inform": {
"description": "inform user of value of a slot"
},
"request": {
"description": "ask for value of a slot"
},
"nooffer": {
"description": "inform user that no restaurant matches his request"
}
},
"binary_dialogue_act": [
{
"intent": "request",
"domain": "restaurant",
"slot": "food",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "address",
"value": ""
},
{
"intent": "nooffer",
"domain": "restaurant",
"slot": "",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "area",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "phone",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "pricerange",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "postcode",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "name",
"value": ""
}
],
"state": {
"restaurant": {
"pricerange": "",
"area": "",
"food": ""
}
}
}
\ No newline at end of file
# README
## Features
- Annotations: character-level span for non-categorical slots. No slot descriptions.
Statistics:
| | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
| train | 30483 | 540311 | 17.72 | 9.18 | 13 |
## Main changes
- each speaker for one turn
- intent is set to **inform**
- not annotate state and state upadte
- span info is provided by original data
## Original data
https://github.com/google-research-datasets/Taskmaster
TM-1: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-1-2019
TM-2: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-2-2020
\ No newline at end of file
File deleted
{
"domains": {
"uber_lyft": {
"description": "order a car for a ride inside a city",
"slots": {
"location.from": {
"description": "pickup location",
"is_categorical": false,
"possible_values": [],
"count": 5764,
"in original ontology": true
},
"location.to": {
"description": "destination of the ride",
"is_categorical": false,
"possible_values": [],
"count": 6026,
"in original ontology": true
},
"type.ride": {
"description": "type of ride",
"is_categorical": false,
"possible_values": [],
"count": 13317,
"in original ontology": true
},
"num.people": {
"description": "number of people",
"is_categorical": false,
"possible_values": [],
"count": 2636,
"in original ontology": true
},
"price.estimate": {
"description": "estimated cost of the ride",
"is_categorical": false,
"possible_values": [],
"count": 4996,
"in original ontology": true
},
"duration.estimate": {
"description": "estimated duration of the ride",
"is_categorical": false,
"possible_values": [],
"count": 1152,
"in original ontology": true
},
"time.pickup": {
"description": "time of pickup",
"is_categorical": false,
"possible_values": [],
"count": 4303,
"in original ontology": true
},
"time.dropoff": {
"description": "time of dropoff",
"is_categorical": false,
"possible_values": [],
"count": 111,
"in original ontology": true
}
}
},
"movie_ticket": {
"description": "book movie tickets for a film",
"slots": {
"name.movie": {
"description": "name of the movie",
"is_categorical": false,
"possible_values": [],
"count": 8959,
"in original ontology": true
},
"name.theater": {
"description": "name of the theater",
"is_categorical": false,
"possible_values": [],
"count": 6842,
"in original ontology": true
},
"num.tickets": {
"description": "number of tickets",
"is_categorical": false,
"possible_values": [],
"count": 7368,
"in original ontology": true
},
"time.start": {
"description": "start time of the movie",
"is_categorical": false,
"possible_values": [],
"count": 14820,
"in original ontology": true
},
"location.theater": {
"description": "location of the theater",
"is_categorical": false,
"possible_values": [],
"count": 5295,
"in original ontology": true
},
"price.ticket": {
"description": "price of the ticket",
"is_categorical": false,
"possible_values": [],
"count": 2418,
"in original ontology": true
},
"type.screening": {
"description": "type of the screening",
"is_categorical": false,
"possible_values": [],
"count": 4579,
"in original ontology": true
},
"time.end": {
"description": "end time of the movie",
"is_categorical": false,
"possible_values": [],
"count": 10,
"in original ontology": true
},
"time.duration": {
"description": "duration of the movie",
"is_categorical": false,
"possible_values": [],
"count": 250,
"in original ontology": true
}
}
},
"restaurant_reservation": {
"description": "searching for a restaurant and make reservation",
"slots": {
"name.restaurant": {
"description": "name of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 14544,
"in original ontology": true
},
"name.reservation": {
"description": "name of the person who make the reservation",
"is_categorical": false,
"possible_values": [],
"count": 609,
"in original ontology": true
},
"num.guests": {
"description": "number of guests",
"is_categorical": false,
"possible_values": [],
"count": 6962,
"in original ontology": true
},
"time.reservation": {
"description": "time of the reservation",
"is_categorical": false,
"possible_values": [],
"count": 12073,
"in original ontology": true
},
"type.seating": {
"description": "type of the seating",
"is_categorical": false,
"possible_values": [],
"count": 8983,
"in original ontology": true
},
"location.restaurant": {
"description": "location of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 4311,
"in original ontology": true
}
}
},
"coffee_ordering": {
"description": "order a coffee drink from either Starbucks or Peets for pick up",
"slots": {
"location.store": {
"description": "location of the coffee store",
"is_categorical": false,
"possible_values": [],
"count": 5510,
"in original ontology": true
},
"name.drink": {
"description": "name of the drink",
"is_categorical": false,
"possible_values": [],
"count": 9182,
"in original ontology": true
},
"size.drink": {
"description": "size of the drink",
"is_categorical": false,
"possible_values": [],
"count": 7804,
"in original ontology": true
},
"num.drink": {
"description": "number of drinks",
"is_categorical": false,
"possible_values": [],
"count": 848,
"in original ontology": true
},
"type.milk": {
"description": "type of the milk",
"is_categorical": false,
"possible_values": [],
"count": 8433,
"in original ontology": true
},
"preference": {
"description": "user preference of the drink",
"is_categorical": false,
"possible_values": [],
"count": 11266,
"in original ontology": true
}
}
},
"pizza_ordering": {
"description": "order a pizza",
"slots": {
"name.store": {
"description": "name of the pizza store",
"is_categorical": false,
"possible_values": [],
"count": 5127,
"in original ontology": true
},
"name.pizza": {
"description": "name of the pizza",
"is_categorical": false,
"possible_values": [],
"count": 9208,
"in original ontology": true
},
"size.pizza": {
"description": "size of the pizza",
"is_categorical": false,
"possible_values": [],
"count": 9661,
"in original ontology": true
},
"type.topping": {
"description": "type of the topping",
"is_categorical": false,
"possible_values": [],
"count": 20639,
"in original ontology": true
},
"type.crust": {
"description": "type of the crust",
"is_categorical": false,
"possible_values": [],
"count": 5099,
"in original ontology": true
},
"preference": {
"description": "user preference of the pizza",
"is_categorical": false,
"possible_values": [],
"count": 4998,
"in original ontology": true
},
"location.store": {
"description": "location of the pizza store",
"is_categorical": false,
"possible_values": [],
"count": 1487,
"in original ontology": true
}
}
},
"auto_repair": {
"description": "set up an auto repair appointment with a repair shop",
"slots": {
"name.store": {
"description": "name of the repair store",
"is_categorical": false,
"possible_values": [],
"count": 4005,
"in original ontology": true
},
"name.customer": {
"description": "name of the customer",
"is_categorical": false,
"possible_values": [],
"count": 4547,
"in original ontology": true
},
"date.appt": {
"description": "date of the appointment",
"is_categorical": false,
"possible_values": [],
"count": 7650,
"in original ontology": true
},
"time.appt": {
"description": "time of the appointment",
"is_categorical": false,
"possible_values": [],
"count": 9827,
"in original ontology": true
},
"reason.appt": {
"description": "reason of the appointment",
"is_categorical": false,
"possible_values": [],
"count": 6509,
"in original ontology": true
},
"name.vehicle": {
"description": "name of the vehicle",
"is_categorical": false,
"possible_values": [],
"count": 5262,
"in original ontology": true
},
"year.vehicle": {
"description": "year of the vehicle",
"is_categorical": false,
"possible_values": [],
"count": 4561,
"in original ontology": true
},
"location.store": {
"description": "location of the repair store",
"is_categorical": false,
"possible_values": [],
"count": 709,
"in original ontology": true
}
}
},
"flights": {
"description": "find a round trip or multi-city flights",
"slots": {
"type": {
"description": "type of the flight",
"is_categorical": false,
"possible_values": [],
"count": 1999,
"in original ontology": true
},
"destination1": {
"description": "the first destination city of the trip",
"is_categorical": false,
"possible_values": [],
"count": 3993,
"in original ontology": true
},
"destination2": {
"description": "the second destination city of the trip",
"is_categorical": false,
"possible_values": [],
"count": 128,
"in original ontology": true
},
"origin": {
"description": "the origin city of the trip",
"is_categorical": false,
"possible_values": [],
"count": 2595,
"in original ontology": true
},
"date.depart_origin": {
"description": "date of departure from origin",
"is_categorical": false,
"possible_values": [],
"count": 3177,
"in original ontology": true
},
"date.depart_intermediate": {
"description": "date of departure from intermediate",
"is_categorical": false,
"possible_values": [],
"count": 48,
"in original ontology": true
},
"date.return": {
"description": "date of return",
"is_categorical": false,
"possible_values": [],
"count": 2822,
"in original ontology": true
},
"time_of_day": {
"description": "time of the flight",
"is_categorical": false,
"possible_values": [],
"count": 3840,
"in original ontology": true
},
"seating_class": {
"description": "seat type (first class, business class, economy class, etc.",
"is_categorical": false,
"possible_values": [],
"count": 3626,
"in original ontology": true
},
"seat_location": {
"description": "location of the seat",
"is_categorical": false,
"possible_values": [],
"count": 313,
"in original ontology": true
},
"stops": {
"description": "non-stop, layovers, etc.",
"is_categorical": false,
"possible_values": [],
"count": 6174,
"in original ontology": true
},
"price_range": {
"description": "price range of the flight",
"is_categorical": false,
"possible_values": [],
"count": 2646,
"in original ontology": true
},
"num.pax": {
"description": "number of people",
"is_categorical": false,
"possible_values": [],
"count": 437,
"in original ontology": true
},
"luggage": {
"description": "luggage information",
"is_categorical": false,
"possible_values": [],
"count": 17,
"in original ontology": true
},
"total_fare": {
"description": "total cost of the trip",
"is_categorical": false,
"possible_values": [],
"count": 1238,
"in original ontology": true
},
"other_description": {
"description": "other description of the flight",
"is_categorical": false,
"possible_values": [],
"count": 2620,
"in original ontology": true
},
"from": {
"description": "departure of the flight",
"is_categorical": false,
"possible_values": [],
"count": 1293,
"in original ontology": true
},
"to": {
"description": "destination of the flight",
"is_categorical": false,
"possible_values": [],
"count": 1979,
"in original ontology": true
},
"airline": {
"description": "airline of the flight",
"is_categorical": false,
"possible_values": [],
"count": 3981,
"in original ontology": true
},
"flight_number": {
"description": "the number of the flight",
"is_categorical": false,
"possible_values": [],
"count": 42,
"in original ontology": true
},
"date": {
"description": "date of the flight",
"is_categorical": false,
"possible_values": [],
"count": 756,
"in original ontology": true
},
"from.time": {
"description": "departure time of the flight",
"is_categorical": false,
"possible_values": [],
"count": 6440,
"in original ontology": true
},
"to.time": {
"description": "arrival time of the flight",
"is_categorical": false,
"possible_values": [],
"count": 2571,
"in original ontology": true
},
"stops.location": {
"description": "location of the stop",
"is_categorical": false,
"possible_values": [],
"count": 1097,
"in original ontology": true
},
"fare": {
"description": "cost of the flight",
"is_categorical": false,
"possible_values": [],
"count": 1475,
"in original ontology": true
}
}
},
"food_order": {
"description": "order take-out for a particular cuisine choice",
"slots": {
"name.item": {
"description": "name of the item",
"is_categorical": false,
"possible_values": [],
"count": 6080,
"in original ontology": true
},
"other_description.item": {
"description": "other description of the item",
"is_categorical": false,
"possible_values": [],
"count": 1474,
"in original ontology": true
},
"type.retrieval": {
"description": "type of the retrieval method",
"is_categorical": false,
"possible_values": [],
"count": 1868,
"in original ontology": true
},
"total_price": {
"description": "total price",
"is_categorical": false,
"possible_values": [],
"count": 5,
"in original ontology": true
},
"time.pickup": {
"description": "pick up time",
"is_categorical": false,
"possible_values": [],
"count": 981,
"in original ontology": true
},
"num.people": {
"description": "number of people",
"is_categorical": false,
"possible_values": [],
"count": 880,
"in original ontology": true
},
"name.restaurant": {
"description": "name of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 106,
"in original ontology": true
},
"type.food": {
"description": "type of food",
"is_categorical": false,
"possible_values": [],
"count": 1247,
"in original ontology": true
},
"type.meal": {
"description": "type of meal",
"is_categorical": false,
"possible_values": [],
"count": 64,
"in original ontology": true
},
"location.restaurant": {
"description": "location of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 8,
"in original ontology": true
},
"rating.restaurant": {
"description": "rating of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 5,
"in original ontology": true
},
"price_range": {
"description": "price range of the food",
"is_categorical": false,
"possible_values": [],
"count": 5,
"in original ontology": true
}
}
},
"hotel": {
"description": "find a hotel using typical preferences",
"slots": {
"name.hotel": {
"description": "name of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 5241,
"in original ontology": true
},
"location.hotel": {
"description": "location of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 2940,
"in original ontology": true
},
"sub_location.hotel": {
"description": "rough location of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 1869,
"in original ontology": true
},
"star_rating": {
"description": "star rating of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 2049,
"in original ontology": true
},
"customer_rating": {
"description": "customer rating of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 1239,
"in original ontology": true
},
"price_range": {
"description": "price range of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 2357,
"in original ontology": true
},
"amenity": {
"description": "amenity of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 19030,
"in original ontology": true
},
"num.beds": {
"description": "number of beds to book",
"is_categorical": false,
"possible_values": [],
"count": 733,
"in original ontology": true
},
"type.bed": {
"description": "type of the bed",
"is_categorical": false,
"possible_values": [],
"count": 1423,
"in original ontology": true
},
"num.rooms": {
"description": "number of rooms to book",
"is_categorical": false,
"possible_values": [],
"count": 256,
"in original ontology": true
},
"check-in_date": {
"description": "check-in date",
"is_categorical": false,
"possible_values": [],
"count": 608,
"in original ontology": true
},
"check-out_date": {
"description": "check-out date",
"is_categorical": false,
"possible_values": [],
"count": 428,
"in original ontology": true
},
"date_range": {
"description": "date range of the reservation",
"is_categorical": false,
"possible_values": [],
"count": 2458,
"in original ontology": true
},
"num.guests": {
"description": "number of guests",
"is_categorical": false,
"possible_values": [],
"count": 1323,
"in original ontology": true
},
"type.room": {
"description": "type of the room",
"is_categorical": false,
"possible_values": [],
"count": 1840,
"in original ontology": true
},
"price_per_night": {
"description": "price per night",
"is_categorical": false,
"possible_values": [],
"count": 2357,
"in original ontology": true
},
"total_fare": {
"description": "total fare",
"is_categorical": false,
"possible_values": [],
"count": 28,
"in original ontology": true
},
"location": {
"description": "location of the hotel",
"is_categorical": false,
"possible_values": [],
"count": 352,
"in original ontology": true
}
}
},
"movie": {
"description": "find a movie to watch in theaters or using a streaming service at home",
"slots": {
"name.movie": {
"description": "name of the movie",
"is_categorical": false,
"possible_values": [],
"count": 13413,
"in original ontology": true
},
"genre": {
"description": "genre of the movie",
"is_categorical": false,
"possible_values": [],
"count": 4982,
"in original ontology": true
},
"name.theater": {
"description": "name of the theater",
"is_categorical": false,
"possible_values": [],
"count": 2371,
"in original ontology": true
},
"location.theater": {
"description": "location of the theater",
"is_categorical": false,
"possible_values": [],
"count": 2894,
"in original ontology": true
},
"time.start": {
"description": "start time of the movie",
"is_categorical": false,
"possible_values": [],
"count": 6455,
"in original ontology": true
},
"time.end": {
"description": "end time of the movie",
"is_categorical": false,
"possible_values": [],
"count": 3,
"in original ontology": true
},
"price.ticket": {
"description": "price of the ticket",
"is_categorical": false,
"possible_values": [],
"count": 989,
"in original ontology": true
},
"price.streaming": {
"description": "price of the streaming",
"is_categorical": false,
"possible_values": [],
"count": 397,
"in original ontology": true
},
"type.screening": {
"description": "type of the screening",
"is_categorical": false,
"possible_values": [],
"count": 1419,
"in original ontology": true
},
"audience_rating": {
"description": "audience rating",
"is_categorical": false,
"possible_values": [],
"count": 1506,
"in original ontology": true
},
"movie_rating": {
"description": "film rating",
"is_categorical": false,
"possible_values": [],
"count": 273,
"in original ontology": true
},
"release_date": {
"description": "release date of the movie",
"is_categorical": false,
"possible_values": [],
"count": 386,
"in original ontology": true
},
"runtime": {
"description": "running time of the movie",
"is_categorical": false,
"possible_values": [],
"count": 262,
"in original ontology": true
},
"real_person": {
"description": "name of actors, directors, etc.",
"is_categorical": false,
"possible_values": [],
"count": 3406,
"in original ontology": true
},
"character": {
"description": "name of character in the movie",
"is_categorical": false,
"possible_values": [],
"count": 1676,
"in original ontology": true
},
"streaming_service": {
"description": "streaming service that provide the movie",
"is_categorical": false,
"possible_values": [],
"count": 2729,
"in original ontology": true
},
"num.tickets": {
"description": "number of tickets",
"is_categorical": false,
"possible_values": [],
"count": 1045,
"in original ontology": true
},
"seating": {
"description": "type of seating",
"is_categorical": false,
"possible_values": [],
"count": 13,
"in original ontology": true
}
}
},
"music": {
"description": "find several tracks to play and then comment on each one",
"slots": {
"name.track": {
"description": "name of the track",
"is_categorical": false,
"possible_values": [],
"count": 4916,
"in original ontology": true
},
"name.artist": {
"description": "name of the artist",
"is_categorical": false,
"possible_values": [],
"count": 9287,
"in original ontology": true
},
"name.album": {
"description": "name of the album",
"is_categorical": false,
"possible_values": [],
"count": 1106,
"in original ontology": true
},
"name.genre": {
"description": "music genre",
"is_categorical": false,
"possible_values": [],
"count": 452,
"in original ontology": true
},
"type.music": {
"description": "rough type of the music",
"is_categorical": false,
"possible_values": [],
"count": 603,
"in original ontology": true
},
"describes_track": {
"description": "description of a track to find",
"is_categorical": false,
"possible_values": [],
"count": 2969,
"in original ontology": true
},
"describes_artist": {
"description": "description of a artist to find",
"is_categorical": false,
"possible_values": [],
"count": 612,
"in original ontology": true
},
"describes_album": {
"description": "description of an album to find",
"is_categorical": false,
"possible_values": [],
"count": 189,
"in original ontology": true
},
"describes_genre": {
"description": "description of a genre to find",
"is_categorical": false,
"possible_values": [],
"count": 26,
"in original ontology": true
},
"describes_type.music": {
"description": "description of the music type",
"is_categorical": false,
"possible_values": [],
"count": 375,
"in original ontology": true
}
}
},
"restaurant": {
"description": "ask for recommendations for a particular type of cuisine",
"slots": {
"name.restaurant": {
"description": "name of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 8676,
"in original ontology": true
},
"location": {
"description": "location of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 5165,
"in original ontology": true
},
"sub-location": {
"description": "rough location of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 1210,
"in original ontology": true
},
"type.food": {
"description": "the cuisine of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 12412,
"in original ontology": true
},
"menu_item": {
"description": "item in the menu",
"is_categorical": false,
"possible_values": [],
"count": 1499,
"in original ontology": true
},
"type.meal": {
"description": "type of meal",
"is_categorical": false,
"possible_values": [],
"count": 2677,
"in original ontology": true
},
"rating": {
"description": "rating of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 2951,
"in original ontology": true
},
"price_range": {
"description": "price range of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 1930,
"in original ontology": true
},
"business_hours": {
"description": "business hours of the restaurant",
"is_categorical": false,
"possible_values": [],
"count": 2024,
"in original ontology": true
},
"name.reservation": {
"description": "name of the person who make the reservation",
"is_categorical": false,
"possible_values": [],
"count": 16,
"in original ontology": true
},
"num.guests": {
"description": "number of guests",
"is_categorical": false,
"possible_values": [],
"count": 179,
"in original ontology": true
},
"time.reservation": {
"description": "time of the reservation",
"is_categorical": false,
"possible_values": [],
"count": 216,
"in original ontology": true
},
"date.reservation": {
"description": "date of the reservation",
"is_categorical": false,
"possible_values": [],
"count": 130,
"in original ontology": true
},
"type.seating": {
"description": "type of the seating",
"is_categorical": false,
"possible_values": [],
"count": 11,
"in original ontology": true
}
}
},
"sport": {
"description": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL",
"slots": {
"name.team": {
"description": "name of the team",
"is_categorical": false,
"possible_values": [],
"count": 19651,
"in original ontology": true
},
"record.team": {
"description": "record of the team (number of wins and losses)",
"is_categorical": false,
"possible_values": [],
"count": 3338,
"in original ontology": true
},
"record.games_ahead": {
"description": "number of games ahead",
"is_categorical": false,
"possible_values": [],
"count": 33,
"in original ontology": true
},
"record.games_back": {
"description": "number of games behind",
"is_categorical": false,
"possible_values": [],
"count": 361,
"in original ontology": true
},
"place.team": {
"description": "ranking of the team",
"is_categorical": false,
"possible_values": [],
"count": 4075,
"in original ontology": true
},
"result.match": {
"description": "result of the match",
"is_categorical": false,
"possible_values": [],
"count": 3245,
"in original ontology": true
},
"score.match": {
"description": "score of the match",
"is_categorical": false,
"possible_values": [],
"count": 3241,
"in original ontology": true
},
"date.match": {
"description": "date of the match",
"is_categorical": false,
"possible_values": [],
"count": 2660,
"in original ontology": true
},
"day.match": {
"description": "day of the match",
"is_categorical": false,
"possible_values": [],
"count": 4743,
"in original ontology": true
},
"time.match": {
"description": "time of the match",
"is_categorical": false,
"possible_values": [],
"count": 1283,
"in original ontology": true
},
"name.player": {
"description": "name of the player",
"is_categorical": false,
"possible_values": [],
"count": 2365,
"in original ontology": true
},
"position.player": {
"description": "position of the player",
"is_categorical": false,
"possible_values": [],
"count": 2746,
"in original ontology": true
},
"record.player": {
"description": "record of the player",
"is_categorical": false,
"possible_values": [],
"count": 80,
"in original ontology": true
},
"name.non_player": {
"description": "name of non-palyer such as the manager, coach",
"is_categorical": false,
"possible_values": [],
"count": 843,
"in original ontology": true
},
"venue": {
"description": "venue of the match take place",
"is_categorical": false,
"possible_values": [],
"count": 328,
"in original ontology": true
}
}
}
},
"intents": {
"inform": {
"description": ""
}
},
"binary_dialogue_act": [],
"state": {}
}
\ No newline at end of file
File deleted
import json
import os
import copy
import zipfile
from tqdm import tqdm
import re
from convlab.util.file_util import read_zipped_json, write_zipped_json
from pprint import pprint
descriptions = {
"uber_lyft": {
"uber_lyft": "order a car for a ride inside a city",
"location.from": "pickup location",
"location.to": "destination of the ride",
"type.ride": "type of ride",
"num.people": "number of people",
"price.estimate": "estimated cost of the ride",
"duration.estimate": "estimated duration of the ride",
"time.pickup": "time of pickup",
"time.dropoff": "time of dropoff",
},
"movie_ticket": {
"movie_ticket": "book movie tickets for a film",
"name.movie": "name of the movie",
"name.theater": "name of the theater",
"num.tickets": "number of tickets",
"time.start": "start time of the movie",
"location.theater": "location of the theater",
"price.ticket": "price of the ticket",
"type.screening": "type of the screening",
"time.end": "end time of the movie",
"time.duration": "duration of the movie",
},
"restaurant_reservation": {
"restaurant_reservation": "searching for a restaurant and make reservation",
"name.restaurant": "name of the restaurant",
"name.reservation": "name of the person who make the reservation",
"num.guests": "number of guests",
"time.reservation": "time of the reservation",
"type.seating": "type of the seating",
"location.restaurant": "location of the restaurant",
},
"coffee_ordering": {
"coffee_ordering": "order a coffee drink from either Starbucks or Peets for pick up",
"location.store": "location of the coffee store",
"name.drink": "name of the drink",
"size.drink": "size of the drink",
"num.drink": "number of drinks",
"type.milk": "type of the milk",
"preference": "user preference of the drink",
},
"pizza_ordering": {
"pizza_ordering": "order a pizza",
"name.store": "name of the pizza store",
"name.pizza": "name of the pizza",
"size.pizza": "size of the pizza",
"type.topping": "type of the topping",
"type.crust": "type of the crust",
"preference": "user preference of the pizza",
"location.store": "location of the pizza store",
},
"auto_repair": {
"auto_repair": "set up an auto repair appointment with a repair shop",
"name.store": "name of the repair store",
"name.customer": "name of the customer",
"date.appt": "date of the appointment",
"time.appt": "time of the appointment",
"reason.appt": "reason of the appointment",
"name.vehicle": "name of the vehicle",
"year.vehicle": "year of the vehicle",
"location.store": "location of the repair store",
},
"flights": {
"flights": "find a round trip or multi-city flights",
"type": "type of the flight",
"destination1": "the first destination city of the trip",
"destination2": "the second destination city of the trip",
"origin": "the origin city of the trip",
"date.depart_origin": "date of departure from origin",
"date.depart_intermediate": "date of departure from intermediate",
"date.return": "date of return",
"time_of_day": "time of the flight",
"seating_class": "seat type (first class, business class, economy class, etc.",
"seat_location": "location of the seat",
"stops": "non-stop, layovers, etc.",
"price_range": "price range of the flight",
"num.pax": "number of people",
"luggage": "luggage information",
"total_fare": "total cost of the trip",
"other_description": "other description of the flight",
"from": "departure of the flight",
"to": "destination of the flight",
"airline": "airline of the flight",
"flight_number": "the number of the flight",
"date": "date of the flight",
"from.time": "departure time of the flight",
"to.time": "arrival time of the flight",
"stops.location": "location of the stop",
"fare": "cost of the flight",
},
"food_order": {
"food_order": "order take-out for a particular cuisine choice",
"name.item": "name of the item",
"other_description.item": "other description of the item",
"type.retrieval": "type of the retrieval method",
"total_price": "total price",
"time.pickup": "pick up time",
"num.people": "number of people",
"name.restaurant": "name of the restaurant",
"type.food": "type of food",
"type.meal": "type of meal",
"location.restaurant": "location of the restaurant",
"rating.restaurant": "rating of the restaurant",
"price_range": "price range of the food",
},
"hotel": {
"hotel": "find a hotel using typical preferences",
"name.hotel": "name of the hotel",
"location.hotel": "location of the hotel",
"sub_location.hotel": "rough location of the hotel",
"star_rating": "star rating of the hotel",
"customer_rating": "customer rating of the hotel",
"price_range": "price range of the hotel",
"amenity": "amenity of the hotel",
"num.beds": "number of beds to book",
"type.bed": "type of the bed",
"num.rooms": "number of rooms to book",
"check-in_date": "check-in date",
"check-out_date": "check-out date",
"date_range": "date range of the reservation",
"num.guests": "number of guests",
"type.room": "type of the room",
"price_per_night": "price per night",
"total_fare": "total fare",
"location": "location of the hotel",
},
"movie": {
"movie": "find a movie to watch in theaters or using a streaming service at home",
"name.movie": "name of the movie",
"genre": "genre of the movie",
"name.theater": "name of the theater",
"location.theater": "location of the theater",
"time.start": "start time of the movie",
"time.end": "end time of the movie",
"price.ticket": "price of the ticket",
"price.streaming": "price of the streaming",
"type.screening": "type of the screening",
"audience_rating": "audience rating",
"movie_rating": "film rating",
"release_date": "release date of the movie",
"runtime": "running time of the movie",
"real_person": "name of actors, directors, etc.",
"character": "name of character in the movie",
"streaming_service": "streaming service that provide the movie",
"num.tickets": "number of tickets",
"seating": "type of seating",
},
"music": {
"music": "find several tracks to play and then comment on each one",
"name.track": "name of the track",
"name.artist": "name of the artist",
"name.album": "name of the album",
"name.genre": "music genre",
"type.music": "rough type of the music",
"describes_track": "description of a track to find",
"describes_artist": "description of a artist to find",
"describes_album": "description of an album to find",
"describes_genre": "description of a genre to find",
"describes_type.music": "description of the music type",
},
"restaurant": {
"restaurant": "ask for recommendations for a particular type of cuisine",
"name.restaurant": "name of the restaurant",
"location": "location of the restaurant",
"sub-location": "rough location of the restaurant",
"type.food": "the cuisine of the restaurant",
"menu_item": "item in the menu",
"type.meal": "type of meal",
"rating": "rating of the restaurant",
"price_range": "price range of the restaurant",
"business_hours": "business hours of the restaurant",
"name.reservation": "name of the person who make the reservation",
"num.guests": "number of guests",
"time.reservation": "time of the reservation",
"date.reservation": "date of the reservation",
"type.seating": "type of the seating",
},
"sport": {
"sport": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL",
"name.team": "name of the team",
"record.team": "record of the team (number of wins and losses)",
"record.games_ahead": "number of games ahead",
"record.games_back": "number of games behind",
"place.team": "ranking of the team",
"result.match": "result of the match",
"score.match": "score of the match",
"date.match": "date of the match",
"day.match": "day of the match",
"time.match": "time of the match",
"name.player": "name of the player",
"position.player": "position of the player",
"record.player": "record of the player",
"name.non_player": "name of non-palyer such as the manager, coach",
"venue": "venue of the match take place",
}
}
def normalize_domain_name(domain):
if domain == 'auto':
return 'auto_repair'
elif domain == 'pizza':
return 'pizza_ordering'
elif domain == 'coffee':
return 'coffee_ordering'
elif domain == 'uber':
return 'uber_lyft'
elif domain == 'restaurant':
return 'restaurant_reservation'
elif domain == 'movie':
return 'movie_ticket'
elif domain == 'flights':
return 'flights'
elif domain == 'food-ordering':
return 'food_order'
elif domain == 'hotels':
return 'hotel'
elif domain == 'movies':
return 'movie'
elif domain == 'music':
return 'music'
elif domain == 'restaurant-search':
return 'restaurant'
elif domain == 'sports':
return 'sport'
assert 0
def format_turns(ori_turns):
new_turns = []
previous_speaker = None
utt_idx = 0
for i, turn in enumerate(ori_turns):
speaker = 'system' if turn['speaker'] == 'ASSISTANT' else 'user'
turn['speaker'] = speaker
if utt_idx == 0 and speaker == 'system':
continue
if turn['text'] == '(deleted)':
continue
if not previous_speaker:
assert speaker != previous_speaker
if speaker != previous_speaker:
previous_speaker = speaker
new_turns.append(copy.deepcopy(turn))
utt_idx += 1
else:
# continuous speaking
last_turn = new_turns[-1]
# if ori_turns[i-1]['text'] == turn['text']:
# # skip repeat turn
# continue
if turn['text'] in ori_turns[i-1]['text']:
continue
index_shift = len(last_turn['text']) + 1
last_turn['text'] += ' '+turn['text']
if 'segments' in turn:
last_turn.setdefault('segments', [])
for segment in turn['segments']:
segment['start_index'] += index_shift
segment['end_index'] += index_shift
last_turn['segments'] += turn['segments']
if new_turns and new_turns[-1]['speaker'] == 'system':
new_turns = new_turns[:-1]
return new_turns
def log_ontology(acts, ontology, ori_ontology):
for item in acts:
intent, domain, slot, value = item['intent'], item['domain'], item['slot'], item['value']
if domain not in ontology['domains']:
ontology['domains'][domain] = {'description': "", 'slots': {}}
if slot not in ontology['domains'][domain]['slots']:
ontology['domains'][domain]['slots'][slot] = {
'description': '',
'is_categorical': False,
'possible_values': [],
'count': 1
}
else:
ontology['domains'][domain]['slots'][slot]['count'] += 1
ontology['domains'][domain]['slots'][slot]['in original ontology'] = slot in ori_ontology[domain]
if intent is not None and intent not in ontology['intents']:
ontology['intents'][intent] = {
"description": ''
}
def preprocess():
self_dir = os.path.dirname(os.path.abspath(__file__))
processed_dialogue = []
ontology = {'domains': {},
'intents': {},
'binary_dialogue_act': [],
'state': {}}
original_zipped_path = os.path.join(self_dir, 'original_data.zip')
new_dir = os.path.join(self_dir, 'original_data')
if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(os.path.join(self_dir, 'ontology.json')):
print('unzip to', new_dir)
print('This may take several minutes')
archive = zipfile.ZipFile(original_zipped_path, 'r')
archive.extractall(self_dir)
files = [
('TM-1-2019/woz-dialogs.json', 'TM-1-2019/ontology.json'),
('TM-1-2019/self-dialogs.json', 'TM-1-2019/ontology.json'),
('TM-2-2020/data/flights.json', 'TM-2-2020/ontology/flights.json'),
('TM-2-2020/data/food-ordering.json', 'TM-2-2020/ontology/food-ordering.json'),
('TM-2-2020/data/hotels.json', 'TM-2-2020/ontology/hotels.json'),
('TM-2-2020/data/movies.json', 'TM-2-2020/ontology/movies.json'),
('TM-2-2020/data/music.json', 'TM-2-2020/ontology/music.json'),
('TM-2-2020/data/restaurant-search.json', 'TM-2-2020/ontology/restaurant-search.json'),
('TM-2-2020/data/sports.json', 'TM-2-2020/ontology/sports.json')
]
idx_count = 1
total = 0
for filename, ontology_filename in files:
data = json.load(open(os.path.join(new_dir, filename)))
ori_ontology = {}
if 'TM-1' in filename:
for domain, item in json.load(open(os.path.join(new_dir, ontology_filename))).items():
ori_ontology[item["id"]] = {}
for slot in item["required"] + item["optional"]:
ori_ontology[item["id"]][slot] = 0
else:
domain = normalize_domain_name(filename.split('/')[-1].split('.')[0])
ori_ontology[domain] = {}
for _, item in json.load(open(os.path.join(new_dir, ontology_filename))).items():
for group in item:
for anno in group["annotations"]:
ori_ontology[domain][anno] = 0
for d in ori_ontology:
if d not in ontology['domains']:
ontology['domains'][d] = {'description': descriptions[d][d], 'slots': {}}
for s in ori_ontology[d]:
if s not in ontology['domains'][d]['slots']:
ontology['domains'][d]['slots'][s] = {
'description': descriptions[d][s],
'is_categorical': False,
'possible_values': [],
'count': 0,
'in original ontology': True
}
# pprint(ori_ontology)
for ori_sess in tqdm(data, desc='processing taskmaster-{}'.format(filename)):
total += 1
turns = format_turns(ori_sess['utterances'])
if not turns:
continue
if 'TM-2' in filename:
dial_domain = normalize_domain_name(filename.split('/')[-1].split('.')[0])
else:
dial_domain = normalize_domain_name(ori_sess['instruction_id'].split('-', 1)[0])
dialogue = {
"dataset": "taskmaster",
"data_split": "train",
"dialogue_id": 'taskmaster_' + str(idx_count),
"original_id": ori_sess['conversation_id'],
"instruction_id": ori_sess['instruction_id'],
"domains": [
dial_domain
],
"turns": []
}
idx_count += 1
assert turns[0]['speaker'] == 'user' and turns[-1]['speaker'] == 'user', print(turns)
for utt_idx, uttr in enumerate(turns):
speaker = uttr['speaker']
turn = {
'speaker': speaker,
'utterance': uttr['text'],
'utt_idx': utt_idx,
'dialogue_act': {
'binary': [],
'categorical': [],
'non-categorical': [],
},
}
if speaker == 'user':
turn['state'] = {}
turn['state_update'] = {'categorical': [], 'non-categorical': []}
if 'segments' in uttr:
for segment in uttr['segments']:
for item in segment['annotations']:
# domain = item['name'].split('.', 1)[0]
domain = dial_domain
# if domain != item['name'].split('.', 1)[0]:
# print(domain, item['name'].split('.', 1), dialogue["original_id"])
# assert domain in item['name'].split('.', 1)[0]
# if item['name'].split('.', 1)[0] != domain:
# print(domain, item['name'].split('.', 1), dialogue["original_id"])
slot = item['name'].split('.', 1)[-1]
if slot.endswith('.accept') or slot.endswith('.reject'):
slot = slot[:-7]
if slot not in ori_ontology[domain]:
# print(domain, item['name'].split('.', 1), dialogue["original_id"])
continue
# if domain in ori_ontology:
# ori_ontology[domain][slot] += 1
# else:
# print(domain, item['name'].split('.', 1), dialogue["original_id"])
# assert domain in ori_ontology, print(domain, item['name'].split('.', 1), dialogue["original_id"])
if not segment['text']:
print(slot)
print(segment)
print()
assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text']
turn['dialogue_act']['non-categorical'].append({
'intent': 'inform',
'domain': domain,
'slot': slot,
'value': segment['text'].lower(),
'start': segment['start_index'],
'end': segment['end_index']
})
log_ontology(turn['dialogue_act']['non-categorical'], ontology, ori_ontology)
dialogue['turns'].append(turn)
processed_dialogue.append(dialogue)
# pprint(ori_ontology)
# save ontology json
json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=2)
json.dump(processed_dialogue, open('data.json', 'w'), indent=2)
write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
os.remove('data.json')
else:
# read from file
processed_dialogue = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
ontology = json.load(open(os.path.join(self_dir, 'ontology.json')))
return processed_dialogue, ontology
if __name__ == '__main__':
preprocess()
{
"domains": {
"restaurant": {
"description": "search for a restaurant to dine",
"slots": {
"food": {
"is_categorical": false,
"possible_values": [],
"description": "food type of the restaurant"
},
"area": {
"is_categorical": true,
"possible_values": [
"east",
"west",
"center",
"north",
"south"
],
"description": "area of the restaurant"
},
"postcode": {
"is_categorical": false,
"possible_values": [],
"description": "postal code of the restaurant"
},
"phone": {
"is_categorical": false,
"possible_values": [],
"description": "phone number of the restaurant"
},
"address": {
"is_categorical": false,
"possible_values": [],
"description": "address of the restaurant"
},
"price range": {
"is_categorical": true,
"possible_values": [
"expensive",
"moderate",
"cheap"
],
"description": "price range of the restaurant"
},
"name": {
"is_categorical": false,
"possible_values": [],
"description": "name of the restaurant"
}
}
}
},
"intents": {
"inform": {
"description": "system informs user the value of a slot"
},
"request": {
"description": "system asks the user to provide value of a slot"
}
},
"binary_dialogue_act": [
{
"intent": "request",
"domain": "restaurant",
"slot": "postcode",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "phone",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "address",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "area",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "price range",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "food",
"value": ""
},
{
"intent": "request",
"domain": "restaurant",
"slot": "name",
"value": ""
}
],
"state": {
"restaurant": {
"food": "",
"area": "",
"postcode": "",
"phone": "",
"address": "",
"price range": "",
"name": ""
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment