Skip to content
Snippets Groups Projects
Commit 87181760 authored by zqwerty's avatar zqwerty
Browse files

remove booking domain in multiwoz21 and update its README

parent 2d9980ea
No related branches found
No related tags found
No related merge requests found
......@@ -19,6 +19,7 @@ MultiWOZ 2.1 fixed the noise in state annotations and dialogue utterances. It al
- Correct some non-categorical slots' values and provide character level span annotation.
- Concatenate multiple values in user goal & state using `|`.
- Add `booked` information in system turns from original belief states.
- Remove `Booking` domain and remap all booking relevant dialog acts to unify the annotation of booking action in different domains, see `booking_remapper.py`.
- **Annotations:**
- user goal, dialogue acts, state.
......@@ -34,14 +35,14 @@ English
| split | dialogues | utterances | avg_utt | avg_tokens | avg_domains | cat slot match(state) | cat slot match(goal) | cat slot match(dialogue act) | non-cat slot span(dialogue act) |
|------------|-------------|--------------|-----------|--------------|---------------|-------------------------|------------------------|--------------------------------|-----------------------------------|
| train | 8438 | 113556 | 13.46 | 13.23 | 3.39 | 98.84 | 99.48 | 86.39 | 98.22 |
| validation | 1000 | 14748 | 14.75 | 13.5 | 3.64 | 98.84 | 99.46 | 86.59 | 98.17 |
| test | 1000 | 14744 | 14.74 | 13.5 | 3.59 | 99.21 | 99.32 | 85.83 | 98.58 |
| all | 10438 | 143048 | 13.7 | 13.28 | 3.44 | 98.88 | 99.47 | 86.36 | 98.25 |
| train | 8438 | 113556 | 13.46 | 13.23 | 2.8 | 98.84 | 99.48 | 86.39 | 98.22 |
| validation | 1000 | 14748 | 14.75 | 13.5 | 2.98 | 98.84 | 99.46 | 86.59 | 98.17 |
| test | 1000 | 14744 | 14.74 | 13.5 | 2.93 | 99.21 | 99.32 | 85.83 | 98.58 |
| all | 10438 | 143048 | 13.7 | 13.28 | 2.83 | 98.88 | 99.47 | 86.35 | 98.25 |
9 domains: ['attraction', 'hotel', 'taxi', 'restaurant', 'train', 'police', 'hospital', 'booking', 'general']
- **cat slot match**: how many values of categorical slots are in the possible values of ontology.
- **non-cat slot span**: how many values of non-categorical slots have span annotation.
8 domains: ['attraction', 'hotel', 'taxi', 'restaurant', 'train', 'police', 'hospital', 'general']
- **cat slot match**: how many values of categorical slots are in the possible values of ontology in percentage.
- **non-cat slot span**: how many values of non-categorical slots have span annotation in percentage.
### Citation
......
No preview for this file type
......@@ -6,7 +6,6 @@
"original_id": "SNG01856.json",
"domains": [
"hotel",
"booking",
"general"
],
"goal": {
......@@ -202,11 +201,6 @@
}
],
"binary": [
{
"intent": "inform",
"domain": "booking",
"slot": ""
},
{
"intent": "inform",
"domain": "hotel",
......@@ -307,8 +301,8 @@
"categorical": [
{
"intent": "nobook",
"domain": "booking",
"slot": "day",
"domain": "hotel",
"slot": "book day",
"value": "Tuesday"
}
],
......@@ -316,13 +310,13 @@
"binary": [
{
"intent": "request",
"domain": "booking",
"domain": "hotel",
"slot": "book stay"
},
{
"intent": "request",
"domain": "booking",
"slot": "day"
"domain": "hotel",
"slot": "book day"
}
]
},
......@@ -404,8 +398,8 @@
"categorical": [],
"non-categorical": [
{
"intent": "book",
"domain": "booking",
"intent": "inform",
"domain": "hotel",
"slot": "ref",
"value": "7GAWK763",
"start": 46,
......@@ -417,6 +411,11 @@
"intent": "reqmore",
"domain": "general",
"slot": ""
},
{
"intent": "book",
"domain": "hotel",
"slot": ""
}
]
},
......@@ -989,7 +988,6 @@
"domains": [
"hotel",
"train",
"booking",
"general"
],
"goal": {
......@@ -1389,7 +1387,7 @@
"categorical": [],
"non-categorical": [
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "ref",
"value": "A9NHSO9Y",
......@@ -1397,7 +1395,13 @@
"end": 69
}
],
"binary": []
"binary": [
{
"intent": "book",
"domain": "train",
"slot": ""
}
]
},
"db_results": {},
"booked": {
......@@ -1572,8 +1576,8 @@
"binary": [
{
"intent": "request",
"domain": "booking",
"slot": "day"
"domain": "hotel",
"slot": "book day"
}
]
},
......@@ -1675,15 +1679,21 @@
"categorical": [],
"non-categorical": [
{
"intent": "book",
"domain": "booking",
"intent": "inform",
"domain": "hotel",
"slot": "ref",
"value": "5NAWGJDC",
"start": 46,
"end": 54
}
],
"binary": []
"binary": [
{
"intent": "book",
"domain": "hotel",
"slot": ""
}
]
},
"db_results": {},
"booked": {
......@@ -2179,8 +2189,7 @@
"domains": [
"hotel",
"train",
"general",
"booking"
"general"
],
"goal": {
"description": "You are looking for information in Cambridge. You are looking for a <span class='emphasis'>place to stay</span>. The hotel should be in the <span class='emphasis'>east</span> and <span class='emphasis'>doesn't need to include internet</span>. The hotel should <span class='emphasis'>include free parking</span> and should be in the type of <span class='emphasis'>guesthouse</span>. If there is no such hotel, how about one that has <span class='emphasis'>free wifi</span>. Make sure you get <span class='emphasis'>address</span>. You are also looking for a <span class='emphasis'>train</span>. The train should leave on <span class='emphasis'>wednesday</span> and should <span class='emphasis'>arrive by 11:30</span>. The train should go to <span class='emphasis'>cambridge</span> and should depart from <span class='emphasis'>ely</span>. Once you find the train you want to make a booking for <span class='emphasis'>8 people</span>. Make sure you get the <span class='emphasis'>reference number</span>",
......@@ -2594,11 +2603,6 @@
}
],
"binary": [
{
"intent": "inform",
"domain": "booking",
"slot": ""
},
{
"intent": "inform",
"domain": "hotel",
......@@ -3024,7 +3028,7 @@
"categorical": [],
"non-categorical": [
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "ref",
"value": "7ZCVR4Q3",
......@@ -3032,7 +3036,7 @@
"end": 106
},
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "price",
"value": "35.2 GBP",
......@@ -3040,7 +3044,13 @@
"end": 49
}
],
"binary": []
"binary": [
{
"intent": "book",
"domain": "train",
"slot": ""
}
]
},
"db_results": {},
"booked": {
......@@ -3153,7 +3163,6 @@
"domains": [
"hotel",
"train",
"booking",
"general"
],
"goal": {
......@@ -3902,13 +3911,7 @@
"dialogue_acts": {
"categorical": [],
"non-categorical": [],
"binary": [
{
"intent": "inform",
"domain": "booking",
"slot": ""
}
]
"binary": []
},
"db_results": {},
"booked": {
......@@ -3987,7 +3990,7 @@
"binary": [
{
"intent": "request",
"domain": "booking",
"domain": "hotel",
"slot": "book stay"
}
]
......@@ -4080,8 +4083,8 @@
"binary": [
{
"intent": "request",
"domain": "booking",
"slot": "day"
"domain": "hotel",
"slot": "book day"
}
]
},
......@@ -4161,16 +4164,16 @@
"categorical": [],
"non-categorical": [
{
"intent": "book",
"domain": "booking",
"intent": "inform",
"domain": "hotel",
"slot": "ref",
"value": "04CSEO7Q",
"start": 132,
"end": 140
},
{
"intent": "book",
"domain": "booking",
"intent": "inform",
"domain": "hotel",
"slot": "name",
"value": "Acorn guest house",
"start": 89,
......@@ -4182,6 +4185,11 @@
"intent": "reqmore",
"domain": "general",
"slot": ""
},
{
"intent": "book",
"domain": "hotel",
"slot": ""
}
]
},
......@@ -5778,7 +5786,7 @@
"dialogue_acts": {
"categorical": [
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "day",
"value": "Wednesday"
......@@ -5786,7 +5794,7 @@
],
"non-categorical": [
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "ref",
"value": "xu1qlhvw",
......@@ -5794,7 +5802,7 @@
"end": 111
},
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "departure",
"value": "Cambridge",
......@@ -5802,7 +5810,7 @@
"end": 51
},
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "book people",
"value": "one",
......@@ -5810,7 +5818,7 @@
"end": 16
},
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "train id",
"value": "TR1879",
......@@ -5818,7 +5826,7 @@
"end": 31
},
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "destination",
"value": "Peterborough",
......@@ -5826,7 +5834,7 @@
"end": 68
},
{
"intent": "offerbooked",
"intent": "inform",
"domain": "train",
"slot": "leave at",
"value": "13:06",
......@@ -5839,6 +5847,11 @@
"intent": "reqmore",
"domain": "general",
"slot": ""
},
{
"intent": "book",
"domain": "train",
"slot": ""
}
]
},
......
......@@ -432,49 +432,6 @@ ontology = {
}
}
},
"booking": {
"description": "booking for taxi, restaurant, hotel, train, etc.",
"slots":{
"day": {
"description": "day of the booking",
"is_categorical": True,
"possible_values": [
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday"
]
},
"time": {
"description": "time of the booking",
"is_categorical": False,
"possible_values": []
},
"book people": {
"description": "number of people for the booking",
"is_categorical": False,
"possible_values": []
},
"book stay": {
"description": "length of stay at the hotel",
"is_categorical": False,
"possible_values": []
},
"name": {
"description": "name of the booked entity",
"is_categorical": False,
"possible_values": []
},
"ref": {
"description": "reference number of the booking",
"is_categorical": False,
"possible_values": []
}
}
},
"general":{
"description": "general domain without slots",
"slots": {}
......@@ -769,7 +726,7 @@ def preprocess():
dialogues_by_split = {split:[] for split in splits}
sent_tokenizer = PunktSentenceTokenizer()
word_tokenizer = TreebankWordTokenizer()
booking_remapper = BookingActRemapper(init_ontology)
booking_remapper = BookingActRemapper(ontology)
for ori_dialog_id, ori_dialog in tqdm(original_data.items()):
if ori_dialog_id in val_list:
split = 'validation'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment