diff --git a/data/unified_datasets/multiwoz21/original_data.zip b/data/unified_datasets/multiwoz21/MultiWOZ_2.1.zip
similarity index 66%
rename from data/unified_datasets/multiwoz21/original_data.zip
rename to data/unified_datasets/multiwoz21/MultiWOZ_2.1.zip
index b27361772be980013a94c91898fb61e3e122ba8e..c30d869f77e86fe2bf19245120e771f28cb32bfa 100644
Binary files a/data/unified_datasets/multiwoz21/original_data.zip and b/data/unified_datasets/multiwoz21/MultiWOZ_2.1.zip differ
diff --git a/data/unified_datasets/multiwoz21/data.zip b/data/unified_datasets/multiwoz21/data.zip
index 7f60019dfc4a68728474baf9e8d3d73305b46082..51ac6b5f326b2c9c6d078c433c89b9e0010301cd 100644
Binary files a/data/unified_datasets/multiwoz21/data.zip and b/data/unified_datasets/multiwoz21/data.zip differ
diff --git a/data/unified_datasets/multiwoz21/dummy_data.json b/data/unified_datasets/multiwoz21/dummy_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..9945b92915b0f9739d8b31b3fd112f8ab86f2cb3
--- /dev/null
+++ b/data/unified_datasets/multiwoz21/dummy_data.json
@@ -0,0 +1,5896 @@
+[
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-0",
+    "original_id": "SNG01856.json",
+    "domains": [
+      "hotel",
+      "booking",
+      "general"
+    ],
+    "goal": {
+      "description": "You are looking for a <span class='emphasis'>place to stay</span>. The hotel should be in the <span class='emphasis'>cheap</span> price range and should be in the type of <span class='emphasis'>hotel</span>. The hotel should <span class='emphasis'>include free parking</span> and should <span class='emphasis'>include free wifi</span>. Once you find the <span class='emphasis'>hotel</span> you want to book it for <span class='emphasis'>6 people</span> and <span class='emphasis'>3 nights</span> starting from <span class='emphasis'>tuesday</span>. If the booking fails how about <span class='emphasis'>2 nights</span>. Make sure you get the <span class='emphasis'>reference number</span>",
+      "inform": {
+        "hotel": {
+          "type": "hotel",
+          "parking": "yes",
+          "price range": "cheap",
+          "internet": "yes",
+          "book stay": "3|2",
+          "book day": "tuesday",
+          "book people": "6"
+        }
+      },
+      "request": {
+        "hotel": {}
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "am looking for a place to to stay that has cheap price range it should be in a type of hotel",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": "cheap"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "hotel",
+              "start": 87,
+              "end": 92
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "cheap",
+            "stars": "",
+            "internet": "",
+            "type": "hotel",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Okay, do you have a specific area you want to stay in?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "area",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "no, i just need to make sure it's cheap. oh, and i need parking",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "parking",
+              "value": "yes"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "yes",
+            "price range": "cheap",
+            "stars": "",
+            "internet": "",
+            "type": "hotel",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I found 1 cheap hotel for you that includes parking. Do you like me to book it?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": "cheap"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "1",
+              "start": 8,
+              "end": 9
+            }
+          ],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "booking",
+              "slot": "",
+              "value": ""
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "parking",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Yes, please. 6 people 3 nights starting on tuesday.",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book day",
+              "value": "tuesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book stay",
+              "value": "3",
+              "start": 22,
+              "end": 23
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book people",
+              "value": "6",
+              "start": 13,
+              "end": 14
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "yes",
+            "price range": "cheap",
+            "stars": "",
+            "internet": "",
+            "type": "hotel",
+            "book stay": "3",
+            "book day": "tuesday",
+            "book people": "6"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I am sorry but I wasn't able to book that for you for Tuesday. Is there another day you would like to stay or perhaps a shorter stay?",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "nobook",
+              "domain": "booking",
+              "slot": "day",
+              "value": "Tuesday"
+            }
+          ],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "booking",
+              "slot": "book stay",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "booking",
+              "slot": "day",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "how about only 2 nights.",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book stay",
+              "value": "2",
+              "start": 15,
+              "end": 16
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "yes",
+            "price range": "cheap",
+            "stars": "",
+            "internet": "",
+            "type": "hotel",
+            "book stay": "2",
+            "book day": "tuesday",
+            "book people": "6"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Booking was successful.\nReference number is : 7GAWK763. Anything else I can do for you?",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "book",
+              "domain": "booking",
+              "slot": "ref",
+              "value": "7GAWK763",
+              "start": 46,
+              "end": 54
+            }
+          ],
+          "binary": [
+            {
+              "intent": "reqmore",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "No, that will be all. Good bye.",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "yes",
+            "price range": "cheap",
+            "stars": "",
+            "internet": "",
+            "type": "hotel",
+            "book stay": "2",
+            "book day": "tuesday",
+            "book people": "6"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Thank you for using our services.",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-1",
+    "original_id": "SNG0129.json",
+    "domains": [
+      "police",
+      "general"
+    ],
+    "goal": {
+      "description": "You were <span class='emphasis'>robbed</span> and are looking for help. Make sure you get <span class='emphasis'>postcode</span>, <span class='emphasis'>address</span>, and <span class='emphasis'>phone number</span>",
+      "inform": {
+        "police": {}
+      },
+      "request": {
+        "police": {
+          "postcode": "",
+          "address": "",
+          "phone": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "Hello, I have been robbed.  Can you please help me get in touch with the police?",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Parkside Police Station is in Parkside, Cambridge. Their number is 01223358966. Anything else I can do for you?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "name",
+              "value": "Parkside Police Station",
+              "start": 0,
+              "end": 23
+            },
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "phone",
+              "value": "01223358966",
+              "start": 67,
+              "end": 78
+            },
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "address",
+              "value": "Parkside, Cambridge",
+              "start": 30,
+              "end": 49
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Can I please have the postcode as well?",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "police",
+              "slot": "postcode",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The postcode for the Parkside Police Station is CB11JG. Can I help you with anything else?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "name",
+              "value": "Parkside Police Station",
+              "start": 21,
+              "end": 44
+            },
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "postcode",
+              "value": "CB11JG",
+              "start": 48,
+              "end": 54
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Was Parkside the address of the police station? If not, can I have the address please?",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "police",
+              "slot": "address",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Yes, Parkside is the address.",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Thank you that will be all for now.",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Great. Thank you for contacting Cambridge Towninfo Centre.",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "welcome",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "You were great. Goodbye.",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "We are happy to help. Have a good day!",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            },
+            {
+              "intent": "welcome",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-2",
+    "original_id": "MUL2168.json",
+    "domains": [
+      "hotel",
+      "train",
+      "booking",
+      "general"
+    ],
+    "goal": {
+      "description": "You are planning your trip in Cambridge. You are looking for a <span class='emphasis'>train</span>. The train should go to <span class='emphasis'>cambridge</span> and should <span class='emphasis'>arrive by 20:45</span>. The train should depart from <span class='emphasis'>birmingham new street</span> and should leave on <span class='emphasis'>wednesday</span>. Once you find the train you want to make a booking for <span class='emphasis'>5 people</span>. Make sure you get the <span class='emphasis'>reference number</span>. You are also looking for a <span class='emphasis'>place to stay</span>. The hotel should have <span class='emphasis'>a star of 4</span> and should <span class='emphasis'>include free wifi</span>. The hotel <span class='emphasis'>doesn't need to have free parking</span> and should be in the <span class='emphasis'>cheap</span> price range. If there is no such hotel, how about one that has <span class='emphasis'>free parking</span>. Once you find the <span class='emphasis'>hotel</span> you want to book it for <span class='emphasis'>the same group of people</span> and <span class='emphasis'>5 nights</span> starting from <span class='emphasis'>the same day</span>. Make sure you get the <span class='emphasis'>reference number</span>",
+      "inform": {
+        "hotel": {
+          "parking": "no|yes",
+          "price range": "cheap|cheap",
+          "stars": "4|4",
+          "internet": "yes|yes",
+          "book stay": "5",
+          "book day": "wednesday",
+          "book people": "5"
+        },
+        "train": {
+          "destination": "cambridge",
+          "day": "wednesday",
+          "arrive by": "20:45",
+          "departure": "birmingham new street",
+          "book people": "5"
+        }
+      },
+      "request": {
+        "hotel": {},
+        "train": {}
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "Hi, I'm looking for a train that is going to cambridge and arriving there by 20:45, is there anything like that?",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "cambridge",
+              "start": 45,
+              "end": 54
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "20:45",
+              "start": 77,
+              "end": 82
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "",
+            "arrive by": "20:45",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are over 1,000 trains like that.  Where will you be departing from?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "choice",
+              "value": "over 1",
+              "start": 10,
+              "end": 16
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "choice",
+              "value": "000"
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "departure",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I am departing from birmingham new street.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "birmingham new street",
+              "start": 20,
+              "end": 41
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Can you confirm your desired travel day?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "day",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I would like to leave on wednesday",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "wednesday"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "wednesday",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I show a train leaving birmingham new street at 17:40 and arriving at 20:23 on Wednesday.  Will this work for you?",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "Wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "20:23",
+              "start": 70,
+              "end": 75
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "17:40",
+              "start": 48,
+              "end": 53
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "birmingham new street",
+              "start": 23,
+              "end": 44
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "That will, yes. Please make a booking for 5 people please.",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "book people",
+              "value": "5",
+              "start": 42,
+              "end": 43
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "wednesday",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": "5"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I've booked your train tickets, and your reference number is A9NHSO9Y.",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "ref",
+              "value": "A9NHSO9Y",
+              "start": 61,
+              "end": 69
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Thanks so much. I would also need a place to say. I am looking for something with 4 stars and has free wifi. ",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "internet",
+              "value": "yes"
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "stars",
+              "value": "4"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "4",
+            "internet": "yes",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "wednesday",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": "5"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "How about the cambridge belfry?  It has all the attributes you requested and a great name!  Maybe even a real belfry?",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "recommend",
+              "domain": "hotel",
+              "slot": "name",
+              "value": "the cambridge belfry",
+              "start": 10,
+              "end": 30
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "That sounds great, could you make a booking for me please?",
+        "utt_idx": 10,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "cambridge belfry",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "4",
+            "internet": "yes",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "wednesday",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": "5"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "What day would you like your booking for?",
+        "utt_idx": 11,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "booking",
+              "slot": "day",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Please book it for Wednesday for 5 people and 5 nights, please.",
+        "utt_idx": 12,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book day",
+              "value": "wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book stay",
+              "value": "5",
+              "start": 46,
+              "end": 47
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book people",
+              "value": "5",
+              "start": 33,
+              "end": 34
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "cambridge belfry",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "4",
+            "internet": "yes",
+            "type": "",
+            "book stay": "5",
+            "book day": "wednesday",
+            "book people": "5"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "wednesday",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": "5"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Booking was successful. Reference number is : 5NAWGJDC.",
+        "utt_idx": 13,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "book",
+              "domain": "booking",
+              "slot": "ref",
+              "value": "5NAWGJDC",
+              "start": 46,
+              "end": 54
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Thank you, goodbye",
+        "utt_idx": 14,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "cambridge belfry",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "4",
+            "internet": "yes",
+            "type": "",
+            "book stay": "5",
+            "book day": "wednesday",
+            "book people": "5"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "cambridge",
+            "day": "wednesday",
+            "arrive by": "20:45",
+            "departure": "birmingham new street",
+            "book people": "5"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Goodbye. If you think of anything else you need don't hesitate to contact us.",
+        "utt_idx": 15,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-3",
+    "original_id": "SNG01445.json",
+    "domains": [
+      "police",
+      "general"
+    ],
+    "goal": {
+      "description": "You were <span class='emphasis'>robbed</span> and are looking for help. Make sure you get <span class='emphasis'>address</span>, <span class='emphasis'>phone number</span>, and <span class='emphasis'>postcode</span>",
+      "inform": {
+        "police": {}
+      },
+      "request": {
+        "police": {
+          "address": "",
+          "phone": "",
+          "postcode": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I was robbed a few minutes ago and need assistance.",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Please contact the Parkside, Cambridge police station.",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "address",
+              "value": "Parkside, Cambridge",
+              "start": 19,
+              "end": 38
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I need the address including postcode of the police station and also the phone number.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "police",
+              "slot": "address",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "police",
+              "slot": "postcode",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "police",
+              "slot": "phone",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Of course. Their address is Parkside, Cambridge postcode CB11JG and phone number is 01223358966.",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "phone",
+              "value": "01223358966",
+              "start": 84,
+              "end": 95
+            },
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "address",
+              "value": "Parkside, Cambridge",
+              "start": 28,
+              "end": 47
+            },
+            {
+              "intent": "inform",
+              "domain": "police",
+              "slot": "postcode",
+              "value": "CB11JG",
+              "start": 57,
+              "end": 63
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Thank you. That's all I needed.",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "You are more than welcome!\n",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "greet",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Ok, have a good day. Goodbye.",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "You are very welcome. Goodbye.",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-4",
+    "original_id": "MUL2105.json",
+    "domains": [
+      "hotel",
+      "train",
+      "general",
+      "booking"
+    ],
+    "goal": {
+      "description": "You are looking for information in Cambridge. You are looking for a <span class='emphasis'>place to stay</span>. The hotel should be in the <span class='emphasis'>east</span> and <span class='emphasis'>doesn't need to include internet</span>. The hotel should <span class='emphasis'>include free parking</span> and should be in the type of <span class='emphasis'>guesthouse</span>. If there is no such hotel, how about one that has <span class='emphasis'>free wifi</span>. Make sure you get <span class='emphasis'>address</span>. You are also looking for a <span class='emphasis'>train</span>. The train should leave on <span class='emphasis'>wednesday</span> and should <span class='emphasis'>arrive by 11:30</span>. The train should go to <span class='emphasis'>cambridge</span> and should depart from <span class='emphasis'>ely</span>. Once you find the train you want to make a booking for <span class='emphasis'>8 people</span>. Make sure you get the <span class='emphasis'>reference number</span>",
+      "inform": {
+        "hotel": {
+          "parking": "yes|yes",
+          "type": "guesthouse|guesthouse",
+          "internet": "no|yes",
+          "area": "east|centre"
+        },
+        "train": {
+          "destination": "cambridge",
+          "day": "wednesday",
+          "arrive by": "11:30",
+          "departure": "ely",
+          "book people": "8"
+        }
+      },
+      "request": {
+        "hotel": {
+          "address": ""
+        },
+        "train": {}
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "Hi. I'm looking for a hotel in the east.  The internet is not needed.",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "area",
+              "value": "east"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "no",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are no hotels that do not have internet, but 7 which do have it. Do you have a specific price range you'd prefer?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "7",
+              "start": 51,
+              "end": 52
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": ""
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "internet",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "no, but it should have free parking, please.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "parking",
+              "value": "yes"
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": "dontcare"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The Express by Holiday Inn Cambridge is in the area and has free parking, would you like more information?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "name",
+              "value": "Express by Holiday Inn Cambridge",
+              "start": 4,
+              "end": 36
+            }
+          ],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "parking",
+              "value": ""
+            },
+            {
+              "intent": "reqmore",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Is that a guesthouse or a hotel? I'd really prefer a guesthouse.",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "guesthouse",
+              "start": 10,
+              "end": 20
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "express by holiday inn cambridge",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "It's a hotel. There are 5 guesthouses in the area. Do you prefer cheap or moderate for the price range?",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "5",
+              "start": 24,
+              "end": 25
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "hotel",
+              "start": 7,
+              "end": 12
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "guesthouse",
+              "start": 26,
+              "end": 36
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "The price doesn't really matter. I just need free parking. It doesn't really need to have internet though. ",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are 5 guesthouses that have free parking. Should I book one of them for you?",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "guesthouse",
+              "start": 12,
+              "end": 22
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "5",
+              "start": 10,
+              "end": 11
+            }
+          ],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "booking",
+              "slot": "",
+              "value": ""
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "parking",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Okay, none of them DON'T offer free wifi? If not, I'll need the address for one that does have wifi, please. Tell me about your favorite.",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "internet",
+              "value": "yes"
+            }
+          ],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "address",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The allenbell is a guesthouse on the east.  The addres sis 517a coldham lane post code cb13js.",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "recommend",
+              "domain": "hotel",
+              "slot": "area",
+              "value": "east"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "recommend",
+              "domain": "hotel",
+              "slot": "address",
+              "value": "517a coldham lane",
+              "start": 59,
+              "end": 76
+            },
+            {
+              "intent": "recommend",
+              "domain": "hotel",
+              "slot": "postcode",
+              "value": "cb13js",
+              "start": 87,
+              "end": 93
+            },
+            {
+              "intent": "recommend",
+              "domain": "hotel",
+              "slot": "name",
+              "value": "allenbell",
+              "start": 4,
+              "end": 13
+            },
+            {
+              "intent": "recommend",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "guesthouse",
+              "start": 19,
+              "end": 29
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Great. Can you help me with a train as well? I'm looking to come in from Ely.",
+        "utt_idx": 10,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "Ely",
+              "start": 73,
+              "end": 76
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "ely",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "What is your destination? And do you have any preference for day, departure or arrival time?",
+        "utt_idx": 11,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "day",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "destination",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "leave at",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I want to go from cambridge to ely, no preference for time but I need to book for 8 people.",
+        "utt_idx": 12,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "ely",
+              "start": 31,
+              "end": 34
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "cambridge",
+              "start": 18,
+              "end": 27
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "dontcare",
+            "destination": "ely",
+            "day": "",
+            "arrive by": "dontcare",
+            "departure": "cambridge",
+            "book people": "8"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are trains that run every two hours every day starting at 5:50. Do you know what day you'd like to travel?",
+        "utt_idx": 13,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "starting at 5:50",
+              "start": 52,
+              "end": 68
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "day",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I'm looking for Wednesday , arrive by 11:30 for 8 people Please.",
+        "utt_idx": 14,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "11:30",
+              "start": 38,
+              "end": 43
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "book people",
+              "value": "8",
+              "start": 48,
+              "end": 49
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "dontcare",
+            "destination": "ely",
+            "day": "wednesday",
+            "arrive by": "11:30",
+            "departure": "cambridge",
+            "book people": "8"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Booking was successful, the total fee is 35.2 GBP payable at the station. You reference number is 7ZCVR4Q3. Can I help you with anything else?",
+        "utt_idx": 15,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "ref",
+              "value": "7ZCVR4Q3",
+              "start": 98,
+              "end": 106
+            },
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "price",
+              "value": "35.2 GBP",
+              "start": 41,
+              "end": 49
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "No, I don't need anything else right now.  Thank you for your assistance.  Good Bye.",
+        "utt_idx": 16,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "east",
+            "parking": "yes",
+            "price range": "dontcare",
+            "stars": "",
+            "internet": "no",
+            "type": "guesthouse",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "dontcare",
+            "destination": "ely",
+            "day": "wednesday",
+            "arrive by": "11:30",
+            "departure": "cambridge",
+            "book people": "8"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Thank you and enjoy your visit. Have a great day.",
+        "utt_idx": 17,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-5",
+    "original_id": "PMUL1690.json",
+    "domains": [
+      "hotel",
+      "train",
+      "booking",
+      "general"
+    ],
+    "goal": {
+      "description": "You are planning your trip in Cambridge. You are looking for a <span class='emphasis'>train</span>. The train should depart from <span class='emphasis'>norwich</span> and should <span class='emphasis'>leave after 21:15</span>. The train should leave on <span class='emphasis'>monday</span> and should go to <span class='emphasis'>cambridge</span>. Make sure you get <span class='emphasis'>arrival time</span>, <span class='emphasis'>travel time</span>, and <span class='emphasis'>price</span>. You are also looking for a <span class='emphasis'>place to stay</span>. The hotel should have <span class='emphasis'>a star of 4</span> and should be in the <span class='emphasis'>north</span>. The hotel should be in the <span class='emphasis'>moderate</span> price range and should be in the type of <span class='emphasis'>hotel</span>. If there is no such hotel, how about one that is in <span class='emphasis'>the type of guesthouse</span>. Once you find the <span class='emphasis'>hotel</span> you want to book it for <span class='emphasis'>4 people</span> and <span class='emphasis'>2 nights</span> starting from <span class='emphasis'>the same day</span>. Make sure you get the <span class='emphasis'>reference number</span>",
+      "inform": {
+        "hotel": {
+          "type": "hotel|guesthouse",
+          "price range": "moderate|moderate",
+          "stars": "4|4",
+          "area": "north|north",
+          "book stay": "2",
+          "book day": "monday",
+          "book people": "4"
+        },
+        "train": {
+          "leave at": "21:15",
+          "destination": "cambridge",
+          "day": "monday",
+          "departure": "norwich"
+        }
+      },
+      "request": {
+        "hotel": {},
+        "train": {
+          "arrive by": "",
+          "duration": "",
+          "price": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I need to take a train from Norwich, leaving after 21:15.",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "Norwich",
+              "start": 28,
+              "end": 35
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "21:15",
+              "start": 51,
+              "end": 56
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Where would you like to go to?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "destination",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I would like to go to Ely please.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "Ely",
+              "start": 22,
+              "end": 25
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "ely",
+            "day": "",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I don't have any listings for trains leaving Norwich and arriving in Ely. Would you like to try a different departure station?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "nooffer",
+              "domain": "train",
+              "slot": "destination",
+              "value": "Ely",
+              "start": 69,
+              "end": 72
+            },
+            {
+              "intent": "nooffer",
+              "domain": "train",
+              "slot": "departure",
+              "value": "Norwich",
+              "start": 45,
+              "end": 52
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "departure",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I meant Cambridge please. I would like to arrive in Cambridge on Monday after 21:15.",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "monday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "Cambridge",
+              "start": 8,
+              "end": 17
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "21:15",
+              "start": 78,
+              "end": 83
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "ely",
+            "day": "",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Ok I will book that for you and get you a confirmation number",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "offerbook",
+              "domain": "train",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I don't need anything booked.  I just need to get the arrival time, travel time and price of a train from norwich to cambridge leaving after 21:15.",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "cambridge",
+              "start": 117,
+              "end": 126
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "norwich",
+              "start": 106,
+              "end": 113
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "21:15",
+              "start": 141,
+              "end": 146
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "duration",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "price",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are trains arriving at 22:35, 23:35 and 24:35. They are 17.60 pounds and 79 minutes each.",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "price",
+              "value": "17.60 pounds",
+              "start": 62,
+              "end": 74
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "duration",
+              "value": "79 minutes",
+              "start": 79,
+              "end": 89
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "22:35",
+              "start": 29,
+              "end": 34
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "23:35",
+              "start": 36,
+              "end": 41
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "24:35",
+              "start": 46,
+              "end": 51
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Great, thanks! I'm also looking for a 4 star hotel in the north. Can you please help with that? ",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "area",
+              "value": "north"
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "stars",
+              "value": "4"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are several that meet your criteria. Is there a price range you are interested in?",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "several",
+              "start": 10,
+              "end": 17
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I would like to keep it in the moderate range, please.",
+        "utt_idx": 10,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "price range",
+              "value": "moderate"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "moderate",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I see 7 options. They are all guesthouses. Do you have preferences for internet and parking?",
+        "utt_idx": 11,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "type",
+              "value": "guesthouses",
+              "start": 30,
+              "end": 41
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "7",
+              "start": 6,
+              "end": 7
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "choice",
+              "value": "all",
+              "start": 26,
+              "end": 29
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "internet",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "parking",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I do not have a preference on those things.",
+        "utt_idx": 12,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "moderate",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Shall I try to start and book you into one?",
+        "utt_idx": 13,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "booking",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Please book it and provide the reference number.",
+        "utt_idx": 14,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hotel",
+              "slot": "ref",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "moderate",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "How many nights will you be staying?",
+        "utt_idx": 15,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "booking",
+              "slot": "book stay",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Please book the hotel for 4 people and 2 nights.  Thank you.",
+        "utt_idx": 16,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book stay",
+              "value": "2",
+              "start": 39,
+              "end": 40
+            },
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book people",
+              "value": "4",
+              "start": 26,
+              "end": 27
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "moderate",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "What day will you be arriving?",
+        "utt_idx": 17,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "booking",
+              "slot": "day",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "the same day... monday please.",
+        "utt_idx": 18,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "hotel",
+              "slot": "book day",
+              "value": "monday"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "moderate",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "2",
+            "book day": "monday",
+            "book people": "4"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I apologize, some of our new workers have phobias of the booking system, I booked you at Acorn guest house, the reference number is 04CSEO7Q any other questions?",
+        "utt_idx": 19,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "book",
+              "domain": "booking",
+              "slot": "ref",
+              "value": "04CSEO7Q",
+              "start": 132,
+              "end": 140
+            },
+            {
+              "intent": "book",
+              "domain": "booking",
+              "slot": "name",
+              "value": "Acorn guest house",
+              "start": 89,
+              "end": 106
+            }
+          ],
+          "binary": [
+            {
+              "intent": "reqmore",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "That's all I need for today.  Thanks for your help!",
+        "utt_idx": 20,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "north",
+            "parking": "",
+            "price range": "moderate",
+            "stars": "4",
+            "internet": "",
+            "type": "",
+            "book stay": "2",
+            "book day": "monday",
+            "book people": "4"
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "21:15",
+            "destination": "cambridge",
+            "day": "monday",
+            "arrive by": "",
+            "departure": "norwich",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I'm happy to help, and I hope you enjoy your stay!",
+        "utt_idx": 21,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-6",
+    "original_id": "MUL2395.json",
+    "domains": [
+      "attraction",
+      "restaurant",
+      "general"
+    ],
+    "goal": {
+      "description": "You are planning your trip in Cambridge. You are looking for <span class='emphasis'>places to go</span> in town. The attraction should be in the <span class='emphasis'>centre</span>. Make sure you get <span class='emphasis'>entrance fee</span>, <span class='emphasis'>attraction type</span>, and <span class='emphasis'>address</span>. You are also looking for a <span class='emphasis'>place to dine</span>. The restaurant should be in the <span class='emphasis'>expensive</span> price range and should be <span class='emphasis'>in the same area as the attraction</span>. The restaurant should serve <span class='emphasis'>caribbean</span> food. If there is no such restaurant, how about one that serves <span class='emphasis'>turkish</span> food. Make sure you get <span class='emphasis'>phone number</span>, <span class='emphasis'>postcode</span>, and <span class='emphasis'>address</span>",
+      "inform": {
+        "attraction": {
+          "area": "east|centre"
+        },
+        "restaurant": {
+          "food": "caribbean|turkish",
+          "price range": "expensive|expensive",
+          "area": "centre|centre"
+        }
+      },
+      "request": {
+        "attraction": {
+          "entrance fee": "",
+          "type": "",
+          "address": ""
+        },
+        "restaurant": {
+          "phone": "",
+          "postcode": "",
+          "address": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I am planning a trip to town and want to sight see a bit. Can you let me know some attractions I may be interested in?",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are 79 attractions in the city. Do you have a specific type of attraction or specific area of the attraction that you are interested in?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "choice",
+              "value": "79",
+              "start": 10,
+              "end": 12
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "area",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "type",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I'd like something in the centre.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "area",
+              "value": "centre"
+            }
+          ],
+          "non-categorical": [],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Great! How about All Saints Church? It has fantastic architecture and free entrance. Would you like more information?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "recommend",
+              "domain": "attraction",
+              "slot": "type",
+              "value": "architecture"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "recommend",
+              "domain": "attraction",
+              "slot": "entrance fee",
+              "value": "free",
+              "start": 70,
+              "end": 74
+            },
+            {
+              "intent": "recommend",
+              "domain": "attraction",
+              "slot": "name",
+              "value": "All Saints Church",
+              "start": 17,
+              "end": 34
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Could I get the address for it? I would also like an expensive place to eat around it. ",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "address",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "expensive",
+            "name": "",
+            "area": "centre",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The address is Jesus Lane. What type of food would you like to eat?",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "address",
+              "value": "Jesus Lane",
+              "start": 15,
+              "end": 25
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "restaurant",
+              "slot": "food",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I'm in the mood for Caribbean food.",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "food",
+              "value": "Caribbean",
+              "start": 20,
+              "end": 29
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "caribbean",
+            "price range": "expensive",
+            "name": "",
+            "area": "centre",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Unfortunately, there are no expensive caribbean restaurants in that area. Would you like me to check a different type of cuisine, or another area?",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "nooffer",
+              "domain": "restaurant",
+              "slot": "price range",
+              "value": "expensive"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "nooffer",
+              "domain": "restaurant",
+              "slot": "food",
+              "value": "caribbean",
+              "start": 38,
+              "end": 47
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "restaurant",
+              "slot": "food",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "restaurant",
+              "slot": "area",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Yes, I also love turkish food. Is there something in the center that's expensive? Also, what type of attraction is All Saints Church?",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "price range",
+              "value": "expensive"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "name",
+              "value": "All Saints Church",
+              "start": 115,
+              "end": 132
+            },
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "food",
+              "value": "turkish",
+              "start": 17,
+              "end": 24
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "type",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "all saints church",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "turkish",
+            "price range": "expensive",
+            "name": "",
+            "area": "centre",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The Meze Bar Restaurant is an expensive Turkish restaurant in the city centre.  The All Saints Church is an architecture attraction.",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "area",
+              "value": "the city centre"
+            },
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "price range",
+              "value": "expensive"
+            },
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "type",
+              "value": "architecture"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "food",
+              "value": "Turkish",
+              "start": 40,
+              "end": 47
+            },
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "name",
+              "value": "The Meze Bar Restaurant",
+              "start": 0,
+              "end": 23
+            },
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "name",
+              "value": "The All Saints Church",
+              "start": 80,
+              "end": 101
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Can I have the phone number, address and postcode for the restaurant please?",
+        "utt_idx": 10,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "restaurant",
+              "slot": "phone",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "restaurant",
+              "slot": "postcode",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "restaurant",
+              "slot": "address",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "all saints church",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "turkish",
+            "price range": "expensive",
+            "name": "meze bar",
+            "area": "centre",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I don't have a phone number available, but the address is 196 Mill Road City Centre, postal code cb13nf. Anything else I can do for you? ",
+        "utt_idx": 11,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "postcode",
+              "value": "cb13nf",
+              "start": 97,
+              "end": 103
+            },
+            {
+              "intent": "inform",
+              "domain": "restaurant",
+              "slot": "address",
+              "value": "196 Mill Road City Centre",
+              "start": 58,
+              "end": 83
+            }
+          ],
+          "binary": [
+            {
+              "intent": "reqmore",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Great that's all that I needed to know, thank you!",
+        "utt_idx": 12,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "all saints church",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "turkish",
+            "price range": "expensive",
+            "name": "meze bar",
+            "area": "centre",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "You're welcome, feel free to contact us about anything else you need.",
+        "utt_idx": 13,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            },
+            {
+              "intent": "welcome",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-7",
+    "original_id": "SNG0190.json",
+    "domains": [
+      "hospital",
+      "general"
+    ],
+    "goal": {
+      "description": "You got injured and are looking for a <span class='emphasis'>hospital</span> nearby. Make sure you get <span class='emphasis'>address</span> and <span class='emphasis'>phone number</span>",
+      "inform": {
+        "hospital": {}
+      },
+      "request": {
+        "hospital": {
+          "address": "",
+          "phone": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I've been injured and need to find a hospital nearby.",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "inform",
+              "domain": "hospital",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The Addenbrookes Hospital is located at Hills Rd., Cambridge. Are you looking for a specific department?",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hospital",
+              "slot": "department",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "No, I just need the general phone number, please.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hospital",
+              "slot": "phone",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "01223245151, is the number. Do you need anything else?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hospital",
+              "slot": "phone",
+              "value": "01223245151",
+              "start": 0,
+              "end": 11
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Thank you that will do.",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Feel better soon! Have a better night! ",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-8",
+    "original_id": "PMUL1170.json",
+    "domains": [
+      "attraction",
+      "train",
+      "general"
+    ],
+    "goal": {
+      "description": "You are planning your trip in Cambridge. You are looking for a <span class='emphasis'>train</span>. The train should depart from <span class='emphasis'>cambridge</span> and should leave on <span class='emphasis'>wednesday</span>. The train should <span class='emphasis'>leave after 12:45</span> and should go to <span class='emphasis'>peterborough</span>. Make sure you get <span class='emphasis'>train ID</span>, <span class='emphasis'>arrival time</span>, and <span class='emphasis'>price</span>. You are also looking for <span class='emphasis'>places to go</span> in town. The attraction should be in the <span class='emphasis'>centre</span> and should be in the type of <span class='emphasis'>museum</span>. Make sure you get <span class='emphasis'>address</span>, <span class='emphasis'>postcode</span>, and <span class='emphasis'>phone number</span>",
+      "inform": {
+        "attraction": {
+          "type": "museum",
+          "area": "centre"
+        },
+        "train": {
+          "leave at": "12:45",
+          "destination": "peterborough",
+          "day": "wednesday",
+          "departure": "cambridge"
+        }
+      },
+      "request": {
+        "attraction": {
+          "address": "",
+          "postcode": "",
+          "phone": ""
+        },
+        "train": {
+          "train id": "",
+          "arrive by": "",
+          "price": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I need to take a train out of Cambridge, I will be leaving town on Wednesday.",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "Cambridge",
+              "start": 30,
+              "end": 39
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "wednesday",
+            "arrive by": "",
+            "departure": "cambridge",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "There are 5 trains out of Cambridge on Wednesday. Do you have a departure time in mind? ",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "Wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "choice",
+              "value": "5",
+              "start": 10,
+              "end": 11
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "departure",
+              "value": "Cambridge",
+              "start": 26,
+              "end": 35
+            }
+          ],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "leave at",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I'd like to go to peterborough and leave after 12:45, I have to attend a meeting beforehand.",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "peterborough",
+              "start": 18,
+              "end": 30
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "12:45",
+              "start": 47,
+              "end": 52
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "12:45",
+            "destination": "peterborough",
+            "day": "wednesday",
+            "arrive by": "",
+            "departure": "cambridge",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "TR1879 leaves at 13:06 on Wednesday.  Will that work for you?",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "day",
+              "value": "Wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "13:06",
+              "start": 17,
+              "end": 22
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "train id",
+              "value": "TR1879",
+              "start": 0,
+              "end": 6
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "What is the price of the fair and could you tell me what is the arrival time into Peterborough?",
+        "utt_idx": 4,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "train",
+              "slot": "price",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "12:45",
+            "destination": "peterborough",
+            "day": "wednesday",
+            "arrive by": "",
+            "departure": "cambridge",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The price is 16.50 pounds and the train will arrive in Peterborough by 13:56 Wednesday.  Would you like me to book a ticket for you?",
+        "utt_idx": 5,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "price",
+              "value": "16.50 pounds",
+              "start": 13,
+              "end": 25
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "destination",
+              "value": "Peterborough",
+              "start": 55,
+              "end": 67
+            },
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "arrive by",
+              "value": "13:56",
+              "start": 71,
+              "end": 76
+            }
+          ],
+          "binary": [
+            {
+              "intent": "offerbook",
+              "domain": "train",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "Yes, one ticket please, can I also get the reference number?",
+        "utt_idx": 6,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "train",
+              "slot": "book people",
+              "value": "one",
+              "start": 5,
+              "end": 8
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "12:45",
+            "destination": "peterborough",
+            "day": "wednesday",
+            "arrive by": "",
+            "departure": "cambridge",
+            "book people": "1"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "I booked you one seat on TR1879 departing Cambridge for Peterborough on Wednesday at 13:06, Reference: xu1qlhvw. Can I help further today?",
+        "utt_idx": 7,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "day",
+              "value": "Wednesday"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "ref",
+              "value": "xu1qlhvw",
+              "start": 103,
+              "end": 111
+            },
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "departure",
+              "value": "Cambridge",
+              "start": 42,
+              "end": 51
+            },
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "book people",
+              "value": "one",
+              "start": 13,
+              "end": 16
+            },
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "train id",
+              "value": "TR1879",
+              "start": 25,
+              "end": 31
+            },
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "destination",
+              "value": "Peterborough",
+              "start": 56,
+              "end": 68
+            },
+            {
+              "intent": "offerbooked",
+              "domain": "train",
+              "slot": "leave at",
+              "value": "13:06",
+              "start": 85,
+              "end": 90
+            }
+          ],
+          "binary": [
+            {
+              "intent": "reqmore",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I am also looking for a museum in centre. Could you give me the address, postcode and phone number of one? ",
+        "utt_idx": 8,
+        "dialogue_acts": {
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "area",
+              "value": "centre"
+            },
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "type",
+              "value": "museum"
+            }
+          ],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "address",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "postcode",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "attraction",
+              "slot": "phone",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "museum",
+            "name": "",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "12:45",
+            "destination": "peterborough",
+            "day": "wednesday",
+            "arrive by": "",
+            "departure": "cambridge",
+            "book people": "1"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The Broughton House Gallery is located at 98 King Street, postcode cb11ln. Their phone number is 01223314960. Is there anything else you need?",
+        "utt_idx": 9,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "phone",
+              "value": "01223314960",
+              "start": 97,
+              "end": 108
+            },
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "address",
+              "value": "98 King Street",
+              "start": 42,
+              "end": 56
+            },
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "name",
+              "value": "Broughton House Gallery",
+              "start": 4,
+              "end": 27
+            },
+            {
+              "intent": "inform",
+              "domain": "attraction",
+              "slot": "postcode",
+              "value": "cb11ln",
+              "start": 67,
+              "end": 73
+            }
+          ],
+          "binary": [
+            {
+              "intent": "reqmore",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "That's all I need right now. Thanks, you have been very helpful!",
+        "utt_idx": 10,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "thank",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "museum",
+            "name": "",
+            "area": "centre"
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "12:45",
+            "destination": "peterborough",
+            "day": "wednesday",
+            "arrive by": "",
+            "departure": "cambridge",
+            "book people": "1"
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "You're welcome, I hope you enjoy your time in Cambridge!",
+        "utt_idx": 11,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            },
+            {
+              "intent": "welcome",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  },
+  {
+    "dataset": "multiwoz21",
+    "data_split": "train",
+    "dialogue_id": "multiwoz21-train-9",
+    "original_id": "SNG01741.json",
+    "domains": [
+      "hospital",
+      "general"
+    ],
+    "goal": {
+      "description": "You want to find a <span class='emphasis'>hospital</span> in town. The hospital should have the <span class='emphasis'>paediatric clinic</span> department. Make sure you get <span class='emphasis'>address</span>, <span class='emphasis'>postcode</span>, and <span class='emphasis'>phone number</span>",
+      "inform": {
+        "hospital": {
+          "department": "paediatric clinic"
+        }
+      },
+      "request": {
+        "hospital": {
+          "address": "",
+          "postcode": "",
+          "phone": ""
+        }
+      }
+    },
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I am looking in town  for a hospital with a paediatric clinic",
+        "utt_idx": 0,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hospital",
+              "slot": "department",
+              "value": "paediatric clinic",
+              "start": 44,
+              "end": 61
+            }
+          ],
+          "binary": []
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Addenbrookes Hosptial on Hills Rd. in Cambridge has a paediatric clinic.",
+        "utt_idx": 1,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "hospital",
+              "slot": "department",
+              "value": "paediatric clinic",
+              "start": 54,
+              "end": 71
+            }
+          ],
+          "binary": []
+        },
+        "db_results": {}
+      },
+      {
+        "speaker": "user",
+        "utterance": "I need the address and phone number and postal code",
+        "utt_idx": 2,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "hospital",
+              "slot": "address",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "hospital",
+              "slot": "postcode",
+              "value": ""
+            },
+            {
+              "intent": "request",
+              "domain": "hospital",
+              "slot": "phone",
+              "value": ""
+            }
+          ]
+        },
+        "state": {
+          "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+          },
+          "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+          },
+          "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+          },
+          "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+          },
+          "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+          }
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Great. Thank you and goodbye.",
+        "utt_idx": 3,
+        "dialogue_acts": {
+          "categorical": [],
+          "non-categorical": [],
+          "binary": [
+            {
+              "intent": "bye",
+              "domain": "general",
+              "slot": "",
+              "value": ""
+            }
+          ]
+        },
+        "db_results": {}
+      }
+    ]
+  }
+]
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz21/ontology.json b/data/unified_datasets/multiwoz21/ontology.json
deleted file mode 100644
index b976054f3b6dc47c327c75f545549dfe3563ad01..0000000000000000000000000000000000000000
--- a/data/unified_datasets/multiwoz21/ontology.json
+++ /dev/null
@@ -1,1895 +0,0 @@
-{
-    "domains": {
-        "taxi": {
-            "description": "taxi information query system",
-            "slots": {
-                "destination": {
-                    "description": "destination of taxi",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "departure": {
-                    "description": "departure location of taxi",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "leaveAt": {
-                    "description": "leaving time of taxi",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "arriveBy": {
-                    "description": "arrival time of taxi",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "taxi_phone": {
-                    "description": "taxi phone number",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "taxi_types": {
-                    "description": "taxi type",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        },
-        "restaurant": {
-            "description": "restaurant information query system",
-            "slots": {
-                "food": {
-                    "description": "food type for the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "name": {
-                    "description": "name of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "pricerange": {
-                    "description": "price budget for the restaurant",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "not moderate",
-                        "ch",
-                        "high - end",
-                        "luxury options",
-                        "priced cheaply",
-                        "moderate",
-                        "various",
-                        "all price ranges",
-                        "cheap price range",
-                        "low cost",
-                        "assorted",
-                        "cheap priced",
-                        "moderate or expensive priced",
-                        "expensive price",
-                        "your",
-                        "most expensive",
-                        "moderate and expensive",
-                        "reasonable",
-                        "moderately expensive",
-                        "moderate|cheap",
-                        "fairly expensive",
-                        "upscale",
-                        "quite inexpensive",
-                        "boderate",
-                        "affordable",
-                        "expinsive",
-                        "they vary",
-                        "more moderate",
-                        "expensive / upscale",
-                        "high class",
-                        "moderately price",
-                        "reasonably priced",
-                        "a little expensive but worth it",
-                        "middle",
-                        "any price point",
-                        "priced moderately",
-                        "cherap",
-                        "moderately prices",
-                        "cheap to expensive",
-                        "or otherwise",
-                        "inexpensive",
-                        "cheaply priced",
-                        "adforable",
-                        "that range",
-                        "great prices",
-                        "rather expensive",
-                        "expensive side",
-                        "moderate range",
-                        "cheaply - priced",
-                        "more expensive",
-                        "varying price",
-                        "similar price range",
-                        "this price range",
-                        "expensive",
-                        "expensive or moderate",
-                        "not cheap",
-                        "the least expensive",
-                        "pretty expensive",
-                        "high end",
-                        "not expensive",
-                        "less expensive",
-                        "moderatley priced",
-                        "most affordable price range",
-                        "budget - friendly",
-                        "expensive and moderately priced",
-                        "moderatre",
-                        "cheap range",
-                        "or the moderately priced",
-                        "fairly cheap",
-                        "not too expensive",
-                        "east",
-                        "fairly inexpensive",
-                        "regardless of price",
-                        "north",
-                        "mostly expensive and moderately priced",
-                        "cheap",
-                        "budget conscious",
-                        "same price range",
-                        "very expensive",
-                        "not too pricey",
-                        "any price range",
-                        "quite expensive",
-                        "that price range",
-                        "your price range",
-                        "every price point",
-                        "expensively priced",
-                        "varying price range",
-                        "not - so - expensive",
-                        "all",
-                        "vietnamese",
-                        "any",
-                        "moderately - priced",
-                        "that price",
-                        "moderate pricing",
-                        "do nt care",
-                        "modest",
-                        "pricey",
-                        "expensive but worth every penny",
-                        "cheaper",
-                        "pretty cheap",
-                        "moderate price",
-                        "ranging from cheap to expensive",
-                        "quite low",
-                        "moderate priced",
-                        "centre",
-                        "the same",
-                        "expensive range",
-                        "relatively cheap",
-                        "cheap or expensive",
-                        "epensive",
-                        "moderate and one in the cheap range",
-                        "budget friendly",
-                        "fine",
-                        "on the cheap side",
-                        "expensive price range",
-                        "mostly expensive",
-                        "moderately priced",
-                        "relatively expensive",
-                        "moderately",
-                        "moderatly",
-                        "on the pricey side",
-                        "low priced",
-                        "expensively",
-                        "moderate price range",
-                        "moderatly priced",
-                        "do n't care"
-                    ]
-                },
-                "address": {
-                    "description": "exact location of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "area": {
-                    "description": "area or place of the restaurant",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "near the centre",
-                        "east area",
-                        "northern parts of town",
-                        "any part of town",
-                        "west cambridge",
-                        "town of centre",
-                        "centre of cambridge",
-                        "north or centre",
-                        "westside",
-                        "city centre",
-                        "the west end",
-                        "the south of town",
-                        "the city center",
-                        "elsewhere in the city",
-                        "town center",
-                        "the west side",
-                        "south of cambridge",
-                        "south area of town",
-                        "here",
-                        "north side of town",
-                        "on the east",
-                        "throughout the city",
-                        "south area",
-                        "centre city",
-                        "city",
-                        "in the north",
-                        "not",
-                        "east side",
-                        "not in the west",
-                        "here in the city",
-                        "the south area of town",
-                        "that part of town",
-                        "the east",
-                        "west of town",
-                        "centere",
-                        "in the city .",
-                        "south part of time",
-                        "mexican",
-                        "the east side",
-                        "south part of town",
-                        "west part of town .",
-                        "southern",
-                        "any where in the city",
-                        "west area",
-                        "north cambridge",
-                        "the south area of the city",
-                        "almost every area of town",
-                        "ely",
-                        "the north end",
-                        "south cambridge",
-                        "the are",
-                        "east side of town",
-                        "southside",
-                        "east side of the city",
-                        "by the airport",
-                        "around there",
-                        "all of cambridge",
-                        "the town centre",
-                        "nearby",
-                        "town centre",
-                        "the center part of town",
-                        "other parts of town",
-                        "not in the south of town",
-                        "chesterton",
-                        "the centre of town",
-                        "east area of town",
-                        "the east area",
-                        "southend",
-                        "ctre",
-                        "same area",
-                        "anywhere in cambridge",
-                        "north side of cambridge",
-                        "same area as the park",
-                        "south side of town",
-                        "the centre area",
-                        "the same area as the botanic gardens",
-                        "south part of the city",
-                        "the area you have chosen",
-                        "in the centre",
-                        "northern part of cambridge",
-                        "east section",
-                        "east|south",
-                        "the east part of town",
-                        "this town",
-                        "the same area",
-                        "west area of town",
-                        "town centre area",
-                        "the southern area",
-                        "northern part",
-                        "southern area",
-                        "the east side of town",
-                        "expensive",
-                        "east section of town",
-                        "same",
-                        "in cambridge",
-                        "north side of chersteron",
-                        "towns centre",
-                        "in town",
-                        "west side",
-                        "in the east",
-                        "centrally",
-                        "west side area",
-                        "all over town",
-                        "centre region",
-                        "anywhere in town",
-                        "closely located",
-                        "west part of town",
-                        "cambridge",
-                        "downtown",
-                        "south end",
-                        "close it city centre",
-                        "in the city centre",
-                        "close to the hotel",
-                        "east",
-                        "north and west",
-                        "west end of town",
-                        "the south part of town",
-                        "the north side",
-                        "any area",
-                        "the area of west",
-                        "center of the town",
-                        "the area",
-                        "the centre part of town",
-                        "north",
-                        "wet part of town",
-                        "other part of the town",
-                        "cheap",
-                        "north part of town",
-                        "centre area of the town",
-                        "cetre",
-                        "in town at all",
-                        "central",
-                        "east part of town",
-                        "near the center of town",
-                        "the city centre",
-                        "north end",
-                        "east of town",
-                        "centreof",
-                        "west",
-                        "the north side of town",
-                        "in the west",
-                        "same side of town as your hotel",
-                        "the west of town",
-                        "centre area of town",
-                        "centrem",
-                        "east of cambridge",
-                        "center of town",
-                        "anywhere",
-                        "throughout the area",
-                        "central area",
-                        "south of town",
-                        "the west area",
-                        "the center of town",
-                        "centre part of town",
-                        "the center",
-                        "north side",
-                        "the south",
-                        "the west side of town",
-                        "west part of the city",
-                        "center",
-                        "any",
-                        "the north",
-                        "east end",
-                        "the west part of town",
-                        "west end",
-                        "center cambridge",
-                        "the centre area of town",
-                        "centre area",
-                        "northside",
-                        "centre area of the city",
-                        "center area of town",
-                        "centrally located",
-                        "the south side",
-                        "around the college",
-                        "do nt care",
-                        "thai",
-                        "the west",
-                        "south side",
-                        "cambridge centre",
-                        "town",
-                        "west of cambridge",
-                        "near clare hall",
-                        "south of town .",
-                        "n the centre",
-                        "centre of the city",
-                        "the north area",
-                        "that",
-                        "this area",
-                        "central region of town",
-                        "centre",
-                        "north of town",
-                        "these areas",
-                        "the same",
-                        "south part",
-                        "that area of town",
-                        "in the city",
-                        "that area",
-                        "city center",
-                        "south",
-                        "close to your location",
-                        "north area of town",
-                        "all over cambridge",
-                        "the south side of town",
-                        "within this area",
-                        "near centre",
-                        "the centre",
-                        "centre of town .",
-                        "the north part of town",
-                        "the north side .",
-                        "same area as the hotel",
-                        "and centre",
-                        "west side of town",
-                        "westies",
-                        "around town",
-                        "east part of tow",
-                        "eastside",
-                        "close to that area",
-                        "all over the city",
-                        "south side of the town",
-                        "centre cambridge",
-                        "do n't care",
-                        "north area",
-                        "centre of town"
-                    ]
-                },
-                "postcode": {
-                    "description": "postcode of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "phone": {
-                    "description": "restaurant phone number",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "people": {
-                    "description": "number of people booking the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "time": {
-                    "description": "time of the restaurant booking",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "day": {
-                    "description": "day of the restaurant booking",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "saturday",
-                        "wednesday",
-                        "tuesday",
-                        "w",
-                        "saturday|thursday",
-                        "monday",
-                        "friday",
-                        "thursday",
-                        "sunday",
-                        "sunday|thursday"
-                    ]
-                },
-                "choice": {
-                    "description": "number of restaurants meeting requests of user",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        },
-        "attraction": {
-            "description": "an entertainment that is offered to the public",
-            "slots": {
-                "address": {
-                    "description": "details of where the attraction is",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "postcode": {
-                    "description": "postcode of the attraction",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "phone": {
-                    "description": "phone number of the attraction",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "entrance fee": {
-                    "description": "the fee charged for admission to the attraction",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "type": {
-                    "description": "type of the attraction",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "area": {
-                    "description": "area or place of the attraction",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "east area",
-                        "near the centre",
-                        "west cambridge",
-                        "centre of cambridge",
-                        "other areas",
-                        "westside",
-                        "the west - side",
-                        "city centre",
-                        "central district",
-                        "near the restaurant",
-                        "whole of cambridge",
-                        "the west end",
-                        "entre",
-                        "various",
-                        "northern area",
-                        "town center",
-                        "west of your city",
-                        "the west side",
-                        "south of cambridge",
-                        "west part of cambridge",
-                        "norwich",
-                        "throughout the city",
-                        "centre city",
-                        "south area",
-                        "city",
-                        "very close in the same area",
-                        "centre by galleria",
-                        "east side",
-                        "northern",
-                        "that part of town",
-                        "the east",
-                        "same general area",
-                        "went side of town",
-                        "southern cambridge",
-                        "the east side",
-                        "south part of town",
-                        "in that area",
-                        "to the south",
-                        "clifton way",
-                        "there",
-                        "west area",
-                        "in the south",
-                        "center are",
-                        "south cambridge",
-                        "ely",
-                        "east side of town",
-                        "central cambridge",
-                        "south park of town",
-                        "the south area",
-                        "centre area of cambridge",
-                        "west area of the city",
-                        "several different parts of town",
-                        "cambridge 's centre",
-                        "nearby",
-                        "town centre",
-                        "the center part of town",
-                        "near the hotel",
-                        "museum",
-                        "the area you 're looking for",
-                        "other parts of town",
-                        "most of them are in the center",
-                        "the centre of town",
-                        "northern area of town",
-                        "east area of town",
-                        "north of the city",
-                        "different area",
-                        "same area",
-                        "east section",
-                        "south side of town",
-                        "the centre area",
-                        "in the centre",
-                        "in the center",
-                        "western part of town",
-                        "centre area .",
-                        "south part of cambridge",
-                        "the same road",
-                        "cetnre of town",
-                        "the same area",
-                        "town centre area",
-                        "the east part of town",
-                        "center area",
-                        "west area of town",
-                        "cenre",
-                        "center of town .",
-                        "cambridge leisure park",
-                        "church area",
-                        "that area .",
-                        "near ely",
-                        "in that side",
-                        "east cambridge",
-                        "same",
-                        "in cambridge",
-                        "towns centre",
-                        "that side of town",
-                        "another area",
-                        "in town",
-                        "west side",
-                        "west end of the city",
-                        "the centry area",
-                        "in the east",
-                        "we",
-                        "all over town",
-                        "all around the city",
-                        "city centre .",
-                        "center of cambridge",
-                        "centre region",
-                        "west part of town",
-                        "cambridge",
-                        "cent",
-                        "western part of the town",
-                        "downtown",
-                        "south end",
-                        "east",
-                        "this side of town",
-                        "same area as hotel",
-                        "center of the town",
-                        "west end of town",
-                        "the area",
-                        "eat",
-                        "northern cambridge",
-                        "north",
-                        "same part of town as your restaurant",
-                        "the centre part of town",
-                        "north part of town",
-                        "central",
-                        "close to the center of town",
-                        "east part of town",
-                        "same part",
-                        "western cambridge",
-                        "the city centre",
-                        "east of town",
-                        "west",
-                        "the eastside",
-                        "north in milton",
-                        "the west of town",
-                        "centre area of town",
-                        "ce",
-                        "east of cambridge",
-                        "the east of the town",
-                        "close to the restaurant",
-                        "the west area of town",
-                        "center of town",
-                        "the south near your hotel",
-                        "south of town",
-                        "the west area",
-                        "centre part of town",
-                        "the center",
-                        "north side",
-                        "north section of cambridge",
-                        "the north of the city",
-                        "all of the other areas",
-                        "the south",
-                        "all",
-                        "the west side of town",
-                        "west part of the city",
-                        "center",
-                        "the north",
-                        "your desired location",
-                        "wet end",
-                        "center part of town",
-                        "cb30aq",
-                        "west end",
-                        "in the area",
-                        "centre area",
-                        "centrally located",
-                        "the south side",
-                        "city 's centre",
-                        "do nt care",
-                        "the west",
-                        "east end of town",
-                        "every area except the north",
-                        "south side",
-                        "centre of the city",
-                        "that",
-                        "this area",
-                        "centre",
-                        "north of town",
-                        "right in the center of town",
-                        "city cenre",
-                        "centre of town ?",
-                        "that area",
-                        "city center",
-                        "south",
-                        "same area as tandoori palace",
-                        "centre|west",
-                        "central zone",
-                        "the centre",
-                        "the center area",
-                        "west side of town",
-                        "western area of town",
-                        "the center of the park",
-                        "west side of the city",
-                        "eastside",
-                        "northend",
-                        "the area you are looking for",
-                        "all over the city",
-                        "western",
-                        "on the centre",
-                        "museums",
-                        "north area",
-                        "centre of town",
-                        "center of the city"
-                    ]
-                },
-                "name": {
-                    "description": "name of the attraction",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "pricerange": {
-                    "description": "the price range for the attraction, from cheap to expensive",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "free",
-                        "no entrance fee",
-                        "entrance fee",
-                        "unaware of what their entrance fee is",
-                        "not listed",
-                        "cheap",
-                        "we do n't have any information",
-                        "2 pounds",
-                        "do n't see a price listed",
-                        "from free to 2 pounds 50",
-                        "4 pounds",
-                        "moderately priced",
-                        "neither prices are listed",
-                        "expensive",
-                        "5 pounds",
-                        "they do n't have the entrance fee posted",
-                        "free admission",
-                        "not sure of the fee",
-                        "5 pound entrance fee",
-                        "do n't have information",
-                        "3.50 pounds"
-                    ]
-                },
-                "choice": {
-                    "description": "number of attractions matching requests of user",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        },
-        "booking": {
-            "description": "to arrange with a taxi, restaurant, train, etc.",
-            "slots": {
-                "time": {
-                    "description": "time for an order",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "day": {
-                    "description": "day for an order, from monday to sunday",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "saturday",
-                        "wednesday",
-                        "saturday night thru monday",
-                        "vmhj6y3i",
-                        "6",
-                        "weds",
-                        "tuesday through friday",
-                        "tuesday instead of thursday",
-                        "2",
-                        "that",
-                        "tues",
-                        "sunday 18/06/2017",
-                        "tuesday 's",
-                        "today",
-                        "tonight",
-                        "that time frame",
-                        "tuesday",
-                        "at that time",
-                        "1",
-                        "this evening",
-                        "sunday and monday",
-                        "the same day",
-                        "sundar",
-                        "monday",
-                        "friday",
-                        "thursday",
-                        "sunday",
-                        "we d"
-                    ]
-                },
-                "stay": {
-                    "description": "for how long the user wish to be at a place",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "people": {
-                    "description": "how many person the order is for",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "name": {
-                    "description": "name of the ordered place",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "Ref": {
-                    "description": "reference number of the order",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        },
-        "train": {
-            "description": "query and order a train",
-            "slots": {
-                "destination": {
-                    "description": "destination of the train",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "arriveBy": {
-                    "description": "arrival time of the train",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "departure": {
-                    "description": "departure location of the train",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "duration": {
-                    "description": "the length of time the train trip lasts",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "people": {
-                    "description": "number of people booking for train",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "day": {
-                    "description": "day of the train",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "cambridge",
-                        "fr",
-                        "saturday",
-                        "wednesday",
-                        "13:07",
-                        "everday",
-                        "weds",
-                        "sat",
-                        "sundays",
-                        "train",
-                        "that",
-                        "between friday and wednesday",
-                        "all week",
-                        "this day",
-                        "every day",
-                        "tuesday",
-                        "saturdays",
-                        "tr2519",
-                        "every",
-                        "other days are available",
-                        "mondays",
-                        "we",
-                        "monday",
-                        "friday",
-                        "thursday",
-                        "frday",
-                        "sunday",
-                        "daily",
-                        "that day",
-                        "fiday"
-                    ]
-                },
-                "Ref": {
-                    "description": "reference number of the order",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "leaveAt": {
-                    "description": "leaving time for the train",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "price": {
-                    "description": "price for the train ticket",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "choice": {
-                    "description": "number of trains that meets requests of the user",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        },
-        "hotel": {
-            "description": "to query hotel information and place an order",
-            "slots": {
-                "internet": {
-                    "description": "internet option at the hotel",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "free internet",
-                        "free",
-                        "y",
-                        "do nt care",
-                        "yes",
-                        "none",
-                        "no",
-                        "4"
-                    ]
-                },
-                "area": {
-                    "description": "area or place of the hotel",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "near the centre",
-                        "any part of town",
-                        "east area",
-                        "different parts of the city",
-                        "north end of the city",
-                        "centre of cambridge",
-                        "near the restaurant",
-                        "city centre",
-                        "throughout cambridge",
-                        "different part of town",
-                        "eastern part of the city",
-                        "the west end",
-                        "various",
-                        "northern area",
-                        "the northern part of town",
-                        "northern part of town",
-                        "the west side",
-                        "on the west",
-                        "south area of town",
-                        "centrally - located",
-                        "north side of town",
-                        "central location",
-                        "centra",
-                        "throughout the city",
-                        "not in the south",
-                        "south area",
-                        "centre city",
-                        "in the north",
-                        "east side",
-                        "northern side of town",
-                        "different parts of town",
-                        "you are interested in",
-                        "northern",
-                        "that part of town",
-                        "west of town",
-                        "the east",
-                        "all areas",
-                        "all parts of the city",
-                        "the east side",
-                        "south part of town",
-                        "south end of the city",
-                        "there",
-                        "west area",
-                        "north cambridge",
-                        "the north end",
-                        "south cambridge",
-                        "east side of town",
-                        "southside",
-                        "the south area",
-                        "or west ?",
-                        "another area of town",
-                        "all of cambridge",
-                        "the town centre",
-                        "east near other shops and boutiques",
-                        "town centre",
-                        "on the west side",
-                        "all over",
-                        "the centre of the city",
-                        "other parts of town",
-                        "the centre of town",
-                        "east area of town",
-                        "the east area",
-                        "the westside",
-                        "southend",
-                        "west|centre",
-                        "northe part of town",
-                        "anywhere in cambridge",
-                        "different area",
-                        "same area",
-                        "south side of town",
-                        "the centre area",
-                        "on the south",
-                        "in the centre",
-                        "in the center",
-                        "wast",
-                        "all across town",
-                        "south end of town",
-                        "except in the north",
-                        "and east",
-                        "place to be a guesthouse",
-                        "the same area",
-                        "other parts of the city",
-                        "the east side of town",
-                        "somewhere else",
-                        "and in the centre",
-                        "east section of town",
-                        "east cambridge",
-                        "across cambridge",
-                        "north of cambridge",
-                        "next door",
-                        "except in the east",
-                        "in cambridge",
-                        "north part of the city",
-                        "that side of town",
-                        "south cambridge area",
-                        "in town",
-                        "west side",
-                        "west end of the city",
-                        "we",
-                        "centrally",
-                        "west side near the restaurant",
-                        "all over town",
-                        "southern part of town",
-                        "all around the city",
-                        "west part of town",
-                        "cambridge",
-                        "downtown",
-                        "south end",
-                        "east",
-                        "east and the north .",
-                        "norht",
-                        "the north side",
-                        "any area",
-                        "center of the town",
-                        "the area",
-                        "eat",
-                        "northern cambridge",
-                        "north",
-                        "cheap",
-                        "north part of town",
-                        "centre part",
-                        "east part of town",
-                        "western cambridge",
-                        "north end of town",
-                        "north end",
-                        "east of town",
-                        "several areas of town",
-                        "west",
-                        "eastern",
-                        "in the west",
-                        "everywhere but the city centre",
-                        "centre area of town",
-                        "center of town",
-                        "west part",
-                        "the town center",
-                        "south of town",
-                        "the center of town",
-                        "the west area",
-                        "centre part of town",
-                        "nborth",
-                        "north side",
-                        "the north of the city",
-                        "the south",
-                        "west part of the city",
-                        "center",
-                        "the north",
-                        "west end",
-                        "same side of town",
-                        "in the area",
-                        "any part of the city",
-                        "centre area",
-                        "not to far from the restaurant",
-                        "northside",
-                        "on the eastside",
-                        "close to where you 'll be dining",
-                        "the east end of town",
-                        "various parts of the city",
-                        "west areas of town",
-                        "centrally located",
-                        "the south side",
-                        "north part of town .",
-                        "do nt care",
-                        "the west",
-                        "same area as the restaurant",
-                        "that vicinity",
-                        "nearby the restaurant",
-                        "south side",
-                        "east part of time",
-                        "centre of the city",
-                        "south closer to the museum",
-                        "north park of town",
-                        "centre",
-                        "north of town",
-                        "north and centre",
-                        "the same",
-                        "on the north",
-                        "that area of town",
-                        "that area",
-                        "in the city",
-                        "el shaddai",
-                        "city center",
-                        "south",
-                        "the city 's south side",
-                        "north area of town",
-                        "near the museum",
-                        "eastern cambridge",
-                        "near centre",
-                        "the easy",
-                        "a different area",
-                        "the centre",
-                        "the north part of town",
-                        "west side of town",
-                        "across town",
-                        "eastside",
-                        "northend",
-                        "north par of town",
-                        "the centre of cambridge",
-                        "all over the city",
-                        "north location",
-                        "centre cambridge",
-                        "that region",
-                        "north area",
-                        "centre of town"
-                    ]
-                },
-                "stars": {
-                    "description": "star rating of the hotel",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "4|5",
-                        "several",
-                        "0 to 4",
-                        "no star rating",
-                        "do nt care",
-                        "one",
-                        "does not show",
-                        "foru",
-                        "does not have",
-                        "different star ratings",
-                        "unrated",
-                        "2",
-                        "four starts",
-                        "4-star",
-                        "four - star",
-                        "3|4",
-                        "yes",
-                        "three",
-                        "four",
-                        "lower",
-                        "four stars",
-                        "zero",
-                        "ranging from 2 - 4 stars",
-                        "two",
-                        "1",
-                        "3",
-                        "four star",
-                        "0-star",
-                        "five",
-                        "drop the star rating",
-                        "2-star",
-                        "no",
-                        "3-star",
-                        "5-star",
-                        "0",
-                        "1-star",
-                        "not rated",
-                        "5",
-                        "not as fancy",
-                        "4"
-                    ]
-                },
-                "parking": {
-                    "description": "parking facility at the hotel",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "n",
-                        "free",
-                        "do nt care",
-                        "yes",
-                        "none",
-                        "no",
-                        "free parking"
-                    ]
-                },
-                "phone": {
-                    "description": "hotel phone number",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "name": {
-                    "description": "name of the hotel",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "pricerange": {
-                    "description": "price budget of the hotel",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "cheapest",
-                        "economically priced",
-                        "free",
-                        "all different price ranges",
-                        "more moderately priced",
-                        "expensively - priced",
-                        "ca n't view that information",
-                        "moderate",
-                        "super cheap",
-                        "low cost",
-                        "cheap price range",
-                        "lower",
-                        "affordable pricing",
-                        "moderate to cheap",
-                        "cheap priced",
-                        "inexpensively - priced",
-                        "mostly in the cheap to moderate price range",
-                        "moderately pricing",
-                        "guesthouses",
-                        "moderate|cheap",
-                        "moderately priceed",
-                        "very cheap",
-                        "even cheaper",
-                        "upscale",
-                        "other ranges",
-                        "moderate or cheap",
-                        "cheaply",
-                        "affordable",
-                        "different price range",
-                        "does not say",
-                        "moderately price",
-                        "expensive or cheap",
-                        "priced moderately",
-                        "moderatly price",
-                        "moderate price point",
-                        "moderately prices",
-                        "cheap to expensive",
-                        "on the more expensive side",
-                        "inexpensive",
-                        "the cheapest",
-                        "cheaply priced",
-                        "that range",
-                        "rather expensive",
-                        "your price",
-                        "moderate range",
-                        "cheaply - priced",
-                        "more expensive",
-                        "expensive",
-                        "much cheaper",
-                        "cheap side",
-                        "slightly more expensive",
-                        "$100",
-                        "cheaper than the others",
-                        "pretty expensive",
-                        "not expensive",
-                        "cheaply prices",
-                        "moderatley priced",
-                        "modrate",
-                        "cheap range",
-                        "espensive",
-                        "chear",
-                        "fairly cheap",
-                        "oderately priced",
-                        "moderate in price",
-                        "different",
-                        "very affordable",
-                        "your chosen",
-                        "unfortunately do not have the price",
-                        "north",
-                        "less costly",
-                        "different price ranges",
-                        "cheap",
-                        "very inexpensive",
-                        "2",
-                        "moderately priced .",
-                        "moderately to expensively priced",
-                        "economical",
-                        "same price range",
-                        "moderate to cheap range",
-                        "cheap to moderate",
-                        "quite expensive",
-                        "that price range",
-                        "cheap|moderate",
-                        "your price range",
-                        "varying price ranges",
-                        "expensive to moderate",
-                        "expensively priced",
-                        "epxensive",
-                        "or expensive ?",
-                        "all",
-                        "higher price range",
-                        "any",
-                        "moderately - priced",
-                        "hotel",
-                        "moderate pricing",
-                        "do nt care",
-                        "lower end",
-                        "quite cheap",
-                        "more budget - friendly",
-                        "cheaper",
-                        "fairly cheap compared to other hotels",
-                        "pretty cheap",
-                        "moderate price",
-                        "that",
-                        "moderate priced",
-                        "centre",
-                        "the same",
-                        "expensive range",
-                        "relatively cheap",
-                        "moderate prices",
-                        "hotels",
-                        "cheap>moderate",
-                        "a little pricey",
-                        "on the cheap side",
-                        "moderately - priced or cheap",
-                        "expensive price range",
-                        "moderately priced",
-                        "same price",
-                        "moderately",
-                        "budget - priced",
-                        "moderately pried",
-                        "moderate price range",
-                        "great",
-                        "moderatly priced",
-                        "extremely reasonable",
-                        "cheap or moderate",
-                        "cheaper side"
-                    ]
-                },
-                "people": {
-                    "description": "number of people for the hotel booking",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "postcode": {
-                    "description": "postcode of the hotel",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "address": {
-                    "description": "exact location of the hotel",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "type": {
-                    "description": "what is the type of the hotel",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "stay": {
-                    "description": "length of stay at the hotel",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "day": {
-                    "description": "day of the hotel booking",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "next friday",
-                        "saturday",
-                        "wednesday",
-                        "tuesday",
-                        "saturday|tuesday",
-                        "t",
-                        "sunday>monday",
-                        "friday>tuesday",
-                        "monday",
-                        "friday",
-                        "thursday",
-                        "monday<thursday",
-                        "sunday",
-                        "wednesday|friday",
-                        "monda"
-                    ]
-                },
-                "choice": {
-                    "description": "number of hotels that meets requests of the user",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        },
-        "police": {
-            "description": "find police stations",
-            "slots": {
-                "address": {
-                    "description": "exact location of the police station",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "postcode": {
-                    "description": "postcode of the police station",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "phone": {
-                    "description": "police station phone number",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        }
-    },
-    "intents": {
-        "inform": {
-            "description": "inform user of value for a certain slot"
-        },
-        "request": {
-            "description": "ask for value of a slot"
-        },
-        "nobook": {
-            "description": "inform user of booking failure"
-        },
-        "reqmore": {
-            "description": "ask user for more instructions"
-        },
-        "book": {
-            "description": "place an order for user"
-        },
-        "bye": {
-            "description": "end a conversation and say goodbye to user"
-        },
-        "thank": {
-            "description": "express gratitude"
-        },
-        "welcome": {
-            "description": "welcome"
-        },
-        "recommend": {
-            "description": "recommend a choice for user request"
-        },
-        "offerbook": {
-            "description": "offer to place an order for user"
-        },
-        "offerbooked": {
-            "description": "inform user that an order is succussful"
-        },
-        "greet": {
-            "description": "express greeting"
-        },
-        "nooffer": {
-            "description": "inform user that no options matches user request"
-        },
-        "select": {
-            "description": "provide several choices for user to choose from"
-        }
-    },
-    "binary_dialogue_act": [
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "area",
-            "value": ""
-        },
-        {
-            "intent": "inform",
-            "domain": "booking",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "booking",
-            "slot": "stay",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "booking",
-            "slot": "day",
-            "value": ""
-        },
-        {
-            "intent": "reqmore",
-            "domain": "",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "bye",
-            "domain": "",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "thank",
-            "domain": "",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "welcome",
-            "domain": "",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "pricerange",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "leaveAt",
-            "value": ""
-        },
-        {
-            "intent": "offerbook",
-            "domain": "train",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "duration",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "price",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "departure",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "day",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "taxi",
-            "slot": "leaveAt",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "taxi",
-            "slot": "arriveBy",
-            "value": ""
-        },
-        {
-            "intent": "greet",
-            "domain": "",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "address",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "destination",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "arriveBy",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "internet",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "parking",
-            "value": ""
-        },
-        {
-            "intent": "inform",
-            "domain": "attraction",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "area",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "type",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "address",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "food",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "area",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "phone",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "postcode",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "address",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "postcode",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "phone",
-            "value": ""
-        },
-        {
-            "intent": "nobook",
-            "domain": "booking",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "inform",
-            "domain": "train",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "people",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "booking",
-            "slot": "people",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "stars",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "type",
-            "value": ""
-        },
-        {
-            "intent": "select",
-            "domain": "hotel",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "postcode",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "entrance fee",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "pricerange",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "train",
-            "slot": "Ref",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "booking",
-            "slot": "time",
-            "value": ""
-        },
-        {
-            "intent": "book",
-            "domain": "booking",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "phone",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "pricerange",
-            "value": ""
-        },
-        {
-            "intent": "inform",
-            "domain": "hotel",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "select",
-            "domain": "train",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "select",
-            "domain": "restaurant",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "inform",
-            "domain": "restaurant",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "inform",
-            "domain": "taxi",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "taxi",
-            "slot": "departure",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "name",
-            "value": ""
-        },
-        {
-            "intent": "nooffer",
-            "domain": "attraction",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "taxi",
-            "slot": "destination",
-            "value": ""
-        },
-        {
-            "intent": "nooffer",
-            "domain": "hotel",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "select",
-            "domain": "attraction",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "taxi",
-            "slot": "taxi_phone",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "taxi",
-            "slot": "taxi_types",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "attraction",
-            "slot": "name",
-            "value": ""
-        },
-        {
-            "intent": "nooffer",
-            "domain": "restaurant",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "nooffer",
-            "domain": "train",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "hotel",
-            "slot": "name",
-            "value": ""
-        },
-        {
-            "intent": "recommend",
-            "domain": "hotel",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "recommend",
-            "domain": "attraction",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "offerbooked",
-            "domain": "train",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "recommend",
-            "domain": "restaurant",
-            "slot": "",
-            "value": ""
-        }
-    ],
-    "state": {
-        "hotel": {
-            "name": "",
-            "area": "",
-            "parking": "",
-            "pricerange": "",
-            "stars": "",
-            "internet": "",
-            "type": "",
-            "stay": "",
-            "day": "",
-            "people": ""
-        },
-        "train": {
-            "leaveAt": "",
-            "destination": "",
-            "day": "",
-            "arriveBy": "",
-            "departure": "",
-            "people": ""
-        },
-        "taxi": {
-            "leaveAt": "",
-            "destination": "",
-            "departure": "",
-            "arriveBy": ""
-        },
-        "restaurant": {
-            "food": "",
-            "pricerange": "",
-            "name": "",
-            "area": "",
-            "time": "",
-            "day": "",
-            "people": ""
-        },
-        "attraction": {
-            "type": "",
-            "name": "",
-            "area": ""
-        }
-    }
-}
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz21/preprocess.py b/data/unified_datasets/multiwoz21/preprocess.py
index 5ca81e2e399d8a816e3be6d4ae33babfc2f1597e..9f1c0cbc31eb761d02f72833782c8e863ede87d5 100644
--- a/data/unified_datasets/multiwoz21/preprocess.py
+++ b/data/unified_datasets/multiwoz21/preprocess.py
@@ -1,299 +1,609 @@
 import copy
 import re
-import zipfile
+from zipfile import ZipFile, ZIP_DEFLATED
+from shutil import copy2
 import json
 import os
 from tqdm import tqdm
-import sys
-import difflib
-from fuzzywuzzy import fuzz
-sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-
-from convlab2.util.file_util import read_zipped_json, write_zipped_json
-import logging
-
-
-logging.basicConfig(level=logging.INFO)
-self_dir = (os.path.abspath(os.getcwd()))
-
-REF_SYS_DA = {
-    'Attraction': {
-        'Addr': "address", 'Area': "area", 'Choice': "choice",
-        'Fee': "entrance fee", 'Name': "name", 'Phone': "phone",
-        'Post': "postcode", 'Price': "pricerange", 'Type': "type",
-        'none': None, 'Open': None
-    },
-    'Hospital': {
-        'Department': 'department', 'Addr': 'address', 'Post': 'postcode',
-        'Phone': 'phone', 'none': None
-    },
-    'Booking': {
-        'Day': 'day', 'Name': 'name', 'People': 'people',
-        'Ref': 'Ref', 'Stay': 'stay', 'Time': 'time',
-        'none': None
-    },
-    'Hotel': {
-        'Addr': "address", 'Area': "area", 'Choice': "choice",
-        'Internet': "internet", 'Name': "name", 'Parking': "parking",
-        'Phone': "phone", 'Post': "postcode", 'Price': "pricerange",
-        'Stars': "stars", 'Type': "type", 'Stay': 'stay', 'Day': 'day', 'People': 'people',
-        'none': None
-    },
-    'Restaurant': {
-        'Addr': "address", 'Area': "area", 'Choice': "choice",
-        'Name': "name", 'Food': "food", 'Phone': "phone",
-        'Post': "postcode", 'Price': "pricerange",
-        'Time': 'time', 'Day': 'day', 'People': 'people',
-        'none': None
-    },
-    'Taxi': {
-        'Arrive': "arriveBy", 'Car': "taxi_types", 'Depart': "departure",
-        'Dest': "destination", 'Leave': "leaveAt", 'Phone': "taxi_phone",
-        'none': None
-    },
-    'Train': {
-        'Arrive': "arriveBy", 'Choice': "choice", 'Day': "day",
-        'Depart': "departure", 'Dest': "destination",
-        'Leave': "leaveAt", 'People': "people", 'Ref': "Ref",
-        'Time': "duration", 'none': None, 'Ticket': 'price',
-    },
-    'Police': {
-        'Addr': "address", 'Post': "postcode", 'Phone': "phone", 'none': None
-    },
-}
-
-# taxi restaurant attraction train
-slot_to_type = {
-    'taxi-destination': 'non',
-    'taxi-departure': 'non',
-    'taxi-leaveAt': 'non',
-    'taxi-arriveBy': 'non',
-    'restaurant-food': 'non',
-    'restaurant-name': 'non',
-    'attraction-address': 'non',
-    'attraction-postcode': 'non',
-    'restaurant-pricerange': 'cat',
-    'restaurant-address': 'non',
-    'restaurant-area': 'cat',
-    'restaurant-postcode': 'non',
-    'attraction-phone': 'non',
-    'attraction-entrance fee': 'non',
-    'booking-time': 'non',
-    'booking-day': 'cat',
-    'attraction-type': 'non',
-    'attraction-area': 'cat',
-    'train-destination': 'non',
-    'train-arriveBy': 'non',
-    'train-departure': 'non',
-    'hotel-internet': 'cat',
-    'hotel-area': 'cat',
-    'booking-stay': 'non',
-    'booking-people': 'non',
-    'train-duration': 'non',
-    'train-people': 'non',
-    'train-day': 'cat',
-    'train-Ref': 'non',
-    'hotel-stars': 'cat',
-    'train-leaveAt': 'non',
-    'train-price': 'non',
-    'hotel-parking': 'cat',
-    'hotel-phone': 'non',
-    'hotel-name': 'non',
-    'hotel-pricerange': 'cat',
-    'hotel-people': 'non',
-    'restaurant-phone': 'non',
-    'hotel-postcode': 'non',
-    'hotel-address': 'non',
-    'attraction-name': 'non',
-    'hotel-type': 'non',
-    'restaurant-people': 'non',
-    'train-choice': 'non',
-    'attraction-pricerange': 'cat',
-    'hotel-stay': 'non',
-    'booking-name': 'non',
-    'booking-Ref': 'non',
-    'restaurant-time': 'non',
-    'restaurant-day': 'cat',
-    'hotel-day': 'cat',
-    'hotel-choice': 'non',
-    'restaurant-choice': 'non',
-    'attraction-choice': 'non',
-    'taxi-taxi_phone': 'non',
-    'taxi-taxi_types': 'non',
-    'police-address': 'non',
-    'police-postcode': 'non',
-    'police-phone': 'non'
-}
-
-state_cat_slot_value_dict = {
-    "hotel-pricerange": {
-        "cheap": 735,
-        "moderate": 1063,
-        "expensive": 594,
-    },
-    "hotel-parking": {
-        "yes": 1809,
-        "no": 126,
-        "free": 4,
-    },
-    "hotel-day": {
-        "tuesday": 385,
-        "wednesday": 410,
-        "monday": 365,
-        "saturday": 407,
-        "friday": 393,
-        "thursday": 384,
-        "sunday": 369,
-    },
-    "train-day": {
-        "wednesday": 533,
-        "monday": 533,
-        "saturday": 543,
-        "thursday": 547,
-        "friday": 563,
-        "tuesday": 553,
-        "sunday": 613,
-    },
-    "hotel-stars": {
-        "4": 1263,
-        "2": 193,
-        "0": 201,
-        "3": 401,
-        "5": 45,
-        "1": 45,
-    },
-    "hotel-internet": {
-        "yes": 1841,
-        "no": 79,
-        "free": 2
-    },
-    "hotel-area": {
-        "east": 416,
-        "north": 717,
-        "centre": 538,
-        "south": 289,
-        "west": 316,
-    },
-    "attraction-area": {
-        "centre": 1290,
-        "west": 332,
-        "north": 155,
-        "south": 240,
-        "east": 272,
-    },
-    "restaurant-pricerange": {
-        "expensive": 1477,
-        "cheap": 758,
-        "moderate": 1028,
+from collections import Counter
+from pprint import pprint
+from nltk.tokenize import TreebankWordTokenizer, PunktSentenceTokenizer
+
+init_ontology = {
+    "domains": { # descriptions are adapted from multiwoz22, but is_categorical may be different
+        "attraction": {
+            "description": "find an attraction",
+            "slots": {
+                "area": {
+                    "description": "area to search for attractions",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "centre",
+                        "east",
+                        "north",
+                        "south",
+                        "west"
+                    ]
+                },
+                "name": {
+                    "description": "name of the attraction",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "type of the attraction",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "architecture",
+                        "boat",
+                        "cinema",
+                        "college",
+                        "concerthall",
+                        "entertainment",
+                        "museum",
+                        "multiple sports",
+                        "nightclub",
+                        "park",
+                        "swimmingpool",
+                        "theatre"
+                    ]
+                },
+                "entrance fee": {
+                    "description": "how much is the entrance fee",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "open hours": {
+                    "description": "open hours of the attraction",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "address of the attraction",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the attraction",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the attraction",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "choice": {
+                    "description": "number of attractions that meet the requirement",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "hotel": {
+            "description": "find and book a hotel",
+            "slots": {
+                "internet": {
+                    "description": "whether the hotel has internet",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "free",
+                        "no",
+                        "yes"
+                    ]
+                },
+                "parking": {
+                    "description": "whether the hotel has parking",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "free",
+                        "no",
+                        "yes"
+                    ]
+                },
+                "area": {
+                    "description": "area or place of the hotel",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "centre",
+                        "east",
+                        "north",
+                        "south",
+                        "west"
+                    ]
+                },
+                "stars": {
+                    "description": "star rating of the hotel",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "0",
+                        "1",
+                        "2",
+                        "3",
+                        "4",
+                        "5"
+                    ]
+                },
+                "price range": {
+                    "description": "price budget of the hotel",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "expensive",
+                        "cheap",
+                        "moderate"
+                    ]
+                },
+                "type": {
+                    "description": "what is the type of the hotel",
+                    "is_categorical": False,
+                    "possible_values": [
+                        "guesthouse",
+                        "hotel"
+                    ]
+                },
+                "name": {
+                    "description": "name of the hotel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book people": {
+                    "description": "number of people for the hotel booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book stay": {
+                    "description": "length of stay at the hotel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book day": {
+                    "description": "day of the hotel booking",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "monday",
+                        "tuesday",
+                        "wednesday",
+                        "thursday",
+                        "friday",
+                        "saturday",
+                        "sunday"
+                    ]
+                },
+                "phone": {
+                    "description": "phone number of the hotel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the hotel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "address of the hotel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "ref": {
+                    "description": "reference number of the hotel booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "choice": {
+                    "description": "number of hotels that meet the requirement",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "taxi": {
+            "description": "rent taxi to travel",
+            "slots": {
+                "destination": {
+                    "description": "destination of taxi",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of taxi",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "leave at": {
+                    "description": "leaving time of taxi",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "arrive by": {
+                    "description": "arrival time of taxi",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the taxi",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "car type of the taxi",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "restaurant": {
+            "description": "find and book a restaurant",
+            "slots": {
+                "price range": {
+                    "description": "price budget for the restaurant",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "cheap",
+                        "expensive",
+                        "moderate"
+                    ]
+                },
+                "area": {
+                    "description": "area or place of the restaurant",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "centre",
+                        "east",
+                        "north",
+                        "south",
+                        "west"
+                    ]
+                },
+                "food": {
+                    "description": "the cuisine of the restaurant",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the restaurant",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "address of the restaurant",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the restaurant",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the restaurant",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book people": {
+                    "description": "number of people for the restaurant booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book time": {
+                    "description": "time of the restaurant booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book day": {
+                    "description": "day of the restaurant booking",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "monday",
+                        "tuesday",
+                        "wednesday",
+                        "thursday",
+                        "friday",
+                        "saturday",
+                        "sunday"
+                    ]
+                },
+                "ref": {
+                    "description": "reference number of the restaurant booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "choice": {
+                    "description": "number of restaurants that meet the requirement",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "train": {
+            "description": "find a train to travel",
+            "slots": {
+                "destination": {
+                    "description": "destination of the train",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "arrive by": {
+                    "description": "arrival time of the train",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of the train",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "leave at": {
+                    "description": "leaving time for the train",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "duration": {
+                    "description": "duration of the travel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book people": {
+                    "description": "number of people booking for train",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the train",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "monday",
+                        "tuesday",
+                        "wednesday",
+                        "thursday",
+                        "friday",
+                        "saturday",
+                        "sunday"
+                    ]
+                },
+                "ref": {
+                    "description": "reference number of the train booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "price": {
+                    "description": "price of the train ticket",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "train id": {
+                    "description": "id of the train",
+                    "is_categorical": False
+                },
+                "choice": {
+                    "description": "number of trains that meet the requirement",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "police": {
+            "description": "find a police station for help",
+            "slots": {
+                "name": {
+                    "description": "name of the police station",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "address of the police station",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the police station",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the police station",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "hospital": {
+            "description": "find a hospital for help",
+            "slots": {
+                "department": {
+                    "description": "specific department of the hospital",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "address of the hospital",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the hospital",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the hospital",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "booking": {
+            "description": "booking for taxi, restaurant, hotel, train, etc.",
+            "slots":{
+                "day": {
+                    "description": "day of the booking",
+                    "is_categorical": True,
+                    "possible_values": [
+                        "monday",
+                        "tuesday",
+                        "wednesday",
+                        "thursday",
+                        "friday",
+                        "saturday",
+                        "sunday"
+                    ]
+                },
+                "time": {
+                    "description": "time of the booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book people": {
+                    "description": "number of people for the booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "book stay": {
+                    "description": "length of stay at the hotel",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the booked entity",
+                    "is_categorical": False,
+                    "possible_values": []
+                },
+                "ref": {
+                    "description": "reference number of the booking",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+            }
+        },
+        "general":{
+            "description": "general domain without slots",
+            "slots": {}
+        }
     },
-    "restaurant-area": {
-        "centre": 1745,
-        "south": 398,
-        "north": 390,
-        "east": 360,
-        "west": 423,
+    "intents": {
+        "inform": {
+            "description": "inform the value of a slot"
+        },
+        "request": {
+            "description": "ask for the value of a slot"
+        },
+        "nobook": {
+            "description": "inform the user that the booking is failed"
+        },
+        "reqmore": {
+            "description": "ask the user for more instructions"
+        },
+        "book": {
+            "description": "book something for the user"
+        },
+        "bye": {
+            "description": "say goodbye to the user and end the conversation"
+        },
+        "thank": {
+            "description": "thanks for the help"
+        },
+        "welcome": {
+            "description": "you're welcome"
+        },
+        "greet": {
+            "description": "express greeting"
+        },
+        "recommend": {
+            "description": "recommend a choice to the user"
+        },
+        "select": {
+            "description": "provide several choices for the user"
+        },
+        "offerbook": {
+            "description": "ask the user if he or she needs booking"
+        },
+        "offerbooked": {
+            "description": "provide information about the booking"
+        },
+        "nooffer": {
+            "description": "inform the user that there is no result satisfies user requirements"
+        }
     },
-    "restaurant-day": {
-        "thursday": 362,
-        "wednesday": 412,
-        "friday": 395,
-        "monday": 383,
-        "sunday": 399,
-        "saturday": 421,
-        "tuesday": 350,
+    "binary_dialogue_acts": set(), # from data
+    "state": {
+        "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+        },
+        "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "price range": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "book stay": "",
+            "book day": "",
+            "book people": ""
+        },
+        "restaurant": {
+            "food": "",
+            "price range": "",
+            "name": "",
+            "area": "",
+            "book time": "",
+            "book day": "",
+            "book people": ""
+        },
+        "taxi": {
+            "leave at": "",
+            "destination": "",
+            "departure": "",
+            "arrive by": ""
+        },
+        "train": {
+            "leave at": "",
+            "destination": "",
+            "day": "",
+            "arrive by": "",
+            "departure": "",
+            "book people": ""
+        }
     }
 }
 
-
-synonyms = [
-    ["el shaddia guesthouse", "el shaddai"],
-    [ "peterborough", "peterbourgh"],
-    ["night club", "nightclub", 'nightclubs'],
-    ["boat", "boating"],
-    ["portugese", "portuguese"],
-    ["guesthouse", "guest house"],
-    ["seafood", "sea food"],
-    ["christ 's college", "christ college"],
-    ["huntingdon marriott hotel"]
-]
-
-state_cat_slot_ds = [k for k, v in slot_to_type.items() if v == 'cat']
-
-da_cat_slot_values = {
-    # 'hotel-stay': ['1', '2', '3', '4', '5'],
-    'hotel-internet': ['free', 'no', 'none', 'yes'],
-    'hotel-parking': ['free', 'no', 'none', 'yes']
-}
-
-state_cat_slot_values = {}
-
-multiwoz_desc = {
-    'taxi': {
-        'domain': 'taxi information query system',
-        'taxi_phone': 'taxi phone number',
-        'taxi_types': 'taxi type',
-    },
-    'restaurant': {
-        'domain': 'restaurant information query system',
-        'address': 'exact location of the restaurant',
-        'postcode': 'postcode of the restaurant',
-        'phone': 'restaurant phone number',
-        'choice': 'number of restaurants meeting requests of user',
-    },
+slot_name_map = {
+    'addr': "address", 
+    'post': "postcode",
+    'pricerange': "price range",
+    'arrive': "arrive by",
+    'arriveby': "arrive by",
+    'leave': "leave at",
+    'leaveat': "leave at",
+    'depart': "departure",
+    'dest': "destination",
+    'fee': "entrance fee", 
+    'open': 'open hours',
+    'car': "type",
+    'car type': "type",
+    'ticket': 'price', 
+    'trainid': 'train id',
+    'id': 'train id',
+    'people': 'book people',
+    'stay': 'book stay',
+    'none': '',
     'attraction': {
-        'domain': 'an entertainment that is offered to the public',
-        'address': 'details of where the attraction is',
-        'postcode': 'postcode of the attraction',
-        'phone': 'phone number of the attraction',
-        'entrance fee': 'the fee charged for admission to the attraction',
-        'pricerange': 'the price range for the attraction, from cheap to expensive',
-        'choice': 'number of attractions matching requests of user'
-    },
-    'booking': {
-        'domain': 'to arrange with a taxi, restaurant, train, etc.',
-        'time': 'time for an order',
-        'day': 'day for an order, from monday to sunday',
-        'stay': 'for how long the user wish to be at a place',
-        'people': 'how many person the order is for',
-        'name': 'name of the ordered place',
-        'Ref': 'reference number of the order'
-    },
-    'train': {
-        'domain': 'query and order a train',
-        'duration': 'the length of time the train trip lasts',
-        'Ref': 'reference number of the order',
-        'price': 'price for the train ticket',
-        'choice': 'number of trains that meets requests of the user',
+        'price': 'entrance fee'
     },
+    'hospital': {},
     'hotel': {
-        'domain': 'to query hotel information and place an order',
-        'address': 'exact location of the hotel',
-        'postcode': 'postcode of the hotel',
-        'phone': 'hotel phone number',
-        'choice': 'number of hotels that meets requests of the user',
+        'day': 'book day', 'price': "price range"
     },
-    'police': {
-        'domain': 'find police stations',
-        'address': 'exact location of the police station',
-        'postcode': 'postcode of the police station',
-        'phone': 'police station phone number',
+    'restaurant': {
+        'day': 'book day', 'time': 'book time', 'price': "price range"
     },
-    'intents': {
-        'inform': 'inform user of value for a certain slot',
-        'request': 'ask for value of a slot',
-        'nobook': 'inform user of booking failure',
-        'reqmore': 'ask user for more instructions',
-        'book': 'place an order for user',
-        'bye': 'end a conversation and say goodbye to user',
-        'thank': 'express gratitude',
-        'welcome': 'welcome',
-        'offerbooked': 'inform user that an order is succussful',
-        'recommend': 'recommend a choice for user request',
-        'greet': 'express greeting',
-        'nooffer': 'inform user that no options matches user request',
-        'offerbook': 'offer to place an order for user',
-        'select': 'provide several choices for user to choose from',
-    }
+    'taxi': {},
+    'train': {
+        'day': 'day', 'time': "duration"
+    },
+    'police': {},
+    'booking': {}
 }
 
 digit2word = {
@@ -301,1131 +611,282 @@ digit2word = {
     '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten'
 }
 
-
-def pharse_in_sen(phrase, sen):
-    '''
-    match value in the sentence
-    :param phrase: str
-    :param sen: str
-    :return: start, end if matched, else None, None
-    '''
-    assert isinstance(phrase, str)
-    pw = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-]|$)'
-    pn = '(^|[\s\?!-]|\D[,\.:])(?P<v>{})($|[\s\?!-]|[,\.:]\D|[,\.:]$)'
-
-    if phrase.isdigit() and phrase in digit2word:
-        phrase = digit2word[phrase]
-        p = re.compile(pw.format(re.escape(phrase)), re.I)
-        m = re.search(p, sen)
-        if m:
-            num = len(re.findall(p, sen))
-            # if num > 1:
-            #     match['>1'] += 1
-            # else:
-            #     match['1'] += 1
-            return m.span('v'), num
-    # match['0'] += 1
-    if phrase.isdigit():
-        pattern = pn
-    else:
-        pattern = pw
-    p = re.compile(pattern.format(re.escape(phrase)), re.I)
-    m = re.search(p, sen)
-    if m:
-        num = len(re.findall(p, sen))
-        # if num > 1:
-        #     match['>1'] += 1
-        # else:
-        #     match['1'] += 1
-        return m.span('v'), num
-    return (None, None), 0
-
-
-
-
-
-def update_state(state, update):
-    # print('======================')
-    # print(state)
-    # print(update)
-    # print('======================')
-
-    for service, service_update in update.items():
-        if service not in state:
-            state[service] = copy.deepcopy(service_update)
+cnt_domain_slot = Counter()
+
+def normalize_domain_slot_value(domain, slot, value):
+    global init_ontology, slot_name_map
+    domain = domain.lower()
+    slot = slot.lower()
+    value = value.strip()
+    if value in ['do nt care', "do n't care"]:
+        value = 'dontcare'
+    if value in ['?', 'none', 'not mentioned']:
+        value = ""
+    if domain not in init_ontology['domains']:
+        raise Exception(f'{domain} not in ontology')
+    if slot not in init_ontology['domains'][domain]['slots']:
+        if slot in slot_name_map:
+            slot = slot_name_map[slot]
+        elif slot in slot_name_map[domain]:
+            slot = slot_name_map[domain][slot]
         else:
-            state[service].update(update[service])
+            raise Exception(f'{domain}-{slot} not in ontology')
+    assert slot=='' or slot in init_ontology['domains'][domain]['slots'], f'{(domain, slot, value)} not in ontology'
+    return domain, slot, value
 
-
-def convert_da(utt, da_dict, binary_ont, intent_ont, did, tid, da_cat_slot_values):
+def convert_da(da_dict, utt, sent_tokenizer, word_tokenizer):
     '''
-     convert multiwoz dialogue acts to required format
+    convert multiwoz dialogue acts to required format and update ontology
+    :param da_dict: dict[(intent, domain, slot, value)] = [word_start, word_end]
     :param utt: user or system utt
-    :param da_dict: multiwoz da
-    :param binary_ont: binary ontology
-    :param intent_ont: intent ontology
-    :return:
     '''
+    global init_ontology, digit2word, cnt_domain_slot
+
     converted_da = {
         'categorical': [],
         'non-categorical': [],
         'binary': []
     }
-
-    for Domain_Act, S, v in da_dict:
-        Domain, Act = Domain_Act.split('-')
-        if Domain.lower() in ['police', 'hospital', 'bus']:
-            continue
-
-        if Act.lower() not in intent_ont:
-            intent_ont[Act.lower()] = {}
-
-        # general domain is converted to empty domain. e.g. thank, bye
-        if Domain == 'general':
-            assert S == 'none'
-            assert v == 'none'
-            converted_dict = {
-                'intent': Act.lower(),
-                'domain': '',
-                'slot': '',
-                'value': ''
-            }
-            converted_da['binary'].append(converted_dict)
-
-            if converted_dict not in binary_ont:
-                binary_ont.append(converted_dict)
-            continue
-
-
-
-        try:
-            reformated_slot = REF_SYS_DA[Domain][S]
-        except:
-            # print('44444444444444444444444444444444')
-            # print(Domain, S)
-            # logging.info('slot not in REF_SYS_DA, drop')
-            continue
-
-        # if slot is None, da should be converted into binary
-        if reformated_slot is None:
-            if not (S == 'none' and v == 'none'):
-                # mainly for `Open` slot
-                # print('11111111111111111111')
-                # print(Domain_Act, S, v)
-                continue
-            # Booking-Inform none none
-            # Police-Inform none none
-            # Train-OfferBook none none
-            converted_dict = {
-                'intent': Act.lower(),
-                'domain': Domain.lower(),
-                'slot': '',
-                'value': ''
-            }
-            converted_da['binary'].append(converted_dict)
-            if converted_dict not in binary_ont:
-                binary_ont.append(converted_dict)
-            continue
-
-        reformated_domain_slot = Domain.lower() + '-' + reformated_slot
-
-        if Act.lower() == 'request':
-            converted_dict = {
-                'intent': 'request',
-                'domain': Domain.lower(),
-                'slot': reformated_slot,
-                'value': ''
-            }
-            converted_da['binary'].append(converted_dict)
-
-            if converted_dict not in binary_ont:
-                binary_ont.append(converted_dict)
-            continue
-
-        # vs = da_dict[(Domain_Act, S)]['values']
-
-        if reformated_domain_slot in slot_to_type and slot_to_type[reformated_domain_slot] == 'cat':
-            origin_v = v
-            v = v.lower()
-            # if reformated_domain_slot in cat_slot_proj:
-            #     v = cat_slot_proj[reformated_domain_slot][v]
-            if reformated_domain_slot not in da_cat_slot_values:
-                da_cat_slot_values[reformated_domain_slot] = []
-            # if v not in cat_slot_values[reformated_domain_slot]:
-            da_cat_slot_values[reformated_domain_slot].append(v)
+    sentences = sent_tokenizer.tokenize(utt)
+    sent_spans = sent_tokenizer.span_tokenize(utt)
+    tokens = [token for sent in sentences for token in word_tokenizer.tokenize(sent)]
+    token_spans = [(sent_span[0]+token_span[0], sent_span[0]+token_span[1]) for sent, sent_span in zip(sentences, sent_spans) for token_span in word_tokenizer.span_tokenize(sent)]
+    # assert len(tokens) == len(token_spans)
+    # for token, span in zip(tokens, token_spans):
+    #     if utt[span[0]:span[1]] != '"':
+    #         assert utt[span[0]:span[1]] == token
+
+    for (intent, domain, slot, value), span in da_dict.items():
+        if intent == 'request' or slot == '' or value == '':
+            # binary dialog acts
+            init_ontology['binary_dialogue_acts'].add((intent, domain, slot, value,))
+            converted_da['binary'].append({
+                'intent': intent,
+                'domain': domain,
+                'slot': slot,
+                'value': value
+            })
+        elif init_ontology['domains'][domain]['slots'][slot]['is_categorical']:
+            # categorical dialog acts
             converted_da['categorical'].append({
-                'intent': Act.lower(),
-                'domain': Domain.lower(),
-                'slot': reformated_slot,
-                'value': v
+                'intent': intent,
+                'domain': domain,
+                'slot': slot,
+                'value': value
             })
-            if 'start_word' in da_dict[(Domain_Act, S, origin_v)]:
-                start_ws = da_dict[(Domain_Act, S, origin_v)]['start_word']
-                end_ws = da_dict[(Domain_Act, S, origin_v)]['end_word']
-                utt_list = utt.split()
-                for start_w, end_w in zip(start_ws, end_ws):
-                    if start_w > len(utt_list) or end_w > len(utt_list):
-                        continue
-                    start_ch = 0
-                    for i in range(start_w):
-                        start_ch += len(utt_list[i]) + 1
-                    end_ch = start_ch
-                    for i in range(start_w, end_w):
-                        end_ch += len(utt_list[i]) + 1
-                    try:
-                        end_ch += len(utt_list[end_w])
-                    except:
-                        print(utt_list, start_w, end_w)
-                    if not utt[start_ch: end_ch] == origin_v:
-                        # print('2222222222222222222222222')
-                        # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
-                        continue
-
-                    else:
-                        converted_da['categorical'][-1].update({
-                            'start': start_ch,
-                            'end': end_ch
-                        })
-                        break
-
         else:
-            if 'start_word' not in da_dict[(Domain_Act, S, v)]:
-                # todo no span annotation
-                converted_da['non-categorical'].append({
-                    'intent': Act.lower(),
-                    'domain': Domain.lower(),
-                    'slot': reformated_slot,
-                    'value': v
-                })
-                continue
-
-            start_ws = da_dict[(Domain_Act, S, v)]['start_word']
-            end_ws = da_dict[(Domain_Act, S, v)]['end_word']
-            utt_list = utt.split()
-            found = True
-            for start_w, end_w in zip(start_ws, end_ws):
-                if start_w > len(utt_list) or end_w > len(utt_list):
-                    continue
-                start_ch = 0
-                for i in range(start_w):
-                    start_ch += len(utt_list[i]) + 1
-                end_ch = start_ch
-                for i in range(start_w, end_w):
-                    end_ch += len(utt_list[i]) + 1
-                try:
-                    end_ch += len(utt_list[end_w])
-                except:
-                    print(utt_list, start_w, end_w, v)
-                if not utt[start_ch: end_ch] == v:
-                    # print('2222222222222222222222222')
-                    # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
-                    continue
-
+            # non-categorical dialog acts
+            converted_da['non-categorical'].append({
+                'intent': intent,
+                'domain': domain,
+                'slot': slot,
+                'value': value
+            })
+            # correct some value and try to give char level span
+            match = False
+            ori_value = value
+            value = value.lower()
+            if span and span[0] <= span[1]:
+                # use original span annotation, but tokenizations are different
+                start_word, end_word = span
+                if end_word >= len(tokens):
+                    # due to different tokenization, sometimes will out of index
+                    delta = end_word - len(tokens) + 1
+                    start_word -= delta
+                    end_word -= delta
+                start_char, end_char = token_spans[start_word][0], token_spans[end_word][1]
+                value_span = utt[start_char:end_char].lower()
+                match = True
+                if value_span == value:
+                    cnt_domain_slot['span match'] += 1
+                elif value.isdigit() and value in digit2word and digit2word[value] == value_span:
+                    # !!!CHANGE VALUE: value is digit but value span is word
+                    cnt_domain_slot['digit value match'] += 1
+                elif ''.join(value.split()) == ''.join(value_span.split()):
+                    # !!!CHANGE VALUE: equal when remove blank
+                    cnt_domain_slot['remove blank'] += 1
+                elif value in value_span:
+                    # value in value_span
+                    start_char += value_span.index(value)
+                    end_char = start_char + len(value)
+                    assert utt[start_char:end_char].lower() == value, f'{[value, utt[start_char:end_char], utt]}'
+                    cnt_domain_slot['value in span'] += 1
+                elif ':' in value and value == '0'+value_span:
+                    # !!!CHANGE VALUE: time x:xx == 0x:xx
+                    cnt_domain_slot['x:xx == 0x:xx'] += 1
                 else:
-                    found = True
-                    converted_da['non-categorical'].append({
-                        'intent': Act.lower(),
-                        'domain': Domain.lower(),
-                        'slot': reformated_slot,
-                        'value': v,
-                        'start': start_ch,
-                        'end': end_ch
-                    })
-                    break
-
-            if not found:
-                converted_da['non-categorical'].append({
-                    'intent': Act.lower(),
-                    'domain': Domain.lower(),
-                    'slot': reformated_slot,
-                    'value': v
-                })
-    return converted_da
-
-
-def get_state_update(prev_state, cur_state, dialog, did, tid, utt, coref_dict, slot_notfound_dict, da_cat_slot_values):
-    prev_turns = dialog['turns']
-    state_update = {'categorical': [], 'non-categorical': []}
-    notfoundnum = 0
-    total_value = 0
-
-    diff_state = {}
-    if prev_state is None:
-        diff_state = {domain: {slot: value for slot, value in cur_state[domain].items() if value != ''} for domain in
-                      cur_state}
-    else:
-        assert len(prev_state) == len(cur_state), print(prev_state, cur_state)
-        for domain, domain_state in prev_state.items():
-            if domain not in diff_state:
-                diff_state[domain] = {}
-            for slot, value in domain_state.items():
-                if value != cur_state[domain][slot]:
-                    # assert len(cur_state[domain][slot]) > 0, print(did, tid, domain, slot, utt)
-                    diff_state[domain][slot] = cur_state[domain][slot]
-
-    ret_diff_state = copy.deepcopy(diff_state)
-
-
-
-    for domain in diff_state:
-        for slot in diff_state[domain]:
-
-            total_value += 1
-            fix_or = False
-            if '|' in diff_state[domain][slot]:
-                value = diff_state[domain][slot].split('|')[0]
-            else:
-                value = diff_state[domain][slot]
-
-            # if dialog['original_id'] == 'PMUL2512' and tid == 17 and value == '02:45':
-            #     value = '2:45'
-
-            value_list = [value]
-            for _synonyms in synonyms:
-                if value in _synonyms:
-                    value_list = _synonyms
-
-            value_list.extend(get_time_variants(value))
-            value_list.extend(get_genitive_variants(value))
-            value_list.extend(get_bb_variants(value))
-
-            if value.endswith(' restaurant'):
-                value_list.append(value.split(' restaurant')[0])
-            if value.endswith(' hotel'):
-                value_list.append(value.split(' hotel')[0])
-            found = False
-            for value in value_list:
-                # categorical slots
-                if slot in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']:
-                    reformated_domain_slot = '-'.join([domain, slot])
-                    if reformated_domain_slot in state_cat_slot_value_dict and (value in state_cat_slot_value_dict[reformated_domain_slot] or value in ['dontcare', '', 'none', 'not mentioned']):
-                        state_update['categorical'].append({
-                            'domain': domain,
-                            'slot': slot,
-                            'value': diff_state[domain][slot]
-                        })
-                        if domain + '-' + slot not in da_cat_slot_values:
-                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
-                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
-                        if value != diff_state[domain][slot]:
-                            state_update['categorical'][-1].update({'fixed_value': value})
-                            ret_diff_state[domain][slot] = value
-                        else :
-                            for _turn in prev_turns[::-1]:
-                                found = False
-                                for da in _turn['dialogue_act']['categorical']:
-                                    if da['value'] == value:
-                                        if 'start' in da:
-                                            state_update['categorical'][-1].update({
-                                                'utt_idx': _turn['utt_idx'],
-                                                'start': da['start'],
-                                                'end': da['end'],
-                                                'from': 'prev_da_span'
-                                            })
-                                            found = True
-                                            break
-                                if found:
-                                    break
-                    else:
-                        state_update['categorical'].append({
-                            'domain': domain,
-                            'slot': slot,
-                            'value': diff_state[domain][slot],
-                            'fixed_value': 'not found'
-                        })
-                        if domain + '-' + slot not in da_cat_slot_values:
-                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
-                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
-                        ret_diff_state[domain][slot] = 'not found'
-                        notfoundnum += 1
-                    # reformated_domain_slot = '-'.join([domain, slot]
-                    found = True
-                    break
-
-                # process value ---> none
-                assert value not in ['none', 'not mentioned']
-                if value in ['', 'dontcare']:
-                    # if reformated_domain_slot not in state_cat_slot_values:
-                    #     state_cat_slot_values[reformated_domain_slot] = []
-                    # # if v not in cat_slot_values[reformated_domain_slot]:
-                    # state_cat_slot_values[reformated_domain_slot].append(value)
-                    state_update['non-categorical'].append({
-                        'domain': domain,
-                        'slot': slot,
-                        'value': diff_state[domain][slot]
-                    })
-                    found = True
-                    break
-
-                # first look for values in coref_dict
-                for _Domain_Act, _Slot, _value in coref_dict:
-                    _domain, _act = _Domain_Act.lower().split('-')
-                    _slot = _Slot.lower()
-                    _coref_value = coref_dict[(_Domain_Act, _Slot, _value)]['coref_value']
-                    if _coref_value == '':
-                        continue
-                    _coref_turn = coref_dict[(_Domain_Act, _Slot, _value)]['turn']
-                    if _coref_turn == -1:
-                        continue
-                    _coref_pos = coref_dict[(_Domain_Act, _Slot, _value)]['pos']
-                    if _coref_pos == '':
-                        continue
-                    _utt = coref_dict[(_Domain_Act, _Slot, _value)]['utt']
-                    if _domain == domain and _slot == slot and value == _coref_value:
-
-                        start_w, end_w = [int(p) for p in _coref_pos.split('-')]
-                        utt_list = _utt.split()
-                        start_ch = 0
-                        for i in range(start_w):
-                            start_ch += len(utt_list[i]) + 1
-                        end_ch = start_ch
-                        for i in range(start_w, end_w + 1):
-                            end_ch += len(utt_list[i]) + 1
-                        end_ch -= 1
-
-                        if not _utt[start_ch: end_ch] == _coref_value:
-                            # print(111111111111111111111111111111111)
-                            # print(_utt[start_ch: end_ch], _coref_value)
-                            continue
-
-                        state_update['non-categorical'].append({
-                            'domain': domain,
-                            'slot': slot,
-                            'value': diff_state[domain][slot],
-                            'from': 'coref',
-                            'utt_idx': _coref_turn,
-                            'start': start_ch,
-                            'end': end_ch
-                        })
-                        if value != diff_state[domain][slot]:
-                            state_update['categorical'][-1].update({'fixed_value': value})
-                            ret_diff_state[domain][slot] = value
-                        found = True
-
-                if found:
-                    break
-
-                # from da annotation
-                for _turn in prev_turns[::-1]:
-                    for da in _turn['dialogue_act']['non-categorical']:
-                        # if da['domain'] == domain and da['slot'] == slot and fuzz.ratio(da['value'], value) > 85:
-                            # if not da['value'] == value:
-                            #     print(1111111111111111)
-                            #     print(value, da['value'])
-
-                        if fuzz.ratio(da['value'], value) > 85:
-
-                            if 'start' in da:
-                                found = True
-                                state_update['non-categorical'].append({
-                                    'domain': domain,
-                                    'slot': slot,
-                                    # 'value': da['value'],
-                                    'value': diff_state[domain][slot],
-                                    'utt_idx': _turn['utt_idx'],
-                                    'start': da['start'],
-                                    'end': da['end'],
-                                    'from': 'prev_da_span'
-                                })
-                                if value != diff_state[domain][slot]:
-                                    state_update['non-categorical'][-1].update({'fixed_value': value})
-                                    ret_diff_state[domain][slot] = value
-                                if da['value'] != value:
-                                    state_update['non-categorical'][-1].update({'fixed_value':da['value']})
-                                    ret_diff_state[domain][slot] = da['value']
-
-                                break
-                    if found:
-                        break
-
-                if found:
-                    break
-
-                # from utterance
-                for _turn in prev_turns[::-1]:
-                    _utt = _turn['utterance']
-                    (start, end), num = pharse_in_sen(str(value), _utt)
-                    if num:
-                        assert value.lower() == _utt[start:end].lower() \
-                               or digit2word[value].lower() == _utt[start:end].lower()
-                        found = True
-                        state_update['non-categorical'].append({
-                            'domain': domain,
-                            'slot': slot,
-                            'value': diff_state[domain][slot],
-                            # 'value': _utt[start:end].lower(),
-                            # 'fixed_value': _utt[start:end].lower(),
-                            'from': 'prev_utt',
-                            'utt_idx': _turn['utt_idx'],
-                            'start': start,
-                            'end': end
-                        })
-                        if value != diff_state[domain][slot]:
-                            state_update['non-categorical'][-1].update({'fixed_value': value})
-                            ret_diff_state[domain][slot] = value
-                        if value != _utt[start:end].lower():
-                            state_update['non-categorical'][-1].update({'fixed_value': _utt[start:end].lower()})
-                            ret_diff_state[domain][slot] = _utt[start:end].lower()
-                        found = True
-                        break
-                if found:
-                    break
-
-                # from utterance
-                if not value.isdigit():
-                    for _turn in prev_turns[::-1]:
-                        _utt = _turn['utterance']
-
-                        s = difflib.SequenceMatcher(None, _utt, value)
-                        matches = s.get_matching_blocks()
-
-                        for i, j, n in matches:
-                            possible_value = _utt[i: i+len(value)]
-
-                            if i+ len(value) < len(_utt) and _utt[i+len(value)] not in [ ' ', ',', '.', '?', '!', '/'] :
-                                possible_value += _utt[i+len(value):].split()[0]
-
-                                if possible_value.startswith('th '):
-                                    possible_value = possible_value[3:]
-                                    i += 3
-                            if i > 0 and _utt[i-1] not in [ ' ', ',', '.', '?', '!', '/']:
-                                # cut first incomplete word
-                                if len(possible_value.split()) > 1:
-                                    i += len(possible_value.split()[0]) + 1
-                                    possible_value = ' '.join(possible_value.split()[1:])
-
-
-                                # prepend first incomplete word
-                                # possible_value = _utt[:i].split()[-1] + possible_value
-                                # i -= len(_utt[:i].split()[-1])
-
-
-                            if fuzz.token_sort_ratio(value, possible_value) > 92 or possible_value.startswith('ashley hotel and lovell lodge') :
-                                found = True
-
-                                state_update['non-categorical'].append({
-                                            'domain': domain,
-                                            'slot': slot,
-                                            'value': diff_state[domain][slot],
-                                            # 'value': possible_value,
-                                            # 'fixed_value': possible_value,
-                                            'from':'prev_utt',
-                                            'utt_idx': _turn['utt_idx'],
-                                            'start': i,
-                                            'end': i+len(possible_value)
-                                        })
-                                if value != diff_state[domain][slot]:
-                                    state_update['non-categorical'][-1].update({'fixed_value': value})
-                                    ret_diff_state[domain][slot] = value
-                                if possible_value != value:
-                                    state_update['non-categorical'][-1].update({'fixed_value': possible_value})
-                                    ret_diff_state[domain][slot] = possible_value
-                                break
-                    #             assert _utt[i:i+len(possible_value)] == possible_value, print(_utt, _utt[i:i+len(possible_value)], possible_value)
-                    #             break
-                                # if not possible_value == value:
-                                #             print(3333333333333333)
-                                #             print(value)
-                                #             print(possible_value)
-                            if found:
-                                break
-                        if found:
+                    # span mismatch, search near 1-2 words
+                    for window in range(1,3):
+                        start = max(0,start_word-window)
+                        end = min(len(token_spans)-1,end_word+window)
+                        large_span = utt[token_spans[start][0]:token_spans[end][1]].lower()
+                        if value in large_span:
+                            start_char = token_spans[start][0] + large_span.index(value)
+                            end_char = start_char + len(value)
+                            assert utt[start_char:end_char].lower() == value, f'{[value, utt[start_char:end_char], utt]}'
+                            cnt_domain_slot[f'window={window}'] += 1
                             break
-
-                if found:
-                    break
-            if not found:
-                #                 print('3333333333333333333')
-                #                 print(did, tid)
-                #                 print(domain, slot, value)
-                #                 print([_t['utterance'] for _t in prev_turns])
-                # assert slot not in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']
-
-                if (domain, slot) not in slot_notfound_dict:
-                    slot_notfound_dict[(domain, slot)] = 1
-                else:
-                    slot_notfound_dict[(domain, slot)] += 1
-                state_update['non-categorical'].append({
-                    'domain': domain,
-                    'slot': slot,
-                    'value': diff_state[domain][slot],
-                    'fixed_value': 'not found'
-                })
-                ret_diff_state[domain][slot] = 'not found'
-                notfoundnum += 1
-    return state_update, notfoundnum, total_value, ret_diff_state
-
-
-def merge_data_annotation():
-    extract_dir = os.path.join(self_dir, 'original_data')
-    data21 = json.load(open(os.path.join(self_dir, extract_dir, 'data.json')))
-    # data21_train = json.load(open(os.path.join(self_dir, extract_dir, 'train.json')))
-    # data21_val = json.load(open(os.path.join(self_dir, extract_dir, 'val.json')))
-    # data21_test = json.load(open(os.path.join(self_dir, extract_dir, 'test.json')))
-    # data21 = {}
-    # data21.update(data21_train)
-    # data21.update(data21_val)
-    # data21.update(data21_test)
-
-    # update_from_25_cnt = 0
-    # total_turn = 0
-    # for dial_id, dialog in data21.items():
-    #     dial_id = dial_id + '.json'
-    #     assert dial_id in data25
-    #     for i, _turn in enumerate(dialog['log']):
-    #         total_turn += 1
-    #         if _turn['text'] == data25[dial_id]['log'][i]['text']:
-    #             _turn['span_info'].extend(copy.deepcopy(data25[dial_id]['log'][i]['span_info']))
-    #             # _turn['span_info'] = list(set(_turn['span_info']))
-    #             # _turn['dialog_act'].update(copy.deepcopy(data25[dial_id]['log'][i]['dialog_act']))
-    #             for Domain_Intent in data25[dial_id]['log'][i]['dialog_act']:
-    #                 if Domain_Intent in _turn['dialog_act']:
-    #                     _turn['dialog_act'][Domain_Intent].extend(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
-    #                 else:
-    #                     _turn['dialog_act'][Domain_Intent] = copy.deepcopy(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
-    #                 # _turn['dialog_act'][Domain_Intent] = list(set(_turn['dialog_act'][Domain_Intent]))
-    #             if 'coreference' in data25[dial_id]['log'][i]:
-    #                 _turn['coreference'] = copy.deepcopy(data25[dial_id]['log'][i]['coreference'])
-    #             update_from_25_cnt += 1
-    #         else:
-    #             # print('==============multiwoz21=================')
-    #             # print(_turn['text'])
-    #             # print('==============multiwoz25=================')
-    #             # print(data25[dial_id]['log'][i]['text'])
-    #             continue
-    #
-    # print('{}/{} turns update from multiwoz25 data'.format(update_from_25_cnt, total_turn))
-    return data21
-
-
-def preprocess(da_cat_slot_values, state_cat_slot_values):
-    all_data = []
-    binary_ont = []
-    intent_ont = {}
-    state_ont = {}
-
-    data_splits = ['train', 'val', 'test']
-    # data_splits = ['test']
-    extract_dir = os.path.join(self_dir, 'original_data')
-    num_train_dialogue = 0
-    num_train_utt = 0
-
-    num_match_error_da_span = 0
-
-    if not os.path.exists('data.zip') or not os.path.exists('ontology.json'):
-        # for data_split in data_splits:
-        # data_zip_file = os.path.join(self_dir, 'original_data.zip')
-        # if not os.path.exists(data_zip_file):
-        #     raise FileNotFoundError(data_zip_file)
-
-        # logging.info('unzip multiwoz data to {}'.format(extract_dir))
-        # archive = zipfile.ZipFile(data_zip_file, 'r')
-        # archive.extractall(extract_dir)
-
-        data = merge_data_annotation()
-        # exit()
-        # data = json.load(open(os.path.join(self_dir, extract_dir, 'data_meta_fixed.json')))
-        train_list = open(os.path.join(self_dir, extract_dir, 'trainListFile')).read().split()
-        val_list = open(os.path.join(self_dir, extract_dir, 'valListFile')).read().split()
-        test_list = open(os.path.join(self_dir, extract_dir, 'testListFile')).read().split()
-
-        total_not_found_slot = 0
-        total_slot = 0
-        total_turn = 0
-        total_not_found_turn = 0
-        total_not_found_state = 0
-
-        slot_notfound_dict = {}
-
-        dialog_idx = 0
-        for dialog_id, dialog in tqdm(data.items()):
-
-            acc_not_found_flag = False
-
-            coref_dict = {}
-
-            data_split = None
-            for _split in data_splits:
-                if dialog_id.strip('.json') in eval(_split + '_list'):
-                    data_split = _split
-                    break
-            # assert data_split is not None
-            # if data_split != 'test':
-            #     continue
-            if data_split is None:
-                continue
-
-            if data_split == 'train':
-                num_train_dialogue += len(data)
-
-            dialog_idx += 1
-            # if dialog_idx > 10:
-            #     break
-            converted_dialogue = {
-                'dataset': 'multiwoz21',
-                'data_split': data_split,
-                'dialogue_id': 'multiwoz21_' + str(dialog_idx),
-                'original_id': dialog_id,
-                'domains': [d for d in dialog['goal'] if
-                            len(dialog['goal'][d]) != 0 and d in multiwoz_desc and d not in ['police', 'hospital', 'bus']],
-                'turns': [],
-            }
-
-            if data_split == 'train':
-                num_train_utt += len(dialog['log'])
-
-            prev_state = None
-            accum_fixed_state = {}
-            for turn_id, turn in enumerate(dialog['log']):
-
-                utt = turn['text'].lower()
-                # for several wrong words
-                utt = utt.replace('seeuni', 'see uni')
-
-                utt = ' '.join(utt.split())
-                utt = utt.replace(' im ', ' i\'m ')
-                utt = utt.replace(' dont ', ' don\'t ')
-                utt = utt.replace(' thats ', ' that\'s ')
-                utt = utt.replace('idon\'t', ' i don\'t ')
-                utt = utt.replace('wedon\'t ', 'we don\'t ')
-                utt = utt.replace('id be ', 'i\'d be ')
-                # utt = utt.replace('cambridgethat\'svery ', 'cambridge that\'s very')
-                utt = re.sub(r'^im ', 'i\'m ', utt)
-                utt = re.sub(r'^whats ', 'what\'s ', utt)
-                utt = re.sub(r'^id ', 'i\'d ', utt)
-                utt = re.sub(r'^thats ', 'that\'s ', utt)
-
-                utt = re.sub( r'([a-zA-Z0-9])([,.!\'-\?"~])', r'\1 \2', utt)
-                utt = re.sub(r'([,.!\'-\?"~])([a-zA-Z0-9])', r'\1 \2', utt)
-
-                das = turn.get('dialog_act', [])
-                role = 'user' if turn_id % 2 == 0 else 'system'
-                spans = turn.get('span_info', [])
-
-                da_dict = {}
-                for Domain_Act in das:
-                    Domain = Domain_Act.split('-')[0]
-                    if Domain.lower() not in converted_dialogue['domains'] and Domain.lower() not in ['general', 'booking']:
-                        continue
-
-                    Svs = das[Domain_Act]
-                    for S, v in Svs:
-                        v = v.lower()
-                        if v.startswith('th '):
-                            # print(v)
-                            v = v[3:]
-                        if v.startswith('he '):
-                            # print(v)
-                            v = v[3:]
-
-                        if (Domain_Act, S, v) not in da_dict:
-                            da_dict[(Domain_Act, S, v)] = {}
-
-                for span in spans:
-                    Domain_Act, S, v, start_word, end_word = span
-                    v = v.lower()
-                    if not (Domain_Act, S, v) in da_dict:
-                        # logging.info('span da annotation not found in multiwoz da label')
-                        # logging.info(dialog_id, turn_id)
-                        # logging.info((Domain_Act, S, v))
-                        # logging.info(da_dict)
-                        num_match_error_da_span += 1
                     else:
-                        if v.startswith('th '):
-                            # print(v)
-                            v = v[3:]
-                            start_word += 3
-                        if v.startswith('he '):
-                            # print(v)
-                            v = v[3:]
-                            start_word += 3
-
-                        if 'start_word' not in da_dict[(Domain_Act, S, v)]:
-                            da_dict[(Domain_Act, S, v)]['start_word'] = []
-                            da_dict[(Domain_Act, S, v)]['end_word'] = []
-
-                        da_dict[(Domain_Act, S, v)]['start_word'].append(start_word)
-                        da_dict[(Domain_Act, S, v)]['end_word'].append(end_word)
-
-                converted_turn = {
-                    'utt_idx': turn_id,
-                    'speaker': role,
-                    'utterance': utt,
-                    'dialogue_act': convert_da(utt, da_dict, binary_ont, intent_ont, dialog_id, turn_id, da_cat_slot_values),
-                }
-
-                # for state annotations
-                if role == 'system':
-                    turn_state = turn['metadata']
-                    cur_state = {}
-                    for domain in turn_state:
-                        if domain in ['police', 'hospital', 'bus']:
-                            continue
-                        if domain not in converted_dialogue['domains']:
-                            continue
-                        cur_state[domain] = {}
-                        for subdomain in ['semi', 'book']:
-                            for slot in turn_state[domain][subdomain]:
-                                if slot == 'booked':
-                                    continue
-                                if slot == 'ticket':  # or (domain == 'train' and slot == 'people'):
-                                    # for cases where domain slot exists in REF but not in state
-                                    # because of check in evaluate.py
-                                    continue
-
-                                else:
-                                    fixed_slot = slot
-                                state_ds = domain + '-' + fixed_slot
-                                if state_ds not in slot_to_type:
-                                    logging.info('state slot not defined in da list')
-                                    logging.info(state_ds)
-                                if turn_state[domain][subdomain][slot] in ['', [], 'not mentioned', 'none']:
-                                    cur_state[domain][fixed_slot] = ""
-                                else:
-                                    if turn_state[domain][subdomain][slot].startswith('th '):
-                                        # print('state')
-                                        # print(turn_state[domain][subdomain][slot])
-                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
-                                    if turn_state[domain][subdomain][slot].startswith('he '):
-                                        # print('state')
-                                        # print(turn_state[domain][subdomain][slot])
-                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
-
-                                    cur_state[domain][fixed_slot] = turn_state[domain][subdomain][slot]
-
-                                if domain not in state_ont:
-                                    state_ont[domain] = []
-                                if fixed_slot not in state_ont[domain]:
-                                    state_ont[domain].append(fixed_slot)
-
-                        if domain == 'train' and 'people' not in cur_state[domain]:
-                            cur_state[domain]['people'] = ''
-                        # if len(converted_turn['state'][domain]) == 0:
-                        #     converted_turn['state'].pop(domain)
-                        if len(converted_dialogue['turns']) > 0:
-                            # move state from system side to user side
-                            converted_dialogue['turns'][-1]['state'] = copy.deepcopy(cur_state)
-
-                    # for state update annotations
-                    state_update, _notfoundslot, _totalslot, ret_diff_state = get_state_update(prev_state, cur_state, converted_dialogue,
-                                                                               dialog_id, turn_id, turn['text'], coref_dict,
-                                                                               slot_notfound_dict, da_cat_slot_values)
-
-                    update_state(accum_fixed_state, ret_diff_state)
-                    for domain in accum_fixed_state:
-                        for slot in accum_fixed_state[domain]:
-                            assert isinstance(accum_fixed_state[domain][slot], str), print(accum_fixed_state[domain][slot])
-
-                    if _notfoundslot == 0:
-                        # for slot in state_update['categorical']:
-                        #     assert 'fixed_value' not in slot
-                        for slot in state_update['non-categorical']:
-                            if slot['value'] not in ['', 'dontcare']:
-                                assert 'utt_idx' in slot
-
-                    else:
-                        flag = False
-                        for slot in state_update['categorical']:
-                            if 'fixed_value' in slot:
-                                flag = True
-                                break
-                        for slot in state_update['non-categorical']:
-                            if 'utt_idx' not in slot:
-                                flag = True
-                                break
-                        assert flag, print(flag, state_update['non-categorical'])
-
-                    total_turn += 1
-                    total_slot += _totalslot
-                    total_not_found_slot += _notfoundslot
-                    total_not_found_turn += 1 if _notfoundslot > 0 else 0
-                    if _notfoundslot > 0:
-                        acc_not_found_flag = True
-                    if acc_not_found_flag:
-                        total_not_found_state += 1
-
-                    coref_dict = {}
-                    converted_dialogue['turns'][-1]['state_update'] = copy.deepcopy(state_update)
-                    converted_dialogue['turns'][-1]['fixed_state'] = copy.deepcopy(accum_fixed_state)
-                    if 'state' not in converted_dialogue['turns'][-1]:
-                        converted_dialogue['turns'][-1]['state'] = {}
-                    prev_state = copy.deepcopy(cur_state)
-
-                converted_dialogue['turns'].append(converted_turn)
-
-                if 'coreference' in turn:
-                    for Domain_Act in turn['coreference']:
-                        for Slot, value, coref, coref_turn, coref_pos in turn['coreference'][Domain_Act]:
-                            value = value.lower()
-                            coref_dict[(Domain_Act, Slot, value)] = {'turn': coref_turn, 'pos': coref_pos,
-                                                                     'coref_value': coref,
-                                                                     'utt': converted_dialogue['turns'][coref_turn][
-                                                                         'utterance']}
-
-            check_spans(converted_dialogue)
-            # postprocess_update_spans(converted_dialogue)
-            if converted_dialogue['turns'][-1]['speaker'] == 'system':
-                converted_dialogue['turns'].pop(-1)
-            all_data.append(converted_dialogue)
-
-        print('total_turn', total_turn)
-        print('total_not_found_turn', total_not_found_turn)
-        print('total_slot', total_slot)
-        print('total_not_found_slot', total_not_found_slot)
-        print('total_not_found_state', total_not_found_state)
-        print(slot_notfound_dict)
-        from collections import Counter
-        # print({k : dict(Counter(v)) for k, v in cat_slot_values.items()})
-        json.dump({k : dict(Counter(v)) for k, v in state_cat_slot_values.items()}, open(os.path.join(self_dir, 'cat_slot_values.json'), 'w'), indent=4)
-        cat_slot_values = {k: list(set(v)) for k, v in state_cat_slot_values.items()}
-        da_cat_slot_values = {k: list(set(v)) for k, v in da_cat_slot_values.items()}
-
-        json.dump(all_data, open('data.json', 'w'), indent=4)
-        write_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
-        os.remove('data.json')
-
-        new_ont = {
-            'domains': {},
-            'intents': {},
-            'binary_dialogue_act': {}
-        }
-
-        for d_s in slot_to_type:
-            d, s = d_s.split('-')
-            if d not in new_ont['domains']:
-                new_ont['domains'][d] = {
-                    'description': multiwoz_desc[d]['domain'],
-                    'slots': {}
-                }
-            domain_ont = new_ont['domains'][d]
-            assert s not in domain_ont
-            domain_ont['slots'][s] = {
-                'description': multiwoz_desc[d][s] if s in multiwoz_desc[d] else '',
-                'is_categorical': d_s in state_cat_slot_ds,
-                'possible_values': da_cat_slot_values[d_s] if d_s in state_cat_slot_ds else []
-            }
-            domain_ont['slots'][s]['possible_values'] = [_ for _ in domain_ont['slots'][s]['possible_values'] if _ not in ['dontcare', '']]
-
-        new_ont['state'] = {}
-        # print(state_cat_slot_value_dict)
-        print(state_ont)
-        for d in state_ont:
-            new_ont['state'][d] = {}
-            for s in state_ont[d]:
-                d_s = '-'.join([d, s])
-                new_ont['state'][d][s] = ''
-
-        new_ont['intents'] = {i: {'description': multiwoz_desc['intents'][i]} for i in intent_ont}
-        new_ont['binary_dialogue_act'] = binary_ont
-
-        slot_desc = json.load(open(os.path.join(self_dir, extract_dir, './slot_descriptions.json')))
-        for domain_slot in slot_desc:
-            _domain, _slot = domain_slot.split('-')
-            _desc = slot_desc[domain_slot][0]
-            if _slot == 'arriveby':
-                _slot = 'arriveBy'
-            elif _slot == 'leaveat':
-                _slot = 'leaveAt'
-            if 'book' in _slot:
-                _slot = _slot.replace('book ', '')
-            if not _domain in new_ont['state']:
-                # logging.info('domain {} not in state domains'.format(_domain))
-                continue
-            if _domain in new_ont['domains'] and _slot in new_ont['domains'][_domain]['slots']:
-                new_ont['domains'][_domain]['slots'][_slot]['description'] = _desc
-            if not _slot in new_ont['state'][_domain]:
-                logging.info('domain {} slot {} not in state'.format(_domain, _slot))
-                continue
-            # new_ont['state'][_domain][_slot] = ""
-            assert _domain in new_ont['domains'], print(_domain)
-            assert _slot in new_ont['domains'][_domain]['slots']
-
-        logging.info('num_match_error_da_span {}'.format(num_match_error_da_span))
-        json.dump(new_ont, open(os.path.join(self_dir, './ontology.json'), 'w'), indent=4)
-
-    else:
-        all_data = read_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
-        new_ont = json.load(open(os.path.join(self_dir, './ontology.json'), 'r'))
-    logging.info('# dialogue: {}, # turn: {}'.format(num_train_dialogue, num_train_utt))
-    return all_data, new_ont
-
-
-# def postprocess_update_spans(dialog):
-#     changed_utt_idx_and_position = {}
-#     for turn in dialog['turns']:
-#         if turn['speaker'] != 'user':
-#             continue
-#         changed = False
-#         for _update in turn['state_update']['non-categorical']:
-#             if 'utt_idx' in _update:
-#                 utt_idx = _update['utt_idx']
-#                 start = _update['start']
-#                 end = _update['end']
-#
-#                 # assume at most one word changes for every utterance
-#                 if turn['utt_idx'] not in changed_utt_idx_and_position:
-#                     if utt_idx == turn['utt_idx'] and start-1 > -1 and turn['utterance'][start-1] not in [' ']:
-#                         changed_utt_idx_and_position[turn['utt_idx']] = start
-#                         print('=======================')
-#                         print(dialog['original_id'])
-#                         print(turn['utterance'])
-#                         print(json.dumps(_update, indent=2))
-#                         print(turn['utterance'][start: end])
-#                         turn['utterance'] = turn['utterance'][:start] + ' ' + turn['utterance'][start:]
-#                         print(turn['utterance'])
-#                         _update['start'] += 1
-#                         _update['end'] += 1
-#                         changed = True
-#                 if utt_idx not in changed_utt_idx_and_position:
-#                     continue
-#                 else:
-#                     value = _update['fixed_value'] if 'fixed_value' in _update and _update['fixed_value'] != 'not found' else _update['value']
-#                     if start >= changed_utt_idx_and_position[utt_idx]:
-#                         if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
-#                             assert dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
-#                             _update['start'] += 1
-#                             _update['end'] += 1
-#                     elif start < changed_utt_idx_and_position[utt_idx] < end:
-#                         if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
-#                             assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1], value)
-#                             print('fix')
-#                             print(_update)
-#                             _update['end'] += 1
-#                             _update['fixed_value'] = turn['utterance'][_update['start']: _update['end'] + 1].strip()
-#                             print(_update)
-#         if changed:
-#             for _update in turn['state_update']['non-categorical']:
-#                 if 'utt_idx' in _update:
-#                     utt_idx = _update['utt_idx']
-#                     start = _update['start']
-#                     end = _update['end']
-#
-#                     if utt_idx not in changed_utt_idx_and_position:
-#                         continue
-#                     else:
-#                         value = _update['fixed_value'] if 'fixed_value' in _update and _update[
-#                             'fixed_value'] != 'not found' else _update['value']
-#                         if start >= changed_utt_idx_and_position[utt_idx]:
-#                             if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
-#                                 assert dialog['turns'][utt_idx]['utterance'][_update['start'] + 1: _update['end'] + 1] == value
-#                                 _update['start'] += 1
-#                                 _update['end'] += 1
-#                         elif start < changed_utt_idx_and_position[utt_idx] < end:
-#                             if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
-#                                 print('====================fix===================')
-#                                 print(_update)
-#                                 assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
-#                                 _update['end'] += 1
-#                                 _update['fixed_value'] = dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end'] + 1]
-#                                 print(_update)
-#     for turn in dialog['turns']:
-#         if turn['speaker'] != 'user':
-#             continue
-#         for _update in turn['state_update']['non-categorical']:
-#             if 'utt_idx' in _update:
-#                 value = _update['fixed_value'] if 'fixed_value' in _update and _update[
-#                     'fixed_value'] != 'not found' else _update['value']
-#                 utt_idx = _update['utt_idx']
-#                 start = _update['start']
-#                 end = _update['end']
-#                 if dialog['turns'][utt_idx]['utterance'][start] == ' ':
-#                     _update['start'] += 1
-#                     _update['fixed_value'] = value[1:]
-#                     value = value[1:]
-#                     start += 1
-#                 assert dialog['turns'][utt_idx]['utterance'][start: end] == value, print(json.dumps(turn, indent=4), [c for c in dialog['turns'][utt_idx]['utterance'][start: end]], [c for c in value])
-#     return dialog
-
+                        # still not found
+                        match = False
+            
+            if match:
+                converted_da['non-categorical'][-1]['value'] = utt[start_char:end_char]
+                converted_da['non-categorical'][-1]['start'] = start_char
+                converted_da['non-categorical'][-1]['end'] = end_char
+                cnt_domain_slot['have span'] += 1
+            else:
+                cnt_domain_slot['no span'] += 1
+    return converted_da
 
-def get_time_variants(time_text):
-    value_list = [time_text]
-    pattern_time = r'(\d{1,2}:\d{2})(\s)?(am|pm|AM|PM)?'
-    match_times = re.findall(pattern_time, time_text)
-    if len(match_times) < 1:
-        return []
-    match_time = match_times[0]
+def preprocess():
+    original_data_dir = 'MultiWOZ_2.1'
+    new_data_dir = 'data'
 
-    am_flag = match_time[2] in ['am', 'AM']
-    pm_flag = match_time[2] in ['pm', 'PM']
-    no_am_pm_flag = match_time[2] == ''
-    if am_flag:
-        # 4:00am -> 4:00
-        value_list.append(match_time[0])
-        if len(match_time[0]) == 4:
-            # 4:00 -> 04:00
-            value_list.append('0' + match_time[0])
-    if pm_flag:
-        # 4:00pm -> 16:00
-        hour, min = match_time[0].split(':')
-        hour = int(hour)
-        new_hour = 12 + hour
-        value_list.append(str(new_hour)+':'+min)
-    if no_am_pm_flag:
-        hour, min = match_time[0].split(':')
-        hour = int(hour)
-        if hour > 12:
-            new_hour = hour - 12
-            value_list.append(str(new_hour) + ':' + min + 'pm')
-            value_list.append(str(new_hour) + ':' + min + ' pm')
-            value_list.append(str(new_hour) + ':' + min)
-            if min == '00':
-                value_list.append(str(new_hour) + 'pm')
-                value_list.append(str(new_hour) + ' pm')
-                value_list.append(str(new_hour))
+    if not os.path.exists(original_data_dir):
+        # should manually download MultiWOZ_2.1.zip from 
+        # https://github.com/budzianowski/multiwoz/blob/master/data/MultiWOZ_2.1.zip
+        original_data_zip = 'MultiWOZ_2.1.zip'
+        if not os.path.exists(original_data_zip):
+            raise FileNotFoundError(f'cannot find original data: {original_data_zip}')
         else:
-            value_list.append(str(hour) + ':' + min + 'am')
-            value_list.append(str(hour) + ':' + min + ' am')
-            value_list.append(str(hour) + ':' + min)
-            if min == '00':
-                value_list.append(str(hour) + 'am')
-                value_list.append(str(hour) + ' am')
-                value_list.append(str(hour))
-        if len(match_time[0]) == 5 and match_time[0][0] == '0':
-            value_list.append(match_time[0][1:])
-        value_list.append(''.join(match_time[0].split(':')))
-
-    return value_list
-
-
-def get_genitive_variants(value):
-    ret_list = []
-    value_genitive_format = r"(?=\w)s(?=\s)"
-    value_pattern = re.compile(value_genitive_format)
-
-    span_genitive_value = re.sub(value_pattern, " 's", value)
-    if span_genitive_value != value:
-        ret_list.append(span_genitive_value)
-    span_genitive_value = re.sub(value_pattern, "'s", value)
-    if span_genitive_value != value:
-        ret_list.append(span_genitive_value)
-    # if len(ret_list) > 0:
-    #     print('=============================')
-    #     print(value)
-    #     print(re.findall(value_pattern, value))
-    #     print(ret_list)
-    return ret_list
-
-
-def check_spans(dialog):
-    for turn in dialog['turns']:
-        if turn['speaker'] != 'user':
-            continue
-        for _update in turn['state_update']['non-categorical']:
-            if 'utt_idx' in _update:
-                value = _update['fixed_value'] if 'fixed_value' in _update and _update[
-                    'fixed_value'] != 'not found' else _update['value']
-                utt_idx = _update['utt_idx']
-                start = _update['start']
-                end = _update['end']
-                assert dialog['turns'][utt_idx]['utterance'][start:end] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][start:end])
-
+            archive = ZipFile(original_data_zip)
+            archive.extractall()
+    
+    os.makedirs(new_data_dir, exist_ok=True)
+    for filename in os.listdir(original_data_dir):
+        if 'db' in filename:
+            copy2(f'{original_data_dir}/{filename}', new_data_dir)
+    
+    original_data = json.load(open(f'{original_data_dir}/data.json'))
+    global init_ontology, cnt_domain_slot
+
+    val_list = set(open(f'{original_data_dir}/valListFile.txt').read().split())
+    test_list = set(open(f'{original_data_dir}/testListFile.txt').read().split())
+    dataset = 'multiwoz21'
+    splits = ['train', 'validation', 'test']
+    dialogues_by_split = {split:[] for split in splits}
+    sent_tokenizer = PunktSentenceTokenizer()
+    word_tokenizer = TreebankWordTokenizer()
+    for ori_dialog_id, ori_dialog in tqdm(original_data.items()):
+        if ori_dialog_id in val_list:
+            split = 'validation'
+        elif ori_dialog_id in test_list:
+            split = 'test'
+        else:
+            split = 'train'
+        dialogue_id = f'{dataset}-{split}-{len(dialogues_by_split[split])}'
+
+        # get user goal and involved domains
+        cur_domains = []
+        goal = {
+            'description': '. '.join(ori_dialog['goal']['message']),
+            'inform': {},
+            'request': {}
+        }
+        for k, v in ori_dialog['goal'].items():
+            if len(v) != 0 and k in init_ontology['domains']:
+                cur_domains.append(k)
+                goal['inform'][k] = {}
+                goal['request'][k] = {}
+                for attr in ['fail_info', 'info', 'fail_book', 'book']:
+                    if attr in v:
+                        for slot, value in v[attr].items():
+                            if 'invalid' in slot:
+                                continue
+                            domain, slot, value = normalize_domain_slot_value(k, slot, value)
+                            if slot in goal['inform'][domain]:
+                                goal['inform'][domain][slot] += '|'+value
+                            else:
+                                goal['inform'][domain][slot] = value
+                if 'reqt' in v:
+                    for slot in v['reqt']:
+                        domain, slot, _ = normalize_domain_slot_value(k, slot, '')
+                        goal['request'][domain][slot] = ''
+
+        dialogue = {
+            'dataset': dataset,
+            'data_split': split,
+            'dialogue_id': dialogue_id,
+            'original_id': ori_dialog_id,
+            'domains': cur_domains, # will be updated by dialog_acts and state
+            'goal': goal,
+            'turns': []
+        }
 
+        for turn_id, turn in enumerate(ori_dialog['log']):
+            # correct some grammar error in text, mainly follow tokenization.md in MultiWOZ_2.1
+            text = turn['text']
+            text = re.sub(" Im ", " I'm ", text)
+            text = re.sub(" im ", " i'm ", text)
+            text = re.sub(r"^Im ", "I'm ", text)
+            text = re.sub(r"^im ", "i'm ", text)
+            text = re.sub("theres", "there's", text)
+            text = re.sub("dont", "don't", text)
+            text = re.sub("whats", "what's", text)
+            text = re.sub('thats', "that's", text)
+            utt = text
+            speaker = 'user' if turn_id % 2 == 0 else 'system'
+
+            das = turn.get('dialog_act', [])    
+            spans = turn.get('span_info', [])
+            da_dict = {}
+            # transform DA
+            for Domain_Intent in das:
+                domain, intent = Domain_Intent.lower().split('-')
+                assert intent in init_ontology['intents'], f'{ori_dialog_id}:{turn_id}:da\t{intent} not in ontology'
+                for Slot, value in das[Domain_Intent]:
+                    domain, slot, value = normalize_domain_slot_value(domain, Slot, value)
+                    if domain not in cur_domains:
+                        # update original cur_domains
+                        cur_domains.append(domain)
+                    da_dict[(intent, domain, slot, value,)] = []
+
+            for span in spans:
+                Domain_Intent, Slot, value, start_word, end_word = span
+                domain, intent = Domain_Intent.lower().split('-')
+                domain, slot, value = normalize_domain_slot_value(domain, Slot, value)
+                assert (intent, domain, slot, value,) in da_dict
+                da_dict[(intent, domain, slot, value,)] = [start_word, end_word]
+
+            dialogue_acts = convert_da(da_dict, utt, sent_tokenizer, word_tokenizer) # will also update ontology
+
+            dialogue['turns'].append({
+                'speaker': speaker,
+                'utterance': utt,
+                'utt_idx': turn_id,
+                'dialogue_acts': dialogue_acts,
+            })
 
-def get_bb_variants(value):
-    ret_list = []
-    if 'bed and breakfast' in value:
-        ret_list.append(value.replace('bed and breakfast', 'b & b'))
-    return ret_list
+            if speaker == 'system':
+                # add state to last user turn
+                # add empty db_results
+                turn_state = turn['metadata']
+                cur_state = copy.deepcopy(init_ontology['state'])
+                for domain in turn_state:
+                    if domain not in cur_state:
+                        continue
+                    for subdomain in ['semi', 'book']:
+                        for slot, value in turn_state[domain][subdomain].items():
+                            if slot in ['booked', 'ticket']:
+                                continue
+                            _, slot, value = normalize_domain_slot_value(domain, slot, value)
+                            cur_state[domain][slot] = value
+                dialogue['turns'][-2]['state'] = cur_state
+                dialogue['turns'][-1]['db_results'] = {}
+        dialogues_by_split[split].append(dialogue)
+    # pprint(cnt_domain_slot.most_common())
+    dialogues = []
+    for split in splits:
+        dialogues += dialogues_by_split[split]
+    init_ontology['binary_dialogue_acts'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in init_ontology['binary_dialogue_acts']]
+    json.dump(dialogues[:10], open(f'dummy_data.json', 'w'), indent=2)
+    json.dump(dialogues, open(f'{new_data_dir}/dialogues.json', 'w'), indent=2)
+    json.dump(init_ontology, open(f'{new_data_dir}/ontology.json', 'w'), indent=2)
+    with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf:
+        for filename in os.listdir(new_data_dir):
+            zf.write(f'{new_data_dir}/{filename}')
+    return dialogues, init_ontology
 
 if __name__ == '__main__':
-    preprocess(da_cat_slot_values, state_cat_slot_values)
\ No newline at end of file
+    preprocess()
\ No newline at end of file