diff --git a/data/unified_datasets/README.md b/data/unified_datasets/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a22a057e191e4bc2b75c0290e2d9cc8a09c23ffc
--- /dev/null
+++ b/data/unified_datasets/README.md
@@ -0,0 +1,561 @@
+# Unified data format with example
+
+Under `data/unified_datasets` directory.
+
+single turn->dialogue with one turn
+
+Each dataset have at least 4 files:
+
+- `README.md`: dataset description and the main changes from original data to processed data.
+- `preprocess.py`: python script that preprocess the data. By running `python preprocess.py` we can get the following two files. The structure `preprocess.py`  should be:
+
+```python
+def preprocess():
+    pass
+if __name__ == '__main__':
+    preprocess()
+```
+
+- `ontology.json`: dataset ontology, contains descriptions, state definition, etc.
+- `data.json.zip`: contains `data.json`.
+
+### README
+
+- Data source: publication, original data download link, etc.
+- Data description:
+  - Annotations: whether have dialogue act, belief state annotation.
+  - Statistics: \# domains, # dialogues, \# utterances, Avg. turns, Avg. tokens (split by space), etc.
+- Main changes from original data to processed data.
+
+### Ontology
+
+`ontology.json`: a *dict* containing:
+
+- `domains`: (*dict*) descriptions for domains, slots. Must contains all slots in the state and non-binary dialogue acts.
+  - `$domain_name`: (*dict*)
+    - `description`: (*str*) description for this domain.
+    - `slots`: (*dict*)
+      - `$slot_name`: (*dict*)
+        - `description`: (*str*) description for this slot.
+        - `is_categorical`: (*bool*) categorical slot or not.
+        - `possible_values`: (*list*) List of possible values the slot can take. If the slot is a categorical slot, it is a complete list of all the possible values. If the slot is a non categorical slot, it is either an empty list or a small sample of all the values taken by the slot.
+
+- `intents`: (*dict*) descriptions for intents.
+  - `$intent_name`: (*dict*)
+    - `description`: (*str*) description for this intent.
+- `binary_dialogue_act`: (*list* of *dict*) special dialogue acts that the value may not present in the utterance, e.g. request the address of a hotel.
+  - `{"intent": (str), "domain": (str), "slot": (str), "value": (str)}`. domain, slot, value may be empty.
+- `state`: (*dict*) belief state of all domains.
+  - `$domain_name`: (*dict*)
+    - `$slot_name: ""`: slot with empty value. Note that the slot set are the subset of the slot set in Part 1 definition.
+
+### Dialogues
+
+`data.json`: a *list* of dialogues containing:
+
+- `dataset`: (*str*) dataset name, must be one of  ['schema', 'multiwoz', 'camrest', 'woz', ...], and be the same as the current dataset.
+- `data_split`: (*str*) in [train, val, test].
+- `dialogue_id`: (*str*) use dataset name as prefix, add count.
+- `domains`: (*list*) domains in this dialogue.
+- `turns`: (*list* of *dict*)
+  - `speaker`: (*str*) "user" or "system". **User side first, user side final**, "user" and "system" appear alternately?
+  - `utterance`: (*str*) sentence.
+  - `utt_idx`: (*int*) `turns['utt_idx']` gives current turn.
+  - `dialogue_act`: (*dict*)
+    - `categorical`: (*list* of *dict*) for categorical slots.
+      - `{"intent": (str), "domain": (str), "slot": (str), "value": (str)}`. Value sets are defined in the ontology.
+    - `non-categorical` (*list* of *dict*) for non-categorical slots.
+      - `{"intent": (str), "domain": (str), "slot": (str), "value": (str), "start": (int), "end": (int)}`. `start` and `end` are character indexes for the value span.
+    - `binary` (*list* of *dict*) for binary dialogue acts in ontology.
+      - `{"intent": (str), "domain": (str), "slot": (str), "value": (str)}`. Possible dialogue acts are listed in the `ontology['binary_dialogue_act']`.
+  - `state`: (*dict*, optional, user side) full state are shown in `ontology['state']`.
+    - `$domain_name`: (*dict*) contains all slots in this domain.
+      - `$slot_name`: (*str*) value for this slot.
+  - `state_update`: (*dict*, optional, user side) records the difference of states between the current turn and the last turn.
+    - `categorical`: (*list* of *dict*) for categorical slots.
+      - `{"domain": (str), "slot": (str), "value": (str)}`. Value sets are defined in the ontology (**dontcare** may not be included).
+    - `non-categorical` (*list* of *dict*) for non-categorical slots.
+      - `{"domain": (str), "slot": (str), "value": (str), "utt_idx": (int), "start": (int), "end": (int)}`. `utt_idx` is the utterance index of the value. `start` and `end` are character indexes for the value span in the current turn. `turn[utt_idx]['utterance'][start:end]` gives the value.
+
+Other attributes are optional.
+
+Run `python evaluate.py $dataset` to check the validation of processed dataset.
+
+## Example of Schema Dataset
+
+```json
+	{
+    "dataset": "schema",
+    "data_split": "train",
+    "dialogue_id": "schema_535",
+    "original_id": "5_00022",
+    "domains": [
+      "event_2"
+    ],
+    "turns": [
+      {
+        "speaker": "user",
+        "utterance": "I feel like going out to do something in Oakland. I've heard the Raiders Vs Bengals game should be good.",
+        "utt_idx": 0,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "inform_intent",
+              "domain": "event_2",
+              "slot": "intent",
+              "value": "geteventdates"
+            }
+          ],
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "event_2",
+              "slot": "event_name",
+              "value": "raiders vs bengals",
+              "start": 65,
+              "end": 83
+            },
+            {
+              "intent": "inform",
+              "domain": "event_2",
+              "slot": "city",
+              "value": "oakland",
+              "start": 41,
+              "end": 48
+            }
+          ]
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "",
+            "time": "",
+            "number_of_tickets": "",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "domain": "event_2",
+              "slot": "city",
+              "value": "oakland",
+              "utt_idx": 0,
+              "start": 41,
+              "end": 48
+            },
+            {
+              "domain": "event_2",
+              "slot": "event_name",
+              "value": "raiders vs bengals",
+              "utt_idx": 0,
+              "start": 65,
+              "end": 83
+            }
+          ]
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The Raiders Vs Bengals game is at Oakland-Alameda County Coliseum today.",
+        "utt_idx": 1,
+        "dialogue_act": {
+          "binary": [],
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "offer",
+              "domain": "event_2",
+              "slot": "date",
+              "value": "today",
+              "start": 66,
+              "end": 71
+            },
+            {
+              "intent": "offer",
+              "domain": "event_2",
+              "slot": "event_name",
+              "value": "raiders vs bengals",
+              "start": 4,
+              "end": 22
+            },
+            {
+              "intent": "offer",
+              "domain": "event_2",
+              "slot": "venue",
+              "value": "oakland-alameda county coliseum",
+              "start": 34,
+              "end": 65
+            }
+          ]
+        }
+      },
+      {
+        "speaker": "user",
+        "utterance": "What time does it start?",
+        "utt_idx": 2,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "event_2",
+              "slot": "time",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "",
+            "time": "",
+            "number_of_tickets": "",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [],
+          "non-categorical": []
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "It starts at 7 pm.",
+        "utt_idx": 3,
+        "dialogue_act": {
+          "binary": [],
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "event_2",
+              "slot": "time",
+              "value": "7 pm",
+              "start": 13,
+              "end": 17
+            }
+          ]
+        }
+      },
+      {
+        "speaker": "user",
+        "utterance": "That sounds fine.",
+        "utt_idx": 4,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "select",
+              "domain": "event_2",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "today",
+            "time": "",
+            "number_of_tickets": "",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [],
+          "non-categorical": [
+            {
+              "domain": "event_2",
+              "slot": "date",
+              "value": "today",
+              "utt_idx": 1,
+              "start": 66,
+              "end": 71
+            }
+          ]
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Do you want to get tickets for it?",
+        "utt_idx": 5,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "offer_intent",
+              "domain": "event_2",
+              "slot": "intent",
+              "value": "buyeventtickets"
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        }
+      },
+      {
+        "speaker": "user",
+        "utterance": "Yes, can you buy 3 tickets for me?",
+        "utt_idx": 6,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "affirm_intent",
+              "domain": "event_2",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [
+            {
+              "intent": "inform",
+              "domain": "event_2",
+              "slot": "number_of_tickets",
+              "value": "3"
+            }
+          ],
+          "non-categorical": []
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "today",
+            "time": "",
+            "number_of_tickets": "3",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [
+            {
+              "domain": "event_2",
+              "slot": "number_of_tickets",
+              "value": "3"
+            }
+          ],
+          "non-categorical": []
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Sure. I will go ahead and buy 3 tickets for the Raiders Vs Bengals game in Oakland today. Is that right?",
+        "utt_idx": 7,
+        "dialogue_act": {
+          "binary": [],
+          "categorical": [
+            {
+              "intent": "confirm",
+              "domain": "event_2",
+              "slot": "number_of_tickets",
+              "value": "3"
+            }
+          ],
+          "non-categorical": [
+            {
+              "intent": "confirm",
+              "domain": "event_2",
+              "slot": "event_name",
+              "value": "raiders vs bengals",
+              "start": 48,
+              "end": 66
+            },
+            {
+              "intent": "confirm",
+              "domain": "event_2",
+              "slot": "date",
+              "value": "today",
+              "start": 83,
+              "end": 88
+            },
+            {
+              "intent": "confirm",
+              "domain": "event_2",
+              "slot": "city",
+              "value": "oakland",
+              "start": 75,
+              "end": 82
+            }
+          ]
+        }
+      },
+      {
+        "speaker": "user",
+        "utterance": "Yes, that's good. What's the address?",
+        "utt_idx": 8,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "request",
+              "domain": "event_2",
+              "slot": "venue_address",
+              "value": ""
+            },
+            {
+              "intent": "affirm",
+              "domain": "",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "today",
+            "time": "",
+            "number_of_tickets": "3",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [],
+          "non-categorical": []
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "The game is at 7000 Coliseum Way. I've bought the tickets.",
+        "utt_idx": 9,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "notify_success",
+              "domain": "event_2",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": [
+            {
+              "intent": "inform",
+              "domain": "event_2",
+              "slot": "venue_address",
+              "value": "7000 coliseum way",
+              "start": 15,
+              "end": 32
+            }
+          ]
+        }
+      },
+      {
+        "speaker": "user",
+        "utterance": "Thanks! That's all.",
+        "utt_idx": 10,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "thank_you",
+              "domain": "",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "today",
+            "time": "",
+            "number_of_tickets": "3",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [],
+          "non-categorical": []
+        }
+      },
+      {
+        "speaker": "system",
+        "utterance": "Need help with anything else?",
+        "utt_idx": 11,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "req_more",
+              "domain": "",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        }
+      },
+      {
+        "speaker": "user",
+        "utterance": "No, thank you.",
+        "utt_idx": 12,
+        "dialogue_act": {
+          "binary": [
+            {
+              "intent": "negate",
+              "domain": "",
+              "slot": "",
+              "value": ""
+            },
+            {
+              "intent": "thank_you",
+              "domain": "",
+              "slot": "",
+              "value": ""
+            }
+          ],
+          "categorical": [],
+          "non-categorical": []
+        },
+        "state": {
+          "event_2": {
+            "event_type": "",
+            "category": "",
+            "event_name": "raiders vs bengals",
+            "date": "today",
+            "time": "",
+            "number_of_tickets": "3",
+            "city": "oakland",
+            "venue": "",
+            "venue_address": ""
+          }
+        },
+        "state_update": {
+          "categorical": [],
+          "non-categorical": []
+        }
+      }
+    ]
+  }
+```
+
diff --git a/data/unified_datasets/camrest/README.md b/data/unified_datasets/camrest/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ae06dbe9dd5cd62c980f6b9bc731a1542bfd1aad
--- /dev/null
+++ b/data/unified_datasets/camrest/README.md
@@ -0,0 +1,24 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, character-level span for non-categorical slots.
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 406         | 2936         | 7.23     | 11.36       | 1          |
+| dev | 135         | 941         | 6.97      | 11.99       | 1          |
+| train | 135         | 935         | 6.93       | 11.87       | 1          |
+
+## Main changes
+
+- domain is set to **restaurant**
+- ignore some rare pair
+- 3 values are not found in original utterances
+- **dontcare** values in non-categorical slots are calculated in `evaluate.py` so `da_match` in evaluation is lower than actual number.
+
+## Original data
+
+camrest used in convlab2, included in `data/` path
\ No newline at end of file
diff --git a/data/unified_datasets/camrest/data.zip b/data/unified_datasets/camrest/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..b0d3db9f816f377f431e33d4a43ab0b9eb668f2a
Binary files /dev/null and b/data/unified_datasets/camrest/data.zip differ
diff --git a/data/unified_datasets/camrest/ontology.json b/data/unified_datasets/camrest/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..b5617f7ef4d441674a361a280acdf3123468d53a
--- /dev/null
+++ b/data/unified_datasets/camrest/ontology.json
@@ -0,0 +1,122 @@
+{
+    "domains": {
+        "restaurant": {
+            "description": "find a restaurant to eat",
+            "slots": {
+                "food": {
+                    "description": "food type the restaurant serves",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area where the restaurant is located",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "north",
+                        "east",
+                        "west",
+                        "south",
+                        "centre"
+                    ]
+                },
+                "name": {
+                    "description": "name of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price range of the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "cheap",
+                        "moderate",
+                        "expensive"
+                    ]
+                },
+                "phone": {
+                    "description": "phone number of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "exact location of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postal code of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        }
+    },
+    "intents": {
+        "inform": {
+            "description": "inform user of value of a slot"
+        },
+        "request": {
+            "description": "ask for value of a slot"
+        },
+        "nooffer": {
+            "description": "inform user that no restaurant matches his request"
+        }
+    },
+    "binary_dialogue_act": [
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "food",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "name",
+            "value": ""
+        }
+    ],
+    "state": {
+        "restaurant": {
+            "pricerange": "",
+            "area": "",
+            "food": ""
+        }
+    }
+}
\ No newline at end of file
diff --git a/data/unified_datasets/camrest/original_data.zip b/data/unified_datasets/camrest/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..ab07af894f6508a78e7baf21d914978fe19e16a3
Binary files /dev/null and b/data/unified_datasets/camrest/original_data.zip differ
diff --git a/data/unified_datasets/camrest/preprocess.py b/data/unified_datasets/camrest/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..510df266aa4d528bfe19fd5c496be5d2f96521cb
--- /dev/null
+++ b/data/unified_datasets/camrest/preprocess.py
@@ -0,0 +1,350 @@
+import zipfile
+import json
+import os
+import copy
+import logging
+
+logging.basicConfig(level=logging.INFO)
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+# print(sys.path[-1])
+
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+
+self_dir = os.path.dirname(os.path.abspath(__file__))
+
+cat_slot_values = {
+    'area': ['north', 'east', 'west', 'south', 'centre'],
+    'pricerange': ['cheap', 'moderate', 'expensive']
+}
+
+camrest_desc = {
+    'restaurant': {
+        'domain': 'find a restaurant to eat',
+        'food': 'food type the restaurant serves',
+        'area': 'area where the restaurant is located',
+        'name': 'name of the restaurant',
+        'pricerange': 'price range of the restaurant',
+        'phone': 'phone number of the restaurant',
+        'address': 'exact location of the restaurant',
+        'postcode': 'postal code of the restaurant',
+    },
+    'intents': {
+        'inform': 'inform user of value of a slot',
+        'request': 'ask for value of a slot',
+        'nooffer': 'inform user that no restaurant matches his request'
+    }
+}
+
+all_slots = ['food', 'area', 'name', 'pricerange', 'phone', 'address', 'postcode']
+
+
+def convert_da(utt, da, all_intent, all_binary_das):
+    converted_da = {
+        'binary': [],
+        'categorical': [],
+        'non-categorical': []
+    }
+
+    for _intent, svs in da.items():
+        if _intent not in all_intent:
+            all_intent.append(_intent)
+
+        if _intent == 'nooffer':
+            converted_da['binary'].append({
+                'intent': _intent,
+                'domain': 'restaurant',
+                'slot': '',
+                'value': ''
+            })
+
+            if {
+                'intent': _intent,
+                'domain': 'restaurant',
+                'slot': '',
+                'value': ''
+            } not in all_binary_das:
+                all_binary_das.append({
+                    'intent': _intent,
+                    'domain': 'restaurant',
+                    'slot': '',
+                    'value': ''
+                })
+            continue
+
+        for s, v in svs:
+            if 'care' in v:
+                v = 'dontcare'
+            s = s.lower()
+            v = v.lower()
+            if _intent == 'request':
+                converted_da['binary'].append({
+                    'intent': _intent,
+                    'domain': 'restaurant',
+                    'slot': s,
+                    'value': ''
+                })
+
+                if {
+                    'intent': _intent,
+                    'domain': 'restaurant',
+                    'slot': s,
+                    'value': ''
+                } not in all_binary_das:
+                    all_binary_das.append({
+                        'intent': _intent,
+                        'domain': 'restaurant',
+                        'slot': s,
+                        'value': ''
+                    })
+                continue
+
+            if s in cat_slot_values:
+                assert v in cat_slot_values[s] + ['dontcare']
+                converted_da['categorical'].append({
+                    'intent': _intent,
+                    'domain': 'restaurant',
+                    'slot': s,
+                    'value': v
+                })
+
+            else:
+                # non-categorical
+                start_ch = utt.find(v)
+
+                if start_ch == -1:
+                    # if not v == 'dontcare':
+                    #     logging.info('non-categorical slot value not found')
+                    #     logging.info('value: {}'.format(v))
+                    #     logging.info('sentence: {}'.format(utt))
+                    #     continue
+
+                    converted_da['non-categorical'].append({
+                        'intent': _intent,
+                        'domain': 'restaurant',
+                        'slot': s,
+                        'value': v,
+                        # 'start': 0,
+                        # 'end': 0
+                    })
+                    continue
+
+                converted_da['non-categorical'].append({
+                    'intent': _intent,
+                    'domain': 'restaurant',
+                    'slot': s,
+                    'value': utt[start_ch: start_ch + len(v)],
+                    'start': start_ch,
+                    'end': start_ch + len(v)
+                })
+                assert utt[start_ch: start_ch + len(v)] == v
+
+    return converted_da
+
+
+def convert_state(state, state_slots):
+    ret_state = {'restaurant': {k: '' for k in state_slots}}
+    for da in state:
+        if da['act'] != 'inform':
+            continue
+
+        for s, v in da['slots']:
+            s = s.lower()
+            v = v.lower()
+
+            if not s in all_slots:
+                logging.info('state slot {} not in all_slots!'.format(s))
+                continue
+
+            ret_state['restaurant'][s] = v
+
+            if s not in state_slots:
+                print(s)
+                raise
+
+    return ret_state
+
+
+def get_state_update(prev_state, cur_state, prev_turns, cur_user_da, dialog_id):
+    # cur_user_da: List of non-categorical slot-values
+    diff_state = {}
+    state_update = {'categorical': [], 'non-categorical':[]}
+    for s, v in cur_state.items():
+        if s in prev_state and prev_state[s] == v:
+            continue
+        diff_state[s] = v
+
+    for s, v in diff_state.items():
+        if v == '':
+            continue
+        if s in cat_slot_values:
+            assert v in cat_slot_values[s] + ['dontcare']
+            state_update['categorical'].append({
+                'domain': 'restaurant',
+                'slot': s,
+                'value': v,
+            })
+        else:
+            # non-categorical slot
+            found = False
+            for _usr_da in cur_user_da:
+                if _usr_da['slot'] == s and _usr_da['value'] == v :
+                    found = True
+                    if v != 'dontcare' and 'start' in _usr_da:
+                        state_update['non-categorical'].append({
+                            'domain': 'restaurant',
+                            'slot': s,
+                            'value': v,
+                            'utt_idx': len(prev_turns),
+                            'start': _usr_da['start'],
+                            'end': _usr_da['end']
+                        })
+                    else:
+                        state_update['non-categorical'].append({
+                            'domain': 'restaurant',
+                            'slot': s,
+                            'value': v,
+                        })
+            if found:
+                continue
+
+            prev_sys_da = [] if len(prev_turns) == 0 else prev_turns[-1]['dialogue_act']['non-categorical']
+            for _sys_da in prev_sys_da:
+                if _sys_da['slot'] == s and _sys_da['value'] == v and 'start' in _sys_da:
+                    if _sys_da['slot'] == s and _sys_da['value'] == v:
+                        state_update['non-categorical'].append({
+                            'domain': 'restaurant',
+                            'slot': s,
+                            'value': v,
+                            'utt_idx': len(prev_turns) - 1,
+                            'start': _sys_da['start'],
+                            'end': _sys_da['end']
+                        })
+                        found = True
+
+            if not found:
+                state_update['non-categorical'].append({
+                    'domain': 'restaurant',
+                    'slot': s,
+                    'value': v
+                })
+
+    return state_update
+
+
+def preprocess():
+    original_zipped_path = os.path.join(self_dir, 'original_data.zip')
+    if not os.path.exists(original_zipped_path):
+        raise FileNotFoundError(original_zipped_path)
+    if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(
+            os.path.join(self_dir, 'ontology.json')):
+        # print('unzip to', new_dir)
+        # print('This may take several minutes')
+        archive = zipfile.ZipFile(original_zipped_path, 'r')
+        archive.extractall(self_dir)
+
+    all_data = []
+    all_intent = []
+    all_binary_das = []
+    all_state_slots = ['pricerange', 'area', 'food']
+
+    data_splits = ['train', 'val', 'test']
+    extract_dir = os.path.join(self_dir, 'original_data')
+
+    if not os.path.exists('data.zip') or not os.path.exists('ontology.json'):
+
+        dialog_id = 1
+        for data_split in data_splits:
+            data = json.load(open(os.path.join(self_dir, extract_dir, '{}.json'.format(data_split))))
+
+            for i, d in enumerate(data):
+
+                dialogue = d['dial']
+                converted_dialogue = {
+                    'dataset': 'camrest',
+                    'data_split': data_split,
+                    'dialogue_id': 'camrest_' + str(dialog_id),
+                    'original_id': d['dialogue_id'],
+                    'domains': ['restaurant'],
+                    'turns': []
+                }
+
+                prev_state = {'restaurant': {}}
+                for turn in dialogue:
+                    usr_text = turn['usr']['transcript'].lower()
+                    usr_da = turn['usr']['dialog_act']
+
+                    sys_text = turn['sys']['sent'].lower()
+                    sys_da = turn['sys']['dialog_act']
+
+                    cur_state = convert_state(turn['usr']['slu'], all_state_slots)
+                    cur_user_da = convert_da(usr_text, usr_da, all_intent, all_binary_das)
+
+                    usr_turn = {
+                        'utt_idx': len(converted_dialogue['turns']),
+                        'speaker': 'user',
+                        'utterance': usr_text,
+                        'dialogue_act': cur_user_da,
+                        'state': copy.deepcopy(cur_state),
+                        'state_update': get_state_update(prev_state['restaurant'], cur_state['restaurant'], converted_dialogue['turns'], cur_user_da['non-categorical'], converted_dialogue['dialogue_id'])
+                    }
+
+                    sys_turn = {
+                        'utt_idx': len(converted_dialogue['turns'])+1,
+                        'speaker': 'system',
+                        'utterance': sys_text,
+                        'dialogue_act': convert_da(sys_text, sys_da, all_intent, all_binary_das),
+                    }
+
+                    prev_state = copy.deepcopy(cur_state)
+
+                    converted_dialogue['turns'].append(usr_turn)
+                    converted_dialogue['turns'].append(sys_turn)
+                if converted_dialogue['turns'][-1]['speaker'] == 'system':
+                    converted_dialogue['turns'].pop(-1)
+                all_data.append(converted_dialogue)
+                dialog_id += 1
+
+        json.dump(all_data, open('./data.json', 'w'), indent=4)
+        write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        os.remove('data.json')
+
+        new_ont = {
+            'domains': {},
+            'intents': {},
+            'binary_dialogue_act': [],
+            'state': {}
+        }
+
+        new_ont['state']['restaurant'] = {}
+        for ss in all_state_slots:
+            new_ont['state']['restaurant'][ss] = ''
+
+        for b in all_binary_das:
+            new_ont['binary_dialogue_act'].append(b)
+
+        for i in all_intent:
+            new_ont['intents'][i] = {'description': camrest_desc['intents'][i]}
+
+        new_ont['domains']['restaurant'] = {
+            'description': camrest_desc['restaurant']['domain'],
+            'slots': {}
+        }
+        for s in all_slots:
+            new_ont['domains']['restaurant']['slots'][s] = {
+                "description": camrest_desc['restaurant'][s],
+                "is_categorical": True if s in cat_slot_values else False,
+                "possible_values": cat_slot_values[s] if s in cat_slot_values else []
+            }
+        json.dump(new_ont, open(os.path.join(self_dir, './ontology.json'), 'w'), indent=4)
+
+
+    else:
+        all_data = read_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        new_ont = json.load(open(os.path.join(self_dir, './ontology.json'), 'r'))
+
+    return all_data, new_ont
+
+
+if __name__ == '__main__':
+    preprocess()
diff --git a/data/unified_datasets/evaluate.py b/data/unified_datasets/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c68f384db76c34a6e05a4e3993af227e2e3be3c
--- /dev/null
+++ b/data/unified_datasets/evaluate.py
@@ -0,0 +1,334 @@
+import json
+import os
+from copy import deepcopy
+
+special_values = ['dontcare', '']
+
+
+def check_ontology(name):
+    """
+    ontology: {
+        "domains": {
+            domain name: {
+                "description": domain description,
+                "slots": {
+                    slot name: {
+                        "description": slot description
+                        // possible_values is empty iff is_categorical is False
+                        "is_categorical": is_categorical,
+                        "possible_values": [possible_values...]
+                    }
+                }
+            }
+        },
+        "intents": {
+            intent name: {
+                "description": intent description
+            }
+        },
+        "binary_dialogue_act": {
+            [
+                {
+                    "intent": intent name,
+                    "domain": domain name
+                    "slot": slot name,
+                    "value": some value
+                }
+            ]
+        }
+        "state": {
+            domain name: {
+                slot name: ""
+            }
+        }
+    }
+    """
+    global special_values
+
+    ontology_file = os.path.join(f'{name}', 'ontology.json')
+    assert os.path.exists(ontology_file), f'ontology file should named {ontology_file}'
+    ontology = json.load(open(ontology_file))
+    
+    # record issues in ontology
+    descriptions = {
+        # if each domain has a description
+        "domains": True,
+        "slots": True,
+        "intents": True,
+    }
+    for domain_name, domain in ontology['domains'].items():
+        if not domain['description']:
+            descriptions["domains"] = False
+        # if not domain_name in ontology['state']:
+        #     print(f"domain '{domain_name}' not found in state")
+        for slot_name, slot in domain["slots"].items():
+            if not slot["description"]:
+                descriptions["slots"] = False
+            if slot["is_categorical"]:
+                assert slot["possible_values"]
+                slot['possible_values'] = list(map(str.lower, slot['possible_values']))
+                for value in special_values:
+                    assert value not in slot['possible_values'], f'ONTOLOGY\tspecial value `{value}` should not present in possible values'
+
+    for intent_name, intent in ontology["intents"].items():
+        if not intent["description"]:
+            descriptions["intents"] = False
+
+    binary_dialogue_acts = set()
+    for bda in ontology['binary_dialogue_act']:
+        assert bda['intent'] is None or bda["intent"] in ontology['intents'], f'ONTOLOGY\tintent undefined intent in binary dialog act: {bda}'
+        binary_dialogue_acts.add(tuple(bda.values()))
+    ontology['bda_set'] = binary_dialogue_acts
+
+    assert 'state' in ontology, 'ONTOLOGY\tno state'
+    redundant_value = False
+    for domain_name, domain in ontology['state'].items():
+        assert domain_name in ontology['domains']
+        for slot_name, value in domain.items():
+            assert slot_name in ontology['domains'][domain_name]['slots']
+            if value:
+                redundant_value = True
+
+    if redundant_value:
+        print('ONTOLOGY: redundant value description in state')
+
+    # print('description existence:', descriptions, '\n')
+    for description, value in descriptions.items():
+        if not value:
+            print(f'description of {description} is incomplete')
+    return ontology
+
+
+def check_data(name, ontology):
+    global special_values
+
+    from zipfile import ZipFile
+    data_file = os.path.join(f'{name}', 'data.zip')
+    if not os.path.exists(data_file):
+        print('cannot find data.zip')
+        return
+
+    print('loading data')
+    with ZipFile(data_file) as zipfile:
+        with zipfile.open('data.json', 'r') as f:
+            data = json.load(f)
+
+    all_id = set()
+    splits = ['train', 'val', 'test']
+    da_values = 0
+    da_matches = 0
+    state_values = 0
+    state_matches = 0
+    distances = []
+    stat_keys = ['dialogues', 'utterances', 'tokens', 'domains']
+    stat = {
+        split: {
+            key: 0 for key in stat_keys
+        } for split in splits
+    }
+
+    # present for both non-categorical or categorical
+
+    for dialogue in data:
+        dialogue_id = dialogue['dialogue_id']
+        assert isinstance(dialogue_id, str), '`dialogue_id` is expected to be str type'
+        dialogue_id = str(dialogue_id)
+
+        assert dialogue['dataset'] == name, f'{dialogue_id}\tinconsistent dataset name: {dialogue["dataset"]}'
+
+        split = dialogue['data_split']
+        assert split in splits, f'unknown split: `{split}`'
+        cur_stat = stat[split]
+        cur_stat['dialogues'] += 1
+        try:
+            prefix, num = dialogue_id.split('_')
+            assert prefix == name
+            int(num)    # try converting to int
+        except:
+            print(f'{dialogue_id}\twrong dialogue id format: {dialogue_id}')
+            raise Exception
+        assert dialogue_id not in all_id, f'multiple dialogue id: {dialogue_id}'
+        all_id.add(dialogue_id)
+
+        cur_domains = dialogue['domains']
+        assert isinstance(cur_domains, list), f'{dialogue_id}\t`domains` is expected to be list type, '
+        assert len(set(cur_domains)) == len(cur_domains), f'{dialogue_id}\trepeated domains'
+        cur_stat['domains'] += len(cur_domains)
+        cur_domains = set(cur_domains)
+        for domain_name in cur_domains:
+            assert domain_name in ontology['domains'], f'{dialogue_id}\tundefined current domain: {domain_name}'
+
+        turns = dialogue['turns']
+        cur_stat['utterances'] += len(turns)
+        assert turns, f'{dialogue_id}\tempty turn'
+
+        assert turns[0]['speaker'] == 'user', f'{dialogue_id}\tnot start with user role'
+        if ontology['state']:
+            # update cur_state with state_update every turn, and compare it with state annotation
+            cur_state = {
+                domain_name: deepcopy(ontology['state'][domain_name]) for domain_name in cur_domains
+            }
+        # check dialog act
+        for turn_id, turn in enumerate(turns):
+            assert turn['speaker'] in ['user', 'system'], f'{dialogue_id}:{turn_id}\tunknown speaker value: {turn["speaker"]}'
+            assert turn_id == turn['utt_idx'], f'{dialogue_id}:{turn_id}\twrong utt_idx'
+            if turn_id > 0:
+                assert turns[turn_id - 1]['speaker'] != turn['speaker'], f'{dialogue_id}:{turn_id}\tuser and system should speak alternatively'
+
+            utterance = turn['utterance']
+            cur_stat['tokens'] += len(utterance.strip().split(' '))
+            dialogue_acts = turn['dialogue_act']
+
+            # check domain-slot-value
+            # prefix: error prefix
+            def check_dsv(domain_name, slot_name, value, categorical, prefix):
+                assert domain_name in cur_domains or domain_name == 'booking', f'{prefix}\t{domain_name} not presented in current domains'
+                domain = ontology['domains'][domain_name]
+                assert slot_name in domain['slots'], f'{prefix}\t{slot_name} not presented in domain {domain_name}'
+                slot = domain['slots'][slot_name]
+                if categorical:
+                    assert slot['is_categorical'], f'{prefix}\t{domain_name}-{slot_name} is not categorical'
+                    value = value.lower()
+                    assert value in special_values or value in slot['possible_values'], f'{prefix}\t`{value}` not presented in possible values of' \
+                                                             f' {domain_name}-{slot_name}: {slot["possible_values"]}'
+                else:
+                    assert not slot['is_categorical'], f'{prefix}\t{domain_name}-{slot_name} is not non-categorical'
+
+            def check_da(da, categorical):
+                assert da['intent'] in ontology['intents'], f'{dialogue_id}:{turn_id}\tundefined intent {da["intent"]}'
+                check_dsv(da['domain'], da['slot'], da['value'], categorical, f'{dialogue_id}:{turn_id}:da')
+
+            for da in dialogue_acts['categorical']:
+                check_da(da, True)
+            for da in dialogue_acts['non-categorical']:
+                check_da(da, False)
+                # values only match after .strip() in some case, it's the issue of pre-processing
+                if da['value'] not in special_values:
+                    da_values += 1
+                    assert 'start' in da and 'end' in da or 'start' not in da and 'end' not in da, \
+                        f'{dialogue_id}:{turn_id}\tstart and end field in da should both present or neither not present'
+                    if 'start' in da:
+                        value = utterance[da['start']:da['end']]
+                        if da['value'].lower() == value.lower():
+                            da_matches += 1
+
+            for da in dialogue_acts['binary']:
+                assert tuple(da.values()) in ontology['bda_set'], f'{dialogue_id}:{turn_id}\tbinary dialog act {da} not present in ontology'
+                # do not check domain-slot-value in binary dialogue acts
+
+            if turn['speaker'] == 'user':
+                assert 'state' in turn and 'state_update' in turn, f"{dialogue_id}:{turn_id}\tstate and state_update must present in user's role"
+                state_update = turn['state_update']
+
+                def apply_update(update, categorical):
+                    domain_name = update['domain']
+                    slot_name = update['slot']
+                    value = update['value']
+                    check_dsv(domain_name, slot_name, value, categorical, f'{dialogue_id}:{turn_id}:state_update')
+                    cur_state[domain_name][slot_name] = value
+                if ontology['state']:
+                    for update in state_update['categorical']:
+                        apply_update(update, True)
+                    for update in state_update['non-categorical']:
+                        apply_update(update, False)
+                        value = update['value']
+                        if value not in special_values:
+                            state_values += 1
+                            if 'utt_idx' in update:
+                                if turns[update['utt_idx']]['utterance'][update['start']:update['end']].lower() == update['value']:
+                                    state_matches += 1
+                                else:
+                                    print('value in utt:\t', turns[update['utt_idx']]['utterance'][update['start']:update['end']].strip())
+                                    print('value in state:\t', update['value'])
+                                    pass
+
+                    assert cur_state == turn['state'], f'{dialogue_id}:{turn_id}:state_update incorrect state or state update calculation'
+
+            else:
+                assert 'state' not in turn or 'state_update' in turn, f"{dialogue_id}:{turn_id}\tstate or state_update cannot present in system's role"
+
+        assert turns[-1]['speaker'] == 'user', f'{dialogue_id} dialog must end with user role'
+
+    if da_values:
+        print('da values match rate:    {:.3f}'.format(da_matches * 100 / da_values))
+    if state_values:
+        print('state values match rate: {:.3f}'.format(state_matches * 100 / state_values))
+
+    all_stat = {key: 0 for key in stat_keys}
+    for key in stat_keys:
+        all_stat[key] = sum(stat[split][key] for split in splits)
+    stat['all'] = all_stat
+
+    for split in splits + ['all']:
+        cur_stat = stat[split]
+        if cur_stat['dialogues']:
+            cur_stat['avg_utt'] = round(cur_stat['utterances'] / cur_stat['dialogues'], 2)
+            cur_stat['avg_tokens'] = round(cur_stat['tokens'] / cur_stat['utterances'], 2)
+            cur_stat['avg_domains'] = round(cur_stat.pop('domains') / cur_stat['dialogues'], 2)
+        else:
+            del stat[split]
+    print(f'domains: {len(ontology["domains"])}')
+    print(json.dumps(stat, indent=4))
+    if state_matches:
+        for dis, cnt in enumerate(distances):
+            print(cnt)
+
+
+if __name__ == '__main__':
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser(description="evaluate pre-processed datasets")
+    parser.add_argument('datasets', metavar='dataset_name', nargs='*', help='dataset names to be evaluated')
+    parser.add_argument('--all', action='store_true', help='evaluate all datasets')
+    parser.add_argument('--no-int', action='store_true', help='not interrupted by exception')
+    parser.add_argument('--preprocess', '-p', action='store_true', help='run preprocess automatically')
+    args = parser.parse_args()
+
+    if args.all:
+        datasets = list(filter(os.path.isdir, os.listdir()))
+    else:
+        datasets = args.datasets
+    if not datasets:
+        print('no dataset specified')
+        parser.print_help()
+        exit(1)
+
+    print('datasets to be evaluated:', datasets)
+
+    fail = []
+
+    for name in datasets:
+        try:
+            print('')
+            if not os.path.isdir(name):
+                print(f'dataset {name} not found')
+                continue
+
+            print(f'checking {name}')
+            preprocess_file = os.path.join(f'{name}', 'preprocess.py')
+            if not os.path.exists(preprocess_file):
+                print('no preprocess.py')
+                if args.preprocess:
+                    print(f'skip evaluation of {name}')
+                    continue
+            if args.preprocess:
+                print('pre-processing')
+
+                os.chdir(name)
+                import importlib
+                preprocess = importlib.import_module(f'{name}.preprocess')
+                preprocess.preprocess()
+                os.chdir('..')
+
+            ontology = check_ontology(name)
+            check_data(name, ontology)
+        except Exception as e:
+            if args.no_int:
+                fail.append(name)
+            else:
+                raise e
+
+    if not fail:
+        print('all datasets passed test')
+    else:
+        print('failed dataset(s):', fail)
diff --git a/data/unified_datasets/frames/README.md b/data/unified_datasets/frames/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd1fc1ba33b1f19b8dc41ca6b361eb2478564cb7
--- /dev/null
+++ b/data/unified_datasets/frames/README.md
@@ -0,0 +1,23 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, character-level span for non-categorical slots.
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 1369         | 19445         | 14.2       | 12.71       | 1          |
+
+## Main changes
+
+- domain is set to **travel**
+- slot-value pair changes: intent-book => book-"True", action-book => booked-"True"
+- ignore some rare pair
+- not annotate state and state upadte
+- span info is from string matching, covering 96.4 non-categorical value
+
+## Original data
+
+https://www.microsoft.com/en-us/research/project/frames-dataset/#!download
\ No newline at end of file
diff --git a/data/unified_datasets/frames/data.zip b/data/unified_datasets/frames/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..d6b73c808d8682571e2cd060a2937c34abd06e4e
Binary files /dev/null and b/data/unified_datasets/frames/data.zip differ
diff --git a/data/unified_datasets/frames/ontology.json b/data/unified_datasets/frames/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..4101d15b77135162e4cad5af5d433a434ad5c61d
--- /dev/null
+++ b/data/unified_datasets/frames/ontology.json
@@ -0,0 +1,1488 @@
+{
+  "domains": {
+    "travel": {
+      "description": "Book a vacation package containing round-trip flights and a hotel.",
+      "slots": {
+        "dst_city": {
+          "description": "Destination city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "or_city": {
+          "description": "Origin city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "str_date": {
+          "description": "Start date for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "n_adults": {
+          "description": "Number of adults",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "budget": {
+          "description": "The amount of money that the user has available to spend for the trip.",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "flex": {
+          "description": "Boolean value indicating whether the constraints are flexible",
+          "is_categorical": true,
+          "possible_values": [
+            "false",
+            "true"
+          ]
+        },
+        "duration": {
+          "description": "Duration of the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "ref_anaphora": {
+          "description": "Words used to refer to a frame",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Price of the trip including flights and hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "end_date": {
+          "description": "End date for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "max_duration": {
+          "description": "Maximum number of days for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "name": {
+          "description": "Name of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "category": {
+          "description": "Rating of the hotel (in number of stars)",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "wifi": {
+          "description": "Boolean value indicating whether or not the hotel offers free wifi",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "dep_time_or": {
+          "description": "Time of departure from origin city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "n_children": {
+          "description": "Number of children",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "gst_rating": {
+          "description": "Rating of the hotel by guests (in number of stars)",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "parking": {
+          "description": "Boolean value indicating whether or not the hotel offers free parking",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "arr_time_or": {
+          "description": "Time of arrival to origin city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "breakfast": {
+          "description": "Boolean value indicating whether or not the hotel offers free breakfast",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "Number of different packages",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "seat": {
+          "description": "Seat type (economy or business)",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count_name": {
+          "description": "Number of different hotels",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "budget_ok": {
+          "description": "Boolean value indicating whether the package fits the budget",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "arr_time_dst": {
+          "description": "Time of arrival to destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "dep_time_dst": {
+          "description": "Time of departure from destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "gym": {
+          "description": "Boolean value indicating whether or not the hotel offers gym",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "spa": {
+          "description": "Boolean value indicating whether or not the hotel offers spa",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "downtown": {
+          "description": "Boolean value indicating whether or not the hotel is in the heart of the city",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count_dst_city": {
+          "description": "Number of destination cities",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "min_duration": {
+          "description": "Minimum number of days for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "airport": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of an airport",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "beach": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a beach",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "museum": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a museum",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "theatre": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a theatre",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "park": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a park",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "market": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a market",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "shopping": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a shopping center",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "university": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of an university",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "mall": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a mall",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "palace": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a palace",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        },
+        "cathedral": {
+          "description": "Boolean value indicating whether or not the hotel is in the vicinity of a cathedral",
+          "is_categorical": true,
+          "possible_values": [
+            "true"
+          ]
+        }
+      }
+    }
+  },
+  "intents": {
+    "inform": {
+      "description": "Inform a slot value"
+    },
+    "suggest": {
+      "description": "Suggest a slot value or package that does not match the user's constraints"
+    },
+    "no_result": {
+      "description": "Tell the user that the database returned no results"
+    },
+    "negate": {
+      "description": "Negate something said by the other speaker"
+    },
+    "switch_frame": {
+      "description": "Switch to a frame"
+    },
+    "request": {
+      "description": "Ask for the value of a particular slot"
+    },
+    "affirm": {
+      "description": "Affirm something said by the other speaker"
+    },
+    "offer": {
+      "description": "Offer a package to the user"
+    },
+    "request_alts": {
+      "description": "Ask for other possibilities"
+    },
+    "request_compare": {
+      "description": "Ask the wizard to compare packages"
+    },
+    "confirm": {
+      "description": "Ask the other speaker to confirm a given slot value"
+    },
+    "hearmore": {
+      "description": "Ask the user if she'd like to hear more about a given package"
+    },
+    "moreinfo": {
+      "description": "Ask for more information on a given set of results"
+    }
+  },
+  "binary_dialogue_act": [
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "book",
+      "value": "true"
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "booked",
+      "value": "true"
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "duration",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "seat",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "count",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "count_name",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "n_children",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": "true"
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "breakfast",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "gst_rating",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "max_duration",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "affirm",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": "dontcare"
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "book",
+      "value": "true"
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "booked",
+      "value": "true"
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "max_duration",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": "dontcare"
+    },
+    {
+      "intent": "confirm",
+      "domain": "travel",
+      "slot": "budget",
+      "value": "dontcare"
+    },
+    {
+      "intent": "hearmore",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "hearmore",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "book",
+      "value": "true"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "booked",
+      "value": "true"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "budget",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "max_duration",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "n_children",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "dep_time_or",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "arr_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "dep_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "arr_time_or",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "category",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "max_duration",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "spa",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": "false"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": "false"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "parking",
+      "value": "dontcare"
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "duration",
+      "value": ""
+    },
+    {
+      "intent": "inform",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "airport",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "booked",
+      "value": "true"
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "book",
+      "value": "true"
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "shopping",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "duration",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "arr_time_or",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": "dontcare"
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "gst_rating",
+      "value": ""
+    },
+    {
+      "intent": "negate",
+      "domain": "travel",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "dep_time_or",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "dep_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "max_duration",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "museum",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "n_children",
+      "value": ""
+    },
+    {
+      "intent": "no_result",
+      "domain": "travel",
+      "slot": "downtown",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "n_children",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "duration",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "offer",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": "false"
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "duration",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "n_children",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "breakfast",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "max_duration",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "seat",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "count_dst_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "count_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "dep_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "dep_time_or",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "arr_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "arr_time_or",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "wifi",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "gym",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "flex",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "parking",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "spa",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "gst_rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "count",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "beach",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "park",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "museum",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "downtown",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "mall",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "shopping",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "budget",
+      "value": "dontcare"
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "university",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel",
+      "slot": "palace",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "dep_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "wifi",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "parking",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "breakfast",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "seat",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "gst_rating",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "dep_time_or",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "arr_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "dep_time_dst",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "arr_time_or",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "duration",
+      "value": ""
+    },
+    {
+      "intent": "request_compare",
+      "domain": "travel",
+      "slot": "beach",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "dst_city",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "booked",
+      "value": "true"
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "budget",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "budget",
+      "value": "dontcare"
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "name",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "amenities",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "vicinity",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "museum",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "park",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "beach",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "n_adults",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "n_children",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "market",
+      "value": ""
+    },
+    {
+      "intent": "suggest",
+      "domain": "travel",
+      "slot": "or_city",
+      "value": "dontcare"
+    },
+    {
+      "intent": "switch_frame",
+      "domain": "travel",
+      "slot": "museum",
+      "value": ""
+    },
+    {
+      "intent": "switch_frame",
+      "domain": "travel",
+      "slot": "str_date",
+      "value": ""
+    },
+    {
+      "intent": "switch_frame",
+      "domain": "travel",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "switch_frame",
+      "domain": "travel",
+      "slot": "gst_rating",
+      "value": ""
+    }
+  ],
+  "state": {}
+}
\ No newline at end of file
diff --git a/data/unified_datasets/frames/original_data.zip b/data/unified_datasets/frames/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..07ca92735ee93112b830e6181aa9832a091698d7
Binary files /dev/null and b/data/unified_datasets/frames/original_data.zip differ
diff --git a/data/unified_datasets/frames/preprocess.py b/data/unified_datasets/frames/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..64a61f0dcae4d14e5dcc4e8e377bc5d7699a7eeb
--- /dev/null
+++ b/data/unified_datasets/frames/preprocess.py
@@ -0,0 +1,341 @@
+import zipfile
+import json
+import os
+from pprint import pprint
+from copy import deepcopy
+from collections import Counter
+from tqdm import tqdm
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+import re
+self_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+intent2des = {
+    "inform": "Inform a slot value",
+    "offer": "Offer a package to the user",
+    "request": "Ask for the value of a particular slot",
+    "switch_frame": "Switch to a frame",
+    "suggest": "Suggest a slot value or package that does not match the user's constraints",
+    "no_result": "Tell the user that the database returned no results",
+    "thankyou": "Thank the other speaker",
+    "sorry": "Apologize to the user",
+    "greeting": "Greet the other speaker",
+    "affirm": "Affirm something said by the other speaker",
+    "negate": "Negate something said by the other speaker",
+    "confirm": "Ask the other speaker to confirm a given slot value",
+    "moreinfo": "Ask for more information on a given set of results",
+    "goodbye": "Say goodbye to the other speaker",
+    "request_alts": "Ask for other possibilities",
+    "request_compare": "Ask the wizard to compare packages",
+    "hearmore": "Ask the user if she'd like to hear more about a given package",
+    "you_are_welcome": "Tell the user she is welcome",
+    "canthelp": "Tell the user you cannot answer her request",
+    "reject": "Tell the user you did not understand what she meant"
+}
+
+slot2des = {
+    "book": "Find a trip to book",
+    "dst_city": "Destination city",
+    "or_city": "Origin city",
+    "str_date": "Start date for the trip",
+    "n_adults": "Number of adults",
+    "budget": "The amount of money that the user has available to spend for the trip.",
+    "end_date": "End date for the trip",
+    "flex": "Boolean value indicating whether the constraints are flexible",
+    "duration": "Duration of the trip",
+    "ref_anaphora": "Words used to refer to a frame",
+    "price": "Price of the trip including flights and hotel",
+    "max_duration": "Maximum number of days for the trip",
+    "amenities": "Number of amenities",
+    "vicinity": "Vicinity of the hotel",
+    "name": "Name of the hotel",
+    "category": "Rating of the hotel (in number of stars)",
+    "wifi": "Boolean value indicating whether or not the hotel offers free wifi",
+    "booked": "Booked a trip",
+    "dep_time_or": "Time of departure from origin city",
+    "n_children": "Number of children",
+    "gst_rating": "Rating of the hotel by guests (in number of stars)",
+    "parking": "Boolean value indicating whether or not the hotel offers free parking",
+    "arr_time_or": "Time of arrival to origin city",
+    "breakfast": "Boolean value indicating whether or not the hotel offers free breakfast",
+    "count": "Number of different packages",
+    "seat": "Seat type (economy or business)",
+    "count_name": "Number of different hotels",
+    "count_dst_city": "Number of destination cities",
+    "budget_ok": "Boolean value indicating whether the package fits the budget",
+    "arr_time_dst": "Time of arrival to destination",
+    "dep_time_dst": "Time of departure from destination",
+    "gym": "Boolean value indicating whether or not the hotel offers gym",
+    "spa": "Boolean value indicating whether or not the hotel offers spa",
+    "downtown": "Boolean value indicating whether or not the hotel is in the heart of the city",
+    "min_duration": "Minimum number of days for the trip",
+    "airport": "Boolean value indicating whether or not the hotel is in the vicinity of an airport",
+    "beach": "Boolean value indicating whether or not the hotel is in the vicinity of a beach",
+    "museum": "Boolean value indicating whether or not the hotel is in the vicinity of a museum",
+    "theatre": "Boolean value indicating whether or not the hotel is in the vicinity of a theatre",
+    "park": "Boolean value indicating whether or not the hotel is in the vicinity of a park",
+    "market": "Boolean value indicating whether or not the hotel is in the vicinity of a market",
+    "shopping": "Boolean value indicating whether or not the hotel is in the vicinity of a shopping center",
+    "university": "Boolean value indicating whether or not the hotel is in the vicinity of an university",
+    "mall": "Boolean value indicating whether or not the hotel is in the vicinity of a mall",
+    "palace": "Boolean value indicating whether or not the hotel is in the vicinity of a palace",
+    "cathedral": "Boolean value indicating whether or not the hotel is in the vicinity of a cathedral",
+    "no_result": "Boolean value indicating whether there is no result match user's constraints"
+}
+
+
+def get_slot_type(slot):
+    if slot in {'book', 'booked', 'vicinity', 'amenities'}:
+        return 'binary'
+    elif slot in {'dst_city', 'or_city', 'str_date', 'end_date', 'duration', 'min_duration', 'max_duration',
+                  'dep_time_or', 'arr_time_or', 'arr_time_dst', 'dep_time_dst', 'n_adults', 'n_children', 'budget',
+                  'price', 'ref_anaphora', 'name', 'category', 'gst_rating',
+                  'count', 'count_name', 'count_dst_city', 'seat'}:
+        return 'non-categorical'
+    elif slot in {'budget_ok', 'flex', 'wifi', 'parking', 'breakfast', 'gym', 'spa', 'downtown', 'airport', 'beach',
+                  'museum', 'theatre', 'park', 'market', 'shopping', 'university', 'mall', 'palace', 'cathedral'}:
+        return 'categorical'
+    else:
+        return None
+
+
+digit2word = {
+    '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four', '5': 'five',
+    '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten'
+}
+
+match = {
+    '0': 0,
+    '1': 0,
+    '>1': 0,
+}
+
+
+def pharse_in_sen(phrase, sen):
+    '''
+    match value in the sentence
+    :param phrase: str
+    :param sen: str
+    :return: start, end if matched, else None, None
+    '''
+    assert isinstance(phrase, str)
+    pw = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-]|$)'
+    pn = '(^|[\s\?!-]|\D[,\.:])(?P<v>{})($|[\s\?!-]|[,\.:]\D|[,\.:]$)'
+    if phrase.isdigit():
+        pattern = pn
+    else:
+        pattern = pw
+    p = re.compile(pattern.format(re.escape(phrase)), re.I)
+    m = re.search(p, sen)
+    if m:
+        num = len(re.findall(p, sen))
+        if num > 1:
+            match['>1'] += 1
+        else:
+            match['1'] += 1
+        return m.span('v'), num
+    if phrase.isdigit() and phrase in digit2word:
+        phrase = digit2word[phrase]
+        p = re.compile(pw.format(re.escape(phrase)), re.I)
+        m = re.search(p, sen)
+        if m:
+            num = len(re.findall(p, sen))
+            if num > 1:
+                match['>1'] += 1
+            else:
+                match['1'] += 1
+            return m.span('v'), num
+    match['0'] += 1
+    return (None, None), 0
+
+
+def iter_over_acts(acts):
+    for act in acts:
+        intent = act['name']
+        for arg in act['args']:
+            k = arg['key']
+            if k == 'id':
+                continue
+            elif k in ['ref', 'read', 'write']:
+                assert isinstance(arg['val'], list)
+                for frame in arg['val']:
+                    for kv in frame['annotations']:
+                        if kv['key'] in ('ref', 'read', 'write'):
+                            print(kv, frame)
+                            assert False
+                        yield intent, kv['key'], kv.get('val')
+            else:
+                yield intent, k, arg.get('val', None)
+
+
+def normalize_da(intent, slot, value, utterance):
+    if slot == 'intent':
+        slot = 'book'
+    elif slot == 'action':
+        slot = 'booked'
+    elif slot not in slot2des:
+        # ignore some rare slot
+        return None, None
+
+    if slot in ['book', 'booked']:
+        slot_type = 'binary'
+        return slot_type, {
+            "intent": intent,
+            "domain": 'travel',
+            "slot": slot,
+            "value": 'True',
+        }
+    elif value is None or value == '':
+        slot_type = 'binary'
+        return slot_type, {
+            "intent": intent,
+            "domain": 'travel',
+            "slot": slot,
+            "value": '',
+        }
+    elif value == '-1':
+        slot_type = 'binary'
+        return slot_type, {
+            "intent": intent,
+            "domain": 'travel',
+            "slot": slot,
+            "value": 'dontcare',
+        }
+    elif isinstance(value, str):
+        slot_type = get_slot_type(slot)
+        assert slot_type == 'non-categorical'
+        (start, end), num = pharse_in_sen(value, utterance)
+        if not num:
+            if slot == 'gst_rating' and pharse_in_sen(' / '.join(value.split('/')), utterance)[1]:
+                value = ' / '.join(value.split('/'))
+            elif 'a. m' in value and pharse_in_sen(value.replace('a. m', 'a.m'), utterance)[1]:
+                value = value.replace('a. m', 'a.m')
+            elif 'p. m' in value and pharse_in_sen(value.replace('p. m', 'p.m'), utterance)[1]:
+                value = value.replace('p. m', 'p.m')
+            elif slot == 'price' and pharse_in_sen(value.replace('USD', ' USD'), utterance)[1]:
+                value = value.replace('USD', ' USD')
+            else:
+                # few wrong annotation
+                return None, None
+            (start, end), num = pharse_in_sen(value, utterance)
+            assert num, print(value, utterance)
+            if not num:
+                return None, None
+            # return None, None
+        return slot_type, {
+            "intent": intent,
+            "domain": 'travel',
+            "slot": slot,
+            "value": utterance[start:end],
+            "start": start,
+            "end": end
+        }
+    elif isinstance(value, bool):
+        slot_type = get_slot_type(slot)
+        value = str(value)
+        assert slot_type == 'categorical' or slot_type == 'binary', print(slot, value)
+        return slot_type, {
+            "intent": intent,
+            "domain": 'travel',
+            "slot": slot,
+            "value": value,
+        }
+    else:
+        assert 0
+
+
+def preprocess():
+    processed_dialogue = []
+    ontology = {'domains': {'travel':
+                                {"description": "Book a vacation package containing round-trip flights and a hotel.",
+                                 "slots": {}}},
+                'intents': {},
+                'binary_dialogue_act': [],
+                'state': {}}
+    original_zipped_path = os.path.join(self_dir, 'original_data.zip')
+    new_dir = os.path.join(self_dir, 'original_data')
+    if not os.path.exists(original_zipped_path):
+        raise FileNotFoundError(original_zipped_path)
+    if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(os.path.join(self_dir, 'ontology.json')):
+        print('unzip to', new_dir)
+        print('This may take several minutes')
+        archive = zipfile.ZipFile(original_zipped_path, 'r')
+        archive.extractall(new_dir)
+        data = json.load(open(os.path.join(new_dir, 'frames.json')))
+        # json.dump(data, open(os.path.join(new_dir, 'original_data.json'), 'w'), indent=2)
+        cnt = 1
+        for d in tqdm(data, desc='dialogue'):
+            dialogue = {
+                "dataset": 'frames',
+                "data_split": 'train',
+                "dialogue_id": 'frames_' + str(cnt),
+                "original_id": d['id'],
+                "user_id": d['user_id'],
+                "system_id": d['wizard_id'],
+                "userSurveyRating": d['labels']['userSurveyRating'],
+                "wizardSurveyTaskSuccessful": d['labels']['wizardSurveyTaskSuccessful'],
+                "domains": ['travel'],
+                "turns": []
+            }
+            # state = deepcopy(ontology['state']['travel'])
+            for utt_idx, t in enumerate(d['turns']):
+                speaker = 'system' if t['author']=='wizard' else t['author']
+                turn = {
+                    'speaker': speaker,
+                    'utterance': t['text'],
+                    'utt_idx': utt_idx,
+                    'dialogue_act': {
+                        'binary': [],
+                        'categorical': [],
+                        'non-categorical': [],
+                    },
+                }
+                for intent, slot, value in iter_over_acts(t['labels']['acts']):
+                    da_type, da = normalize_da(intent, slot, value, t['text'])
+                    if da is not None:
+                        da['value'] = da['value'].lower()
+                        turn['dialogue_act'][da_type].append(da)
+                        slot = da['slot']
+                        value = da['value']
+                        if da_type == 'binary':
+                            if da not in ontology['binary_dialogue_act']:
+                                ontology['binary_dialogue_act'].append(da)
+                        else:
+                            ontology['domains']['travel']['slots'].setdefault(slot, {
+                                "description": slot2des[slot],
+                                "is_categorical": da_type=='categorical',
+                                "possible_values": []
+                            })
+                            if da_type == 'categorical' \
+                                    and value not in ontology['domains']['travel']['slots'][slot]['possible_values']:
+                                ontology['domains']['travel']['slots'][slot]['possible_values'].append(value)
+                        ontology['intents'].setdefault(intent, {
+                            "description": intent2des[intent]
+                        })
+                # state
+                if speaker == 'user':
+                    turn['state'] = {}
+                    turn['state_update'] = {
+                        'categorical': [],
+                        'non-categorical': [],
+                    }
+                dialogue['turns'].append(deepcopy(turn))
+            cnt += 1
+            if len(dialogue['turns']) % 2 == 0:
+                dialogue['turns'] = dialogue['turns'][:-1]
+            processed_dialogue.append(deepcopy(dialogue))
+        ontology['binary_dialogue_act'] = sorted(ontology['binary_dialogue_act'], key=lambda x: x['intent'])
+        json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=2)
+        json.dump(processed_dialogue, open('data.json', 'w'), indent=2)
+        write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        os.remove('data.json')
+    else:
+        # read from file
+        processed_dialogue = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        ontology = json.load(open(os.path.join(self_dir, 'ontology.json')))
+    return processed_dialogue, ontology
+
+
+if __name__ == '__main__':
+    preprocess()
+    print(match) # {'0': 271, '1': 29333, '>1': 806}
diff --git a/data/unified_datasets/metalwoz/README.md b/data/unified_datasets/metalwoz/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e49e76f5a938161fa1f39f5f762173f5841aabe6
--- /dev/null
+++ b/data/unified_datasets/metalwoz/README.md
@@ -0,0 +1,17 @@
+# README
+
+## Features
+
+No sentence-level annotation. Only annotate domain.
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 37884         | 362450       | 9.57    | 7.66       | -          |
+| test | 2319        | 21949         | 9.46       | 8.23       | -          |
+
+
+## Original data
+
+- https://www.microsoft.com/en-us/research/project/metalwoz/
diff --git a/data/unified_datasets/metalwoz/data.zip b/data/unified_datasets/metalwoz/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..ccd14f97ff4f8be6244d1634f33ca3b6ecda6dc8
Binary files /dev/null and b/data/unified_datasets/metalwoz/data.zip differ
diff --git a/data/unified_datasets/metalwoz/metalwoz-test-v1.zip b/data/unified_datasets/metalwoz/metalwoz-test-v1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..040153c440767b113a805e68d7e9851e2e066c06
Binary files /dev/null and b/data/unified_datasets/metalwoz/metalwoz-test-v1.zip differ
diff --git a/data/unified_datasets/metalwoz/metalwoz-v1.zip b/data/unified_datasets/metalwoz/metalwoz-v1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..eff7551b322eb7c3428d706fa660fce9c776aced
Binary files /dev/null and b/data/unified_datasets/metalwoz/metalwoz-v1.zip differ
diff --git a/data/unified_datasets/metalwoz/ontology.json b/data/unified_datasets/metalwoz/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..099a53528f47e5962b053c649d3e4a56cf0819c8
--- /dev/null
+++ b/data/unified_datasets/metalwoz/ontology.json
@@ -0,0 +1,211 @@
+{
+    "domains": {
+        "AGREEMENT_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "ALARM_SET": {
+            "description": "",
+            "slots": {}
+        },
+        "APARTMENT_FINDER": {
+            "description": "",
+            "slots": {}
+        },
+        "APPOINTMENT_REMINDER": {
+            "description": "",
+            "slots": {}
+        },
+        "AUTO_SORT": {
+            "description": "",
+            "slots": {}
+        },
+        "BANK_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "BUS_SCHEDULE_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "CATALOGUE_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "CHECK_STATUS": {
+            "description": "",
+            "slots": {}
+        },
+        "CITY_INFO": {
+            "description": "",
+            "slots": {}
+        },
+        "CONTACT_MANAGER": {
+            "description": "",
+            "slots": {}
+        },
+        "DECIDER_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "EDIT_PLAYLIST": {
+            "description": "",
+            "slots": {}
+        },
+        "EVENT_RESERVE": {
+            "description": "",
+            "slots": {}
+        },
+        "GAME_RULES": {
+            "description": "",
+            "slots": {}
+        },
+        "GEOGRAPHY": {
+            "description": "",
+            "slots": {}
+        },
+        "GUINESS_CHECK": {
+            "description": "",
+            "slots": {}
+        },
+        "HOME_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "HOW_TO_BASIC": {
+            "description": "",
+            "slots": {}
+        },
+        "INSURANCE": {
+            "description": "",
+            "slots": {}
+        },
+        "LIBRARY_REQUEST": {
+            "description": "",
+            "slots": {}
+        },
+        "LOOK_UP_INFO": {
+            "description": "",
+            "slots": {}
+        },
+        "MAKE_RESTAURANT_RESERVATIONS": {
+            "description": "",
+            "slots": {}
+        },
+        "MOVIE_LISTINGS": {
+            "description": "",
+            "slots": {}
+        },
+        "MUSIC_SUGGESTER": {
+            "description": "",
+            "slots": {}
+        },
+        "NAME_SUGGESTER": {
+            "description": "",
+            "slots": {}
+        },
+        "ORDER_PIZZA": {
+            "description": "",
+            "slots": {}
+        },
+        "PET_ADVICE": {
+            "description": "",
+            "slots": {}
+        },
+        "PHONE_PLAN_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "PHONE_SETTINGS": {
+            "description": "",
+            "slots": {}
+        },
+        "PLAY_TIMES": {
+            "description": "",
+            "slots": {}
+        },
+        "POLICY_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "PRESENT_IDEAS": {
+            "description": "",
+            "slots": {}
+        },
+        "PROMPT_GENERATOR": {
+            "description": "",
+            "slots": {}
+        },
+        "QUOTE_OF_THE_DAY_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "RESTAURANT_PICKER": {
+            "description": "",
+            "slots": {}
+        },
+        "SCAM_LOOKUP": {
+            "description": "",
+            "slots": {}
+        },
+        "SHOPPING": {
+            "description": "",
+            "slots": {}
+        },
+        "SKI_BOT": {
+            "description": "",
+            "slots": {}
+        },
+        "SPORTS_INFO": {
+            "description": "",
+            "slots": {}
+        },
+        "STORE_DETAILS": {
+            "description": "",
+            "slots": {}
+        },
+        "TIME_ZONE": {
+            "description": "",
+            "slots": {}
+        },
+        "UPDATE_CALENDAR": {
+            "description": "",
+            "slots": {}
+        },
+        "UPDATE_CONTACT": {
+            "description": "",
+            "slots": {}
+        },
+        "WEATHER_CHECK": {
+            "description": "",
+            "slots": {}
+        },
+        "WEDDING_PLANNER": {
+            "description": "",
+            "slots": {}
+        },
+        "WHAT_IS_IT": {
+            "description": "",
+            "slots": {}
+        },
+        "BOOKING_FLIGHT": {
+            "description": "",
+            "slots": {}
+        },
+        "HOTEL_RESERVE": {
+            "description": "",
+            "slots": {}
+        },
+        "TOURISM": {
+            "description": "",
+            "slots": {}
+        },
+        "VACATION_IDEAS": {
+            "description": "",
+            "slots": {}
+        }
+    },
+    "intents": {},
+    "binary_dialogue_act": [],
+    "state": {}
+}
\ No newline at end of file
diff --git a/data/unified_datasets/metalwoz/preprocess.py b/data/unified_datasets/metalwoz/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..c075e7d4541169a7fd9c611503d6ed9d8ae69817
--- /dev/null
+++ b/data/unified_datasets/metalwoz/preprocess.py
@@ -0,0 +1,89 @@
+import json
+import os
+from zipfile import ZipFile, ZIP_DEFLATED
+
+import json_lines
+
+
+dataset = 'metalwoz'
+self_dir = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(self_dir)), 'data')
+# origin_data_dir = os.path.join(DATA_PATH, dataset)
+origin_data_dir = self_dir
+
+
+def preprocess():
+    ontology = {
+        'domains': {},
+        'intents': {},
+        'binary_dialogue_act': [],
+        'state': {}
+    }
+
+    def process_dialog(ori_dialog, split, dialog_id):
+        domain = ori_dialog['domain']
+        ontology['domains'][domain] = {
+            'description': "",
+            'slots': {}
+        }
+        dialog = {
+            "dataset": dataset,
+            "data_split": split,
+            "dialogue_id": f'{dataset}_{dialog_id}',
+            "original_id": ori_dialog['id'],
+            "domains": [domain],
+        }
+        turns = []
+        # starts with system
+        for utt_idx, utt in enumerate(ori_dialog['turns'][1:]):
+            turn = {
+                'utt_idx': utt_idx,
+                'utterance': utt,
+                'dialogue_act': {
+                    'categorical': [],
+                    'non-categorical': [],
+                    'binary': [],
+                },
+            }
+            if utt_idx % 2 == 0:
+                turn['speaker'] = 'user'
+                turn['state'] = {}
+                turn['state_update'] = {
+                    'categorical': [],
+                    'non-categorical': [],
+                }
+            else:
+                turn['speaker'] = 'system'
+            turns.append(turn)
+        if turns[-1]['speaker'] == 'system':
+            turns.pop()
+
+        dialog['turns'] = turns
+        return dialog
+
+    dialog_id = 0
+    data = []
+    with ZipFile(os.path.join(origin_data_dir, 'metalwoz-v1.zip')) as zipfile:
+        for path in zipfile.namelist():
+            if path.startswith('dialogues'):
+                for dialog in json_lines.reader(zipfile.open(path)):
+                    data.append(process_dialog(dialog, 'train', dialog_id))
+                    dialog_id += 1
+
+    ZipFile(os.path.join(origin_data_dir, 'metalwoz-test-v1.zip')).extract('dstc8_metalwoz_heldout.zip')
+    with ZipFile(os.path.join('dstc8_metalwoz_heldout.zip')) as zipfile:
+        for path in zipfile.namelist():
+            if path.startswith('dialogues'):
+                for dialog in json_lines.reader(zipfile.open(path)):
+                    data.append(process_dialog(dialog, 'test', dialog_id))
+                    dialog_id += 1
+    os.remove('dstc8_metalwoz_heldout.zip')
+
+    json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'))
+    json.dump(data, open('data.json', 'w'), indent=4)
+    ZipFile(os.path.join(self_dir, 'data.zip'), 'w', ZIP_DEFLATED).write('data.json')
+    os.remove('data.json')
+
+
+if __name__ == '__main__':
+    preprocess()
diff --git a/data/unified_datasets/multiwoz21/README.md b/data/unified_datasets/multiwoz21/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..803ebf3ad3ee6c4b6aaf710a739f9518bf5d5321
--- /dev/null
+++ b/data/unified_datasets/multiwoz21/README.md
@@ -0,0 +1,31 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, character-level span for non-categorical slots. state and state updates.   
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 8434         | 105066         | 12.46     | 17.27      | 7          |
+| dev | 999         | 13731         | 13.74      | 17.72       | 7          |
+| train | 1000         | 13744         | 13.74       | 17.67       | 7          |
+
+
+## Main changes
+
+- only keep 5 domains in state annotations and dialog acts. 
+- `pricerange`, `area`, `day`, `internet`, `parking`, `stars` are considered categorical slots.
+- punctuation marks are split from their previous tokens. e.g `I want to find a hotel. -> 
+  I want to find a hotel .`
+
+Run `evaluate.py`:
+
+da values match rate:    97.944
+state values match rate: 66.017
+
+### original data
+
+- from [multiwoz](https://github.com/budzianowski/multiwoz) repo.
+
diff --git a/data/unified_datasets/multiwoz21/data.zip b/data/unified_datasets/multiwoz21/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..7f60019dfc4a68728474baf9e8d3d73305b46082
Binary files /dev/null and b/data/unified_datasets/multiwoz21/data.zip differ
diff --git a/data/unified_datasets/multiwoz21/ontology.json b/data/unified_datasets/multiwoz21/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..b976054f3b6dc47c327c75f545549dfe3563ad01
--- /dev/null
+++ b/data/unified_datasets/multiwoz21/ontology.json
@@ -0,0 +1,1895 @@
+{
+    "domains": {
+        "taxi": {
+            "description": "taxi information query system",
+            "slots": {
+                "destination": {
+                    "description": "destination of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "leaveAt": {
+                    "description": "leaving time of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "arriveBy": {
+                    "description": "arrival time of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "taxi_phone": {
+                    "description": "taxi phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "taxi_types": {
+                    "description": "taxi type",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "restaurant": {
+            "description": "restaurant information query system",
+            "slots": {
+                "food": {
+                    "description": "food type for the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price budget for the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "not moderate",
+                        "ch",
+                        "high - end",
+                        "luxury options",
+                        "priced cheaply",
+                        "moderate",
+                        "various",
+                        "all price ranges",
+                        "cheap price range",
+                        "low cost",
+                        "assorted",
+                        "cheap priced",
+                        "moderate or expensive priced",
+                        "expensive price",
+                        "your",
+                        "most expensive",
+                        "moderate and expensive",
+                        "reasonable",
+                        "moderately expensive",
+                        "moderate|cheap",
+                        "fairly expensive",
+                        "upscale",
+                        "quite inexpensive",
+                        "boderate",
+                        "affordable",
+                        "expinsive",
+                        "they vary",
+                        "more moderate",
+                        "expensive / upscale",
+                        "high class",
+                        "moderately price",
+                        "reasonably priced",
+                        "a little expensive but worth it",
+                        "middle",
+                        "any price point",
+                        "priced moderately",
+                        "cherap",
+                        "moderately prices",
+                        "cheap to expensive",
+                        "or otherwise",
+                        "inexpensive",
+                        "cheaply priced",
+                        "adforable",
+                        "that range",
+                        "great prices",
+                        "rather expensive",
+                        "expensive side",
+                        "moderate range",
+                        "cheaply - priced",
+                        "more expensive",
+                        "varying price",
+                        "similar price range",
+                        "this price range",
+                        "expensive",
+                        "expensive or moderate",
+                        "not cheap",
+                        "the least expensive",
+                        "pretty expensive",
+                        "high end",
+                        "not expensive",
+                        "less expensive",
+                        "moderatley priced",
+                        "most affordable price range",
+                        "budget - friendly",
+                        "expensive and moderately priced",
+                        "moderatre",
+                        "cheap range",
+                        "or the moderately priced",
+                        "fairly cheap",
+                        "not too expensive",
+                        "east",
+                        "fairly inexpensive",
+                        "regardless of price",
+                        "north",
+                        "mostly expensive and moderately priced",
+                        "cheap",
+                        "budget conscious",
+                        "same price range",
+                        "very expensive",
+                        "not too pricey",
+                        "any price range",
+                        "quite expensive",
+                        "that price range",
+                        "your price range",
+                        "every price point",
+                        "expensively priced",
+                        "varying price range",
+                        "not - so - expensive",
+                        "all",
+                        "vietnamese",
+                        "any",
+                        "moderately - priced",
+                        "that price",
+                        "moderate pricing",
+                        "do nt care",
+                        "modest",
+                        "pricey",
+                        "expensive but worth every penny",
+                        "cheaper",
+                        "pretty cheap",
+                        "moderate price",
+                        "ranging from cheap to expensive",
+                        "quite low",
+                        "moderate priced",
+                        "centre",
+                        "the same",
+                        "expensive range",
+                        "relatively cheap",
+                        "cheap or expensive",
+                        "epensive",
+                        "moderate and one in the cheap range",
+                        "budget friendly",
+                        "fine",
+                        "on the cheap side",
+                        "expensive price range",
+                        "mostly expensive",
+                        "moderately priced",
+                        "relatively expensive",
+                        "moderately",
+                        "moderatly",
+                        "on the pricey side",
+                        "low priced",
+                        "expensively",
+                        "moderate price range",
+                        "moderatly priced",
+                        "do n't care"
+                    ]
+                },
+                "address": {
+                    "description": "exact location of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area or place of the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "near the centre",
+                        "east area",
+                        "northern parts of town",
+                        "any part of town",
+                        "west cambridge",
+                        "town of centre",
+                        "centre of cambridge",
+                        "north or centre",
+                        "westside",
+                        "city centre",
+                        "the west end",
+                        "the south of town",
+                        "the city center",
+                        "elsewhere in the city",
+                        "town center",
+                        "the west side",
+                        "south of cambridge",
+                        "south area of town",
+                        "here",
+                        "north side of town",
+                        "on the east",
+                        "throughout the city",
+                        "south area",
+                        "centre city",
+                        "city",
+                        "in the north",
+                        "not",
+                        "east side",
+                        "not in the west",
+                        "here in the city",
+                        "the south area of town",
+                        "that part of town",
+                        "the east",
+                        "west of town",
+                        "centere",
+                        "in the city .",
+                        "south part of time",
+                        "mexican",
+                        "the east side",
+                        "south part of town",
+                        "west part of town .",
+                        "southern",
+                        "any where in the city",
+                        "west area",
+                        "north cambridge",
+                        "the south area of the city",
+                        "almost every area of town",
+                        "ely",
+                        "the north end",
+                        "south cambridge",
+                        "the are",
+                        "east side of town",
+                        "southside",
+                        "east side of the city",
+                        "by the airport",
+                        "around there",
+                        "all of cambridge",
+                        "the town centre",
+                        "nearby",
+                        "town centre",
+                        "the center part of town",
+                        "other parts of town",
+                        "not in the south of town",
+                        "chesterton",
+                        "the centre of town",
+                        "east area of town",
+                        "the east area",
+                        "southend",
+                        "ctre",
+                        "same area",
+                        "anywhere in cambridge",
+                        "north side of cambridge",
+                        "same area as the park",
+                        "south side of town",
+                        "the centre area",
+                        "the same area as the botanic gardens",
+                        "south part of the city",
+                        "the area you have chosen",
+                        "in the centre",
+                        "northern part of cambridge",
+                        "east section",
+                        "east|south",
+                        "the east part of town",
+                        "this town",
+                        "the same area",
+                        "west area of town",
+                        "town centre area",
+                        "the southern area",
+                        "northern part",
+                        "southern area",
+                        "the east side of town",
+                        "expensive",
+                        "east section of town",
+                        "same",
+                        "in cambridge",
+                        "north side of chersteron",
+                        "towns centre",
+                        "in town",
+                        "west side",
+                        "in the east",
+                        "centrally",
+                        "west side area",
+                        "all over town",
+                        "centre region",
+                        "anywhere in town",
+                        "closely located",
+                        "west part of town",
+                        "cambridge",
+                        "downtown",
+                        "south end",
+                        "close it city centre",
+                        "in the city centre",
+                        "close to the hotel",
+                        "east",
+                        "north and west",
+                        "west end of town",
+                        "the south part of town",
+                        "the north side",
+                        "any area",
+                        "the area of west",
+                        "center of the town",
+                        "the area",
+                        "the centre part of town",
+                        "north",
+                        "wet part of town",
+                        "other part of the town",
+                        "cheap",
+                        "north part of town",
+                        "centre area of the town",
+                        "cetre",
+                        "in town at all",
+                        "central",
+                        "east part of town",
+                        "near the center of town",
+                        "the city centre",
+                        "north end",
+                        "east of town",
+                        "centreof",
+                        "west",
+                        "the north side of town",
+                        "in the west",
+                        "same side of town as your hotel",
+                        "the west of town",
+                        "centre area of town",
+                        "centrem",
+                        "east of cambridge",
+                        "center of town",
+                        "anywhere",
+                        "throughout the area",
+                        "central area",
+                        "south of town",
+                        "the west area",
+                        "the center of town",
+                        "centre part of town",
+                        "the center",
+                        "north side",
+                        "the south",
+                        "the west side of town",
+                        "west part of the city",
+                        "center",
+                        "any",
+                        "the north",
+                        "east end",
+                        "the west part of town",
+                        "west end",
+                        "center cambridge",
+                        "the centre area of town",
+                        "centre area",
+                        "northside",
+                        "centre area of the city",
+                        "center area of town",
+                        "centrally located",
+                        "the south side",
+                        "around the college",
+                        "do nt care",
+                        "thai",
+                        "the west",
+                        "south side",
+                        "cambridge centre",
+                        "town",
+                        "west of cambridge",
+                        "near clare hall",
+                        "south of town .",
+                        "n the centre",
+                        "centre of the city",
+                        "the north area",
+                        "that",
+                        "this area",
+                        "central region of town",
+                        "centre",
+                        "north of town",
+                        "these areas",
+                        "the same",
+                        "south part",
+                        "that area of town",
+                        "in the city",
+                        "that area",
+                        "city center",
+                        "south",
+                        "close to your location",
+                        "north area of town",
+                        "all over cambridge",
+                        "the south side of town",
+                        "within this area",
+                        "near centre",
+                        "the centre",
+                        "centre of town .",
+                        "the north part of town",
+                        "the north side .",
+                        "same area as the hotel",
+                        "and centre",
+                        "west side of town",
+                        "westies",
+                        "around town",
+                        "east part of tow",
+                        "eastside",
+                        "close to that area",
+                        "all over the city",
+                        "south side of the town",
+                        "centre cambridge",
+                        "do n't care",
+                        "north area",
+                        "centre of town"
+                    ]
+                },
+                "postcode": {
+                    "description": "postcode of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "restaurant phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "number of people booking the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "time": {
+                    "description": "time of the restaurant booking",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the restaurant booking",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "saturday",
+                        "wednesday",
+                        "tuesday",
+                        "w",
+                        "saturday|thursday",
+                        "monday",
+                        "friday",
+                        "thursday",
+                        "sunday",
+                        "sunday|thursday"
+                    ]
+                },
+                "choice": {
+                    "description": "number of restaurants meeting requests of user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "attraction": {
+            "description": "an entertainment that is offered to the public",
+            "slots": {
+                "address": {
+                    "description": "details of where the attraction is",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "entrance fee": {
+                    "description": "the fee charged for admission to the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "type of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area or place of the attraction",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "east area",
+                        "near the centre",
+                        "west cambridge",
+                        "centre of cambridge",
+                        "other areas",
+                        "westside",
+                        "the west - side",
+                        "city centre",
+                        "central district",
+                        "near the restaurant",
+                        "whole of cambridge",
+                        "the west end",
+                        "entre",
+                        "various",
+                        "northern area",
+                        "town center",
+                        "west of your city",
+                        "the west side",
+                        "south of cambridge",
+                        "west part of cambridge",
+                        "norwich",
+                        "throughout the city",
+                        "centre city",
+                        "south area",
+                        "city",
+                        "very close in the same area",
+                        "centre by galleria",
+                        "east side",
+                        "northern",
+                        "that part of town",
+                        "the east",
+                        "same general area",
+                        "went side of town",
+                        "southern cambridge",
+                        "the east side",
+                        "south part of town",
+                        "in that area",
+                        "to the south",
+                        "clifton way",
+                        "there",
+                        "west area",
+                        "in the south",
+                        "center are",
+                        "south cambridge",
+                        "ely",
+                        "east side of town",
+                        "central cambridge",
+                        "south park of town",
+                        "the south area",
+                        "centre area of cambridge",
+                        "west area of the city",
+                        "several different parts of town",
+                        "cambridge 's centre",
+                        "nearby",
+                        "town centre",
+                        "the center part of town",
+                        "near the hotel",
+                        "museum",
+                        "the area you 're looking for",
+                        "other parts of town",
+                        "most of them are in the center",
+                        "the centre of town",
+                        "northern area of town",
+                        "east area of town",
+                        "north of the city",
+                        "different area",
+                        "same area",
+                        "east section",
+                        "south side of town",
+                        "the centre area",
+                        "in the centre",
+                        "in the center",
+                        "western part of town",
+                        "centre area .",
+                        "south part of cambridge",
+                        "the same road",
+                        "cetnre of town",
+                        "the same area",
+                        "town centre area",
+                        "the east part of town",
+                        "center area",
+                        "west area of town",
+                        "cenre",
+                        "center of town .",
+                        "cambridge leisure park",
+                        "church area",
+                        "that area .",
+                        "near ely",
+                        "in that side",
+                        "east cambridge",
+                        "same",
+                        "in cambridge",
+                        "towns centre",
+                        "that side of town",
+                        "another area",
+                        "in town",
+                        "west side",
+                        "west end of the city",
+                        "the centry area",
+                        "in the east",
+                        "we",
+                        "all over town",
+                        "all around the city",
+                        "city centre .",
+                        "center of cambridge",
+                        "centre region",
+                        "west part of town",
+                        "cambridge",
+                        "cent",
+                        "western part of the town",
+                        "downtown",
+                        "south end",
+                        "east",
+                        "this side of town",
+                        "same area as hotel",
+                        "center of the town",
+                        "west end of town",
+                        "the area",
+                        "eat",
+                        "northern cambridge",
+                        "north",
+                        "same part of town as your restaurant",
+                        "the centre part of town",
+                        "north part of town",
+                        "central",
+                        "close to the center of town",
+                        "east part of town",
+                        "same part",
+                        "western cambridge",
+                        "the city centre",
+                        "east of town",
+                        "west",
+                        "the eastside",
+                        "north in milton",
+                        "the west of town",
+                        "centre area of town",
+                        "ce",
+                        "east of cambridge",
+                        "the east of the town",
+                        "close to the restaurant",
+                        "the west area of town",
+                        "center of town",
+                        "the south near your hotel",
+                        "south of town",
+                        "the west area",
+                        "centre part of town",
+                        "the center",
+                        "north side",
+                        "north section of cambridge",
+                        "the north of the city",
+                        "all of the other areas",
+                        "the south",
+                        "all",
+                        "the west side of town",
+                        "west part of the city",
+                        "center",
+                        "the north",
+                        "your desired location",
+                        "wet end",
+                        "center part of town",
+                        "cb30aq",
+                        "west end",
+                        "in the area",
+                        "centre area",
+                        "centrally located",
+                        "the south side",
+                        "city 's centre",
+                        "do nt care",
+                        "the west",
+                        "east end of town",
+                        "every area except the north",
+                        "south side",
+                        "centre of the city",
+                        "that",
+                        "this area",
+                        "centre",
+                        "north of town",
+                        "right in the center of town",
+                        "city cenre",
+                        "centre of town ?",
+                        "that area",
+                        "city center",
+                        "south",
+                        "same area as tandoori palace",
+                        "centre|west",
+                        "central zone",
+                        "the centre",
+                        "the center area",
+                        "west side of town",
+                        "western area of town",
+                        "the center of the park",
+                        "west side of the city",
+                        "eastside",
+                        "northend",
+                        "the area you are looking for",
+                        "all over the city",
+                        "western",
+                        "on the centre",
+                        "museums",
+                        "north area",
+                        "centre of town",
+                        "center of the city"
+                    ]
+                },
+                "name": {
+                    "description": "name of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "the price range for the attraction, from cheap to expensive",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "free",
+                        "no entrance fee",
+                        "entrance fee",
+                        "unaware of what their entrance fee is",
+                        "not listed",
+                        "cheap",
+                        "we do n't have any information",
+                        "2 pounds",
+                        "do n't see a price listed",
+                        "from free to 2 pounds 50",
+                        "4 pounds",
+                        "moderately priced",
+                        "neither prices are listed",
+                        "expensive",
+                        "5 pounds",
+                        "they do n't have the entrance fee posted",
+                        "free admission",
+                        "not sure of the fee",
+                        "5 pound entrance fee",
+                        "do n't have information",
+                        "3.50 pounds"
+                    ]
+                },
+                "choice": {
+                    "description": "number of attractions matching requests of user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "booking": {
+            "description": "to arrange with a taxi, restaurant, train, etc.",
+            "slots": {
+                "time": {
+                    "description": "time for an order",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day for an order, from monday to sunday",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "saturday",
+                        "wednesday",
+                        "saturday night thru monday",
+                        "vmhj6y3i",
+                        "6",
+                        "weds",
+                        "tuesday through friday",
+                        "tuesday instead of thursday",
+                        "2",
+                        "that",
+                        "tues",
+                        "sunday 18/06/2017",
+                        "tuesday 's",
+                        "today",
+                        "tonight",
+                        "that time frame",
+                        "tuesday",
+                        "at that time",
+                        "1",
+                        "this evening",
+                        "sunday and monday",
+                        "the same day",
+                        "sundar",
+                        "monday",
+                        "friday",
+                        "thursday",
+                        "sunday",
+                        "we d"
+                    ]
+                },
+                "stay": {
+                    "description": "for how long the user wish to be at a place",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "how many person the order is for",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the ordered place",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "Ref": {
+                    "description": "reference number of the order",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "train": {
+            "description": "query and order a train",
+            "slots": {
+                "destination": {
+                    "description": "destination of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "arriveBy": {
+                    "description": "arrival time of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "duration": {
+                    "description": "the length of time the train trip lasts",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "number of people booking for train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the train",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "cambridge",
+                        "fr",
+                        "saturday",
+                        "wednesday",
+                        "13:07",
+                        "everday",
+                        "weds",
+                        "sat",
+                        "sundays",
+                        "train",
+                        "that",
+                        "between friday and wednesday",
+                        "all week",
+                        "this day",
+                        "every day",
+                        "tuesday",
+                        "saturdays",
+                        "tr2519",
+                        "every",
+                        "other days are available",
+                        "mondays",
+                        "we",
+                        "monday",
+                        "friday",
+                        "thursday",
+                        "frday",
+                        "sunday",
+                        "daily",
+                        "that day",
+                        "fiday"
+                    ]
+                },
+                "Ref": {
+                    "description": "reference number of the order",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "leaveAt": {
+                    "description": "leaving time for the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "price": {
+                    "description": "price for the train ticket",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "choice": {
+                    "description": "number of trains that meets requests of the user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "hotel": {
+            "description": "to query hotel information and place an order",
+            "slots": {
+                "internet": {
+                    "description": "internet option at the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "free internet",
+                        "free",
+                        "y",
+                        "do nt care",
+                        "yes",
+                        "none",
+                        "no",
+                        "4"
+                    ]
+                },
+                "area": {
+                    "description": "area or place of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "near the centre",
+                        "any part of town",
+                        "east area",
+                        "different parts of the city",
+                        "north end of the city",
+                        "centre of cambridge",
+                        "near the restaurant",
+                        "city centre",
+                        "throughout cambridge",
+                        "different part of town",
+                        "eastern part of the city",
+                        "the west end",
+                        "various",
+                        "northern area",
+                        "the northern part of town",
+                        "northern part of town",
+                        "the west side",
+                        "on the west",
+                        "south area of town",
+                        "centrally - located",
+                        "north side of town",
+                        "central location",
+                        "centra",
+                        "throughout the city",
+                        "not in the south",
+                        "south area",
+                        "centre city",
+                        "in the north",
+                        "east side",
+                        "northern side of town",
+                        "different parts of town",
+                        "you are interested in",
+                        "northern",
+                        "that part of town",
+                        "west of town",
+                        "the east",
+                        "all areas",
+                        "all parts of the city",
+                        "the east side",
+                        "south part of town",
+                        "south end of the city",
+                        "there",
+                        "west area",
+                        "north cambridge",
+                        "the north end",
+                        "south cambridge",
+                        "east side of town",
+                        "southside",
+                        "the south area",
+                        "or west ?",
+                        "another area of town",
+                        "all of cambridge",
+                        "the town centre",
+                        "east near other shops and boutiques",
+                        "town centre",
+                        "on the west side",
+                        "all over",
+                        "the centre of the city",
+                        "other parts of town",
+                        "the centre of town",
+                        "east area of town",
+                        "the east area",
+                        "the westside",
+                        "southend",
+                        "west|centre",
+                        "northe part of town",
+                        "anywhere in cambridge",
+                        "different area",
+                        "same area",
+                        "south side of town",
+                        "the centre area",
+                        "on the south",
+                        "in the centre",
+                        "in the center",
+                        "wast",
+                        "all across town",
+                        "south end of town",
+                        "except in the north",
+                        "and east",
+                        "place to be a guesthouse",
+                        "the same area",
+                        "other parts of the city",
+                        "the east side of town",
+                        "somewhere else",
+                        "and in the centre",
+                        "east section of town",
+                        "east cambridge",
+                        "across cambridge",
+                        "north of cambridge",
+                        "next door",
+                        "except in the east",
+                        "in cambridge",
+                        "north part of the city",
+                        "that side of town",
+                        "south cambridge area",
+                        "in town",
+                        "west side",
+                        "west end of the city",
+                        "we",
+                        "centrally",
+                        "west side near the restaurant",
+                        "all over town",
+                        "southern part of town",
+                        "all around the city",
+                        "west part of town",
+                        "cambridge",
+                        "downtown",
+                        "south end",
+                        "east",
+                        "east and the north .",
+                        "norht",
+                        "the north side",
+                        "any area",
+                        "center of the town",
+                        "the area",
+                        "eat",
+                        "northern cambridge",
+                        "north",
+                        "cheap",
+                        "north part of town",
+                        "centre part",
+                        "east part of town",
+                        "western cambridge",
+                        "north end of town",
+                        "north end",
+                        "east of town",
+                        "several areas of town",
+                        "west",
+                        "eastern",
+                        "in the west",
+                        "everywhere but the city centre",
+                        "centre area of town",
+                        "center of town",
+                        "west part",
+                        "the town center",
+                        "south of town",
+                        "the center of town",
+                        "the west area",
+                        "centre part of town",
+                        "nborth",
+                        "north side",
+                        "the north of the city",
+                        "the south",
+                        "west part of the city",
+                        "center",
+                        "the north",
+                        "west end",
+                        "same side of town",
+                        "in the area",
+                        "any part of the city",
+                        "centre area",
+                        "not to far from the restaurant",
+                        "northside",
+                        "on the eastside",
+                        "close to where you 'll be dining",
+                        "the east end of town",
+                        "various parts of the city",
+                        "west areas of town",
+                        "centrally located",
+                        "the south side",
+                        "north part of town .",
+                        "do nt care",
+                        "the west",
+                        "same area as the restaurant",
+                        "that vicinity",
+                        "nearby the restaurant",
+                        "south side",
+                        "east part of time",
+                        "centre of the city",
+                        "south closer to the museum",
+                        "north park of town",
+                        "centre",
+                        "north of town",
+                        "north and centre",
+                        "the same",
+                        "on the north",
+                        "that area of town",
+                        "that area",
+                        "in the city",
+                        "el shaddai",
+                        "city center",
+                        "south",
+                        "the city 's south side",
+                        "north area of town",
+                        "near the museum",
+                        "eastern cambridge",
+                        "near centre",
+                        "the easy",
+                        "a different area",
+                        "the centre",
+                        "the north part of town",
+                        "west side of town",
+                        "across town",
+                        "eastside",
+                        "northend",
+                        "north par of town",
+                        "the centre of cambridge",
+                        "all over the city",
+                        "north location",
+                        "centre cambridge",
+                        "that region",
+                        "north area",
+                        "centre of town"
+                    ]
+                },
+                "stars": {
+                    "description": "star rating of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "4|5",
+                        "several",
+                        "0 to 4",
+                        "no star rating",
+                        "do nt care",
+                        "one",
+                        "does not show",
+                        "foru",
+                        "does not have",
+                        "different star ratings",
+                        "unrated",
+                        "2",
+                        "four starts",
+                        "4-star",
+                        "four - star",
+                        "3|4",
+                        "yes",
+                        "three",
+                        "four",
+                        "lower",
+                        "four stars",
+                        "zero",
+                        "ranging from 2 - 4 stars",
+                        "two",
+                        "1",
+                        "3",
+                        "four star",
+                        "0-star",
+                        "five",
+                        "drop the star rating",
+                        "2-star",
+                        "no",
+                        "3-star",
+                        "5-star",
+                        "0",
+                        "1-star",
+                        "not rated",
+                        "5",
+                        "not as fancy",
+                        "4"
+                    ]
+                },
+                "parking": {
+                    "description": "parking facility at the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "n",
+                        "free",
+                        "do nt care",
+                        "yes",
+                        "none",
+                        "no",
+                        "free parking"
+                    ]
+                },
+                "phone": {
+                    "description": "hotel phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price budget of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "cheapest",
+                        "economically priced",
+                        "free",
+                        "all different price ranges",
+                        "more moderately priced",
+                        "expensively - priced",
+                        "ca n't view that information",
+                        "moderate",
+                        "super cheap",
+                        "low cost",
+                        "cheap price range",
+                        "lower",
+                        "affordable pricing",
+                        "moderate to cheap",
+                        "cheap priced",
+                        "inexpensively - priced",
+                        "mostly in the cheap to moderate price range",
+                        "moderately pricing",
+                        "guesthouses",
+                        "moderate|cheap",
+                        "moderately priceed",
+                        "very cheap",
+                        "even cheaper",
+                        "upscale",
+                        "other ranges",
+                        "moderate or cheap",
+                        "cheaply",
+                        "affordable",
+                        "different price range",
+                        "does not say",
+                        "moderately price",
+                        "expensive or cheap",
+                        "priced moderately",
+                        "moderatly price",
+                        "moderate price point",
+                        "moderately prices",
+                        "cheap to expensive",
+                        "on the more expensive side",
+                        "inexpensive",
+                        "the cheapest",
+                        "cheaply priced",
+                        "that range",
+                        "rather expensive",
+                        "your price",
+                        "moderate range",
+                        "cheaply - priced",
+                        "more expensive",
+                        "expensive",
+                        "much cheaper",
+                        "cheap side",
+                        "slightly more expensive",
+                        "$100",
+                        "cheaper than the others",
+                        "pretty expensive",
+                        "not expensive",
+                        "cheaply prices",
+                        "moderatley priced",
+                        "modrate",
+                        "cheap range",
+                        "espensive",
+                        "chear",
+                        "fairly cheap",
+                        "oderately priced",
+                        "moderate in price",
+                        "different",
+                        "very affordable",
+                        "your chosen",
+                        "unfortunately do not have the price",
+                        "north",
+                        "less costly",
+                        "different price ranges",
+                        "cheap",
+                        "very inexpensive",
+                        "2",
+                        "moderately priced .",
+                        "moderately to expensively priced",
+                        "economical",
+                        "same price range",
+                        "moderate to cheap range",
+                        "cheap to moderate",
+                        "quite expensive",
+                        "that price range",
+                        "cheap|moderate",
+                        "your price range",
+                        "varying price ranges",
+                        "expensive to moderate",
+                        "expensively priced",
+                        "epxensive",
+                        "or expensive ?",
+                        "all",
+                        "higher price range",
+                        "any",
+                        "moderately - priced",
+                        "hotel",
+                        "moderate pricing",
+                        "do nt care",
+                        "lower end",
+                        "quite cheap",
+                        "more budget - friendly",
+                        "cheaper",
+                        "fairly cheap compared to other hotels",
+                        "pretty cheap",
+                        "moderate price",
+                        "that",
+                        "moderate priced",
+                        "centre",
+                        "the same",
+                        "expensive range",
+                        "relatively cheap",
+                        "moderate prices",
+                        "hotels",
+                        "cheap>moderate",
+                        "a little pricey",
+                        "on the cheap side",
+                        "moderately - priced or cheap",
+                        "expensive price range",
+                        "moderately priced",
+                        "same price",
+                        "moderately",
+                        "budget - priced",
+                        "moderately pried",
+                        "moderate price range",
+                        "great",
+                        "moderatly priced",
+                        "extremely reasonable",
+                        "cheap or moderate",
+                        "cheaper side"
+                    ]
+                },
+                "people": {
+                    "description": "number of people for the hotel booking",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "exact location of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "what is the type of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "stay": {
+                    "description": "length of stay at the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the hotel booking",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "next friday",
+                        "saturday",
+                        "wednesday",
+                        "tuesday",
+                        "saturday|tuesday",
+                        "t",
+                        "sunday>monday",
+                        "friday>tuesday",
+                        "monday",
+                        "friday",
+                        "thursday",
+                        "monday<thursday",
+                        "sunday",
+                        "wednesday|friday",
+                        "monda"
+                    ]
+                },
+                "choice": {
+                    "description": "number of hotels that meets requests of the user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "police": {
+            "description": "find police stations",
+            "slots": {
+                "address": {
+                    "description": "exact location of the police station",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the police station",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "police station phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        }
+    },
+    "intents": {
+        "inform": {
+            "description": "inform user of value for a certain slot"
+        },
+        "request": {
+            "description": "ask for value of a slot"
+        },
+        "nobook": {
+            "description": "inform user of booking failure"
+        },
+        "reqmore": {
+            "description": "ask user for more instructions"
+        },
+        "book": {
+            "description": "place an order for user"
+        },
+        "bye": {
+            "description": "end a conversation and say goodbye to user"
+        },
+        "thank": {
+            "description": "express gratitude"
+        },
+        "welcome": {
+            "description": "welcome"
+        },
+        "recommend": {
+            "description": "recommend a choice for user request"
+        },
+        "offerbook": {
+            "description": "offer to place an order for user"
+        },
+        "offerbooked": {
+            "description": "inform user that an order is succussful"
+        },
+        "greet": {
+            "description": "express greeting"
+        },
+        "nooffer": {
+            "description": "inform user that no options matches user request"
+        },
+        "select": {
+            "description": "provide several choices for user to choose from"
+        }
+    },
+    "binary_dialogue_act": [
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "stay",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "day",
+            "value": ""
+        },
+        {
+            "intent": "reqmore",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "bye",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "thank",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "welcome",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "leaveAt",
+            "value": ""
+        },
+        {
+            "intent": "offerbook",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "duration",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "price",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "departure",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "day",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "leaveAt",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "arriveBy",
+            "value": ""
+        },
+        {
+            "intent": "greet",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "destination",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "arriveBy",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "internet",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "parking",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "type",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "food",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "nobook",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "people",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "people",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "stars",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "type",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "entrance fee",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "Ref",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "time",
+            "value": ""
+        },
+        {
+            "intent": "book",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "taxi",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "departure",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "destination",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "taxi_phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "taxi_types",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "offerbooked",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        }
+    ],
+    "state": {
+        "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "pricerange": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "stay": "",
+            "day": "",
+            "people": ""
+        },
+        "train": {
+            "leaveAt": "",
+            "destination": "",
+            "day": "",
+            "arriveBy": "",
+            "departure": "",
+            "people": ""
+        },
+        "taxi": {
+            "leaveAt": "",
+            "destination": "",
+            "departure": "",
+            "arriveBy": ""
+        },
+        "restaurant": {
+            "food": "",
+            "pricerange": "",
+            "name": "",
+            "area": "",
+            "time": "",
+            "day": "",
+            "people": ""
+        },
+        "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+        }
+    }
+}
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz21/original_data.zip b/data/unified_datasets/multiwoz21/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..b27361772be980013a94c91898fb61e3e122ba8e
Binary files /dev/null and b/data/unified_datasets/multiwoz21/original_data.zip differ
diff --git a/data/unified_datasets/multiwoz21/preprocess.py b/data/unified_datasets/multiwoz21/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ca81e2e399d8a816e3be6d4ae33babfc2f1597e
--- /dev/null
+++ b/data/unified_datasets/multiwoz21/preprocess.py
@@ -0,0 +1,1431 @@
+import copy
+import re
+import zipfile
+import json
+import os
+from tqdm import tqdm
+import sys
+import difflib
+from fuzzywuzzy import fuzz
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+import logging
+
+
+logging.basicConfig(level=logging.INFO)
+self_dir = (os.path.abspath(os.getcwd()))
+
+REF_SYS_DA = {
+    'Attraction': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Fee': "entrance fee", 'Name': "name", 'Phone': "phone",
+        'Post': "postcode", 'Price': "pricerange", 'Type': "type",
+        'none': None, 'Open': None
+    },
+    'Hospital': {
+        'Department': 'department', 'Addr': 'address', 'Post': 'postcode',
+        'Phone': 'phone', 'none': None
+    },
+    'Booking': {
+        'Day': 'day', 'Name': 'name', 'People': 'people',
+        'Ref': 'Ref', 'Stay': 'stay', 'Time': 'time',
+        'none': None
+    },
+    'Hotel': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Internet': "internet", 'Name': "name", 'Parking': "parking",
+        'Phone': "phone", 'Post': "postcode", 'Price': "pricerange",
+        'Stars': "stars", 'Type': "type", 'Stay': 'stay', 'Day': 'day', 'People': 'people',
+        'none': None
+    },
+    'Restaurant': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Name': "name", 'Food': "food", 'Phone': "phone",
+        'Post': "postcode", 'Price': "pricerange",
+        'Time': 'time', 'Day': 'day', 'People': 'people',
+        'none': None
+    },
+    'Taxi': {
+        'Arrive': "arriveBy", 'Car': "taxi_types", 'Depart': "departure",
+        'Dest': "destination", 'Leave': "leaveAt", 'Phone': "taxi_phone",
+        'none': None
+    },
+    'Train': {
+        'Arrive': "arriveBy", 'Choice': "choice", 'Day': "day",
+        'Depart': "departure", 'Dest': "destination",
+        'Leave': "leaveAt", 'People': "people", 'Ref': "Ref",
+        'Time': "duration", 'none': None, 'Ticket': 'price',
+    },
+    'Police': {
+        'Addr': "address", 'Post': "postcode", 'Phone': "phone", 'none': None
+    },
+}
+
+# taxi restaurant attraction train
+slot_to_type = {
+    'taxi-destination': 'non',
+    'taxi-departure': 'non',
+    'taxi-leaveAt': 'non',
+    'taxi-arriveBy': 'non',
+    'restaurant-food': 'non',
+    'restaurant-name': 'non',
+    'attraction-address': 'non',
+    'attraction-postcode': 'non',
+    'restaurant-pricerange': 'cat',
+    'restaurant-address': 'non',
+    'restaurant-area': 'cat',
+    'restaurant-postcode': 'non',
+    'attraction-phone': 'non',
+    'attraction-entrance fee': 'non',
+    'booking-time': 'non',
+    'booking-day': 'cat',
+    'attraction-type': 'non',
+    'attraction-area': 'cat',
+    'train-destination': 'non',
+    'train-arriveBy': 'non',
+    'train-departure': 'non',
+    'hotel-internet': 'cat',
+    'hotel-area': 'cat',
+    'booking-stay': 'non',
+    'booking-people': 'non',
+    'train-duration': 'non',
+    'train-people': 'non',
+    'train-day': 'cat',
+    'train-Ref': 'non',
+    'hotel-stars': 'cat',
+    'train-leaveAt': 'non',
+    'train-price': 'non',
+    'hotel-parking': 'cat',
+    'hotel-phone': 'non',
+    'hotel-name': 'non',
+    'hotel-pricerange': 'cat',
+    'hotel-people': 'non',
+    'restaurant-phone': 'non',
+    'hotel-postcode': 'non',
+    'hotel-address': 'non',
+    'attraction-name': 'non',
+    'hotel-type': 'non',
+    'restaurant-people': 'non',
+    'train-choice': 'non',
+    'attraction-pricerange': 'cat',
+    'hotel-stay': 'non',
+    'booking-name': 'non',
+    'booking-Ref': 'non',
+    'restaurant-time': 'non',
+    'restaurant-day': 'cat',
+    'hotel-day': 'cat',
+    'hotel-choice': 'non',
+    'restaurant-choice': 'non',
+    'attraction-choice': 'non',
+    'taxi-taxi_phone': 'non',
+    'taxi-taxi_types': 'non',
+    'police-address': 'non',
+    'police-postcode': 'non',
+    'police-phone': 'non'
+}
+
+state_cat_slot_value_dict = {
+    "hotel-pricerange": {
+        "cheap": 735,
+        "moderate": 1063,
+        "expensive": 594,
+    },
+    "hotel-parking": {
+        "yes": 1809,
+        "no": 126,
+        "free": 4,
+    },
+    "hotel-day": {
+        "tuesday": 385,
+        "wednesday": 410,
+        "monday": 365,
+        "saturday": 407,
+        "friday": 393,
+        "thursday": 384,
+        "sunday": 369,
+    },
+    "train-day": {
+        "wednesday": 533,
+        "monday": 533,
+        "saturday": 543,
+        "thursday": 547,
+        "friday": 563,
+        "tuesday": 553,
+        "sunday": 613,
+    },
+    "hotel-stars": {
+        "4": 1263,
+        "2": 193,
+        "0": 201,
+        "3": 401,
+        "5": 45,
+        "1": 45,
+    },
+    "hotel-internet": {
+        "yes": 1841,
+        "no": 79,
+        "free": 2
+    },
+    "hotel-area": {
+        "east": 416,
+        "north": 717,
+        "centre": 538,
+        "south": 289,
+        "west": 316,
+    },
+    "attraction-area": {
+        "centre": 1290,
+        "west": 332,
+        "north": 155,
+        "south": 240,
+        "east": 272,
+    },
+    "restaurant-pricerange": {
+        "expensive": 1477,
+        "cheap": 758,
+        "moderate": 1028,
+    },
+    "restaurant-area": {
+        "centre": 1745,
+        "south": 398,
+        "north": 390,
+        "east": 360,
+        "west": 423,
+    },
+    "restaurant-day": {
+        "thursday": 362,
+        "wednesday": 412,
+        "friday": 395,
+        "monday": 383,
+        "sunday": 399,
+        "saturday": 421,
+        "tuesday": 350,
+    }
+}
+
+
+synonyms = [
+    ["el shaddia guesthouse", "el shaddai"],
+    [ "peterborough", "peterbourgh"],
+    ["night club", "nightclub", 'nightclubs'],
+    ["boat", "boating"],
+    ["portugese", "portuguese"],
+    ["guesthouse", "guest house"],
+    ["seafood", "sea food"],
+    ["christ 's college", "christ college"],
+    ["huntingdon marriott hotel"]
+]
+
+state_cat_slot_ds = [k for k, v in slot_to_type.items() if v == 'cat']
+
+da_cat_slot_values = {
+    # 'hotel-stay': ['1', '2', '3', '4', '5'],
+    'hotel-internet': ['free', 'no', 'none', 'yes'],
+    'hotel-parking': ['free', 'no', 'none', 'yes']
+}
+
+state_cat_slot_values = {}
+
+multiwoz_desc = {
+    'taxi': {
+        'domain': 'taxi information query system',
+        'taxi_phone': 'taxi phone number',
+        'taxi_types': 'taxi type',
+    },
+    'restaurant': {
+        'domain': 'restaurant information query system',
+        'address': 'exact location of the restaurant',
+        'postcode': 'postcode of the restaurant',
+        'phone': 'restaurant phone number',
+        'choice': 'number of restaurants meeting requests of user',
+    },
+    'attraction': {
+        'domain': 'an entertainment that is offered to the public',
+        'address': 'details of where the attraction is',
+        'postcode': 'postcode of the attraction',
+        'phone': 'phone number of the attraction',
+        'entrance fee': 'the fee charged for admission to the attraction',
+        'pricerange': 'the price range for the attraction, from cheap to expensive',
+        'choice': 'number of attractions matching requests of user'
+    },
+    'booking': {
+        'domain': 'to arrange with a taxi, restaurant, train, etc.',
+        'time': 'time for an order',
+        'day': 'day for an order, from monday to sunday',
+        'stay': 'for how long the user wish to be at a place',
+        'people': 'how many person the order is for',
+        'name': 'name of the ordered place',
+        'Ref': 'reference number of the order'
+    },
+    'train': {
+        'domain': 'query and order a train',
+        'duration': 'the length of time the train trip lasts',
+        'Ref': 'reference number of the order',
+        'price': 'price for the train ticket',
+        'choice': 'number of trains that meets requests of the user',
+    },
+    'hotel': {
+        'domain': 'to query hotel information and place an order',
+        'address': 'exact location of the hotel',
+        'postcode': 'postcode of the hotel',
+        'phone': 'hotel phone number',
+        'choice': 'number of hotels that meets requests of the user',
+    },
+    'police': {
+        'domain': 'find police stations',
+        'address': 'exact location of the police station',
+        'postcode': 'postcode of the police station',
+        'phone': 'police station phone number',
+    },
+    'intents': {
+        'inform': 'inform user of value for a certain slot',
+        'request': 'ask for value of a slot',
+        'nobook': 'inform user of booking failure',
+        'reqmore': 'ask user for more instructions',
+        'book': 'place an order for user',
+        'bye': 'end a conversation and say goodbye to user',
+        'thank': 'express gratitude',
+        'welcome': 'welcome',
+        'offerbooked': 'inform user that an order is succussful',
+        'recommend': 'recommend a choice for user request',
+        'greet': 'express greeting',
+        'nooffer': 'inform user that no options matches user request',
+        'offerbook': 'offer to place an order for user',
+        'select': 'provide several choices for user to choose from',
+    }
+}
+
+digit2word = {
+    '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four', '5': 'five',
+    '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten'
+}
+
+
+def pharse_in_sen(phrase, sen):
+    '''
+    match value in the sentence
+    :param phrase: str
+    :param sen: str
+    :return: start, end if matched, else None, None
+    '''
+    assert isinstance(phrase, str)
+    pw = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-]|$)'
+    pn = '(^|[\s\?!-]|\D[,\.:])(?P<v>{})($|[\s\?!-]|[,\.:]\D|[,\.:]$)'
+
+    if phrase.isdigit() and phrase in digit2word:
+        phrase = digit2word[phrase]
+        p = re.compile(pw.format(re.escape(phrase)), re.I)
+        m = re.search(p, sen)
+        if m:
+            num = len(re.findall(p, sen))
+            # if num > 1:
+            #     match['>1'] += 1
+            # else:
+            #     match['1'] += 1
+            return m.span('v'), num
+    # match['0'] += 1
+    if phrase.isdigit():
+        pattern = pn
+    else:
+        pattern = pw
+    p = re.compile(pattern.format(re.escape(phrase)), re.I)
+    m = re.search(p, sen)
+    if m:
+        num = len(re.findall(p, sen))
+        # if num > 1:
+        #     match['>1'] += 1
+        # else:
+        #     match['1'] += 1
+        return m.span('v'), num
+    return (None, None), 0
+
+
+
+
+
+def update_state(state, update):
+    # print('======================')
+    # print(state)
+    # print(update)
+    # print('======================')
+
+    for service, service_update in update.items():
+        if service not in state:
+            state[service] = copy.deepcopy(service_update)
+        else:
+            state[service].update(update[service])
+
+
+def convert_da(utt, da_dict, binary_ont, intent_ont, did, tid, da_cat_slot_values):
+    '''
+     convert multiwoz dialogue acts to required format
+    :param utt: user or system utt
+    :param da_dict: multiwoz da
+    :param binary_ont: binary ontology
+    :param intent_ont: intent ontology
+    :return:
+    '''
+    converted_da = {
+        'categorical': [],
+        'non-categorical': [],
+        'binary': []
+    }
+
+    for Domain_Act, S, v in da_dict:
+        Domain, Act = Domain_Act.split('-')
+        if Domain.lower() in ['police', 'hospital', 'bus']:
+            continue
+
+        if Act.lower() not in intent_ont:
+            intent_ont[Act.lower()] = {}
+
+        # general domain is converted to empty domain. e.g. thank, bye
+        if Domain == 'general':
+            assert S == 'none'
+            assert v == 'none'
+            converted_dict = {
+                'intent': Act.lower(),
+                'domain': '',
+                'slot': '',
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+
+
+        try:
+            reformated_slot = REF_SYS_DA[Domain][S]
+        except:
+            # print('44444444444444444444444444444444')
+            # print(Domain, S)
+            # logging.info('slot not in REF_SYS_DA, drop')
+            continue
+
+        # if slot is None, da should be converted into binary
+        if reformated_slot is None:
+            if not (S == 'none' and v == 'none'):
+                # mainly for `Open` slot
+                # print('11111111111111111111')
+                # print(Domain_Act, S, v)
+                continue
+            # Booking-Inform none none
+            # Police-Inform none none
+            # Train-OfferBook none none
+            converted_dict = {
+                'intent': Act.lower(),
+                'domain': Domain.lower(),
+                'slot': '',
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+        reformated_domain_slot = Domain.lower() + '-' + reformated_slot
+
+        if Act.lower() == 'request':
+            converted_dict = {
+                'intent': 'request',
+                'domain': Domain.lower(),
+                'slot': reformated_slot,
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+        # vs = da_dict[(Domain_Act, S)]['values']
+
+        if reformated_domain_slot in slot_to_type and slot_to_type[reformated_domain_slot] == 'cat':
+            origin_v = v
+            v = v.lower()
+            # if reformated_domain_slot in cat_slot_proj:
+            #     v = cat_slot_proj[reformated_domain_slot][v]
+            if reformated_domain_slot not in da_cat_slot_values:
+                da_cat_slot_values[reformated_domain_slot] = []
+            # if v not in cat_slot_values[reformated_domain_slot]:
+            da_cat_slot_values[reformated_domain_slot].append(v)
+            converted_da['categorical'].append({
+                'intent': Act.lower(),
+                'domain': Domain.lower(),
+                'slot': reformated_slot,
+                'value': v
+            })
+            if 'start_word' in da_dict[(Domain_Act, S, origin_v)]:
+                start_ws = da_dict[(Domain_Act, S, origin_v)]['start_word']
+                end_ws = da_dict[(Domain_Act, S, origin_v)]['end_word']
+                utt_list = utt.split()
+                for start_w, end_w in zip(start_ws, end_ws):
+                    if start_w > len(utt_list) or end_w > len(utt_list):
+                        continue
+                    start_ch = 0
+                    for i in range(start_w):
+                        start_ch += len(utt_list[i]) + 1
+                    end_ch = start_ch
+                    for i in range(start_w, end_w):
+                        end_ch += len(utt_list[i]) + 1
+                    try:
+                        end_ch += len(utt_list[end_w])
+                    except:
+                        print(utt_list, start_w, end_w)
+                    if not utt[start_ch: end_ch] == origin_v:
+                        # print('2222222222222222222222222')
+                        # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
+                        continue
+
+                    else:
+                        converted_da['categorical'][-1].update({
+                            'start': start_ch,
+                            'end': end_ch
+                        })
+                        break
+
+        else:
+            if 'start_word' not in da_dict[(Domain_Act, S, v)]:
+                # todo no span annotation
+                converted_da['non-categorical'].append({
+                    'intent': Act.lower(),
+                    'domain': Domain.lower(),
+                    'slot': reformated_slot,
+                    'value': v
+                })
+                continue
+
+            start_ws = da_dict[(Domain_Act, S, v)]['start_word']
+            end_ws = da_dict[(Domain_Act, S, v)]['end_word']
+            utt_list = utt.split()
+            found = True
+            for start_w, end_w in zip(start_ws, end_ws):
+                if start_w > len(utt_list) or end_w > len(utt_list):
+                    continue
+                start_ch = 0
+                for i in range(start_w):
+                    start_ch += len(utt_list[i]) + 1
+                end_ch = start_ch
+                for i in range(start_w, end_w):
+                    end_ch += len(utt_list[i]) + 1
+                try:
+                    end_ch += len(utt_list[end_w])
+                except:
+                    print(utt_list, start_w, end_w, v)
+                if not utt[start_ch: end_ch] == v:
+                    # print('2222222222222222222222222')
+                    # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
+                    continue
+
+                else:
+                    found = True
+                    converted_da['non-categorical'].append({
+                        'intent': Act.lower(),
+                        'domain': Domain.lower(),
+                        'slot': reformated_slot,
+                        'value': v,
+                        'start': start_ch,
+                        'end': end_ch
+                    })
+                    break
+
+            if not found:
+                converted_da['non-categorical'].append({
+                    'intent': Act.lower(),
+                    'domain': Domain.lower(),
+                    'slot': reformated_slot,
+                    'value': v
+                })
+    return converted_da
+
+
+def get_state_update(prev_state, cur_state, dialog, did, tid, utt, coref_dict, slot_notfound_dict, da_cat_slot_values):
+    prev_turns = dialog['turns']
+    state_update = {'categorical': [], 'non-categorical': []}
+    notfoundnum = 0
+    total_value = 0
+
+    diff_state = {}
+    if prev_state is None:
+        diff_state = {domain: {slot: value for slot, value in cur_state[domain].items() if value != ''} for domain in
+                      cur_state}
+    else:
+        assert len(prev_state) == len(cur_state), print(prev_state, cur_state)
+        for domain, domain_state in prev_state.items():
+            if domain not in diff_state:
+                diff_state[domain] = {}
+            for slot, value in domain_state.items():
+                if value != cur_state[domain][slot]:
+                    # assert len(cur_state[domain][slot]) > 0, print(did, tid, domain, slot, utt)
+                    diff_state[domain][slot] = cur_state[domain][slot]
+
+    ret_diff_state = copy.deepcopy(diff_state)
+
+
+
+    for domain in diff_state:
+        for slot in diff_state[domain]:
+
+            total_value += 1
+            fix_or = False
+            if '|' in diff_state[domain][slot]:
+                value = diff_state[domain][slot].split('|')[0]
+            else:
+                value = diff_state[domain][slot]
+
+            # if dialog['original_id'] == 'PMUL2512' and tid == 17 and value == '02:45':
+            #     value = '2:45'
+
+            value_list = [value]
+            for _synonyms in synonyms:
+                if value in _synonyms:
+                    value_list = _synonyms
+
+            value_list.extend(get_time_variants(value))
+            value_list.extend(get_genitive_variants(value))
+            value_list.extend(get_bb_variants(value))
+
+            if value.endswith(' restaurant'):
+                value_list.append(value.split(' restaurant')[0])
+            if value.endswith(' hotel'):
+                value_list.append(value.split(' hotel')[0])
+            found = False
+            for value in value_list:
+                # categorical slots
+                if slot in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']:
+                    reformated_domain_slot = '-'.join([domain, slot])
+                    if reformated_domain_slot in state_cat_slot_value_dict and (value in state_cat_slot_value_dict[reformated_domain_slot] or value in ['dontcare', '', 'none', 'not mentioned']):
+                        state_update['categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot]
+                        })
+                        if domain + '-' + slot not in da_cat_slot_values:
+                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
+                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
+                        if value != diff_state[domain][slot]:
+                            state_update['categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        else :
+                            for _turn in prev_turns[::-1]:
+                                found = False
+                                for da in _turn['dialogue_act']['categorical']:
+                                    if da['value'] == value:
+                                        if 'start' in da:
+                                            state_update['categorical'][-1].update({
+                                                'utt_idx': _turn['utt_idx'],
+                                                'start': da['start'],
+                                                'end': da['end'],
+                                                'from': 'prev_da_span'
+                                            })
+                                            found = True
+                                            break
+                                if found:
+                                    break
+                    else:
+                        state_update['categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            'fixed_value': 'not found'
+                        })
+                        if domain + '-' + slot not in da_cat_slot_values:
+                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
+                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
+                        ret_diff_state[domain][slot] = 'not found'
+                        notfoundnum += 1
+                    # reformated_domain_slot = '-'.join([domain, slot]
+                    found = True
+                    break
+
+                # process value ---> none
+                assert value not in ['none', 'not mentioned']
+                if value in ['', 'dontcare']:
+                    # if reformated_domain_slot not in state_cat_slot_values:
+                    #     state_cat_slot_values[reformated_domain_slot] = []
+                    # # if v not in cat_slot_values[reformated_domain_slot]:
+                    # state_cat_slot_values[reformated_domain_slot].append(value)
+                    state_update['non-categorical'].append({
+                        'domain': domain,
+                        'slot': slot,
+                        'value': diff_state[domain][slot]
+                    })
+                    found = True
+                    break
+
+                # first look for values in coref_dict
+                for _Domain_Act, _Slot, _value in coref_dict:
+                    _domain, _act = _Domain_Act.lower().split('-')
+                    _slot = _Slot.lower()
+                    _coref_value = coref_dict[(_Domain_Act, _Slot, _value)]['coref_value']
+                    if _coref_value == '':
+                        continue
+                    _coref_turn = coref_dict[(_Domain_Act, _Slot, _value)]['turn']
+                    if _coref_turn == -1:
+                        continue
+                    _coref_pos = coref_dict[(_Domain_Act, _Slot, _value)]['pos']
+                    if _coref_pos == '':
+                        continue
+                    _utt = coref_dict[(_Domain_Act, _Slot, _value)]['utt']
+                    if _domain == domain and _slot == slot and value == _coref_value:
+
+                        start_w, end_w = [int(p) for p in _coref_pos.split('-')]
+                        utt_list = _utt.split()
+                        start_ch = 0
+                        for i in range(start_w):
+                            start_ch += len(utt_list[i]) + 1
+                        end_ch = start_ch
+                        for i in range(start_w, end_w + 1):
+                            end_ch += len(utt_list[i]) + 1
+                        end_ch -= 1
+
+                        if not _utt[start_ch: end_ch] == _coref_value:
+                            # print(111111111111111111111111111111111)
+                            # print(_utt[start_ch: end_ch], _coref_value)
+                            continue
+
+                        state_update['non-categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            'from': 'coref',
+                            'utt_idx': _coref_turn,
+                            'start': start_ch,
+                            'end': end_ch
+                        })
+                        if value != diff_state[domain][slot]:
+                            state_update['categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        found = True
+
+                if found:
+                    break
+
+                # from da annotation
+                for _turn in prev_turns[::-1]:
+                    for da in _turn['dialogue_act']['non-categorical']:
+                        # if da['domain'] == domain and da['slot'] == slot and fuzz.ratio(da['value'], value) > 85:
+                            # if not da['value'] == value:
+                            #     print(1111111111111111)
+                            #     print(value, da['value'])
+
+                        if fuzz.ratio(da['value'], value) > 85:
+
+                            if 'start' in da:
+                                found = True
+                                state_update['non-categorical'].append({
+                                    'domain': domain,
+                                    'slot': slot,
+                                    # 'value': da['value'],
+                                    'value': diff_state[domain][slot],
+                                    'utt_idx': _turn['utt_idx'],
+                                    'start': da['start'],
+                                    'end': da['end'],
+                                    'from': 'prev_da_span'
+                                })
+                                if value != diff_state[domain][slot]:
+                                    state_update['non-categorical'][-1].update({'fixed_value': value})
+                                    ret_diff_state[domain][slot] = value
+                                if da['value'] != value:
+                                    state_update['non-categorical'][-1].update({'fixed_value':da['value']})
+                                    ret_diff_state[domain][slot] = da['value']
+
+                                break
+                    if found:
+                        break
+
+                if found:
+                    break
+
+                # from utterance
+                for _turn in prev_turns[::-1]:
+                    _utt = _turn['utterance']
+                    (start, end), num = pharse_in_sen(str(value), _utt)
+                    if num:
+                        assert value.lower() == _utt[start:end].lower() \
+                               or digit2word[value].lower() == _utt[start:end].lower()
+                        found = True
+                        state_update['non-categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            # 'value': _utt[start:end].lower(),
+                            # 'fixed_value': _utt[start:end].lower(),
+                            'from': 'prev_utt',
+                            'utt_idx': _turn['utt_idx'],
+                            'start': start,
+                            'end': end
+                        })
+                        if value != diff_state[domain][slot]:
+                            state_update['non-categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        if value != _utt[start:end].lower():
+                            state_update['non-categorical'][-1].update({'fixed_value': _utt[start:end].lower()})
+                            ret_diff_state[domain][slot] = _utt[start:end].lower()
+                        found = True
+                        break
+                if found:
+                    break
+
+                # from utterance
+                if not value.isdigit():
+                    for _turn in prev_turns[::-1]:
+                        _utt = _turn['utterance']
+
+                        s = difflib.SequenceMatcher(None, _utt, value)
+                        matches = s.get_matching_blocks()
+
+                        for i, j, n in matches:
+                            possible_value = _utt[i: i+len(value)]
+
+                            if i+ len(value) < len(_utt) and _utt[i+len(value)] not in [ ' ', ',', '.', '?', '!', '/'] :
+                                possible_value += _utt[i+len(value):].split()[0]
+
+                                if possible_value.startswith('th '):
+                                    possible_value = possible_value[3:]
+                                    i += 3
+                            if i > 0 and _utt[i-1] not in [ ' ', ',', '.', '?', '!', '/']:
+                                # cut first incomplete word
+                                if len(possible_value.split()) > 1:
+                                    i += len(possible_value.split()[0]) + 1
+                                    possible_value = ' '.join(possible_value.split()[1:])
+
+
+                                # prepend first incomplete word
+                                # possible_value = _utt[:i].split()[-1] + possible_value
+                                # i -= len(_utt[:i].split()[-1])
+
+
+                            if fuzz.token_sort_ratio(value, possible_value) > 92 or possible_value.startswith('ashley hotel and lovell lodge') :
+                                found = True
+
+                                state_update['non-categorical'].append({
+                                            'domain': domain,
+                                            'slot': slot,
+                                            'value': diff_state[domain][slot],
+                                            # 'value': possible_value,
+                                            # 'fixed_value': possible_value,
+                                            'from':'prev_utt',
+                                            'utt_idx': _turn['utt_idx'],
+                                            'start': i,
+                                            'end': i+len(possible_value)
+                                        })
+                                if value != diff_state[domain][slot]:
+                                    state_update['non-categorical'][-1].update({'fixed_value': value})
+                                    ret_diff_state[domain][slot] = value
+                                if possible_value != value:
+                                    state_update['non-categorical'][-1].update({'fixed_value': possible_value})
+                                    ret_diff_state[domain][slot] = possible_value
+                                break
+                    #             assert _utt[i:i+len(possible_value)] == possible_value, print(_utt, _utt[i:i+len(possible_value)], possible_value)
+                    #             break
+                                # if not possible_value == value:
+                                #             print(3333333333333333)
+                                #             print(value)
+                                #             print(possible_value)
+                            if found:
+                                break
+                        if found:
+                            break
+
+                if found:
+                    break
+            if not found:
+                #                 print('3333333333333333333')
+                #                 print(did, tid)
+                #                 print(domain, slot, value)
+                #                 print([_t['utterance'] for _t in prev_turns])
+                # assert slot not in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']
+
+                if (domain, slot) not in slot_notfound_dict:
+                    slot_notfound_dict[(domain, slot)] = 1
+                else:
+                    slot_notfound_dict[(domain, slot)] += 1
+                state_update['non-categorical'].append({
+                    'domain': domain,
+                    'slot': slot,
+                    'value': diff_state[domain][slot],
+                    'fixed_value': 'not found'
+                })
+                ret_diff_state[domain][slot] = 'not found'
+                notfoundnum += 1
+    return state_update, notfoundnum, total_value, ret_diff_state
+
+
+def merge_data_annotation():
+    extract_dir = os.path.join(self_dir, 'original_data')
+    data21 = json.load(open(os.path.join(self_dir, extract_dir, 'data.json')))
+    # data21_train = json.load(open(os.path.join(self_dir, extract_dir, 'train.json')))
+    # data21_val = json.load(open(os.path.join(self_dir, extract_dir, 'val.json')))
+    # data21_test = json.load(open(os.path.join(self_dir, extract_dir, 'test.json')))
+    # data21 = {}
+    # data21.update(data21_train)
+    # data21.update(data21_val)
+    # data21.update(data21_test)
+
+    # update_from_25_cnt = 0
+    # total_turn = 0
+    # for dial_id, dialog in data21.items():
+    #     dial_id = dial_id + '.json'
+    #     assert dial_id in data25
+    #     for i, _turn in enumerate(dialog['log']):
+    #         total_turn += 1
+    #         if _turn['text'] == data25[dial_id]['log'][i]['text']:
+    #             _turn['span_info'].extend(copy.deepcopy(data25[dial_id]['log'][i]['span_info']))
+    #             # _turn['span_info'] = list(set(_turn['span_info']))
+    #             # _turn['dialog_act'].update(copy.deepcopy(data25[dial_id]['log'][i]['dialog_act']))
+    #             for Domain_Intent in data25[dial_id]['log'][i]['dialog_act']:
+    #                 if Domain_Intent in _turn['dialog_act']:
+    #                     _turn['dialog_act'][Domain_Intent].extend(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
+    #                 else:
+    #                     _turn['dialog_act'][Domain_Intent] = copy.deepcopy(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
+    #                 # _turn['dialog_act'][Domain_Intent] = list(set(_turn['dialog_act'][Domain_Intent]))
+    #             if 'coreference' in data25[dial_id]['log'][i]:
+    #                 _turn['coreference'] = copy.deepcopy(data25[dial_id]['log'][i]['coreference'])
+    #             update_from_25_cnt += 1
+    #         else:
+    #             # print('==============multiwoz21=================')
+    #             # print(_turn['text'])
+    #             # print('==============multiwoz25=================')
+    #             # print(data25[dial_id]['log'][i]['text'])
+    #             continue
+    #
+    # print('{}/{} turns update from multiwoz25 data'.format(update_from_25_cnt, total_turn))
+    return data21
+
+
+def preprocess(da_cat_slot_values, state_cat_slot_values):
+    all_data = []
+    binary_ont = []
+    intent_ont = {}
+    state_ont = {}
+
+    data_splits = ['train', 'val', 'test']
+    # data_splits = ['test']
+    extract_dir = os.path.join(self_dir, 'original_data')
+    num_train_dialogue = 0
+    num_train_utt = 0
+
+    num_match_error_da_span = 0
+
+    if not os.path.exists('data.zip') or not os.path.exists('ontology.json'):
+        # for data_split in data_splits:
+        # data_zip_file = os.path.join(self_dir, 'original_data.zip')
+        # if not os.path.exists(data_zip_file):
+        #     raise FileNotFoundError(data_zip_file)
+
+        # logging.info('unzip multiwoz data to {}'.format(extract_dir))
+        # archive = zipfile.ZipFile(data_zip_file, 'r')
+        # archive.extractall(extract_dir)
+
+        data = merge_data_annotation()
+        # exit()
+        # data = json.load(open(os.path.join(self_dir, extract_dir, 'data_meta_fixed.json')))
+        train_list = open(os.path.join(self_dir, extract_dir, 'trainListFile')).read().split()
+        val_list = open(os.path.join(self_dir, extract_dir, 'valListFile')).read().split()
+        test_list = open(os.path.join(self_dir, extract_dir, 'testListFile')).read().split()
+
+        total_not_found_slot = 0
+        total_slot = 0
+        total_turn = 0
+        total_not_found_turn = 0
+        total_not_found_state = 0
+
+        slot_notfound_dict = {}
+
+        dialog_idx = 0
+        for dialog_id, dialog in tqdm(data.items()):
+
+            acc_not_found_flag = False
+
+            coref_dict = {}
+
+            data_split = None
+            for _split in data_splits:
+                if dialog_id.strip('.json') in eval(_split + '_list'):
+                    data_split = _split
+                    break
+            # assert data_split is not None
+            # if data_split != 'test':
+            #     continue
+            if data_split is None:
+                continue
+
+            if data_split == 'train':
+                num_train_dialogue += len(data)
+
+            dialog_idx += 1
+            # if dialog_idx > 10:
+            #     break
+            converted_dialogue = {
+                'dataset': 'multiwoz21',
+                'data_split': data_split,
+                'dialogue_id': 'multiwoz21_' + str(dialog_idx),
+                'original_id': dialog_id,
+                'domains': [d for d in dialog['goal'] if
+                            len(dialog['goal'][d]) != 0 and d in multiwoz_desc and d not in ['police', 'hospital', 'bus']],
+                'turns': [],
+            }
+
+            if data_split == 'train':
+                num_train_utt += len(dialog['log'])
+
+            prev_state = None
+            accum_fixed_state = {}
+            for turn_id, turn in enumerate(dialog['log']):
+
+                utt = turn['text'].lower()
+                # for several wrong words
+                utt = utt.replace('seeuni', 'see uni')
+
+                utt = ' '.join(utt.split())
+                utt = utt.replace(' im ', ' i\'m ')
+                utt = utt.replace(' dont ', ' don\'t ')
+                utt = utt.replace(' thats ', ' that\'s ')
+                utt = utt.replace('idon\'t', ' i don\'t ')
+                utt = utt.replace('wedon\'t ', 'we don\'t ')
+                utt = utt.replace('id be ', 'i\'d be ')
+                # utt = utt.replace('cambridgethat\'svery ', 'cambridge that\'s very')
+                utt = re.sub(r'^im ', 'i\'m ', utt)
+                utt = re.sub(r'^whats ', 'what\'s ', utt)
+                utt = re.sub(r'^id ', 'i\'d ', utt)
+                utt = re.sub(r'^thats ', 'that\'s ', utt)
+
+                utt = re.sub( r'([a-zA-Z0-9])([,.!\'-\?"~])', r'\1 \2', utt)
+                utt = re.sub(r'([,.!\'-\?"~])([a-zA-Z0-9])', r'\1 \2', utt)
+
+                das = turn.get('dialog_act', [])
+                role = 'user' if turn_id % 2 == 0 else 'system'
+                spans = turn.get('span_info', [])
+
+                da_dict = {}
+                for Domain_Act in das:
+                    Domain = Domain_Act.split('-')[0]
+                    if Domain.lower() not in converted_dialogue['domains'] and Domain.lower() not in ['general', 'booking']:
+                        continue
+
+                    Svs = das[Domain_Act]
+                    for S, v in Svs:
+                        v = v.lower()
+                        if v.startswith('th '):
+                            # print(v)
+                            v = v[3:]
+                        if v.startswith('he '):
+                            # print(v)
+                            v = v[3:]
+
+                        if (Domain_Act, S, v) not in da_dict:
+                            da_dict[(Domain_Act, S, v)] = {}
+
+                for span in spans:
+                    Domain_Act, S, v, start_word, end_word = span
+                    v = v.lower()
+                    if not (Domain_Act, S, v) in da_dict:
+                        # logging.info('span da annotation not found in multiwoz da label')
+                        # logging.info(dialog_id, turn_id)
+                        # logging.info((Domain_Act, S, v))
+                        # logging.info(da_dict)
+                        num_match_error_da_span += 1
+                    else:
+                        if v.startswith('th '):
+                            # print(v)
+                            v = v[3:]
+                            start_word += 3
+                        if v.startswith('he '):
+                            # print(v)
+                            v = v[3:]
+                            start_word += 3
+
+                        if 'start_word' not in da_dict[(Domain_Act, S, v)]:
+                            da_dict[(Domain_Act, S, v)]['start_word'] = []
+                            da_dict[(Domain_Act, S, v)]['end_word'] = []
+
+                        da_dict[(Domain_Act, S, v)]['start_word'].append(start_word)
+                        da_dict[(Domain_Act, S, v)]['end_word'].append(end_word)
+
+                converted_turn = {
+                    'utt_idx': turn_id,
+                    'speaker': role,
+                    'utterance': utt,
+                    'dialogue_act': convert_da(utt, da_dict, binary_ont, intent_ont, dialog_id, turn_id, da_cat_slot_values),
+                }
+
+                # for state annotations
+                if role == 'system':
+                    turn_state = turn['metadata']
+                    cur_state = {}
+                    for domain in turn_state:
+                        if domain in ['police', 'hospital', 'bus']:
+                            continue
+                        if domain not in converted_dialogue['domains']:
+                            continue
+                        cur_state[domain] = {}
+                        for subdomain in ['semi', 'book']:
+                            for slot in turn_state[domain][subdomain]:
+                                if slot == 'booked':
+                                    continue
+                                if slot == 'ticket':  # or (domain == 'train' and slot == 'people'):
+                                    # for cases where domain slot exists in REF but not in state
+                                    # because of check in evaluate.py
+                                    continue
+
+                                else:
+                                    fixed_slot = slot
+                                state_ds = domain + '-' + fixed_slot
+                                if state_ds not in slot_to_type:
+                                    logging.info('state slot not defined in da list')
+                                    logging.info(state_ds)
+                                if turn_state[domain][subdomain][slot] in ['', [], 'not mentioned', 'none']:
+                                    cur_state[domain][fixed_slot] = ""
+                                else:
+                                    if turn_state[domain][subdomain][slot].startswith('th '):
+                                        # print('state')
+                                        # print(turn_state[domain][subdomain][slot])
+                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
+                                    if turn_state[domain][subdomain][slot].startswith('he '):
+                                        # print('state')
+                                        # print(turn_state[domain][subdomain][slot])
+                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
+
+                                    cur_state[domain][fixed_slot] = turn_state[domain][subdomain][slot]
+
+                                if domain not in state_ont:
+                                    state_ont[domain] = []
+                                if fixed_slot not in state_ont[domain]:
+                                    state_ont[domain].append(fixed_slot)
+
+                        if domain == 'train' and 'people' not in cur_state[domain]:
+                            cur_state[domain]['people'] = ''
+                        # if len(converted_turn['state'][domain]) == 0:
+                        #     converted_turn['state'].pop(domain)
+                        if len(converted_dialogue['turns']) > 0:
+                            # move state from system side to user side
+                            converted_dialogue['turns'][-1]['state'] = copy.deepcopy(cur_state)
+
+                    # for state update annotations
+                    state_update, _notfoundslot, _totalslot, ret_diff_state = get_state_update(prev_state, cur_state, converted_dialogue,
+                                                                               dialog_id, turn_id, turn['text'], coref_dict,
+                                                                               slot_notfound_dict, da_cat_slot_values)
+
+                    update_state(accum_fixed_state, ret_diff_state)
+                    for domain in accum_fixed_state:
+                        for slot in accum_fixed_state[domain]:
+                            assert isinstance(accum_fixed_state[domain][slot], str), print(accum_fixed_state[domain][slot])
+
+                    if _notfoundslot == 0:
+                        # for slot in state_update['categorical']:
+                        #     assert 'fixed_value' not in slot
+                        for slot in state_update['non-categorical']:
+                            if slot['value'] not in ['', 'dontcare']:
+                                assert 'utt_idx' in slot
+
+                    else:
+                        flag = False
+                        for slot in state_update['categorical']:
+                            if 'fixed_value' in slot:
+                                flag = True
+                                break
+                        for slot in state_update['non-categorical']:
+                            if 'utt_idx' not in slot:
+                                flag = True
+                                break
+                        assert flag, print(flag, state_update['non-categorical'])
+
+                    total_turn += 1
+                    total_slot += _totalslot
+                    total_not_found_slot += _notfoundslot
+                    total_not_found_turn += 1 if _notfoundslot > 0 else 0
+                    if _notfoundslot > 0:
+                        acc_not_found_flag = True
+                    if acc_not_found_flag:
+                        total_not_found_state += 1
+
+                    coref_dict = {}
+                    converted_dialogue['turns'][-1]['state_update'] = copy.deepcopy(state_update)
+                    converted_dialogue['turns'][-1]['fixed_state'] = copy.deepcopy(accum_fixed_state)
+                    if 'state' not in converted_dialogue['turns'][-1]:
+                        converted_dialogue['turns'][-1]['state'] = {}
+                    prev_state = copy.deepcopy(cur_state)
+
+                converted_dialogue['turns'].append(converted_turn)
+
+                if 'coreference' in turn:
+                    for Domain_Act in turn['coreference']:
+                        for Slot, value, coref, coref_turn, coref_pos in turn['coreference'][Domain_Act]:
+                            value = value.lower()
+                            coref_dict[(Domain_Act, Slot, value)] = {'turn': coref_turn, 'pos': coref_pos,
+                                                                     'coref_value': coref,
+                                                                     'utt': converted_dialogue['turns'][coref_turn][
+                                                                         'utterance']}
+
+            check_spans(converted_dialogue)
+            # postprocess_update_spans(converted_dialogue)
+            if converted_dialogue['turns'][-1]['speaker'] == 'system':
+                converted_dialogue['turns'].pop(-1)
+            all_data.append(converted_dialogue)
+
+        print('total_turn', total_turn)
+        print('total_not_found_turn', total_not_found_turn)
+        print('total_slot', total_slot)
+        print('total_not_found_slot', total_not_found_slot)
+        print('total_not_found_state', total_not_found_state)
+        print(slot_notfound_dict)
+        from collections import Counter
+        # print({k : dict(Counter(v)) for k, v in cat_slot_values.items()})
+        json.dump({k : dict(Counter(v)) for k, v in state_cat_slot_values.items()}, open(os.path.join(self_dir, 'cat_slot_values.json'), 'w'), indent=4)
+        cat_slot_values = {k: list(set(v)) for k, v in state_cat_slot_values.items()}
+        da_cat_slot_values = {k: list(set(v)) for k, v in da_cat_slot_values.items()}
+
+        json.dump(all_data, open('data.json', 'w'), indent=4)
+        write_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        os.remove('data.json')
+
+        new_ont = {
+            'domains': {},
+            'intents': {},
+            'binary_dialogue_act': {}
+        }
+
+        for d_s in slot_to_type:
+            d, s = d_s.split('-')
+            if d not in new_ont['domains']:
+                new_ont['domains'][d] = {
+                    'description': multiwoz_desc[d]['domain'],
+                    'slots': {}
+                }
+            domain_ont = new_ont['domains'][d]
+            assert s not in domain_ont
+            domain_ont['slots'][s] = {
+                'description': multiwoz_desc[d][s] if s in multiwoz_desc[d] else '',
+                'is_categorical': d_s in state_cat_slot_ds,
+                'possible_values': da_cat_slot_values[d_s] if d_s in state_cat_slot_ds else []
+            }
+            domain_ont['slots'][s]['possible_values'] = [_ for _ in domain_ont['slots'][s]['possible_values'] if _ not in ['dontcare', '']]
+
+        new_ont['state'] = {}
+        # print(state_cat_slot_value_dict)
+        print(state_ont)
+        for d in state_ont:
+            new_ont['state'][d] = {}
+            for s in state_ont[d]:
+                d_s = '-'.join([d, s])
+                new_ont['state'][d][s] = ''
+
+        new_ont['intents'] = {i: {'description': multiwoz_desc['intents'][i]} for i in intent_ont}
+        new_ont['binary_dialogue_act'] = binary_ont
+
+        slot_desc = json.load(open(os.path.join(self_dir, extract_dir, './slot_descriptions.json')))
+        for domain_slot in slot_desc:
+            _domain, _slot = domain_slot.split('-')
+            _desc = slot_desc[domain_slot][0]
+            if _slot == 'arriveby':
+                _slot = 'arriveBy'
+            elif _slot == 'leaveat':
+                _slot = 'leaveAt'
+            if 'book' in _slot:
+                _slot = _slot.replace('book ', '')
+            if not _domain in new_ont['state']:
+                # logging.info('domain {} not in state domains'.format(_domain))
+                continue
+            if _domain in new_ont['domains'] and _slot in new_ont['domains'][_domain]['slots']:
+                new_ont['domains'][_domain]['slots'][_slot]['description'] = _desc
+            if not _slot in new_ont['state'][_domain]:
+                logging.info('domain {} slot {} not in state'.format(_domain, _slot))
+                continue
+            # new_ont['state'][_domain][_slot] = ""
+            assert _domain in new_ont['domains'], print(_domain)
+            assert _slot in new_ont['domains'][_domain]['slots']
+
+        logging.info('num_match_error_da_span {}'.format(num_match_error_da_span))
+        json.dump(new_ont, open(os.path.join(self_dir, './ontology.json'), 'w'), indent=4)
+
+    else:
+        all_data = read_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        new_ont = json.load(open(os.path.join(self_dir, './ontology.json'), 'r'))
+    logging.info('# dialogue: {}, # turn: {}'.format(num_train_dialogue, num_train_utt))
+    return all_data, new_ont
+
+
+# def postprocess_update_spans(dialog):
+#     changed_utt_idx_and_position = {}
+#     for turn in dialog['turns']:
+#         if turn['speaker'] != 'user':
+#             continue
+#         changed = False
+#         for _update in turn['state_update']['non-categorical']:
+#             if 'utt_idx' in _update:
+#                 utt_idx = _update['utt_idx']
+#                 start = _update['start']
+#                 end = _update['end']
+#
+#                 # assume at most one word changes for every utterance
+#                 if turn['utt_idx'] not in changed_utt_idx_and_position:
+#                     if utt_idx == turn['utt_idx'] and start-1 > -1 and turn['utterance'][start-1] not in [' ']:
+#                         changed_utt_idx_and_position[turn['utt_idx']] = start
+#                         print('=======================')
+#                         print(dialog['original_id'])
+#                         print(turn['utterance'])
+#                         print(json.dumps(_update, indent=2))
+#                         print(turn['utterance'][start: end])
+#                         turn['utterance'] = turn['utterance'][:start] + ' ' + turn['utterance'][start:]
+#                         print(turn['utterance'])
+#                         _update['start'] += 1
+#                         _update['end'] += 1
+#                         changed = True
+#                 if utt_idx not in changed_utt_idx_and_position:
+#                     continue
+#                 else:
+#                     value = _update['fixed_value'] if 'fixed_value' in _update and _update['fixed_value'] != 'not found' else _update['value']
+#                     if start >= changed_utt_idx_and_position[utt_idx]:
+#                         if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                             assert dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
+#                             _update['start'] += 1
+#                             _update['end'] += 1
+#                     elif start < changed_utt_idx_and_position[utt_idx] < end:
+#                         if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                             assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1], value)
+#                             print('fix')
+#                             print(_update)
+#                             _update['end'] += 1
+#                             _update['fixed_value'] = turn['utterance'][_update['start']: _update['end'] + 1].strip()
+#                             print(_update)
+#         if changed:
+#             for _update in turn['state_update']['non-categorical']:
+#                 if 'utt_idx' in _update:
+#                     utt_idx = _update['utt_idx']
+#                     start = _update['start']
+#                     end = _update['end']
+#
+#                     if utt_idx not in changed_utt_idx_and_position:
+#                         continue
+#                     else:
+#                         value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+#                             'fixed_value'] != 'not found' else _update['value']
+#                         if start >= changed_utt_idx_and_position[utt_idx]:
+#                             if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                                 assert dialog['turns'][utt_idx]['utterance'][_update['start'] + 1: _update['end'] + 1] == value
+#                                 _update['start'] += 1
+#                                 _update['end'] += 1
+#                         elif start < changed_utt_idx_and_position[utt_idx] < end:
+#                             if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                                 print('====================fix===================')
+#                                 print(_update)
+#                                 assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
+#                                 _update['end'] += 1
+#                                 _update['fixed_value'] = dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end'] + 1]
+#                                 print(_update)
+#     for turn in dialog['turns']:
+#         if turn['speaker'] != 'user':
+#             continue
+#         for _update in turn['state_update']['non-categorical']:
+#             if 'utt_idx' in _update:
+#                 value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+#                     'fixed_value'] != 'not found' else _update['value']
+#                 utt_idx = _update['utt_idx']
+#                 start = _update['start']
+#                 end = _update['end']
+#                 if dialog['turns'][utt_idx]['utterance'][start] == ' ':
+#                     _update['start'] += 1
+#                     _update['fixed_value'] = value[1:]
+#                     value = value[1:]
+#                     start += 1
+#                 assert dialog['turns'][utt_idx]['utterance'][start: end] == value, print(json.dumps(turn, indent=4), [c for c in dialog['turns'][utt_idx]['utterance'][start: end]], [c for c in value])
+#     return dialog
+
+
+def get_time_variants(time_text):
+    value_list = [time_text]
+    pattern_time = r'(\d{1,2}:\d{2})(\s)?(am|pm|AM|PM)?'
+    match_times = re.findall(pattern_time, time_text)
+    if len(match_times) < 1:
+        return []
+    match_time = match_times[0]
+
+    am_flag = match_time[2] in ['am', 'AM']
+    pm_flag = match_time[2] in ['pm', 'PM']
+    no_am_pm_flag = match_time[2] == ''
+    if am_flag:
+        # 4:00am -> 4:00
+        value_list.append(match_time[0])
+        if len(match_time[0]) == 4:
+            # 4:00 -> 04:00
+            value_list.append('0' + match_time[0])
+    if pm_flag:
+        # 4:00pm -> 16:00
+        hour, min = match_time[0].split(':')
+        hour = int(hour)
+        new_hour = 12 + hour
+        value_list.append(str(new_hour)+':'+min)
+    if no_am_pm_flag:
+        hour, min = match_time[0].split(':')
+        hour = int(hour)
+        if hour > 12:
+            new_hour = hour - 12
+            value_list.append(str(new_hour) + ':' + min + 'pm')
+            value_list.append(str(new_hour) + ':' + min + ' pm')
+            value_list.append(str(new_hour) + ':' + min)
+            if min == '00':
+                value_list.append(str(new_hour) + 'pm')
+                value_list.append(str(new_hour) + ' pm')
+                value_list.append(str(new_hour))
+        else:
+            value_list.append(str(hour) + ':' + min + 'am')
+            value_list.append(str(hour) + ':' + min + ' am')
+            value_list.append(str(hour) + ':' + min)
+            if min == '00':
+                value_list.append(str(hour) + 'am')
+                value_list.append(str(hour) + ' am')
+                value_list.append(str(hour))
+        if len(match_time[0]) == 5 and match_time[0][0] == '0':
+            value_list.append(match_time[0][1:])
+        value_list.append(''.join(match_time[0].split(':')))
+
+    return value_list
+
+
+def get_genitive_variants(value):
+    ret_list = []
+    value_genitive_format = r"(?=\w)s(?=\s)"
+    value_pattern = re.compile(value_genitive_format)
+
+    span_genitive_value = re.sub(value_pattern, " 's", value)
+    if span_genitive_value != value:
+        ret_list.append(span_genitive_value)
+    span_genitive_value = re.sub(value_pattern, "'s", value)
+    if span_genitive_value != value:
+        ret_list.append(span_genitive_value)
+    # if len(ret_list) > 0:
+    #     print('=============================')
+    #     print(value)
+    #     print(re.findall(value_pattern, value))
+    #     print(ret_list)
+    return ret_list
+
+
+def check_spans(dialog):
+    for turn in dialog['turns']:
+        if turn['speaker'] != 'user':
+            continue
+        for _update in turn['state_update']['non-categorical']:
+            if 'utt_idx' in _update:
+                value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+                    'fixed_value'] != 'not found' else _update['value']
+                utt_idx = _update['utt_idx']
+                start = _update['start']
+                end = _update['end']
+                assert dialog['turns'][utt_idx]['utterance'][start:end] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][start:end])
+
+
+
+def get_bb_variants(value):
+    ret_list = []
+    if 'bed and breakfast' in value:
+        ret_list.append(value.replace('bed and breakfast', 'b & b'))
+    return ret_list
+
+if __name__ == '__main__':
+    preprocess(da_cat_slot_values, state_cat_slot_values)
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz22/README.md b/data/unified_datasets/multiwoz22/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..52db0dd3d3e5e16946d770bc3484d17b50b0dc5c
--- /dev/null
+++ b/data/unified_datasets/multiwoz22/README.md
@@ -0,0 +1,34 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, character-level span for non-categorical slots. state and state updates.   
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 8434         | 105066         | 12.46     | 17.27      | 7          |
+| dev | 999         | 13731         | 13.74      | 17.72       | 7          |
+| train | 1000         | 13744         | 13.74       | 17.67       | 7          |
+
+
+## Main changes
+
+- only keep 5 domains in state annotations and dialog acts. 
+- `pricerange`, `area`, `day`, `internet`, `parking`, `stars` are considered categorical slots.
+- punctuation marks are split from their previous tokens. e.g `I want to find a hotel. -> 
+  I want to find a hotel .`
+
+Run `evaluate.py`:
+
+da values match rate:    97.944
+state values match rate: 66.945
+
+### original data
+
+- from [multiwoz](https://github.com/budzianowski/multiwoz) repo.
+- original multiwoz2.2 dataset gives slot value in List format. We take the first value 
+in each slot list as ground-truth value.
+
+
diff --git a/data/unified_datasets/multiwoz22/data.zip b/data/unified_datasets/multiwoz22/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..92d82a3382228455b61a4117413731d17ee7add2
Binary files /dev/null and b/data/unified_datasets/multiwoz22/data.zip differ
diff --git a/data/unified_datasets/multiwoz22/ontology.json b/data/unified_datasets/multiwoz22/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3cc3c187cc961994cfb477f61a11694c6f156fd
--- /dev/null
+++ b/data/unified_datasets/multiwoz22/ontology.json
@@ -0,0 +1,1879 @@
+{
+    "domains": {
+        "taxi": {
+            "description": "taxi information query system",
+            "slots": {
+                "destination": {
+                    "description": "destination of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "leaveAt": {
+                    "description": "leaving time of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "arriveBy": {
+                    "description": "arrival time of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "taxi_phone": {
+                    "description": "taxi phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "taxi_types": {
+                    "description": "taxi type",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "restaurant": {
+            "description": "restaurant information query system",
+            "slots": {
+                "food": {
+                    "description": "food type for the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price budget for the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "varying price",
+                        "every price point",
+                        "a little expensive but worth it",
+                        "assorted",
+                        "most affordable price range",
+                        "the least expensive",
+                        "same price range",
+                        "similar price range",
+                        "priced cheaply",
+                        "varying price range",
+                        "any price point",
+                        "cheap priced",
+                        "moderate price range",
+                        "moderate",
+                        "any",
+                        "rather expensive",
+                        "that price range",
+                        "on the pricey side",
+                        "on the cheap side",
+                        "various",
+                        "your price range",
+                        "expensively priced",
+                        "expensive or moderate",
+                        "moderatley priced",
+                        "moderatre",
+                        "more expensive",
+                        "adforable",
+                        "fine",
+                        "not moderate",
+                        "moderately expensive",
+                        "relatively cheap",
+                        "cheap or expensive",
+                        "moderately prices",
+                        "expensive side",
+                        "ranging from cheap to expensive",
+                        "very expensive",
+                        "ch",
+                        "cherap",
+                        "do nt care",
+                        "low cost",
+                        "do n't care",
+                        "most expensive",
+                        "or the moderately priced",
+                        "moderate and expensive",
+                        "that price",
+                        "expensively",
+                        "reasonable",
+                        "not cheap",
+                        "moderately priced",
+                        "all price ranges",
+                        "or otherwise",
+                        "upscale",
+                        "expensive / upscale",
+                        "fairly expensive",
+                        "great prices",
+                        "quite low",
+                        "moderatly",
+                        "moderate or expensive priced",
+                        "cheaply priced",
+                        "the same",
+                        "expensive range",
+                        "moderate priced",
+                        "this price range",
+                        "pricey",
+                        "moderately - priced",
+                        "reasonably priced",
+                        "high - end",
+                        "cheaper",
+                        "expensive and moderately priced",
+                        "pretty expensive",
+                        "modest",
+                        "they vary",
+                        "luxury options",
+                        "fairly inexpensive",
+                        "pretty cheap",
+                        "expensive but worth every penny",
+                        "moderate and one in the cheap range",
+                        "not expensive",
+                        "that range",
+                        "low priced",
+                        "relatively expensive",
+                        "cheap to expensive",
+                        "mostly expensive and moderately priced",
+                        "not - so - expensive",
+                        "quite inexpensive",
+                        "inexpensive",
+                        "your",
+                        "budget - friendly",
+                        "high class",
+                        "expinsive",
+                        "fairly cheap",
+                        "expensive price range",
+                        "any price range",
+                        "moderate pricing",
+                        "high end",
+                        "epensive",
+                        "vietnamese",
+                        "cheap price range",
+                        "mostly expensive",
+                        "less expensive",
+                        "moderate range",
+                        "more moderate",
+                        "priced moderately",
+                        "expensive price",
+                        "centre",
+                        "expensive",
+                        "middle",
+                        "boderate",
+                        "moderate price",
+                        "moderately",
+                        "moderatly priced",
+                        "cheap range",
+                        "regardless of price",
+                        "moderately price",
+                        "north",
+                        "not too expensive",
+                        "not too pricey",
+                        "affordable",
+                        "all",
+                        "east",
+                        "quite expensive",
+                        "cheaply - priced",
+                        "budget conscious",
+                        "cheap",
+                        "budget friendly"
+                    ]
+                },
+                "address": {
+                    "description": "exact location of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area or place of the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "center of town",
+                        "west part of town .",
+                        "closely located",
+                        "south cambridge",
+                        "cetre",
+                        "the city centre",
+                        "on the east",
+                        "any part of town",
+                        "centre area of the city",
+                        "the north",
+                        "near centre",
+                        "center",
+                        "central region of town",
+                        "not",
+                        "east area of town",
+                        "north side of cambridge",
+                        "north of town",
+                        "centre area",
+                        "north part of town",
+                        "west part of town",
+                        "the south area of the city",
+                        "here",
+                        "south part",
+                        "west",
+                        "in town",
+                        "in the centre",
+                        "the southern area",
+                        "south side",
+                        "west area",
+                        "anywhere",
+                        "west cambridge",
+                        "north area of town",
+                        "the east side of town",
+                        "any",
+                        "south area of town",
+                        "centre cambridge",
+                        "centre of town",
+                        "this town",
+                        "the east part of town",
+                        "cambridge",
+                        "northern part",
+                        "eastside",
+                        "centrally",
+                        "around town",
+                        "close to your location",
+                        "westies",
+                        "the north area",
+                        "thai",
+                        "centre of town .",
+                        "in town at all",
+                        "in the west",
+                        "mexican",
+                        "not in the west",
+                        "the town centre",
+                        "other part of the town",
+                        "the centre",
+                        "southern area",
+                        "the west",
+                        "any area",
+                        "southend",
+                        "south of cambridge",
+                        "east section of town",
+                        "the north side",
+                        "the east",
+                        "the north side .",
+                        "elsewhere in the city",
+                        "south of town .",
+                        "the west side of town",
+                        "the centre area of town",
+                        "the center part of town",
+                        "northside",
+                        "do nt care",
+                        "that area",
+                        "the are",
+                        "do n't care",
+                        "north end",
+                        "in the city",
+                        "centrem",
+                        "the centre area",
+                        "south of town",
+                        "the east side",
+                        "centre city",
+                        "the south of town",
+                        "the north end",
+                        "centre region",
+                        "the south side of town",
+                        "east part of tow",
+                        "the center",
+                        "the same area",
+                        "east side",
+                        "town",
+                        "any where in the city",
+                        "throughout the area",
+                        "in the city .",
+                        "town centre",
+                        "same side of town as your hotel",
+                        "the south area of town",
+                        "and centre",
+                        "east area",
+                        "north area",
+                        "the west end",
+                        "north cambridge",
+                        "the area you have chosen",
+                        "west side",
+                        "east of town",
+                        "central",
+                        "east part of town",
+                        "that part of town",
+                        "city center",
+                        "the same",
+                        "the south side",
+                        "city",
+                        "by the airport",
+                        "downtown",
+                        "the center of town",
+                        "chesterton",
+                        "east section",
+                        "southern",
+                        "south part of time",
+                        "ely",
+                        "within this area",
+                        "the north part of town",
+                        "north or centre",
+                        "the west side",
+                        "all over the city",
+                        "center of the town",
+                        "cambridge centre",
+                        "wet part of town",
+                        "center area of town",
+                        "west end of town",
+                        "city centre",
+                        "the area",
+                        "all of cambridge",
+                        "towns centre",
+                        "in the north",
+                        "west part of the city",
+                        "north side of town",
+                        "the south part of town",
+                        "centre of the city",
+                        "the east area",
+                        "here in the city",
+                        "that area of town",
+                        "east side of the city",
+                        "centre of cambridge",
+                        "the city center",
+                        "almost every area of town",
+                        "around there",
+                        "north side of chersteron",
+                        "in cambridge",
+                        "west area of town",
+                        "the area of west",
+                        "town centre area",
+                        "nearby",
+                        "same area as the hotel",
+                        "south part of town",
+                        "not in the south of town",
+                        "centreof",
+                        "ctre",
+                        "anywhere in cambridge",
+                        "this area",
+                        "town of centre",
+                        "town center",
+                        "in the east",
+                        "east of cambridge",
+                        "southside",
+                        "all over town",
+                        "south side of town",
+                        "other parts of town",
+                        "northern part of cambridge",
+                        "the same area as the botanic gardens",
+                        "center cambridge",
+                        "south area",
+                        "south",
+                        "near the centre",
+                        "east end",
+                        "throughout the city",
+                        "the centre of town",
+                        "centere",
+                        "near clare hall",
+                        "close to that area",
+                        "in the city centre",
+                        "n the centre",
+                        "the centre part of town",
+                        "the west of town",
+                        "around the college",
+                        "centre area of town",
+                        "centre",
+                        "these areas",
+                        "south end",
+                        "close to the hotel",
+                        "expensive",
+                        "south part of the city",
+                        "close it city centre",
+                        "west of cambridge",
+                        "same area as the park",
+                        "east side of town",
+                        "that",
+                        "west end",
+                        "the south",
+                        "all over cambridge",
+                        "north",
+                        "anywhere in town",
+                        "centre area of the town",
+                        "the west part of town",
+                        "same area",
+                        "south side of the town",
+                        "the west area",
+                        "near the center of town",
+                        "centre part of town",
+                        "northern parts of town",
+                        "west side of town",
+                        "east",
+                        "westside",
+                        "west of town",
+                        "the north side of town",
+                        "north and west",
+                        "centrally located",
+                        "west side area",
+                        "same",
+                        "central area",
+                        "cheap",
+                        "north side"
+                    ]
+                },
+                "postcode": {
+                    "description": "postcode of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "restaurant phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "number of people booking the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "time": {
+                    "description": "time of the restaurant booking",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the restaurant booking",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "w",
+                        "thursday",
+                        "monday",
+                        "friday",
+                        "saturday",
+                        "tuesday",
+                        "wednesday",
+                        "sunday"
+                    ]
+                },
+                "choice": {
+                    "description": "number of restaurants meeting requests of user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "attraction": {
+            "description": "an entertainment that is offered to the public",
+            "slots": {
+                "address": {
+                    "description": "details of where the attraction is",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "entrance fee": {
+                    "description": "the fee charged for admission to the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "type of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area or place of the attraction",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "center of town",
+                        "that side of town",
+                        "south cambridge",
+                        "the city centre",
+                        "north in milton",
+                        "western part of the town",
+                        "western area of town",
+                        "whole of cambridge",
+                        "the south near your hotel",
+                        "the north",
+                        "your desired location",
+                        "center",
+                        "cenre",
+                        "east area of town",
+                        "north part of town",
+                        "centre area",
+                        "north of town",
+                        "west part of town",
+                        "near ely",
+                        "center part of town",
+                        "western",
+                        "west",
+                        "in town",
+                        "in the centre",
+                        "south side",
+                        "west cambridge",
+                        "northern area",
+                        "west area",
+                        "the north of the city",
+                        "centre area of cambridge",
+                        "western cambridge",
+                        "western part of town",
+                        "centre of town",
+                        "the east part of town",
+                        "cambridge",
+                        "various",
+                        "west area of the city",
+                        "eastside",
+                        "eat",
+                        "in the south",
+                        "cetnre of town",
+                        "right in the center of town",
+                        "northern",
+                        "different area",
+                        "another area",
+                        "centre area .",
+                        "centre by galleria",
+                        "same general area",
+                        "the centre",
+                        "clifton way",
+                        "the west",
+                        "east cambridge",
+                        "close to the restaurant",
+                        "this side of town",
+                        "the east of the town",
+                        "south of cambridge",
+                        "most of them are in the center",
+                        "the east",
+                        "same area as tandoori palace",
+                        "went side of town",
+                        "the west side of town",
+                        "do nt care",
+                        "close to the center of town",
+                        "that area",
+                        "central zone",
+                        "the centre area",
+                        "south of town",
+                        "near the restaurant",
+                        "the east side",
+                        "centre city",
+                        "centre region",
+                        "center of town .",
+                        "center area",
+                        "all of the other areas",
+                        "the same area",
+                        "the center",
+                        "east side",
+                        "central cambridge",
+                        "town centre",
+                        "center are",
+                        "south park of town",
+                        "center of cambridge",
+                        "east area",
+                        "northern area of town",
+                        "north area",
+                        "the west end",
+                        "east of town",
+                        "to the south",
+                        "west side",
+                        "central",
+                        "that part of town",
+                        "east part of town",
+                        "city center",
+                        "west side of the city",
+                        "cambridge 's centre",
+                        "the south side",
+                        "city cenre",
+                        "city",
+                        "downtown",
+                        "in that area",
+                        "cb30aq",
+                        "east section",
+                        "ely",
+                        "the area you 're looking for",
+                        "the west side",
+                        "all over the city",
+                        "center of the town",
+                        "the eastside",
+                        "in the area",
+                        "other areas",
+                        "the west area of town",
+                        "in the center",
+                        "west end of town",
+                        "city centre",
+                        "city centre .",
+                        "entre",
+                        "the area",
+                        "towns centre",
+                        "northend",
+                        "west part of the city",
+                        "the center area",
+                        "near the hotel",
+                        "centre of town ?",
+                        "south part of cambridge",
+                        "that area .",
+                        "centre of the city",
+                        "southern cambridge",
+                        "museum",
+                        "centre of cambridge",
+                        "all around the city",
+                        "cambridge leisure park",
+                        "several different parts of town",
+                        "city 's centre",
+                        "center of the city",
+                        "in cambridge",
+                        "church area",
+                        "museums",
+                        "west end of the city",
+                        "west area of town",
+                        "same part",
+                        "town centre area",
+                        "nearby",
+                        "south part of town",
+                        "same part of town as your restaurant",
+                        "the same road",
+                        "this area",
+                        "in the east",
+                        "town center",
+                        "there",
+                        "same area as hotel",
+                        "ce",
+                        "east of cambridge",
+                        "south side of town",
+                        "other parts of town",
+                        "all over town",
+                        "central district",
+                        "the west - side",
+                        "the center of the park",
+                        "south area",
+                        "south",
+                        "near the centre",
+                        "throughout the city",
+                        "the centre of town",
+                        "the south area",
+                        "in that side",
+                        "very close in the same area",
+                        "cent",
+                        "wet end",
+                        "the centre part of town",
+                        "west of your city",
+                        "the west of town",
+                        "we",
+                        "north section of cambridge",
+                        "centre area of town",
+                        "centre",
+                        "norwich",
+                        "south end",
+                        "every area except the north",
+                        "the centry area",
+                        "east side of town",
+                        "that",
+                        "northern cambridge",
+                        "west end",
+                        "the south",
+                        "north of the city",
+                        "north",
+                        "east end of town",
+                        "the center part of town",
+                        "on the centre",
+                        "same area",
+                        "the west area",
+                        "centre part of town",
+                        "west side of town",
+                        "all",
+                        "east",
+                        "westside",
+                        "centrally located",
+                        "same",
+                        "the area you are looking for",
+                        "west part of cambridge",
+                        "north side"
+                    ]
+                },
+                "name": {
+                    "description": "name of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "the price range for the attraction, from cheap to expensive",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "neither prices are listed",
+                        "from free to 2 pounds 50",
+                        "entrance fee",
+                        "5 pounds",
+                        "4 pounds",
+                        "unaware of what their entrance fee is",
+                        "do n't see a price listed",
+                        "expensive",
+                        "they do n't have the entrance fee posted",
+                        "no entrance fee",
+                        "free",
+                        "5 pound entrance fee",
+                        "2 pounds",
+                        "not sure of the fee",
+                        "we do n't have any information",
+                        "free admission",
+                        "do n't have information",
+                        "3.50 pounds",
+                        "moderately priced",
+                        "cheap",
+                        "not listed"
+                    ]
+                },
+                "choice": {
+                    "description": "number of attractions matching requests of user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "booking": {
+            "description": "to arrange with a taxi, restaurant, train, etc.",
+            "slots": {
+                "time": {
+                    "description": "time for an order",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day for an order, from monday to sunday",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "tues",
+                        "monday",
+                        "saturday night thru monday",
+                        "tuesday",
+                        "tuesday through friday",
+                        "tuesday instead of thursday",
+                        "tuesday 's",
+                        "sunday",
+                        "sundar",
+                        "sunday 18/06/2017",
+                        "tonight",
+                        "this evening",
+                        "sunday and monday",
+                        "vmhj6y3i",
+                        "at that time",
+                        "today",
+                        "friday",
+                        "saturday",
+                        "that",
+                        "thursday",
+                        "we d",
+                        "weds",
+                        "2",
+                        "the same day",
+                        "6",
+                        "1",
+                        "wednesday",
+                        "that time frame"
+                    ]
+                },
+                "stay": {
+                    "description": "for how long the user wish to be at a place",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "how many person the order is for",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the ordered place",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "Ref": {
+                    "description": "reference number of the order",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "train": {
+            "description": "query and order a train",
+            "slots": {
+                "destination": {
+                    "description": "destination of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "arriveBy": {
+                    "description": "arrival time of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "duration": {
+                    "description": "the length of time the train trip lasts",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "number of people booking for train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the train",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "monday",
+                        "tuesday",
+                        "tr2519",
+                        "sunday",
+                        "between friday and wednesday",
+                        "sundays",
+                        "every",
+                        "daily",
+                        "mondays",
+                        "sat",
+                        "we",
+                        "all week",
+                        "every day",
+                        "friday",
+                        "saturday",
+                        "that",
+                        "train",
+                        "other days are available",
+                        "this day",
+                        "frday",
+                        "cambridge",
+                        "thursday",
+                        "weds",
+                        "saturdays",
+                        "that day",
+                        "13:07",
+                        "fiday",
+                        "everday",
+                        "wednesday",
+                        "fr"
+                    ]
+                },
+                "Ref": {
+                    "description": "reference number of the order",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "leaveAt": {
+                    "description": "leaving time for the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "price": {
+                    "description": "price for the train ticket",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "choice": {
+                    "description": "number of trains that meets requests of the user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "hotel": {
+            "description": "to query hotel information and place an order",
+            "slots": {
+                "internet": {
+                    "description": "internet option at the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "yes",
+                        "4",
+                        "do nt care",
+                        "no",
+                        "free",
+                        "y",
+                        "free internet",
+                        "none"
+                    ]
+                },
+                "area": {
+                    "description": "area or place of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "center of town",
+                        "that side of town",
+                        "south cambridge",
+                        "several areas of town",
+                        "any part of the city",
+                        "any part of town",
+                        "the east end of town",
+                        "a different area",
+                        "the north",
+                        "different part of town",
+                        "north end of the city",
+                        "center",
+                        "and east",
+                        "near centre",
+                        "norht",
+                        "northern side of town",
+                        "east area of town",
+                        "somewhere else",
+                        "north of town",
+                        "centre area",
+                        "north part of town",
+                        "west part of town",
+                        "east and the north .",
+                        "next door",
+                        "all over",
+                        "west",
+                        "in town",
+                        "in the centre",
+                        "the town center",
+                        "west part",
+                        "south side",
+                        "west area",
+                        "northern area",
+                        "the north of the city",
+                        "north area of town",
+                        "western cambridge",
+                        "the east side of town",
+                        "different parts of town",
+                        "south area of town",
+                        "centre cambridge",
+                        "centre of town",
+                        "cambridge",
+                        "various",
+                        "on the west side",
+                        "north end of town",
+                        "eastside",
+                        "centrally",
+                        "near the museum",
+                        "eat",
+                        "el shaddai",
+                        "on the south",
+                        "close to where you 'll be dining",
+                        "in the west",
+                        "northern",
+                        "the town centre",
+                        "different area",
+                        "the northern part of town",
+                        "the centre",
+                        "the west",
+                        "east cambridge",
+                        "any area",
+                        "southend",
+                        "east section of town",
+                        "the north side",
+                        "southern part of town",
+                        "the east",
+                        "south end of town",
+                        "west side near the restaurant",
+                        "northside",
+                        "do nt care",
+                        "that area",
+                        "north end",
+                        "in the city",
+                        "the centre area",
+                        "south of town",
+                        "near the restaurant",
+                        "the east side",
+                        "the city 's south side",
+                        "centre city",
+                        "the north end",
+                        "north park of town",
+                        "all areas",
+                        "all parts of the city",
+                        "south cambridge area",
+                        "other parts of the city",
+                        "that region",
+                        "the same area",
+                        "centra",
+                        "east side",
+                        "town centre",
+                        "and in the centre",
+                        "same area as the restaurant",
+                        "north part of the city",
+                        "across cambridge",
+                        "east area",
+                        "north area",
+                        "the west end",
+                        "north cambridge",
+                        "east of town",
+                        "west side",
+                        "everywhere but the city centre",
+                        "east part of town",
+                        "that part of town",
+                        "except in the north",
+                        "city center",
+                        "across town",
+                        "west areas of town",
+                        "the south side",
+                        "the same",
+                        "north part of town .",
+                        "north location",
+                        "different parts of the city",
+                        "on the west",
+                        "north of cambridge",
+                        "downtown",
+                        "the center of town",
+                        "the westside",
+                        "the north part of town",
+                        "all over the city",
+                        "the west side",
+                        "center of the town",
+                        "that vicinity",
+                        "on the north",
+                        "in the area",
+                        "in the center",
+                        "city centre",
+                        "the easy",
+                        "northe part of town",
+                        "the area",
+                        "all of cambridge",
+                        "in the north",
+                        "northend",
+                        "or west ?",
+                        "west part of the city",
+                        "north side of town",
+                        "same side of town",
+                        "centre of the city",
+                        "the east area",
+                        "except in the east",
+                        "place to be a guesthouse",
+                        "that area of town",
+                        "centre of cambridge",
+                        "all around the city",
+                        "in cambridge",
+                        "the centre of cambridge",
+                        "west end of the city",
+                        "centre part",
+                        "north par of town",
+                        "central location",
+                        "on the eastside",
+                        "south part of town",
+                        "eastern part of the city",
+                        "the centre of the city",
+                        "east part of time",
+                        "anywhere in cambridge",
+                        "there",
+                        "north and centre",
+                        "south side of town",
+                        "all over town",
+                        "other parts of town",
+                        "southside",
+                        "nborth",
+                        "you are interested in",
+                        "south area",
+                        "south",
+                        "near the centre",
+                        "throughout the city",
+                        "the centre of town",
+                        "the south area",
+                        "south closer to the museum",
+                        "we",
+                        "not in the south",
+                        "centre area of town",
+                        "centre",
+                        "wast",
+                        "south end",
+                        "eastern cambridge",
+                        "various parts of the city",
+                        "east side of town",
+                        "northern cambridge",
+                        "eastern",
+                        "west end",
+                        "the south",
+                        "north",
+                        "east near other shops and boutiques",
+                        "same area",
+                        "the west area",
+                        "centre part of town",
+                        "all across town",
+                        "west side of town",
+                        "throughout cambridge",
+                        "another area of town",
+                        "south end of the city",
+                        "east",
+                        "northern part of town",
+                        "nearby the restaurant",
+                        "west of town",
+                        "centrally located",
+                        "centrally - located",
+                        "cheap",
+                        "not to far from the restaurant",
+                        "north side"
+                    ]
+                },
+                "stars": {
+                    "description": "star rating of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "3-star",
+                        "3",
+                        "does not have",
+                        "foru",
+                        "1-star",
+                        "not as fancy",
+                        "four star",
+                        "yes",
+                        "four starts",
+                        "do nt care",
+                        "five",
+                        "one",
+                        "no star rating",
+                        "two",
+                        "5-star",
+                        "several",
+                        "0 to 4",
+                        "four - star",
+                        "four stars",
+                        "no",
+                        "0",
+                        "0-star",
+                        "2-star",
+                        "5",
+                        "drop the star rating",
+                        "different star ratings",
+                        "ranging from 2 - 4 stars",
+                        "2",
+                        "lower",
+                        "4-star",
+                        "4",
+                        "three",
+                        "zero",
+                        "1",
+                        "does not show",
+                        "unrated",
+                        "not rated",
+                        "four"
+                    ]
+                },
+                "parking": {
+                    "description": "parking facility at the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "yes",
+                        "free parking",
+                        "do nt care",
+                        "no",
+                        "free",
+                        "n",
+                        "none"
+                    ]
+                },
+                "phone": {
+                    "description": "hotel phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price budget of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "different",
+                        "different price range",
+                        "cheapest",
+                        "budget - priced",
+                        "cheaper than the others",
+                        "same price range",
+                        "cheap priced",
+                        "moderate price range",
+                        "extremely reasonable",
+                        "slightly more expensive",
+                        "other ranges",
+                        "same price",
+                        "moderate",
+                        "any",
+                        "moderatly price",
+                        "rather expensive",
+                        "that price range",
+                        "on the cheap side",
+                        "your price range",
+                        "lower",
+                        "expensively priced",
+                        "does not say",
+                        "moderatley priced",
+                        "cheaply",
+                        "more expensive",
+                        "expensive to moderate",
+                        "moderately pried",
+                        "great",
+                        "relatively cheap",
+                        "moderately prices",
+                        "moderately priceed",
+                        "unfortunately do not have the price",
+                        "very cheap",
+                        "do nt care",
+                        "moderately priced .",
+                        "low cost",
+                        "all different price ranges",
+                        "varying price ranges",
+                        "or expensive ?",
+                        "very inexpensive",
+                        "moderate to cheap",
+                        "even cheaper",
+                        "moderate in price",
+                        "less costly",
+                        "super cheap",
+                        "moderately priced",
+                        "cheap side",
+                        "higher price range",
+                        "upscale",
+                        "espensive",
+                        "more moderately priced",
+                        "cheaper side",
+                        "cheaply priced",
+                        "the same",
+                        "expensive range",
+                        "moderate priced",
+                        "your chosen",
+                        "economical",
+                        "moderately - priced",
+                        "different price ranges",
+                        "modrate",
+                        "your price",
+                        "more budget - friendly",
+                        "quite cheap",
+                        "mostly in the cheap to moderate price range",
+                        "cheaper",
+                        "economically priced",
+                        "lower end",
+                        "affordable pricing",
+                        "pretty expensive",
+                        "cheap to moderate",
+                        "oderately priced",
+                        "epxensive",
+                        "cheap or moderate",
+                        "pretty cheap",
+                        "very affordable",
+                        "free",
+                        "not expensive",
+                        "expensive or cheap",
+                        "that range",
+                        "much cheaper",
+                        "cheap to expensive",
+                        "2",
+                        "unknown",
+                        "inexpensive",
+                        "fairly cheap",
+                        "ca n't view that information",
+                        "expensive price range",
+                        "a little pricey",
+                        "chear",
+                        "moderately to expensively priced",
+                        "moderate pricing",
+                        "moderately pricing",
+                        "moderate prices",
+                        "on the more expensive side",
+                        "cheap price range",
+                        "the cheapest",
+                        "moderate range",
+                        "priced moderately",
+                        "centre",
+                        "expensive",
+                        "inexpensively - priced",
+                        "guesthouses",
+                        "moderate price",
+                        "moderately",
+                        "moderate price point",
+                        "that",
+                        "moderatly priced",
+                        "expensively - priced",
+                        "cheap range",
+                        "moderate to cheap range",
+                        "moderately price",
+                        "north",
+                        "affordable",
+                        "moderate or cheap",
+                        "fairly cheap compared to other hotels",
+                        "all",
+                        "quite expensive",
+                        "cheaply - priced",
+                        "cheaply prices",
+                        "moderately - priced or cheap",
+                        "cheap",
+                        "hotel",
+                        "hotels"
+                    ]
+                },
+                "people": {
+                    "description": "number of people for the hotel booking",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "exact location of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "what is the type of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "stay": {
+                    "description": "length of stay at the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the hotel booking",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "thursday",
+                        "monday",
+                        "t",
+                        "friday",
+                        "next friday",
+                        "saturday",
+                        "tuesday",
+                        "monda",
+                        "wednesday",
+                        "sunday"
+                    ]
+                },
+                "choice": {
+                    "description": "number of hotels that meets requests of the user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "police": {
+            "description": "find police stations",
+            "slots": {
+                "address": {
+                    "description": "exact location of the police station",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the police station",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "police station phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        }
+    },
+    "intents": {
+        "inform": {
+            "description": "inform user of value for a certain slot"
+        },
+        "request": {
+            "description": "ask for value of a slot"
+        },
+        "recommend": {
+            "description": "recommend a choice for user request"
+        },
+        "book": {
+            "description": "place an order for user"
+        },
+        "nobook": {
+            "description": "inform user of booking failure"
+        },
+        "thank": {
+            "description": "express gratitude"
+        },
+        "welcome": {
+            "description": "welcome"
+        },
+        "bye": {
+            "description": "end a conversation and say goodbye to user"
+        },
+        "reqmore": {
+            "description": "ask user for more instructions"
+        },
+        "select": {
+            "description": "provide several choices for user to choose from"
+        },
+        "nooffer": {
+            "description": "inform user that no options matches user request"
+        },
+        "greet": {
+            "description": "express greeting"
+        },
+        "offerbook": {
+            "description": "offer to place an order for user"
+        },
+        "offerbooked": {
+            "description": "inform user that an order is succussful"
+        }
+    },
+    "binary_dialogue_act": [
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "people",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "stay",
+            "value": ""
+        },
+        {
+            "intent": "nobook",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "thank",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "welcome",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "food",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "day",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "stars",
+            "value": ""
+        },
+        {
+            "intent": "bye",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "reqmore",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "parking",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "taxi",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "leaveAt",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "taxi_phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "time",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "arriveBy",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "taxi_types",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "departure",
+            "value": ""
+        },
+        {
+            "intent": "book",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "greet",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "internet",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "destination",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "type",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "day",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "departure",
+            "value": ""
+        },
+        {
+            "intent": "offerbook",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "destination",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "people",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "leaveAt",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "duration",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "price",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "arriveBy",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "Ref",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "offerbooked",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "type",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "entrance fee",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        }
+    ],
+    "state": {
+        "hotel": {
+            "area": "",
+            "internet": "",
+            "name": "",
+            "parking": "",
+            "pricerange": "",
+            "stars": "",
+            "type": "",
+            "day": "",
+            "people": "",
+            "stay": ""
+        },
+        "restaurant": {
+            "area": "",
+            "food": "",
+            "name": "",
+            "pricerange": "",
+            "day": "",
+            "people": "",
+            "time": ""
+        },
+        "taxi": {
+            "arriveBy": "",
+            "departure": "",
+            "destination": "",
+            "leaveAt": ""
+        },
+        "train": {
+            "arriveBy": "",
+            "day": "",
+            "departure": "",
+            "destination": "",
+            "leaveAt": "",
+            "people": ""
+        },
+        "attraction": {
+            "area": "",
+            "name": "",
+            "type": ""
+        }
+    }
+}
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz22/original_data.zip b/data/unified_datasets/multiwoz22/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..6fdaeb71ece04f47b6d1b0aa61e1b1ef6a1940fc
Binary files /dev/null and b/data/unified_datasets/multiwoz22/original_data.zip differ
diff --git a/data/unified_datasets/multiwoz22/preprocess.py b/data/unified_datasets/multiwoz22/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..521facf953fd4a4ed4222e4ce9084a0a90a1b518
--- /dev/null
+++ b/data/unified_datasets/multiwoz22/preprocess.py
@@ -0,0 +1,1431 @@
+import copy
+import re
+import zipfile
+import json
+import os
+from tqdm import tqdm
+import sys
+import difflib
+from fuzzywuzzy import fuzz
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+import logging
+
+
+logging.basicConfig(level=logging.INFO)
+self_dir = (os.path.abspath(os.getcwd()))
+
+REF_SYS_DA = {
+    'Attraction': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Fee': "entrance fee", 'Name': "name", 'Phone': "phone",
+        'Post': "postcode", 'Price': "pricerange", 'Type': "type",
+        'none': None, 'Open': None
+    },
+    'Hospital': {
+        'Department': 'department', 'Addr': 'address', 'Post': 'postcode',
+        'Phone': 'phone', 'none': None
+    },
+    'Booking': {
+        'Day': 'day', 'Name': 'name', 'People': 'people',
+        'Ref': 'Ref', 'Stay': 'stay', 'Time': 'time',
+        'none': None
+    },
+    'Hotel': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Internet': "internet", 'Name': "name", 'Parking': "parking",
+        'Phone': "phone", 'Post': "postcode", 'Price': "pricerange",
+        'Stars': "stars", 'Type': "type", 'Stay': 'stay', 'Day': 'day', 'People': 'people',
+        'none': None
+    },
+    'Restaurant': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Name': "name", 'Food': "food", 'Phone': "phone",
+        'Post': "postcode", 'Price': "pricerange",
+        'Time': 'time', 'Day': 'day', 'People': 'people',
+        'none': None
+    },
+    'Taxi': {
+        'Arrive': "arriveBy", 'Car': "taxi_types", 'Depart': "departure",
+        'Dest': "destination", 'Leave': "leaveAt", 'Phone': "taxi_phone",
+        'none': None
+    },
+    'Train': {
+        'Arrive': "arriveBy", 'Choice': "choice", 'Day': "day",
+        'Depart': "departure", 'Dest': "destination",
+        'Leave': "leaveAt", 'People': "people", 'Ref': "Ref",
+        'Time': "duration", 'none': None, 'Ticket': 'price',
+    },
+    'Police': {
+        'Addr': "address", 'Post': "postcode", 'Phone': "phone", 'none': None
+    },
+}
+
+# taxi restaurant attraction train
+slot_to_type = {
+    'taxi-destination': 'non',
+    'taxi-departure': 'non',
+    'taxi-leaveAt': 'non',
+    'taxi-arriveBy': 'non',
+    'restaurant-food': 'non',
+    'restaurant-name': 'non',
+    'attraction-address': 'non',
+    'attraction-postcode': 'non',
+    'restaurant-pricerange': 'cat',
+    'restaurant-address': 'non',
+    'restaurant-area': 'cat',
+    'restaurant-postcode': 'non',
+    'attraction-phone': 'non',
+    'attraction-entrance fee': 'non',
+    'booking-time': 'non',
+    'booking-day': 'cat',
+    'attraction-type': 'non',
+    'attraction-area': 'cat',
+    'train-destination': 'non',
+    'train-arriveBy': 'non',
+    'train-departure': 'non',
+    'hotel-internet': 'cat',
+    'hotel-area': 'cat',
+    'booking-stay': 'non',
+    'booking-people': 'non',
+    'train-duration': 'non',
+    'train-people': 'non',
+    'train-day': 'cat',
+    'train-Ref': 'non',
+    'hotel-stars': 'cat',
+    'train-leaveAt': 'non',
+    'train-price': 'non',
+    'hotel-parking': 'cat',
+    'hotel-phone': 'non',
+    'hotel-name': 'non',
+    'hotel-pricerange': 'cat',
+    'hotel-people': 'non',
+    'restaurant-phone': 'non',
+    'hotel-postcode': 'non',
+    'hotel-address': 'non',
+    'attraction-name': 'non',
+    'hotel-type': 'non',
+    'restaurant-people': 'non',
+    'train-choice': 'non',
+    'attraction-pricerange': 'cat',
+    'hotel-stay': 'non',
+    'booking-name': 'non',
+    'booking-Ref': 'non',
+    'restaurant-time': 'non',
+    'restaurant-day': 'cat',
+    'hotel-day': 'cat',
+    'hotel-choice': 'non',
+    'restaurant-choice': 'non',
+    'attraction-choice': 'non',
+    'taxi-taxi_phone': 'non',
+    'taxi-taxi_types': 'non',
+    'police-address': 'non',
+    'police-postcode': 'non',
+    'police-phone': 'non'
+}
+
+state_cat_slot_value_dict = {
+    "hotel-pricerange": {
+        "cheap": 735,
+        "moderate": 1063,
+        "expensive": 594,
+    },
+    "hotel-parking": {
+        "yes": 1809,
+        "no": 126,
+        "free": 4,
+    },
+    "hotel-day": {
+        "tuesday": 385,
+        "wednesday": 410,
+        "monday": 365,
+        "saturday": 407,
+        "friday": 393,
+        "thursday": 384,
+        "sunday": 369,
+    },
+    "train-day": {
+        "wednesday": 533,
+        "monday": 533,
+        "saturday": 543,
+        "thursday": 547,
+        "friday": 563,
+        "tuesday": 553,
+        "sunday": 613,
+    },
+    "hotel-stars": {
+        "4": 1263,
+        "2": 193,
+        "0": 201,
+        "3": 401,
+        "5": 45,
+        "1": 45,
+    },
+    "hotel-internet": {
+        "yes": 1841,
+        "no": 79,
+        "free": 2
+    },
+    "hotel-area": {
+        "east": 416,
+        "north": 717,
+        "centre": 538,
+        "south": 289,
+        "west": 316,
+    },
+    "attraction-area": {
+        "centre": 1290,
+        "west": 332,
+        "north": 155,
+        "south": 240,
+        "east": 272,
+    },
+    "restaurant-pricerange": {
+        "expensive": 1477,
+        "cheap": 758,
+        "moderate": 1028,
+    },
+    "restaurant-area": {
+        "centre": 1745,
+        "south": 398,
+        "north": 390,
+        "east": 360,
+        "west": 423,
+    },
+    "restaurant-day": {
+        "thursday": 362,
+        "wednesday": 412,
+        "friday": 395,
+        "monday": 383,
+        "sunday": 399,
+        "saturday": 421,
+        "tuesday": 350,
+    }
+}
+
+
+synonyms = [
+    ["el shaddia guesthouse", "el shaddai"],
+    [ "peterborough", "peterbourgh"],
+    ["night club", "nightclub", 'nightclubs'],
+    ["boat", "boating"],
+    ["portugese", "portuguese"],
+    ["guesthouse", "guest house"],
+    ["seafood", "sea food"],
+    ["christ 's college", "christ college"],
+    ["huntingdon marriott hotel"]
+]
+
+state_cat_slot_ds = [k for k, v in slot_to_type.items() if v == 'cat']
+
+da_cat_slot_values = {
+    # 'hotel-stay': ['1', '2', '3', '4', '5'],
+    'hotel-internet': ['free', 'no', 'none', 'yes'],
+    'hotel-parking': ['free', 'no', 'none', 'yes']
+}
+
+state_cat_slot_values = {}
+
+multiwoz_desc = {
+    'taxi': {
+        'domain': 'taxi information query system',
+        'taxi_phone': 'taxi phone number',
+        'taxi_types': 'taxi type',
+    },
+    'restaurant': {
+        'domain': 'restaurant information query system',
+        'address': 'exact location of the restaurant',
+        'postcode': 'postcode of the restaurant',
+        'phone': 'restaurant phone number',
+        'choice': 'number of restaurants meeting requests of user',
+    },
+    'attraction': {
+        'domain': 'an entertainment that is offered to the public',
+        'address': 'details of where the attraction is',
+        'postcode': 'postcode of the attraction',
+        'phone': 'phone number of the attraction',
+        'entrance fee': 'the fee charged for admission to the attraction',
+        'pricerange': 'the price range for the attraction, from cheap to expensive',
+        'choice': 'number of attractions matching requests of user'
+    },
+    'booking': {
+        'domain': 'to arrange with a taxi, restaurant, train, etc.',
+        'time': 'time for an order',
+        'day': 'day for an order, from monday to sunday',
+        'stay': 'for how long the user wish to be at a place',
+        'people': 'how many person the order is for',
+        'name': 'name of the ordered place',
+        'Ref': 'reference number of the order'
+    },
+    'train': {
+        'domain': 'query and order a train',
+        'duration': 'the length of time the train trip lasts',
+        'Ref': 'reference number of the order',
+        'price': 'price for the train ticket',
+        'choice': 'number of trains that meets requests of the user',
+    },
+    'hotel': {
+        'domain': 'to query hotel information and place an order',
+        'address': 'exact location of the hotel',
+        'postcode': 'postcode of the hotel',
+        'phone': 'hotel phone number',
+        'choice': 'number of hotels that meets requests of the user',
+    },
+    'police': {
+        'domain': 'find police stations',
+        'address': 'exact location of the police station',
+        'postcode': 'postcode of the police station',
+        'phone': 'police station phone number',
+    },
+    'intents': {
+        'inform': 'inform user of value for a certain slot',
+        'request': 'ask for value of a slot',
+        'nobook': 'inform user of booking failure',
+        'reqmore': 'ask user for more instructions',
+        'book': 'place an order for user',
+        'bye': 'end a conversation and say goodbye to user',
+        'thank': 'express gratitude',
+        'welcome': 'welcome',
+        'offerbooked': 'inform user that an order is succussful',
+        'recommend': 'recommend a choice for user request',
+        'greet': 'express greeting',
+        'nooffer': 'inform user that no options matches user request',
+        'offerbook': 'offer to place an order for user',
+        'select': 'provide several choices for user to choose from',
+    }
+}
+
+digit2word = {
+    '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four', '5': 'five',
+    '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten'
+}
+
+
+def pharse_in_sen(phrase, sen):
+    '''
+    match value in the sentence
+    :param phrase: str
+    :param sen: str
+    :return: start, end if matched, else None, None
+    '''
+    assert isinstance(phrase, str)
+    pw = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-]|$)'
+    pn = '(^|[\s\?!-]|\D[,\.:])(?P<v>{})($|[\s\?!-]|[,\.:]\D|[,\.:]$)'
+
+    if phrase.isdigit() and phrase in digit2word:
+        phrase = digit2word[phrase]
+        p = re.compile(pw.format(re.escape(phrase)), re.I)
+        m = re.search(p, sen)
+        if m:
+            num = len(re.findall(p, sen))
+            # if num > 1:
+            #     match['>1'] += 1
+            # else:
+            #     match['1'] += 1
+            return m.span('v'), num
+    # match['0'] += 1
+    if phrase.isdigit():
+        pattern = pn
+    else:
+        pattern = pw
+    p = re.compile(pattern.format(re.escape(phrase)), re.I)
+    m = re.search(p, sen)
+    if m:
+        num = len(re.findall(p, sen))
+        # if num > 1:
+        #     match['>1'] += 1
+        # else:
+        #     match['1'] += 1
+        return m.span('v'), num
+    return (None, None), 0
+
+
+
+
+
+def update_state(state, update):
+    # print('======================')
+    # print(state)
+    # print(update)
+    # print('======================')
+
+    for service, service_update in update.items():
+        if service not in state:
+            state[service] = copy.deepcopy(service_update)
+        else:
+            state[service].update(update[service])
+
+
+def convert_da(utt, da_dict, binary_ont, intent_ont, did, tid, da_cat_slot_values):
+    '''
+     convert multiwoz dialogue acts to required format
+    :param utt: user or system utt
+    :param da_dict: multiwoz da
+    :param binary_ont: binary ontology
+    :param intent_ont: intent ontology
+    :return:
+    '''
+    converted_da = {
+        'categorical': [],
+        'non-categorical': [],
+        'binary': []
+    }
+
+    for Domain_Act, S, v in da_dict:
+        Domain, Act = Domain_Act.split('-')
+        if Domain.lower() in ['police', 'hospital', 'bus']:
+            continue
+
+        if Act.lower() not in intent_ont:
+            intent_ont[Act.lower()] = {}
+
+        # general domain is converted to empty domain. e.g. thank, bye
+        if Domain == 'general':
+            assert S == 'none'
+            assert v == 'none'
+            converted_dict = {
+                'intent': Act.lower(),
+                'domain': '',
+                'slot': '',
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+
+
+        try:
+            reformated_slot = REF_SYS_DA[Domain][S]
+        except:
+            # print('44444444444444444444444444444444')
+            # print(Domain, S)
+            # logging.info('slot not in REF_SYS_DA, drop')
+            continue
+
+        # if slot is None, da should be converted into binary
+        if reformated_slot is None:
+            if not (S == 'none' and v == 'none'):
+                # mainly for `Open` slot
+                # print('11111111111111111111')
+                # print(Domain_Act, S, v)
+                continue
+            # Booking-Inform none none
+            # Police-Inform none none
+            # Train-OfferBook none none
+            converted_dict = {
+                'intent': Act.lower(),
+                'domain': Domain.lower(),
+                'slot': '',
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+        reformated_domain_slot = Domain.lower() + '-' + reformated_slot
+
+        if Act.lower() == 'request':
+            converted_dict = {
+                'intent': 'request',
+                'domain': Domain.lower(),
+                'slot': reformated_slot,
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+        # vs = da_dict[(Domain_Act, S)]['values']
+
+        if reformated_domain_slot in slot_to_type and slot_to_type[reformated_domain_slot] == 'cat':
+            origin_v = v
+            v = v.lower()
+            # if reformated_domain_slot in cat_slot_proj:
+            #     v = cat_slot_proj[reformated_domain_slot][v]
+            if reformated_domain_slot not in da_cat_slot_values:
+                da_cat_slot_values[reformated_domain_slot] = []
+            # if v not in cat_slot_values[reformated_domain_slot]:
+            da_cat_slot_values[reformated_domain_slot].append(v)
+            converted_da['categorical'].append({
+                'intent': Act.lower(),
+                'domain': Domain.lower(),
+                'slot': reformated_slot,
+                'value': v
+            })
+            if 'start_word' in da_dict[(Domain_Act, S, origin_v)]:
+                start_ws = da_dict[(Domain_Act, S, origin_v)]['start_word']
+                end_ws = da_dict[(Domain_Act, S, origin_v)]['end_word']
+                utt_list = utt.split()
+                for start_w, end_w in zip(start_ws, end_ws):
+                    if start_w > len(utt_list) or end_w > len(utt_list):
+                        continue
+                    start_ch = 0
+                    for i in range(start_w):
+                        start_ch += len(utt_list[i]) + 1
+                    end_ch = start_ch
+                    for i in range(start_w, end_w):
+                        end_ch += len(utt_list[i]) + 1
+                    try:
+                        end_ch += len(utt_list[end_w])
+                    except:
+                        print(utt_list, start_w, end_w)
+                    if not utt[start_ch: end_ch] == origin_v:
+                        # print('2222222222222222222222222')
+                        # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
+                        continue
+
+                    else:
+                        converted_da['categorical'][-1].update({
+                            'start': start_ch,
+                            'end': end_ch
+                        })
+                        break
+
+        else:
+            if 'start_word' not in da_dict[(Domain_Act, S, v)]:
+                # todo no span annotation
+                converted_da['non-categorical'].append({
+                    'intent': Act.lower(),
+                    'domain': Domain.lower(),
+                    'slot': reformated_slot,
+                    'value': v
+                })
+                continue
+
+            start_ws = da_dict[(Domain_Act, S, v)]['start_word']
+            end_ws = da_dict[(Domain_Act, S, v)]['end_word']
+            utt_list = utt.split()
+            found = True
+            for start_w, end_w in zip(start_ws, end_ws):
+                if start_w > len(utt_list) or end_w > len(utt_list):
+                    continue
+                start_ch = 0
+                for i in range(start_w):
+                    start_ch += len(utt_list[i]) + 1
+                end_ch = start_ch
+                for i in range(start_w, end_w):
+                    end_ch += len(utt_list[i]) + 1
+                try:
+                    end_ch += len(utt_list[end_w])
+                except:
+                    print(utt_list, start_w, end_w, v)
+                if not utt[start_ch: end_ch] == v:
+                    # print('2222222222222222222222222')
+                    # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
+                    continue
+
+                else:
+                    found = True
+                    converted_da['non-categorical'].append({
+                        'intent': Act.lower(),
+                        'domain': Domain.lower(),
+                        'slot': reformated_slot,
+                        'value': v,
+                        'start': start_ch,
+                        'end': end_ch
+                    })
+                    break
+
+            if not found:
+                converted_da['non-categorical'].append({
+                    'intent': Act.lower(),
+                    'domain': Domain.lower(),
+                    'slot': reformated_slot,
+                    'value': v
+                })
+    return converted_da
+
+
+def get_state_update(prev_state, cur_state, dialog, did, tid, utt, coref_dict, slot_notfound_dict, da_cat_slot_values):
+    prev_turns = dialog['turns']
+    state_update = {'categorical': [], 'non-categorical': []}
+    notfoundnum = 0
+    total_value = 0
+
+    diff_state = {}
+    if prev_state is None:
+        diff_state = {domain: {slot: value for slot, value in cur_state[domain].items() if value != ''} for domain in
+                      cur_state}
+    else:
+        assert len(prev_state) == len(cur_state), print(prev_state, cur_state)
+        for domain, domain_state in prev_state.items():
+            if domain not in diff_state:
+                diff_state[domain] = {}
+            for slot, value in domain_state.items():
+                if value != cur_state[domain][slot]:
+                    # assert len(cur_state[domain][slot]) > 0, print(did, tid, domain, slot, utt)
+                    diff_state[domain][slot] = cur_state[domain][slot]
+
+    ret_diff_state = copy.deepcopy(diff_state)
+
+
+
+    for domain in diff_state:
+        for slot in diff_state[domain]:
+
+            total_value += 1
+            fix_or = False
+            if '|' in diff_state[domain][slot]:
+                value = diff_state[domain][slot].split('|')[0]
+            else:
+                value = diff_state[domain][slot]
+
+            # if dialog['original_id'] == 'PMUL2512' and tid == 17 and value == '02:45':
+            #     value = '2:45'
+
+            value_list = [value]
+            for _synonyms in synonyms:
+                if value in _synonyms:
+                    value_list = _synonyms
+
+            value_list.extend(get_time_variants(value))
+            value_list.extend(get_genitive_variants(value))
+            value_list.extend(get_bb_variants(value))
+
+            if value.endswith(' restaurant'):
+                value_list.append(value.split(' restaurant')[0])
+            if value.endswith(' hotel'):
+                value_list.append(value.split(' hotel')[0])
+            found = False
+            for value in value_list:
+                # categorical slots
+                if slot in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']:
+                    reformated_domain_slot = '-'.join([domain, slot])
+                    if reformated_domain_slot in state_cat_slot_value_dict and (value in state_cat_slot_value_dict[reformated_domain_slot] or value in ['dontcare', '', 'none', 'not mentioned']):
+                        state_update['categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot]
+                        })
+                        if domain + '-' + slot not in da_cat_slot_values:
+                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
+                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
+                        if value != diff_state[domain][slot]:
+                            state_update['categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        else :
+                            for _turn in prev_turns[::-1]:
+                                found = False
+                                for da in _turn['dialogue_act']['categorical']:
+                                    if da['value'] == value:
+                                        if 'start' in da:
+                                            state_update['categorical'][-1].update({
+                                                'utt_idx': _turn['utt_idx'],
+                                                'start': da['start'],
+                                                'end': da['end'],
+                                                'from': 'prev_da_span'
+                                            })
+                                            found = True
+                                            break
+                                if found:
+                                    break
+                    else:
+                        state_update['categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            'fixed_value': 'not found'
+                        })
+                        if domain + '-' + slot not in da_cat_slot_values:
+                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
+                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
+                        ret_diff_state[domain][slot] = 'not found'
+                        notfoundnum += 1
+                    # reformated_domain_slot = '-'.join([domain, slot]
+                    found = True
+                    break
+
+                # process value ---> none
+                assert value not in ['none', 'not mentioned']
+                if value in ['', 'dontcare']:
+                    # if reformated_domain_slot not in state_cat_slot_values:
+                    #     state_cat_slot_values[reformated_domain_slot] = []
+                    # # if v not in cat_slot_values[reformated_domain_slot]:
+                    # state_cat_slot_values[reformated_domain_slot].append(value)
+                    state_update['non-categorical'].append({
+                        'domain': domain,
+                        'slot': slot,
+                        'value': diff_state[domain][slot]
+                    })
+                    found = True
+                    break
+
+                # first look for values in coref_dict
+                for _Domain_Act, _Slot, _value in coref_dict:
+                    _domain, _act = _Domain_Act.lower().split('-')
+                    _slot = _Slot.lower()
+                    _coref_value = coref_dict[(_Domain_Act, _Slot, _value)]['coref_value']
+                    if _coref_value == '':
+                        continue
+                    _coref_turn = coref_dict[(_Domain_Act, _Slot, _value)]['turn']
+                    if _coref_turn == -1:
+                        continue
+                    _coref_pos = coref_dict[(_Domain_Act, _Slot, _value)]['pos']
+                    if _coref_pos == '':
+                        continue
+                    _utt = coref_dict[(_Domain_Act, _Slot, _value)]['utt']
+                    if _domain == domain and _slot == slot and value == _coref_value:
+
+                        start_w, end_w = [int(p) for p in _coref_pos.split('-')]
+                        utt_list = _utt.split()
+                        start_ch = 0
+                        for i in range(start_w):
+                            start_ch += len(utt_list[i]) + 1
+                        end_ch = start_ch
+                        for i in range(start_w, end_w + 1):
+                            end_ch += len(utt_list[i]) + 1
+                        end_ch -= 1
+
+                        if not _utt[start_ch: end_ch] == _coref_value:
+                            # print(111111111111111111111111111111111)
+                            # print(_utt[start_ch: end_ch], _coref_value)
+                            continue
+
+                        state_update['non-categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            'from': 'coref',
+                            'utt_idx': _coref_turn,
+                            'start': start_ch,
+                            'end': end_ch
+                        })
+                        if value != diff_state[domain][slot]:
+                            state_update['categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        found = True
+
+                if found:
+                    break
+
+                # from da annotation
+                for _turn in prev_turns[::-1]:
+                    for da in _turn['dialogue_act']['non-categorical']:
+                        # if da['domain'] == domain and da['slot'] == slot and fuzz.ratio(da['value'], value) > 85:
+                            # if not da['value'] == value:
+                            #     print(1111111111111111)
+                            #     print(value, da['value'])
+
+                        if fuzz.ratio(da['value'], value) > 85:
+
+                            if 'start' in da:
+                                found = True
+                                state_update['non-categorical'].append({
+                                    'domain': domain,
+                                    'slot': slot,
+                                    # 'value': da['value'],
+                                    'value': diff_state[domain][slot],
+                                    'utt_idx': _turn['utt_idx'],
+                                    'start': da['start'],
+                                    'end': da['end'],
+                                    'from': 'prev_da_span'
+                                })
+                                if value != diff_state[domain][slot]:
+                                    state_update['non-categorical'][-1].update({'fixed_value': value})
+                                    ret_diff_state[domain][slot] = value
+                                if da['value'] != value:
+                                    state_update['non-categorical'][-1].update({'fixed_value':da['value']})
+                                    ret_diff_state[domain][slot] = da['value']
+
+                                break
+                    if found:
+                        break
+
+                if found:
+                    break
+
+                # from utterance
+                for _turn in prev_turns[::-1]:
+                    _utt = _turn['utterance']
+                    (start, end), num = pharse_in_sen(str(value), _utt)
+                    if num:
+                        assert value.lower() == _utt[start:end].lower() \
+                               or digit2word[value].lower() == _utt[start:end].lower()
+                        found = True
+                        state_update['non-categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            # 'value': _utt[start:end].lower(),
+                            # 'fixed_value': _utt[start:end].lower(),
+                            'from': 'prev_utt',
+                            'utt_idx': _turn['utt_idx'],
+                            'start': start,
+                            'end': end
+                        })
+                        if value != diff_state[domain][slot]:
+                            state_update['non-categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        if value != _utt[start:end].lower():
+                            state_update['non-categorical'][-1].update({'fixed_value': _utt[start:end].lower()})
+                            ret_diff_state[domain][slot] = _utt[start:end].lower()
+                        found = True
+                        break
+                if found:
+                    break
+
+                # from utterance
+                if not value.isdigit():
+                    for _turn in prev_turns[::-1]:
+                        _utt = _turn['utterance']
+
+                        s = difflib.SequenceMatcher(None, _utt, value)
+                        matches = s.get_matching_blocks()
+
+                        for i, j, n in matches:
+                            possible_value = _utt[i: i+len(value)]
+
+                            if i+ len(value) < len(_utt) and _utt[i+len(value)] not in [ ' ', ',', '.', '?', '!', '/'] :
+                                possible_value += _utt[i+len(value):].split()[0]
+
+                                if possible_value.startswith('th '):
+                                    possible_value = possible_value[3:]
+                                    i += 3
+                            if i > 0 and _utt[i-1] not in [ ' ', ',', '.', '?', '!', '/']:
+                                # cut first incomplete word
+                                if len(possible_value.split()) > 1:
+                                    i += len(possible_value.split()[0]) + 1
+                                    possible_value = ' '.join(possible_value.split()[1:])
+
+
+                                # prepend first incomplete word
+                                # possible_value = _utt[:i].split()[-1] + possible_value
+                                # i -= len(_utt[:i].split()[-1])
+
+
+                            if fuzz.token_sort_ratio(value, possible_value) > 92 or possible_value.startswith('ashley hotel and lovell lodge') :
+                                found = True
+
+                                state_update['non-categorical'].append({
+                                            'domain': domain,
+                                            'slot': slot,
+                                            'value': diff_state[domain][slot],
+                                            # 'value': possible_value,
+                                            # 'fixed_value': possible_value,
+                                            'from':'prev_utt',
+                                            'utt_idx': _turn['utt_idx'],
+                                            'start': i,
+                                            'end': i+len(possible_value)
+                                        })
+                                if value != diff_state[domain][slot]:
+                                    state_update['non-categorical'][-1].update({'fixed_value': value})
+                                    ret_diff_state[domain][slot] = value
+                                if possible_value != value:
+                                    state_update['non-categorical'][-1].update({'fixed_value': possible_value})
+                                    ret_diff_state[domain][slot] = possible_value
+                                break
+                    #             assert _utt[i:i+len(possible_value)] == possible_value, print(_utt, _utt[i:i+len(possible_value)], possible_value)
+                    #             break
+                                # if not possible_value == value:
+                                #             print(3333333333333333)
+                                #             print(value)
+                                #             print(possible_value)
+                            if found:
+                                break
+                        if found:
+                            break
+
+                if found:
+                    break
+            if not found:
+                #                 print('3333333333333333333')
+                #                 print(did, tid)
+                #                 print(domain, slot, value)
+                #                 print([_t['utterance'] for _t in prev_turns])
+                # assert slot not in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']
+
+                if (domain, slot) not in slot_notfound_dict:
+                    slot_notfound_dict[(domain, slot)] = 1
+                else:
+                    slot_notfound_dict[(domain, slot)] += 1
+                state_update['non-categorical'].append({
+                    'domain': domain,
+                    'slot': slot,
+                    'value': diff_state[domain][slot],
+                    'fixed_value': 'not found'
+                })
+                ret_diff_state[domain][slot] = 'not found'
+                notfoundnum += 1
+    return state_update, notfoundnum, total_value, ret_diff_state
+
+
+def merge_data_annotation():
+    extract_dir = os.path.join(self_dir, 'original_data')
+    data21 = json.load(open(os.path.join(self_dir, extract_dir, 'data.json')))
+    # data21_train = json.load(open(os.path.join(self_dir, extract_dir, 'train.json')))
+    # data21_val = json.load(open(os.path.join(self_dir, extract_dir, 'val.json')))
+    # data21_test = json.load(open(os.path.join(self_dir, extract_dir, 'test.json')))
+    # data21 = {}
+    # data21.update(data21_train)
+    # data21.update(data21_val)
+    # data21.update(data21_test)
+
+    # update_from_25_cnt = 0
+    # total_turn = 0
+    # for dial_id, dialog in data21.items():
+    #     dial_id = dial_id + '.json'
+    #     assert dial_id in data25
+    #     for i, _turn in enumerate(dialog['log']):
+    #         total_turn += 1
+    #         if _turn['text'] == data25[dial_id]['log'][i]['text']:
+    #             _turn['span_info'].extend(copy.deepcopy(data25[dial_id]['log'][i]['span_info']))
+    #             # _turn['span_info'] = list(set(_turn['span_info']))
+    #             # _turn['dialog_act'].update(copy.deepcopy(data25[dial_id]['log'][i]['dialog_act']))
+    #             for Domain_Intent in data25[dial_id]['log'][i]['dialog_act']:
+    #                 if Domain_Intent in _turn['dialog_act']:
+    #                     _turn['dialog_act'][Domain_Intent].extend(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
+    #                 else:
+    #                     _turn['dialog_act'][Domain_Intent] = copy.deepcopy(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
+    #                 # _turn['dialog_act'][Domain_Intent] = list(set(_turn['dialog_act'][Domain_Intent]))
+    #             if 'coreference' in data25[dial_id]['log'][i]:
+    #                 _turn['coreference'] = copy.deepcopy(data25[dial_id]['log'][i]['coreference'])
+    #             update_from_25_cnt += 1
+    #         else:
+    #             # print('==============multiwoz21=================')
+    #             # print(_turn['text'])
+    #             # print('==============multiwoz25=================')
+    #             # print(data25[dial_id]['log'][i]['text'])
+    #             continue
+    #
+    # print('{}/{} turns update from multiwoz25 data'.format(update_from_25_cnt, total_turn))
+    return data21
+
+
+def preprocess(da_cat_slot_values, state_cat_slot_values):
+    all_data = []
+    binary_ont = []
+    intent_ont = {}
+    state_ont = {}
+
+    data_splits = ['train', 'val', 'test']
+    # data_splits = ['test']
+    extract_dir = os.path.join(self_dir, 'original_data')
+    num_train_dialogue = 0
+    num_train_utt = 0
+
+    num_match_error_da_span = 0
+
+    if not os.path.exists('data.zip') or not os.path.exists('ontology.json'):
+        # for data_split in data_splits:
+        # data_zip_file = os.path.join(self_dir, 'original_data.zip')
+        # if not os.path.exists(data_zip_file):
+        #     raise FileNotFoundError(data_zip_file)
+
+        # logging.info('unzip multiwoz data to {}'.format(extract_dir))
+        # archive = zipfile.ZipFile(data_zip_file, 'r')
+        # archive.extractall(extract_dir)
+
+        data = merge_data_annotation()
+        # exit()
+        # data = json.load(open(os.path.join(self_dir, extract_dir, 'data_meta_fixed.json')))
+        train_list = open(os.path.join(self_dir, extract_dir, 'trainListFile')).read().split()
+        val_list = open(os.path.join(self_dir, extract_dir, 'valListFile')).read().split()
+        test_list = open(os.path.join(self_dir, extract_dir, 'testListFile')).read().split()
+
+        total_not_found_slot = 0
+        total_slot = 0
+        total_turn = 0
+        total_not_found_turn = 0
+        total_not_found_state = 0
+
+        slot_notfound_dict = {}
+
+        dialog_idx = 0
+        for dialog_id, dialog in tqdm(data.items()):
+
+            acc_not_found_flag = False
+
+            coref_dict = {}
+
+            data_split = None
+            for _split in data_splits:
+                if dialog_id.strip('.json') in eval(_split + '_list'):
+                    data_split = _split
+                    break
+            # assert data_split is not None
+            # if data_split != 'test':
+            #     continue
+            if data_split is None:
+                continue
+
+            if data_split == 'train':
+                num_train_dialogue += len(data)
+
+            dialog_idx += 1
+            # if dialog_idx > 10:
+            #     break
+            converted_dialogue = {
+                'dataset': 'multiwoz22',
+                'data_split': data_split,
+                'dialogue_id': 'multiwoz22_' + str(dialog_idx),
+                'original_id': dialog_id,
+                'domains': [d for d in dialog['goal'] if
+                            len(dialog['goal'][d]) != 0 and d in multiwoz_desc and d not in ['police', 'hospital', 'bus']],
+                'turns': [],
+            }
+
+            if data_split == 'train':
+                num_train_utt += len(dialog['log'])
+
+            prev_state = None
+            accum_fixed_state = {}
+            for turn_id, turn in enumerate(dialog['log']):
+
+                utt = turn['text'].lower()
+                # for several wrong words
+                utt = utt.replace('seeuni', 'see uni')
+
+                utt = ' '.join(utt.split())
+                utt = utt.replace(' im ', ' i\'m ')
+                utt = utt.replace(' dont ', ' don\'t ')
+                utt = utt.replace(' thats ', ' that\'s ')
+                utt = utt.replace('idon\'t', ' i don\'t ')
+                utt = utt.replace('wedon\'t ', 'we don\'t ')
+                utt = utt.replace('id be ', 'i\'d be ')
+                # utt = utt.replace('cambridgethat\'svery ', 'cambridge that\'s very')
+                utt = re.sub(r'^im ', 'i\'m ', utt)
+                utt = re.sub(r'^whats ', 'what\'s ', utt)
+                utt = re.sub(r'^id ', 'i\'d ', utt)
+                utt = re.sub(r'^thats ', 'that\'s ', utt)
+
+                utt = re.sub( r'([a-zA-Z0-9])([,.!\'-\?"~])', r'\1 \2', utt)
+                utt = re.sub(r'([,.!\'-\?"~])([a-zA-Z0-9])', r'\1 \2', utt)
+
+                das = turn.get('dialog_act', [])
+                role = 'user' if turn_id % 2 == 0 else 'system'
+                spans = turn.get('span_info', [])
+
+                da_dict = {}
+                for Domain_Act in das:
+                    Domain = Domain_Act.split('-')[0]
+                    if Domain.lower() not in converted_dialogue['domains'] and Domain.lower() not in ['general', 'booking']:
+                        continue
+
+                    Svs = das[Domain_Act]
+                    for S, v in Svs:
+                        v = v.lower()
+                        if v.startswith('th '):
+                            # print(v)
+                            v = v[3:]
+                        if v.startswith('he '):
+                            # print(v)
+                            v = v[3:]
+
+                        if (Domain_Act, S, v) not in da_dict:
+                            da_dict[(Domain_Act, S, v)] = {}
+
+                for span in spans:
+                    Domain_Act, S, v, start_word, end_word = span
+                    v = v.lower()
+                    if not (Domain_Act, S, v) in da_dict:
+                        # logging.info('span da annotation not found in multiwoz da label')
+                        # logging.info(dialog_id, turn_id)
+                        # logging.info((Domain_Act, S, v))
+                        # logging.info(da_dict)
+                        num_match_error_da_span += 1
+                    else:
+                        if v.startswith('th '):
+                            # print(v)
+                            v = v[3:]
+                            start_word += 3
+                        if v.startswith('he '):
+                            # print(v)
+                            v = v[3:]
+                            start_word += 3
+
+                        if 'start_word' not in da_dict[(Domain_Act, S, v)]:
+                            da_dict[(Domain_Act, S, v)]['start_word'] = []
+                            da_dict[(Domain_Act, S, v)]['end_word'] = []
+
+                        da_dict[(Domain_Act, S, v)]['start_word'].append(start_word)
+                        da_dict[(Domain_Act, S, v)]['end_word'].append(end_word)
+
+                converted_turn = {
+                    'utt_idx': turn_id,
+                    'speaker': role,
+                    'utterance': utt,
+                    'dialogue_act': convert_da(utt, da_dict, binary_ont, intent_ont, dialog_id, turn_id, da_cat_slot_values),
+                }
+
+                # for state annotations
+                if role == 'system':
+                    turn_state = turn['metadata']
+                    cur_state = {}
+                    for domain in turn_state:
+                        if domain in ['police', 'hospital', 'bus']:
+                            continue
+                        if domain not in converted_dialogue['domains']:
+                            continue
+                        cur_state[domain] = {}
+                        for subdomain in ['semi', 'book']:
+                            for slot in turn_state[domain][subdomain]:
+                                if slot == 'booked':
+                                    continue
+                                if slot == 'ticket':  # or (domain == 'train' and slot == 'people'):
+                                    # for cases where domain slot exists in REF but not in state
+                                    # because of check in evaluate.py
+                                    continue
+
+                                else:
+                                    fixed_slot = slot
+                                state_ds = domain + '-' + fixed_slot
+                                if state_ds not in slot_to_type:
+                                    logging.info('state slot not defined in da list')
+                                    logging.info(state_ds)
+                                if turn_state[domain][subdomain][slot] in ['', [], 'not mentioned', 'none']:
+                                    cur_state[domain][fixed_slot] = ""
+                                else:
+                                    if turn_state[domain][subdomain][slot].startswith('th '):
+                                        # print('state')
+                                        # print(turn_state[domain][subdomain][slot])
+                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
+                                    if turn_state[domain][subdomain][slot].startswith('he '):
+                                        # print('state')
+                                        # print(turn_state[domain][subdomain][slot])
+                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
+
+                                    cur_state[domain][fixed_slot] = turn_state[domain][subdomain][slot]
+
+                                if domain not in state_ont:
+                                    state_ont[domain] = []
+                                if fixed_slot not in state_ont[domain]:
+                                    state_ont[domain].append(fixed_slot)
+
+                        if domain == 'train' and 'people' not in cur_state[domain]:
+                            cur_state[domain]['people'] = ''
+                        # if len(converted_turn['state'][domain]) == 0:
+                        #     converted_turn['state'].pop(domain)
+                        if len(converted_dialogue['turns']) > 0:
+                            # move state from system side to user side
+                            converted_dialogue['turns'][-1]['state'] = copy.deepcopy(cur_state)
+
+                    # for state update annotations
+                    state_update, _notfoundslot, _totalslot, ret_diff_state = get_state_update(prev_state, cur_state, converted_dialogue,
+                                                                               dialog_id, turn_id, turn['text'], coref_dict,
+                                                                               slot_notfound_dict, da_cat_slot_values)
+
+                    update_state(accum_fixed_state, ret_diff_state)
+                    for domain in accum_fixed_state:
+                        for slot in accum_fixed_state[domain]:
+                            assert isinstance(accum_fixed_state[domain][slot], str), print(accum_fixed_state[domain][slot])
+
+                    if _notfoundslot == 0:
+                        # for slot in state_update['categorical']:
+                        #     assert 'fixed_value' not in slot
+                        for slot in state_update['non-categorical']:
+                            if slot['value'] not in ['', 'dontcare']:
+                                assert 'utt_idx' in slot
+
+                    else:
+                        flag = False
+                        for slot in state_update['categorical']:
+                            if 'fixed_value' in slot:
+                                flag = True
+                                break
+                        for slot in state_update['non-categorical']:
+                            if 'utt_idx' not in slot:
+                                flag = True
+                                break
+                        assert flag, print(flag, state_update['non-categorical'])
+
+                    total_turn += 1
+                    total_slot += _totalslot
+                    total_not_found_slot += _notfoundslot
+                    total_not_found_turn += 1 if _notfoundslot > 0 else 0
+                    if _notfoundslot > 0:
+                        acc_not_found_flag = True
+                    if acc_not_found_flag:
+                        total_not_found_state += 1
+
+                    coref_dict = {}
+                    converted_dialogue['turns'][-1]['state_update'] = copy.deepcopy(state_update)
+                    converted_dialogue['turns'][-1]['fixed_state'] = copy.deepcopy(accum_fixed_state)
+                    if 'state' not in converted_dialogue['turns'][-1]:
+                        converted_dialogue['turns'][-1]['state'] = {}
+                    prev_state = copy.deepcopy(cur_state)
+
+                converted_dialogue['turns'].append(converted_turn)
+
+                if 'coreference' in turn:
+                    for Domain_Act in turn['coreference']:
+                        for Slot, value, coref, coref_turn, coref_pos in turn['coreference'][Domain_Act]:
+                            value = value.lower()
+                            coref_dict[(Domain_Act, Slot, value)] = {'turn': coref_turn, 'pos': coref_pos,
+                                                                     'coref_value': coref,
+                                                                     'utt': converted_dialogue['turns'][coref_turn][
+                                                                         'utterance']}
+
+            check_spans(converted_dialogue)
+            # postprocess_update_spans(converted_dialogue)
+            if converted_dialogue['turns'][-1]['speaker'] == 'system':
+                converted_dialogue['turns'].pop(-1)
+            all_data.append(converted_dialogue)
+
+        print('total_turn', total_turn)
+        print('total_not_found_turn', total_not_found_turn)
+        print('total_slot', total_slot)
+        print('total_not_found_slot', total_not_found_slot)
+        print('total_not_found_state', total_not_found_state)
+        print(slot_notfound_dict)
+        from collections import Counter
+        # print({k : dict(Counter(v)) for k, v in cat_slot_values.items()})
+        json.dump({k : dict(Counter(v)) for k, v in state_cat_slot_values.items()}, open(os.path.join(self_dir, 'cat_slot_values.json'), 'w'), indent=4)
+        cat_slot_values = {k: list(set(v)) for k, v in state_cat_slot_values.items()}
+        da_cat_slot_values = {k: list(set(v)) for k, v in da_cat_slot_values.items()}
+
+        json.dump(all_data, open('data.json', 'w'), indent=4)
+        write_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        os.remove('data.json')
+
+        new_ont = {
+            'domains': {},
+            'intents': {},
+            'binary_dialogue_act': {}
+        }
+
+        for d_s in slot_to_type:
+            d, s = d_s.split('-')
+            if d not in new_ont['domains']:
+                new_ont['domains'][d] = {
+                    'description': multiwoz_desc[d]['domain'],
+                    'slots': {}
+                }
+            domain_ont = new_ont['domains'][d]
+            assert s not in domain_ont
+            domain_ont['slots'][s] = {
+                'description': multiwoz_desc[d][s] if s in multiwoz_desc[d] else '',
+                'is_categorical': d_s in state_cat_slot_ds,
+                'possible_values': da_cat_slot_values[d_s] if d_s in state_cat_slot_ds else []
+            }
+            domain_ont['slots'][s]['possible_values'] = [_ for _ in domain_ont['slots'][s]['possible_values'] if _ not in ['dontcare', '']]
+
+        new_ont['state'] = {}
+        # print(state_cat_slot_value_dict)
+        print(state_ont)
+        for d in state_ont:
+            new_ont['state'][d] = {}
+            for s in state_ont[d]:
+                d_s = '-'.join([d, s])
+                new_ont['state'][d][s] = ''
+
+        new_ont['intents'] = {i: {'description': multiwoz_desc['intents'][i]} for i in intent_ont}
+        new_ont['binary_dialogue_act'] = binary_ont
+
+        slot_desc = json.load(open(os.path.join(self_dir, extract_dir, './slot_descriptions.json')))
+        for domain_slot in slot_desc:
+            _domain, _slot = domain_slot.split('-')
+            _desc = slot_desc[domain_slot][0]
+            if _slot == 'arriveby':
+                _slot = 'arriveBy'
+            elif _slot == 'leaveat':
+                _slot = 'leaveAt'
+            if 'book' in _slot:
+                _slot = _slot.replace('book ', '')
+            if not _domain in new_ont['state']:
+                # logging.info('domain {} not in state domains'.format(_domain))
+                continue
+            if _domain in new_ont['domains'] and _slot in new_ont['domains'][_domain]['slots']:
+                new_ont['domains'][_domain]['slots'][_slot]['description'] = _desc
+            if not _slot in new_ont['state'][_domain]:
+                logging.info('domain {} slot {} not in state'.format(_domain, _slot))
+                continue
+            # new_ont['state'][_domain][_slot] = ""
+            assert _domain in new_ont['domains'], print(_domain)
+            assert _slot in new_ont['domains'][_domain]['slots']
+
+        logging.info('num_match_error_da_span {}'.format(num_match_error_da_span))
+        json.dump(new_ont, open(os.path.join(self_dir, './ontology.json'), 'w'), indent=4)
+
+    else:
+        all_data = read_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        new_ont = json.load(open(os.path.join(self_dir, './ontology.json'), 'r'))
+    logging.info('# dialogue: {}, # turn: {}'.format(num_train_dialogue, num_train_utt))
+    return all_data, new_ont
+
+
+# def postprocess_update_spans(dialog):
+#     changed_utt_idx_and_position = {}
+#     for turn in dialog['turns']:
+#         if turn['speaker'] != 'user':
+#             continue
+#         changed = False
+#         for _update in turn['state_update']['non-categorical']:
+#             if 'utt_idx' in _update:
+#                 utt_idx = _update['utt_idx']
+#                 start = _update['start']
+#                 end = _update['end']
+#
+#                 # assume at most one word changes for every utterance
+#                 if turn['utt_idx'] not in changed_utt_idx_and_position:
+#                     if utt_idx == turn['utt_idx'] and start-1 > -1 and turn['utterance'][start-1] not in [' ']:
+#                         changed_utt_idx_and_position[turn['utt_idx']] = start
+#                         print('=======================')
+#                         print(dialog['original_id'])
+#                         print(turn['utterance'])
+#                         print(json.dumps(_update, indent=2))
+#                         print(turn['utterance'][start: end])
+#                         turn['utterance'] = turn['utterance'][:start] + ' ' + turn['utterance'][start:]
+#                         print(turn['utterance'])
+#                         _update['start'] += 1
+#                         _update['end'] += 1
+#                         changed = True
+#                 if utt_idx not in changed_utt_idx_and_position:
+#                     continue
+#                 else:
+#                     value = _update['fixed_value'] if 'fixed_value' in _update and _update['fixed_value'] != 'not found' else _update['value']
+#                     if start >= changed_utt_idx_and_position[utt_idx]:
+#                         if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                             assert dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
+#                             _update['start'] += 1
+#                             _update['end'] += 1
+#                     elif start < changed_utt_idx_and_position[utt_idx] < end:
+#                         if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                             assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1], value)
+#                             print('fix')
+#                             print(_update)
+#                             _update['end'] += 1
+#                             _update['fixed_value'] = turn['utterance'][_update['start']: _update['end'] + 1].strip()
+#                             print(_update)
+#         if changed:
+#             for _update in turn['state_update']['non-categorical']:
+#                 if 'utt_idx' in _update:
+#                     utt_idx = _update['utt_idx']
+#                     start = _update['start']
+#                     end = _update['end']
+#
+#                     if utt_idx not in changed_utt_idx_and_position:
+#                         continue
+#                     else:
+#                         value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+#                             'fixed_value'] != 'not found' else _update['value']
+#                         if start >= changed_utt_idx_and_position[utt_idx]:
+#                             if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                                 assert dialog['turns'][utt_idx]['utterance'][_update['start'] + 1: _update['end'] + 1] == value
+#                                 _update['start'] += 1
+#                                 _update['end'] += 1
+#                         elif start < changed_utt_idx_and_position[utt_idx] < end:
+#                             if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+#                                 print('====================fix===================')
+#                                 print(_update)
+#                                 assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
+#                                 _update['end'] += 1
+#                                 _update['fixed_value'] = dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end'] + 1]
+#                                 print(_update)
+#     for turn in dialog['turns']:
+#         if turn['speaker'] != 'user':
+#             continue
+#         for _update in turn['state_update']['non-categorical']:
+#             if 'utt_idx' in _update:
+#                 value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+#                     'fixed_value'] != 'not found' else _update['value']
+#                 utt_idx = _update['utt_idx']
+#                 start = _update['start']
+#                 end = _update['end']
+#                 if dialog['turns'][utt_idx]['utterance'][start] == ' ':
+#                     _update['start'] += 1
+#                     _update['fixed_value'] = value[1:]
+#                     value = value[1:]
+#                     start += 1
+#                 assert dialog['turns'][utt_idx]['utterance'][start: end] == value, print(json.dumps(turn, indent=4), [c for c in dialog['turns'][utt_idx]['utterance'][start: end]], [c for c in value])
+#     return dialog
+
+
+def get_time_variants(time_text):
+    value_list = [time_text]
+    pattern_time = r'(\d{1,2}:\d{2})(\s)?(am|pm|AM|PM)?'
+    match_times = re.findall(pattern_time, time_text)
+    if len(match_times) < 1:
+        return []
+    match_time = match_times[0]
+
+    am_flag = match_time[2] in ['am', 'AM']
+    pm_flag = match_time[2] in ['pm', 'PM']
+    no_am_pm_flag = match_time[2] == ''
+    if am_flag:
+        # 4:00am -> 4:00
+        value_list.append(match_time[0])
+        if len(match_time[0]) == 4:
+            # 4:00 -> 04:00
+            value_list.append('0' + match_time[0])
+    if pm_flag:
+        # 4:00pm -> 16:00
+        hour, min = match_time[0].split(':')
+        hour = int(hour)
+        new_hour = 12 + hour
+        value_list.append(str(new_hour)+':'+min)
+    if no_am_pm_flag:
+        hour, min = match_time[0].split(':')
+        hour = int(hour)
+        if hour > 12:
+            new_hour = hour - 12
+            value_list.append(str(new_hour) + ':' + min + 'pm')
+            value_list.append(str(new_hour) + ':' + min + ' pm')
+            value_list.append(str(new_hour) + ':' + min)
+            if min == '00':
+                value_list.append(str(new_hour) + 'pm')
+                value_list.append(str(new_hour) + ' pm')
+                value_list.append(str(new_hour))
+        else:
+            value_list.append(str(hour) + ':' + min + 'am')
+            value_list.append(str(hour) + ':' + min + ' am')
+            value_list.append(str(hour) + ':' + min)
+            if min == '00':
+                value_list.append(str(hour) + 'am')
+                value_list.append(str(hour) + ' am')
+                value_list.append(str(hour))
+        if len(match_time[0]) == 5 and match_time[0][0] == '0':
+            value_list.append(match_time[0][1:])
+        value_list.append(''.join(match_time[0].split(':')))
+
+    return value_list
+
+
+def get_genitive_variants(value):
+    ret_list = []
+    value_genitive_format = r"(?=\w)s(?=\s)"
+    value_pattern = re.compile(value_genitive_format)
+
+    span_genitive_value = re.sub(value_pattern, " 's", value)
+    if span_genitive_value != value:
+        ret_list.append(span_genitive_value)
+    span_genitive_value = re.sub(value_pattern, "'s", value)
+    if span_genitive_value != value:
+        ret_list.append(span_genitive_value)
+    # if len(ret_list) > 0:
+    #     print('=============================')
+    #     print(value)
+    #     print(re.findall(value_pattern, value))
+    #     print(ret_list)
+    return ret_list
+
+
+def check_spans(dialog):
+    for turn in dialog['turns']:
+        if turn['speaker'] != 'user':
+            continue
+        for _update in turn['state_update']['non-categorical']:
+            if 'utt_idx' in _update:
+                value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+                    'fixed_value'] != 'not found' else _update['value']
+                utt_idx = _update['utt_idx']
+                start = _update['start']
+                end = _update['end']
+                assert dialog['turns'][utt_idx]['utterance'][start:end] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][start:end])
+
+
+
+def get_bb_variants(value):
+    ret_list = []
+    if 'bed and breakfast' in value:
+        ret_list.append(value.replace('bed and breakfast', 'b & b'))
+    return ret_list
+
+if __name__ == '__main__':
+    preprocess(da_cat_slot_values, state_cat_slot_values)
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz23/README.md b/data/unified_datasets/multiwoz23/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..57b5a077f7dc3c467764a664b6d01d08414c6e23
--- /dev/null
+++ b/data/unified_datasets/multiwoz23/README.md
@@ -0,0 +1,33 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, character-level span for non-categorical slots. state and state updates.   
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 8434         | 105066         | 12.46     | 15.75      | 7          |
+| dev | 999         | 13731         | 13.74      | 16.1       | 7          |
+| train | 1000         | 13744         | 13.74       | 16.08       | 7          |
+
+
+## Main changes
+
+- only keep 5 domains in state annotations and dialog acts.
+- `pricerange`, `area`, `day`, `internet`, `parking`, `stars` are considered categorical slots.
+- replace special tokens by space. e.g. `I want@to find a hotel.  ->  I want to find a hotel.`
+
+Run `evaluate.py`:
+
+da values match rate:    98.798
+state values match rate: 89.185
+
+### original data
+
+- from [multiwoz-coref](https://github.com/lexmen318/MultiWOZ-coref) repo.
+- slot description by multiwoz2.2
+- some hand-written descriptions. 
+
+
diff --git a/data/unified_datasets/multiwoz23/data.zip b/data/unified_datasets/multiwoz23/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..54d08b07f6ca6f70fdfe39f9bdfda3d4564c1e16
Binary files /dev/null and b/data/unified_datasets/multiwoz23/data.zip differ
diff --git a/data/unified_datasets/multiwoz23/ontology.json b/data/unified_datasets/multiwoz23/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..3d2c0a8dcc659e287ee88c95cf65ee9f7b3b3e8c
--- /dev/null
+++ b/data/unified_datasets/multiwoz23/ontology.json
@@ -0,0 +1,1983 @@
+{
+    "domains": {
+        "taxi": {
+            "description": "taxi information query system",
+            "slots": {
+                "destination": {
+                    "description": "destination of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "leaveAt": {
+                    "description": "leaving time of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "arriveBy": {
+                    "description": "arrival time of taxi",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "taxi_phone": {
+                    "description": "taxi phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "taxi_types": {
+                    "description": "taxi type",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "restaurant": {
+            "description": "restaurant information query system",
+            "slots": {
+                "food": {
+                    "description": "food type for the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price budget for the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "budget conscious",
+                        "same area and price range",
+                        "expensive side",
+                        "mostly expensive",
+                        "fairly expensive",
+                        "all price ranges",
+                        "that range",
+                        "vietnamese",
+                        "same area and price range as the hotel",
+                        "middle",
+                        "or otherwise",
+                        "inexpensive",
+                        "on the pricey side",
+                        "ranging from cheap to expensive",
+                        "moderatly priced",
+                        "cherap",
+                        "centre",
+                        "more moderate",
+                        "north",
+                        "quite low",
+                        "this price range",
+                        "same price range",
+                        "moderatley priced",
+                        "varying price range",
+                        "great prices",
+                        "fine",
+                        "expensive price",
+                        "more expensive",
+                        "upscale",
+                        "any price range",
+                        "cheapish",
+                        "moderatre",
+                        "modest",
+                        "pricey",
+                        "same price",
+                        "fairly cheap",
+                        "relatively cheap",
+                        "cheap priced",
+                        "moderate pricing",
+                        "expensive price range",
+                        "cheaply priced",
+                        "east",
+                        "expensive range",
+                        "quite inexpensive",
+                        "same area and price",
+                        "moderately prices",
+                        "or the moderately priced",
+                        "expensive but worth every penny",
+                        "assorted",
+                        "any",
+                        "any price",
+                        "on the cheap side",
+                        "do n't care",
+                        "budget friendly",
+                        "most affordable price range",
+                        "low cost",
+                        "moderate|cheap",
+                        "pretty cheap",
+                        "a little expensive but worth it",
+                        "epensive",
+                        "not too pricey",
+                        "moderate priced",
+                        "not - so - expensive",
+                        "no particular",
+                        "your",
+                        "cheaper",
+                        "priced cheaply",
+                        "expensive priced",
+                        "fairly inexpensive",
+                        "moderate and one in the cheap range",
+                        "reasonable",
+                        "expensive(no",
+                        "cheap price range",
+                        "expinsive",
+                        "mostly expensive and moderately priced",
+                        "quite expensive",
+                        "very expensive",
+                        "that price",
+                        "cheap range",
+                        "boderate",
+                        "reasonably priced",
+                        "rather expensive",
+                        "any price point",
+                        "moderate or expensive priced",
+                        "all",
+                        "luxury options",
+                        "regardless of price",
+                        "expensive / upscale",
+                        "moderate price",
+                        "similar price range",
+                        "pretty expensive",
+                        "same area and pricerange",
+                        "that price range",
+                        "moderate range",
+                        "moderately expensive",
+                        "moderately",
+                        "not",
+                        "moderatly",
+                        "cheaply",
+                        "does n't matter",
+                        "budget - friendly",
+                        "cheapt",
+                        "cheapest",
+                        "various",
+                        "not moderate",
+                        "expensive or moderate",
+                        "not cheap",
+                        "expensively priced",
+                        "expensive and moderately priced",
+                        "not really",
+                        "high class",
+                        "low priced",
+                        "your price range",
+                        "varying price",
+                        "do not care",
+                        "not too expensive",
+                        "adforable",
+                        "they vary",
+                        "none",
+                        "moderate price range",
+                        "high - end",
+                        "moderately price",
+                        "not expensive",
+                        "relatively expensive",
+                        "cheap to expensive",
+                        "affordable",
+                        "cheaply - priced",
+                        "expensive",
+                        "moderate",
+                        "moderately - priced",
+                        "priced moderately",
+                        "same area and price range as my hotel",
+                        "moderate and expensive",
+                        "less expensive",
+                        "expensively",
+                        "high end",
+                        "most expensive",
+                        "same pricerange",
+                        "does not matter",
+                        "do not have a preference",
+                        "cheap or expensive",
+                        "the least expensive",
+                        "and",
+                        "moderately priced",
+                        "cheap",
+                        "every price point",
+                        "the same"
+                    ]
+                },
+                "address": {
+                    "description": "exact location of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area or place of the restaurant",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "east area of town",
+                        "the city centre",
+                        "that part of town",
+                        "centre city",
+                        "west of town",
+                        "center",
+                        "close to the hotel",
+                        "in cambridge",
+                        "ctre",
+                        "south side of the town",
+                        "close to your location",
+                        "town centre area",
+                        "any area",
+                        "east side of the city",
+                        "that area of town",
+                        "west part of town .",
+                        "on the east",
+                        "north of town",
+                        "n the centre",
+                        "that area",
+                        "east part of town",
+                        "the west of town",
+                        "centre",
+                        "west of cambridge",
+                        "north",
+                        "south area",
+                        "all of cambridge",
+                        "centre of town",
+                        "same area as the park",
+                        "centrally located",
+                        "the north",
+                        "these areas",
+                        "all of",
+                        "north or centre",
+                        "downtown",
+                        "centre area",
+                        "westside",
+                        "the east part of town",
+                        "throughout the area",
+                        "near clare hall",
+                        "the south",
+                        "the east",
+                        "the north area",
+                        "the south side",
+                        "east section of town",
+                        "by the airport",
+                        "the south area of the city",
+                        "the west side of town",
+                        "the south part of town",
+                        "centre region",
+                        "centre area of the city",
+                        "close it city centre",
+                        "mexican",
+                        "centre of the city",
+                        "anywhere in the city",
+                        "central",
+                        "north side of town",
+                        "near the center of town",
+                        "north and west",
+                        "east",
+                        "the south area of town",
+                        "here",
+                        "west area",
+                        "in town at all",
+                        "the east side",
+                        "centrally",
+                        "the centre area of town",
+                        "around town",
+                        "east side",
+                        "centre of cambridge",
+                        "in the north",
+                        "centreof",
+                        "central region of town",
+                        "any",
+                        "the west area",
+                        "the west part of town",
+                        "west part of the city",
+                        "same side",
+                        "ely",
+                        "other part of the town",
+                        "the north part of town",
+                        "do n't care",
+                        "same area",
+                        "anywhere in town",
+                        "same price range and area",
+                        "north end",
+                        "the north end",
+                        "here in the city",
+                        "same side of town as your hotel",
+                        "the area you have chosen",
+                        "center of the town",
+                        "the north side of town",
+                        "near the centre",
+                        "the same area as the botanic gardens",
+                        "southern",
+                        "north part of town",
+                        "this town",
+                        "central area",
+                        "east of cambridge",
+                        "east part of tow",
+                        "do n't have a preference",
+                        "northern parts of town",
+                        "south side of town",
+                        "centre cambridge",
+                        "anywhere",
+                        "south part of time",
+                        "north area of town",
+                        "the center",
+                        "all over the city",
+                        "any where in the city",
+                        "the town centre",
+                        "southern area",
+                        "the north side .",
+                        "north area",
+                        "in the city .",
+                        "in the city centre",
+                        "the city center",
+                        "south side",
+                        "east end",
+                        "near centre",
+                        "same part of town",
+                        "anywhere in cambridge",
+                        "town center",
+                        "the center of town",
+                        "south of cambridge",
+                        "west side of town",
+                        "that",
+                        "thai",
+                        "in the west",
+                        "south area of town",
+                        "northern part",
+                        "does",
+                        "the east side of town",
+                        "northern part of cambridge",
+                        "same location",
+                        "centre area of the town",
+                        "same",
+                        "southend",
+                        "center of town",
+                        "the south of town",
+                        "north side",
+                        "the centre area",
+                        "south of town .",
+                        "west cambridge",
+                        "the are",
+                        "in town",
+                        "do nt care",
+                        "cambridge centre",
+                        "not",
+                        "in the city",
+                        "the east area",
+                        "city",
+                        "the centre of town",
+                        "does n't matter",
+                        "the centre",
+                        "the area",
+                        "centre of town .",
+                        "cetre",
+                        "towns centre",
+                        "west end of town",
+                        "around the college",
+                        "the west side",
+                        "town",
+                        "west",
+                        "wet part of town",
+                        "eastside",
+                        "centrem",
+                        "the southern area",
+                        "north cambridge",
+                        "in the east",
+                        "east area",
+                        "the south side of town",
+                        "westies",
+                        "chesterton",
+                        "elsewhere in the city",
+                        "not in the west",
+                        "any part of town",
+                        "other parts of town",
+                        "the same area",
+                        "town of centre",
+                        "centere",
+                        "same areas",
+                        "town centre",
+                        "east side of town",
+                        "close to that area",
+                        "none",
+                        "center cambridge",
+                        "city center",
+                        "southside",
+                        "same side of town",
+                        "not in the south of town",
+                        "north side of chersteron",
+                        "around there",
+                        "west part of town",
+                        "the north side",
+                        "south end",
+                        "the area of west",
+                        "northern",
+                        "centre part of town",
+                        "center area of town",
+                        "city centre",
+                        "restaurants in the city",
+                        "west area of town",
+                        "expensive",
+                        "all over cambridge",
+                        "south part of the city",
+                        "east section",
+                        "this area",
+                        "within this area",
+                        "eastern",
+                        "nearby",
+                        "west side area",
+                        "centre-",
+                        "the west",
+                        "south part",
+                        "throughout the city",
+                        "south of town",
+                        "same area as the hotel",
+                        "closely located",
+                        "south cambridge",
+                        "and centre",
+                        "the center part of town",
+                        "west side",
+                        "does not matter",
+                        "east of town",
+                        "almost every area of town",
+                        "north side of cambridge",
+                        "south part of town",
+                        "all over town",
+                        "same area of town",
+                        "northside",
+                        "cambridge",
+                        "south",
+                        "the west end",
+                        "cheap",
+                        "the centre part of town",
+                        "in the centre",
+                        "centre area of town",
+                        "east|south",
+                        "the same",
+                        "west end"
+                    ]
+                },
+                "postcode": {
+                    "description": "postcode of the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "restaurant phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "number of people booking the restaurant",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "time": {
+                    "description": "time of the restaurant booking",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the restaurant booking",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "sunday|thursday",
+                        "sunday",
+                        "friday",
+                        "saturday",
+                        "saturday|thursday",
+                        "wednesday",
+                        "monday",
+                        "thursday",
+                        "same day",
+                        "tuesday",
+                        "same group and day"
+                    ]
+                },
+                "choice": {
+                    "description": "number of restaurants meeting requests of user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "attraction": {
+            "description": "an entertainment that is offered to the public",
+            "slots": {
+                "address": {
+                    "description": "details of where the attraction is",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "phone number of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "entrance fee": {
+                    "description": "the fee charged for admission to the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "type of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "area": {
+                    "description": "area or place of the attraction",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "east area of town",
+                        "close to the center of town",
+                        "the city centre",
+                        "every area except the north",
+                        "that part of town",
+                        "centre city",
+                        "center",
+                        "center area",
+                        "in cambridge",
+                        "near ely",
+                        "all of the other areas",
+                        "town centre area",
+                        "south part of cambridge",
+                        "the east of the town",
+                        "the center of the park",
+                        "center of the city",
+                        "west of your city",
+                        "the area you 're looking for",
+                        "north of town",
+                        "cenre",
+                        "that area",
+                        "east part of town",
+                        "the west of town",
+                        "centre area of cambridge",
+                        "centre",
+                        "north section of cambridge",
+                        "north",
+                        "the south area",
+                        "south area",
+                        "near the hotel",
+                        "west area of the city",
+                        "centre of town",
+                        "the north of the city",
+                        "centrally located",
+                        "the north",
+                        "central district",
+                        "northern cambridge",
+                        "city 's centre",
+                        "the center area",
+                        "southern cambridge",
+                        "northend",
+                        "downtown",
+                        "centre area",
+                        "westside",
+                        "the east part of town",
+                        "north of the city",
+                        "the south",
+                        "the east",
+                        "museums",
+                        "the south side",
+                        "the west side of town",
+                        "centre region",
+                        "centre of the city",
+                        "east end of town",
+                        "church area",
+                        "central",
+                        "east",
+                        "west area",
+                        "the west area of town",
+                        "the east side",
+                        "same general area",
+                        "city cenre",
+                        "east side",
+                        "centre of cambridge",
+                        "centre|west",
+                        "any",
+                        "west part of the city",
+                        "the west area",
+                        "same side",
+                        "the eastside",
+                        "all around the city",
+                        "do n't care",
+                        "same area",
+                        "in the south",
+                        "there",
+                        "the area you are looking for",
+                        "close to the restaurant",
+                        "another area",
+                        "center of the town",
+                        "very close in the same area",
+                        "near the centre",
+                        "central zone",
+                        "southern",
+                        "north part of town",
+                        "does n't really matter",
+                        "east of cambridge",
+                        "do n't have a preference",
+                        "that side of town",
+                        "western part of the town",
+                        "the south near your hotel",
+                        "north in milton",
+                        "south side of town",
+                        "eat",
+                        "cetnre of town",
+                        "the center",
+                        "all over the city",
+                        "cambridge leisure park",
+                        "north area",
+                        "most of them are in the center",
+                        "different area",
+                        "south side",
+                        "that area .",
+                        "museum",
+                        "town center",
+                        "same part of town",
+                        "western cambridge",
+                        "south of cambridge",
+                        "west side of town",
+                        "that",
+                        "in that side",
+                        "center of cambridge",
+                        "does",
+                        "whole of cambridge",
+                        "all",
+                        "to the south",
+                        "same location",
+                        "same",
+                        "center part of town",
+                        "center of town",
+                        "norwich",
+                        "city centre .",
+                        "north side",
+                        "the centre area",
+                        "the centry area",
+                        "west cambridge",
+                        "west end of the city",
+                        "your desired location",
+                        "in town",
+                        "entre",
+                        "in the center",
+                        "not",
+                        "centre of town ?",
+                        "city",
+                        "on the centre",
+                        "the centre of town",
+                        "does n't matter",
+                        "went side of town",
+                        "the centre",
+                        "various",
+                        "the area",
+                        "towns centre",
+                        "right in the center of town",
+                        "in the area",
+                        "west end of town",
+                        "central cambridge",
+                        "the west side",
+                        "eastside",
+                        "west",
+                        "this side of town",
+                        "west side of the city",
+                        "near the restaurant",
+                        "cb30aq",
+                        "center of town .",
+                        "not really",
+                        "in the east",
+                        "same area as tandoori palace",
+                        "west part of cambridge",
+                        "centre by galleria",
+                        "east area",
+                        "wet end",
+                        "do not care",
+                        "western",
+                        "northern area of town",
+                        "western area of town",
+                        "other parts of town",
+                        "the same area",
+                        "town centre",
+                        "the same road",
+                        "east side of town",
+                        "center are",
+                        "none",
+                        "city center",
+                        "centre area .",
+                        "in that area",
+                        "clifton way",
+                        "west part of town",
+                        "south end",
+                        "western part of town",
+                        "several different parts of town",
+                        "northern",
+                        "centre part of town",
+                        "city centre",
+                        "west area of town",
+                        "east section",
+                        "this area",
+                        "nearby",
+                        "the west",
+                        "same part of town as your restaurant",
+                        "throughout the city",
+                        "south park of town",
+                        "south of town",
+                        "other areas",
+                        "south cambridge",
+                        "the west - side",
+                        "the center part of town",
+                        "west side",
+                        "does not matter",
+                        "east of town",
+                        "do not have a preference",
+                        "same area of town",
+                        "south part of town",
+                        "all over town",
+                        "cambridge",
+                        "south",
+                        "the west end",
+                        "same part",
+                        "the centre part of town",
+                        "in the centre",
+                        "centre area of town",
+                        "east cambridge",
+                        "northern area",
+                        "cambridge 's centre",
+                        "west end"
+                    ]
+                },
+                "name": {
+                    "description": "name of the attraction",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "the price range for the attraction, from cheap to expensive",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "5 pound entrance fee",
+                        "unaware of what their entrance fee is",
+                        "2 pounds",
+                        "we do n't have any information",
+                        "4 pounds",
+                        "no entrance fee",
+                        "do n't have information",
+                        "5 pounds",
+                        "do n't see a price listed",
+                        "expensive",
+                        "neither prices are listed",
+                        "not listed",
+                        "entrance fee",
+                        "free",
+                        "free admission",
+                        "3.50 pounds",
+                        "not sure of the fee",
+                        "moderately priced",
+                        "cheap",
+                        "from free to 2 pounds 50",
+                        "they do n't have the entrance fee posted"
+                    ]
+                },
+                "choice": {
+                    "description": "number of attractions matching requests of user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "booking": {
+            "description": "to arrange with a taxi, restaurant, train, etc.",
+            "slots": {
+                "time": {
+                    "description": "time for an order",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day for an order, from monday to sunday",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "one",
+                        "sundar",
+                        "2",
+                        "6",
+                        "tonight",
+                        "that time frame",
+                        "that",
+                        "this evening",
+                        "saturday night thru monday",
+                        "we d",
+                        "sunday",
+                        "wednesday",
+                        "same day",
+                        "at that time",
+                        "tuesday",
+                        "sunday and monday",
+                        "friday",
+                        "vmhj6y3i",
+                        "1",
+                        "monday",
+                        "tues",
+                        "the same day",
+                        "sunday 18/06/2017",
+                        "tuesday instead of thursday",
+                        "vmhj6y3i.",
+                        "weds",
+                        "tuesday 's",
+                        "saturday",
+                        "today",
+                        "thursday",
+                        "tuesday through friday"
+                    ]
+                },
+                "stay": {
+                    "description": "for how long the user wish to be at a place",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "how many person the order is for",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the ordered place",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "Ref": {
+                    "description": "reference number of the order",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "train": {
+            "description": "query and order a train",
+            "slots": {
+                "destination": {
+                    "description": "destination of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "arriveBy": {
+                    "description": "arrival time of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "departure": {
+                    "description": "departure location of the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "duration": {
+                    "description": "the length of time the train trip lasts",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "people": {
+                    "description": "number of people booking for train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the train",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "everday",
+                        "this day",
+                        "every day",
+                        "13:07",
+                        "sundays",
+                        "none",
+                        "that",
+                        "thursdays",
+                        "sunday",
+                        "frday",
+                        "wednesday",
+                        "same day",
+                        "between friday and wednesday",
+                        "tuesday",
+                        "tr2519",
+                        "sat",
+                        "all week",
+                        "friday",
+                        "saturdays",
+                        "monday",
+                        "weds",
+                        "that day",
+                        "every",
+                        "saturday",
+                        "other days are available",
+                        "thursday",
+                        "fiday",
+                        "cambridge",
+                        "daily",
+                        "saturday-",
+                        "mondays"
+                    ]
+                },
+                "Ref": {
+                    "description": "reference number of the order",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "leaveAt": {
+                    "description": "leaving time for the train",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "price": {
+                    "description": "price for the train ticket",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "choice": {
+                    "description": "number of trains that meets requests of the user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "hotel": {
+            "description": "to query hotel information and place an order",
+            "slots": {
+                "internet": {
+                    "description": "internet option at the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "no",
+                        "yes",
+                        "none",
+                        "free"
+                    ]
+                },
+                "area": {
+                    "description": "area or place of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "east area of town",
+                        "north location",
+                        "that part of town",
+                        "centre city",
+                        "west of town",
+                        "center",
+                        "east and the north .",
+                        "in cambridge",
+                        "southern part of town",
+                        "the city 's south side",
+                        "other parts of the city",
+                        "close to where you 'll be dining",
+                        "east near other shops and boutiques",
+                        "any area",
+                        "that area of town",
+                        "same area as the restaurant",
+                        "north end of the city",
+                        "centrally - located",
+                        "north of town",
+                        "northern side of town",
+                        "that area",
+                        "east part of town",
+                        "centre",
+                        "on the west",
+                        "north",
+                        "the south area",
+                        "south area",
+                        "all of cambridge",
+                        "eastern part of the city",
+                        "centre of town",
+                        "east part of time",
+                        "the north of the city",
+                        "centrally located",
+                        "on the south",
+                        "the north",
+                        "northern cambridge",
+                        "all over",
+                        "northend",
+                        "downtown",
+                        "centre area",
+                        "westside",
+                        "northern part of town",
+                        "least",
+                        "the east",
+                        "the south side",
+                        "east section of town",
+                        "the south",
+                        "south end of the city",
+                        "across town",
+                        "centre of the city",
+                        "or west ?",
+                        "centra",
+                        "south closer to the museum",
+                        "north side of town",
+                        "east",
+                        "el shaddai",
+                        "different part of town",
+                        "west area",
+                        "all areas",
+                        "on the west side",
+                        "nearby the restaurant",
+                        "the east side",
+                        "same general area",
+                        "you are interested in",
+                        "centrally",
+                        "north park of town",
+                        "east side",
+                        "in the north",
+                        "centre of cambridge",
+                        "not to far from the restaurant",
+                        "any",
+                        "west part of the city",
+                        "the west area",
+                        "centre area of town",
+                        "centre part",
+                        "that vicinity",
+                        "the north part of town",
+                        "all around the city",
+                        "do n't care",
+                        "same area",
+                        "the north end",
+                        "north part of the city",
+                        "north end",
+                        "there",
+                        "and in the centre",
+                        "north and centre",
+                        "center of the town",
+                        "near the centre",
+                        "west side near the restaurant",
+                        "that region",
+                        "southern",
+                        "north part of town",
+                        "except in the east",
+                        "another area of town",
+                        "near the museum",
+                        "nborth",
+                        "do n't have a preference",
+                        "no particular",
+                        "that side of town",
+                        "south side of town",
+                        "centre cambridge",
+                        "eat",
+                        "north area of town",
+                        "the",
+                        "all over the city",
+                        "north par of town",
+                        "north part of town .",
+                        "next door",
+                        "the town centre",
+                        "different area",
+                        "north area",
+                        "south side",
+                        "any part of the city",
+                        "near centre",
+                        "west part",
+                        "everywhere but the city centre",
+                        "same part of town",
+                        "anywhere in cambridge",
+                        "western cambridge",
+                        "the center of town",
+                        "north of cambridge",
+                        "west side of town",
+                        "in the west",
+                        "south area of town",
+                        "the east side of town",
+                        "wast",
+                        "southend",
+                        "center of town",
+                        "west areas of town",
+                        "north side",
+                        "on the eastside",
+                        "the centre area",
+                        "west end of the city",
+                        "central location",
+                        "in town",
+                        "in the center",
+                        "not",
+                        "the centre of the city",
+                        "in the city",
+                        "the east area",
+                        "the easy",
+                        "the centre of cambridge",
+                        "the centre of town",
+                        "does n't matter",
+                        "various parts of the city",
+                        "several areas of town",
+                        "norht",
+                        "the centre",
+                        "various",
+                        "the area",
+                        "not in the south",
+                        "west|centre",
+                        "in the area",
+                        "on the north",
+                        "the west side",
+                        "eastside",
+                        "west",
+                        "near the restaurant",
+                        "north cambridge",
+                        "not really",
+                        "south cambridge area",
+                        "throughout cambridge",
+                        "any part of town",
+                        "western",
+                        "other parts of town",
+                        "the same area",
+                        "town centre",
+                        "east side of town",
+                        "none",
+                        "do n't really care",
+                        "city center",
+                        "southside",
+                        "and east",
+                        "all across town",
+                        "south end of town",
+                        "different parts of the city",
+                        "same side of town",
+                        "west part of town",
+                        "the north side",
+                        "south end",
+                        "except in the north",
+                        "northern",
+                        "centre part of town",
+                        "city centre",
+                        "eastern cambridge",
+                        "the town center",
+                        "eastern",
+                        "the east end of town",
+                        "all parts of the city",
+                        "different parts of town",
+                        "the west",
+                        "throughout the city",
+                        "north end of town",
+                        "south of town",
+                        "a different area",
+                        "south cambridge",
+                        "the northern part of town",
+                        "west side",
+                        "across cambridge",
+                        "does not matter",
+                        "east of town",
+                        "do not have a preference",
+                        "all over town",
+                        "south part of town",
+                        "same area of town",
+                        "somewhere else",
+                        "northside",
+                        "the westside",
+                        "cambridge",
+                        "south",
+                        "the west end",
+                        "northe part of town",
+                        "cheap",
+                        "in the centre",
+                        "east area",
+                        "east cambridge",
+                        "northern area",
+                        "the same",
+                        "west end"
+                    ]
+                },
+                "stars": {
+                    "description": "star rating of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "4|5",
+                        "one",
+                        "2",
+                        "four - star",
+                        "unrated",
+                        "does not show",
+                        "3|4",
+                        "none",
+                        "different star ratings",
+                        "four",
+                        "4",
+                        "not as fancy",
+                        "3",
+                        "yes",
+                        "drop the star rating",
+                        "four starts",
+                        "no star rating",
+                        "3-star",
+                        "0 to 4",
+                        "5",
+                        "2-stars",
+                        "does not have",
+                        "1",
+                        "two",
+                        "lower",
+                        "2-star",
+                        "1-star",
+                        "4-stars",
+                        "ranging from 2 - 4 stars",
+                        "three",
+                        "2-starred",
+                        "foru",
+                        "0-star",
+                        "3-stars",
+                        "0",
+                        "4-star",
+                        "no",
+                        "several",
+                        "zero",
+                        "five",
+                        "5-star",
+                        "not rated"
+                    ]
+                },
+                "parking": {
+                    "description": "parking facility at the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "no",
+                        "yes",
+                        "none",
+                        "free"
+                    ]
+                },
+                "phone": {
+                    "description": "hotel phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "name": {
+                    "description": "name of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "pricerange": {
+                    "description": "price budget of the hotel",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "moderately pricing",
+                        "same area and price range",
+                        "different price ranges",
+                        "that range",
+                        "moderate to cheap range",
+                        "cheaply prices",
+                        "or expensive ?",
+                        "all different price ranges",
+                        "inexpensive",
+                        "cheap|moderate",
+                        "affordable pricing",
+                        "is moderately priced",
+                        "your chosen",
+                        "moderatly priced",
+                        "different",
+                        "the cheapest",
+                        "centre",
+                        "moderate prices",
+                        "same part of town and price range as the restaurant",
+                        "cheaper side",
+                        "north",
+                        "moderately priced .",
+                        "more budget - friendly",
+                        "lower",
+                        "cheap > moderate",
+                        "same price range",
+                        "moderatley priced",
+                        "cheap>moderate",
+                        "more expensive",
+                        "upscale",
+                        "free",
+                        "different price range",
+                        "very affordable",
+                        "moderate to cheap",
+                        "moderately to expensively priced",
+                        "modrate",
+                        "same price",
+                        "fairly cheap",
+                        "super cheap",
+                        "relatively cheap",
+                        "cheap priced",
+                        "expensive price range",
+                        "moderate pricing",
+                        "your price",
+                        "cheaply priced",
+                        "expensive range",
+                        "same area and price range as the restaurant",
+                        "same area and price range as my restaurant",
+                        "moderately prices",
+                        "even cheaper",
+                        "moderate or cheap",
+                        "moderately - priced or cheap",
+                        "higher price range",
+                        "any",
+                        "oderately priced",
+                        "other ranges",
+                        "lower end",
+                        "great",
+                        "extremely reasonable",
+                        "on the cheap side",
+                        "budget - priced",
+                        "do n't care",
+                        "low cost",
+                        "varying price ranges",
+                        "moderate|cheap",
+                        "pretty cheap",
+                        "quite cheap",
+                        "moderate priced",
+                        "cheap to moderate",
+                        "no particular",
+                        "cheaper",
+                        "very inexpensive",
+                        "does not say",
+                        "epxensive",
+                        "expensively - priced",
+                        "cheap price range",
+                        "expensive or cheap",
+                        "on the more expensive side",
+                        "more moderately priced",
+                        "hotel",
+                        "moderate in price",
+                        "quite expensive",
+                        "cheap range",
+                        "that",
+                        "slightly more expensive",
+                        "moderately priceed",
+                        "rather expensive",
+                        "all",
+                        "does",
+                        "moderate price",
+                        "chear",
+                        "pretty expensive",
+                        "moderatly price",
+                        "that price range",
+                        "moderate range",
+                        "much cheaper",
+                        "a little pricey",
+                        "cheap side",
+                        "moderately",
+                        "not",
+                        "mostly in the cheap to moderate price range",
+                        "economically priced",
+                        "cheaply",
+                        "does n't matter",
+                        "cheapest",
+                        "range",
+                        "fairly cheap compared to other hotels",
+                        "expensively priced",
+                        "very cheap",
+                        "not really",
+                        "your price range",
+                        "do not care",
+                        "none",
+                        "less costly",
+                        "moderate price range",
+                        "inexpensively - priced",
+                        "moderately price",
+                        "$ 100",
+                        "espensive",
+                        "not expensive",
+                        "cheap to expensive",
+                        "cheap or moderate",
+                        "cheaply - priced",
+                        "affordable",
+                        "moderate price point",
+                        "guesthouses",
+                        "expensive",
+                        "priced moderately",
+                        "moderate",
+                        "moderately - priced",
+                        "same general price",
+                        "cheaper than the others",
+                        "moderately pried",
+                        "expensive to moderate",
+                        "unfortunately do not have the price",
+                        "ca n't view that information",
+                        "$100",
+                        "hotels",
+                        "same pricerange",
+                        "does not matter",
+                        "moderately priced",
+                        "economical",
+                        "cheap",
+                        "the same"
+                    ]
+                },
+                "people": {
+                    "description": "number of people for the hotel booking",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "address": {
+                    "description": "exact location of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "type": {
+                    "description": "what is the type of the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "stay": {
+                    "description": "length of stay at the hotel",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "day": {
+                    "description": "day of the hotel booking",
+                    "is_categorical": true,
+                    "possible_values": [
+                        "thursday--",
+                        "wednesday|friday",
+                        "none",
+                        "friday > tuesday",
+                        "sunday",
+                        "same",
+                        "wednesday",
+                        "same day",
+                        "tuesday",
+                        "monday < thursday",
+                        "friday",
+                        "saturday|tuesday",
+                        "friday>tuesday",
+                        "monday",
+                        "monday<thursday",
+                        "tuesday/",
+                        "saturday",
+                        "thursday",
+                        "sunday>monday",
+                        "sunday > monday"
+                    ]
+                },
+                "choice": {
+                    "description": "number of hotels that meets requests of the user",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        },
+        "police": {
+            "description": "find police stations",
+            "slots": {
+                "address": {
+                    "description": "exact location of the police station",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "postcode": {
+                    "description": "postcode of the police station",
+                    "is_categorical": false,
+                    "possible_values": []
+                },
+                "phone": {
+                    "description": "police station phone number",
+                    "is_categorical": false,
+                    "possible_values": []
+                }
+            }
+        }
+    },
+    "intents": {
+        "inform": {
+            "description": "inform user of value for a certain slot"
+        },
+        "request": {
+            "description": "ask for value of a slot"
+        },
+        "nobook": {
+            "description": "inform user of booking failure"
+        },
+        "reqmore": {
+            "description": "ask user for more instructions"
+        },
+        "book": {
+            "description": "place an order for user"
+        },
+        "bye": {
+            "description": "end a conversation and say goodbye to user"
+        },
+        "thank": {
+            "description": "express gratitude"
+        },
+        "welcome": {
+            "description": "welcome"
+        },
+        "offerbook": {
+            "description": "offer to place an order for user"
+        },
+        "offerbooked": {
+            "description": "inform user that an order is succussful"
+        },
+        "recommend": {
+            "description": "recommend a choice for user request"
+        },
+        "greet": {
+            "description": "express greeting"
+        },
+        "nooffer": {
+            "description": "inform user that no options matches user request"
+        },
+        "select": {
+            "description": "provide several choices for user to choose from"
+        }
+    },
+    "binary_dialogue_act": [
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "stay",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "day",
+            "value": ""
+        },
+        {
+            "intent": "reqmore",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "bye",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "thank",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "welcome",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "departure",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "day",
+            "value": ""
+        },
+        {
+            "intent": "offerbook",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "greet",
+            "domain": "",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "destination",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "leaveAt",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "arriveBy",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "duration",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "price",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "internet",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "parking",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "type",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "food",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "Ref",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "nobook",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "train",
+            "slot": "people",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "people",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "stars",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "type",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "entrance fee",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "booking",
+            "slot": "time",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "pricerange",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "attraction",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "select",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "inform",
+            "domain": "taxi",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "departure",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "destination",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "taxi_types",
+            "value": ""
+        },
+        {
+            "intent": "book",
+            "domain": "booking",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "taxi_phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "arriveBy",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "taxi",
+            "slot": "leaveAt",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "nooffer",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "hotel",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "hotel",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "attraction",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "name",
+            "value": ""
+        },
+        {
+            "intent": "offerbooked",
+            "domain": "train",
+            "slot": "",
+            "value": ""
+        },
+        {
+            "intent": "recommend",
+            "domain": "restaurant",
+            "slot": "",
+            "value": ""
+        }
+    ],
+    "state": {
+        "hotel": {
+            "name": "",
+            "area": "",
+            "parking": "",
+            "pricerange": "",
+            "stars": "",
+            "internet": "",
+            "type": "",
+            "stay": "",
+            "day": "",
+            "people": ""
+        },
+        "train": {
+            "leaveAt": "",
+            "destination": "",
+            "day": "",
+            "arriveBy": "",
+            "departure": "",
+            "people": ""
+        },
+        "restaurant": {
+            "food": "",
+            "pricerange": "",
+            "name": "",
+            "area": "",
+            "time": "",
+            "day": "",
+            "people": ""
+        },
+        "attraction": {
+            "type": "",
+            "name": "",
+            "area": ""
+        },
+        "taxi": {
+            "leaveAt": "",
+            "destination": "",
+            "departure": "",
+            "arriveBy": ""
+        }
+    }
+}
\ No newline at end of file
diff --git a/data/unified_datasets/multiwoz23/original_data.zip b/data/unified_datasets/multiwoz23/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..68bffe4bfedf2ecc860c5e204323a91e8a6882c8
Binary files /dev/null and b/data/unified_datasets/multiwoz23/original_data.zip differ
diff --git a/data/unified_datasets/multiwoz23/preprocess.py b/data/unified_datasets/multiwoz23/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..d293e7f9c0f2c78f60750f858a34e76d2100ecb4
--- /dev/null
+++ b/data/unified_datasets/multiwoz23/preprocess.py
@@ -0,0 +1,1421 @@
+import copy
+import re
+import zipfile
+import json
+import os
+from tqdm import tqdm
+import sys
+import difflib
+from fuzzywuzzy import fuzz
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+import logging
+
+
+logging.basicConfig(level=logging.INFO)
+self_dir = (os.path.abspath(os.getcwd()))
+
+REF_SYS_DA = {
+    'Attraction': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Fee': "entrance fee", 'Name': "name", 'Phone': "phone",
+        'Post': "postcode", 'Price': "pricerange", 'Type': "type",
+        'none': None, 'Open': None
+    },
+    'Hospital': {
+        'Department': 'department', 'Addr': 'address', 'Post': 'postcode',
+        'Phone': 'phone', 'none': None
+    },
+    'Booking': {
+        'Day': 'day', 'Name': 'name', 'People': 'people',
+        'Ref': 'Ref', 'Stay': 'stay', 'Time': 'time',
+        'none': None
+    },
+    'Hotel': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Internet': "internet", 'Name': "name", 'Parking': "parking",
+        'Phone': "phone", 'Post': "postcode", 'Price': "pricerange",
+        'Stars': "stars", 'Type': "type", 'Stay': 'stay', 'Day': 'day', 'People': 'people',
+        'none': None
+    },
+    'Restaurant': {
+        'Addr': "address", 'Area': "area", 'Choice': "choice",
+        'Name': "name", 'Food': "food", 'Phone': "phone",
+        'Post': "postcode", 'Price': "pricerange",
+        'Time': 'time', 'Day': 'day', 'People': 'people',
+        'none': None
+    },
+    'Taxi': {
+        'Arrive': "arriveBy", 'Car': "taxi_types", 'Depart': "departure",
+        'Dest': "destination", 'Leave': "leaveAt", 'Phone': "taxi_phone",
+        'none': None
+    },
+    'Train': {
+        'Arrive': "arriveBy", 'Choice': "choice", 'Day': "day",
+        'Depart': "departure", 'Dest': "destination",
+        'Leave': "leaveAt", 'People': "people", 'Ref': "Ref",
+        'Time': "duration", 'none': None, 'Ticket': 'price',
+    },
+    'Police': {
+        'Addr': "address", 'Post': "postcode", 'Phone': "phone", 'none': None
+    },
+}
+
+# taxi restaurant attraction train
+slot_to_type = {
+    'taxi-destination': 'non',
+    'taxi-departure': 'non',
+    'taxi-leaveAt': 'non',
+    'taxi-arriveBy': 'non',
+    'restaurant-food': 'non',
+    'restaurant-name': 'non',
+    'attraction-address': 'non',
+    'attraction-postcode': 'non',
+    'restaurant-pricerange': 'cat',
+    'restaurant-address': 'non',
+    'restaurant-area': 'cat',
+    'restaurant-postcode': 'non',
+    'attraction-phone': 'non',
+    'attraction-entrance fee': 'non',
+    'booking-time': 'non',
+    'booking-day': 'cat',
+    'attraction-type': 'non',
+    'attraction-area': 'cat',
+    'train-destination': 'non',
+    'train-arriveBy': 'non',
+    'train-departure': 'non',
+    'hotel-internet': 'cat',
+    'hotel-area': 'cat',
+    'booking-stay': 'non',
+    'booking-people': 'non',
+    'train-duration': 'non',
+    'train-people': 'non',
+    'train-day': 'cat',
+    'train-Ref': 'non',
+    'hotel-stars': 'cat',
+    'train-leaveAt': 'non',
+    'train-price': 'non',
+    'hotel-parking': 'cat',
+    'hotel-phone': 'non',
+    'hotel-name': 'non',
+    'hotel-pricerange': 'cat',
+    'hotel-people': 'non',
+    'restaurant-phone': 'non',
+    'hotel-postcode': 'non',
+    'hotel-address': 'non',
+    'attraction-name': 'non',
+    'hotel-type': 'non',
+    'restaurant-people': 'non',
+    'train-choice': 'non',
+    'attraction-pricerange': 'cat',
+    'hotel-stay': 'non',
+    'booking-name': 'non',
+    'booking-Ref': 'non',
+    'restaurant-time': 'non',
+    'restaurant-day': 'cat',
+    'hotel-day': 'cat',
+    'hotel-choice': 'non',
+    'restaurant-choice': 'non',
+    'attraction-choice': 'non',
+    'taxi-taxi_phone': 'non',
+    'taxi-taxi_types': 'non',
+    'police-address': 'non',
+    'police-postcode': 'non',
+    'police-phone': 'non'
+}
+
+state_cat_slot_value_dict = {
+    "hotel-pricerange": {
+        "cheap": 735,
+        "moderate": 1063,
+        "expensive": 594,
+    },
+    "hotel-parking": {
+        "yes": 1809,
+        "no": 126,
+        "free": 4,
+    },
+    "hotel-day": {
+        "tuesday": 385,
+        "wednesday": 410,
+        "monday": 365,
+        "saturday": 407,
+        "friday": 393,
+        "thursday": 384,
+        "sunday": 369,
+    },
+    "train-day": {
+        "wednesday": 533,
+        "monday": 533,
+        "saturday": 543,
+        "thursday": 547,
+        "friday": 563,
+        "tuesday": 553,
+        "sunday": 613,
+    },
+    "hotel-stars": {
+        "4": 1263,
+        "2": 193,
+        "0": 201,
+        "3": 401,
+        "5": 45,
+        "1": 45,
+    },
+    "hotel-internet": {
+        "yes": 1841,
+        "no": 79,
+        "free": 2
+    },
+    "hotel-area": {
+        "east": 416,
+        "north": 717,
+        "centre": 538,
+        "south": 289,
+        "west": 316,
+    },
+    "attraction-area": {
+        "centre": 1290,
+        "west": 332,
+        "north": 155,
+        "south": 240,
+        "east": 272,
+    },
+    "restaurant-pricerange": {
+        "expensive": 1477,
+        "cheap": 758,
+        "moderate": 1028,
+    },
+    "restaurant-area": {
+        "centre": 1745,
+        "south": 398,
+        "north": 390,
+        "east": 360,
+        "west": 423,
+    },
+    "restaurant-day": {
+        "thursday": 362,
+        "wednesday": 412,
+        "friday": 395,
+        "monday": 383,
+        "sunday": 399,
+        "saturday": 421,
+        "tuesday": 350,
+    }
+}
+
+
+synonyms = [
+    ["el shaddia guesthouse", "el shaddai"],
+    [ "peterborough", "peterbourgh"],
+    ["night club", "nightclub", 'nightclubs'],
+    ["boat", "boating"],
+    ["portugese", "portuguese"],
+    ["guesthouse", "guest house"],
+    ["seafood", "sea food"],
+    ["christ 's college", "christ college"],
+    ["huntingdon marriott hotel"]
+]
+
+state_cat_slot_ds = [k for k, v in slot_to_type.items() if v == 'cat']
+
+da_cat_slot_values = {
+    # 'hotel-stay': ['1', '2', '3', '4', '5'],
+    'hotel-internet': ['free', 'no', 'none', 'yes'],
+    'hotel-parking': ['free', 'no', 'none', 'yes'],
+}
+
+state_cat_slot_values = {}
+
+multiwoz_desc = {
+    'taxi': {
+        'domain': 'taxi information query system',
+        'taxi_phone': 'taxi phone number',
+        'taxi_types': 'taxi type',
+    },
+    'restaurant': {
+        'domain': 'restaurant information query system',
+        'address': 'exact location of the restaurant',
+        'postcode': 'postcode of the restaurant',
+        'phone': 'restaurant phone number',
+        'choice': 'number of restaurants meeting requests of user',
+    },
+    'attraction': {
+        'domain': 'an entertainment that is offered to the public',
+        'address': 'details of where the attraction is',
+        'postcode': 'postcode of the attraction',
+        'phone': 'phone number of the attraction',
+        'entrance fee': 'the fee charged for admission to the attraction',
+        'pricerange': 'the price range for the attraction, from cheap to expensive',
+        'choice': 'number of attractions matching requests of user'
+    },
+    'booking': {
+        'domain': 'to arrange with a taxi, restaurant, train, etc.',
+        'time': 'time for an order',
+        'day': 'day for an order, from monday to sunday',
+        'stay': 'for how long the user wish to be at a place',
+        'people': 'how many person the order is for',
+        'name': 'name of the ordered place',
+        'Ref': 'reference number of the order'
+    },
+    'train': {
+        'domain': 'query and order a train',
+        'duration': 'the length of time the train trip lasts',
+        'Ref': 'reference number of the order',
+        'price': 'price for the train ticket',
+        'choice': 'number of trains that meets requests of the user',
+    },
+    'hotel': {
+        'domain': 'to query hotel information and place an order',
+        'address': 'exact location of the hotel',
+        'postcode': 'postcode of the hotel',
+        'phone': 'hotel phone number',
+        'choice': 'number of hotels that meets requests of the user',
+    },
+    'police': {
+        'domain': 'find police stations',
+        'address': 'exact location of the police station',
+        'postcode': 'postcode of the police station',
+        'phone': 'police station phone number',
+    },
+    'intents': {
+        'inform': 'inform user of value for a certain slot',
+        'request': 'ask for value of a slot',
+        'nobook': 'inform user of booking failure',
+        'reqmore': 'ask user for more instructions',
+        'book': 'place an order for user',
+        'bye': 'end a conversation and say goodbye to user',
+        'thank': 'express gratitude',
+        'welcome': 'welcome',
+        'offerbooked': 'inform user that an order is succussful',
+        'recommend': 'recommend a choice for user request',
+        'greet': 'express greeting',
+        'nooffer': 'inform user that no options matches user request',
+        'offerbook': 'offer to place an order for user',
+        'select': 'provide several choices for user to choose from',
+    }
+}
+
+digit2word = {
+    '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four', '5': 'five',
+    '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten'
+}
+
+
+def pharse_in_sen(phrase, sen):
+    '''
+    match value in the sentence
+    :param phrase: str
+    :param sen: str
+    :return: start, end if matched, else None, None
+    '''
+    assert isinstance(phrase, str)
+    pw = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-]|$)'
+    pn = '(^|[\s\?!-]|\D[,\.:])(?P<v>{})($|[\s\?!-]|[,\.:]\D|[,\.:]$)'
+
+    if phrase.isdigit() and phrase in digit2word:
+        phrase = digit2word[phrase]
+        p = re.compile(pw.format(re.escape(phrase)), re.I)
+        m = re.search(p, sen)
+        if m:
+            num = len(re.findall(p, sen))
+            # if num > 1:
+            #     match['>1'] += 1
+            # else:
+            #     match['1'] += 1
+            return m.span('v'), num
+    # match['0'] += 1
+    if phrase.isdigit():
+        pattern = pn
+    else:
+        pattern = pw
+    p = re.compile(pattern.format(re.escape(phrase)), re.I)
+    m = re.search(p, sen)
+    if m:
+        num = len(re.findall(p, sen))
+        # if num > 1:
+        #     match['>1'] += 1
+        # else:
+        #     match['1'] += 1
+        return m.span('v'), num
+    return (None, None), 0
+
+
+
+
+
+def update_state(state, update):
+    # print('======================')
+    # print(state)
+    # print(update)
+    # print('======================')
+
+    for service, service_update in update.items():
+        if service not in state:
+            state[service] = copy.deepcopy(service_update)
+        else:
+            state[service].update(update[service])
+
+
+def convert_da(utt, da_dict, binary_ont, intent_ont, did, tid, da_cat_slot_values):
+    '''
+     convert multiwoz dialogue acts to required format
+    :param utt: user or system utt
+    :param da_dict: multiwoz da
+    :param binary_ont: binary ontology
+    :param intent_ont: intent ontology
+    :return:
+    '''
+    converted_da = {
+        'categorical': [],
+        'non-categorical': [],
+        'binary': []
+    }
+
+    for Domain_Act, S, v in da_dict:
+        Domain, Act = Domain_Act.split('-')
+        if Domain.lower() in ['police', 'hospital', 'bus']:
+            continue
+
+        if Act.lower() not in intent_ont:
+            intent_ont[Act.lower()] = {}
+
+        # general domain is converted to empty domain. e.g. thank, bye
+        if Domain == 'general':
+            assert S == 'none'
+            assert v == 'none'
+            converted_dict = {
+                'intent': Act.lower(),
+                'domain': '',
+                'slot': '',
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+
+
+        try:
+            reformated_slot = REF_SYS_DA[Domain][S]
+        except:
+            # print('44444444444444444444444444444444')
+            # print(Domain, S)
+            # logging.info('slot not in REF_SYS_DA, drop')
+            continue
+
+        # if slot is None, da should be converted into binary
+        if reformated_slot is None:
+            if not (S == 'none' and v == 'none'):
+                # mainly for `Open` slot
+                # print('11111111111111111111')
+                # print(Domain_Act, S, v)
+                continue
+            # Booking-Inform none none
+            # Police-Inform none none
+            # Train-OfferBook none none
+            converted_dict = {
+                'intent': Act.lower(),
+                'domain': Domain.lower(),
+                'slot': '',
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+        reformated_domain_slot = Domain.lower() + '-' + reformated_slot
+
+        if Act.lower() == 'request':
+            converted_dict = {
+                'intent': 'request',
+                'domain': Domain.lower(),
+                'slot': reformated_slot,
+                'value': ''
+            }
+            converted_da['binary'].append(converted_dict)
+
+            if converted_dict not in binary_ont:
+                binary_ont.append(converted_dict)
+            continue
+
+        # vs = da_dict[(Domain_Act, S)]['values']
+
+        if reformated_domain_slot in slot_to_type and slot_to_type[reformated_domain_slot] == 'cat':
+            origin_v = v
+            v = v.lower()
+            # if reformated_domain_slot in cat_slot_proj:
+            #     v = cat_slot_proj[reformated_domain_slot][v]
+            if reformated_domain_slot not in da_cat_slot_values:
+                da_cat_slot_values[reformated_domain_slot] = []
+            # if v not in cat_slot_values[reformated_domain_slot]:
+            da_cat_slot_values[reformated_domain_slot].append(v)
+            converted_da['categorical'].append({
+                'intent': Act.lower(),
+                'domain': Domain.lower(),
+                'slot': reformated_slot,
+                'value': v
+            })
+            if 'start_word' in da_dict[(Domain_Act, S, origin_v)]:
+                start_ws = da_dict[(Domain_Act, S, origin_v)]['start_word']
+                end_ws = da_dict[(Domain_Act, S, origin_v)]['end_word']
+                utt_list = utt.split()
+                for start_w, end_w in zip(start_ws, end_ws):
+                    if start_w > len(utt_list) or end_w > len(utt_list):
+                        continue
+                    start_ch = 0
+                    for i in range(start_w):
+                        start_ch += len(utt_list[i]) + 1
+                    end_ch = start_ch
+                    for i in range(start_w, end_w):
+                        end_ch += len(utt_list[i]) + 1
+                    try:
+                        end_ch += len(utt_list[end_w])
+                    except:
+                        print(utt_list, start_w, end_w)
+                    if not utt[start_ch: end_ch] == origin_v:
+                        # print('2222222222222222222222222')
+                        # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
+                        continue
+
+                    else:
+                        converted_da['categorical'][-1].update({
+                            'start': start_ch,
+                            'end': end_ch
+                        })
+                        break;
+
+        else:
+            if 'start_word' not in da_dict[(Domain_Act, S, v)]:
+                # todo no span annotation
+                converted_da['non-categorical'].append({
+                    'intent': Act.lower(),
+                    'domain': Domain.lower(),
+                    'slot': reformated_slot,
+                    'value': v
+                })
+                continue
+
+            start_ws = da_dict[(Domain_Act, S, v)]['start_word']
+            end_ws = da_dict[(Domain_Act, S, v)]['end_word']
+            utt_list = utt.split()
+            found = True
+            for start_w, end_w in zip(start_ws, end_ws):
+                if start_w > len(utt_list) or end_w > len(utt_list):
+                    continue
+                start_ch = 0
+                for i in range(start_w):
+                    start_ch += len(utt_list[i]) + 1
+                end_ch = start_ch
+                for i in range(start_w, end_w):
+                    end_ch += len(utt_list[i]) + 1
+                try:
+                    end_ch += len(utt_list[end_w])
+                except:
+                    print(utt_list, start_w, end_w)
+                if not utt[start_ch: end_ch] == v:
+                    # print('2222222222222222222222222')
+                    # print('\n'.join([v, utt[start_ch: end_ch - 1]]))
+                    continue
+
+                else:
+                    found = True
+                    converted_da['non-categorical'].append({
+                        'intent': Act.lower(),
+                        'domain': Domain.lower(),
+                        'slot': reformated_slot,
+                        'value': v,
+                        'start': start_ch,
+                        'end': end_ch
+                    })
+                    break
+
+            if not found:
+                converted_da['non-categorical'].append({
+                    'intent': Act.lower(),
+                    'domain': Domain.lower(),
+                    'slot': reformated_slot,
+                    'value': v
+                })
+    return converted_da
+
+
+def get_state_update(prev_state, cur_state, dialog, did, tid, utt, coref_dict, slot_notfound_dict, da_cat_slot_values):
+    prev_turns = dialog['turns']
+    state_update = {'categorical': [], 'non-categorical': []}
+    notfoundnum = 0
+    total_value = 0
+
+    diff_state = {}
+    if prev_state is None:
+        diff_state = {domain: {slot: value for slot, value in cur_state[domain].items() if value != ''} for domain in
+                      cur_state}
+    else:
+        assert len(prev_state) == len(cur_state), print(prev_state, cur_state)
+        for domain, domain_state in prev_state.items():
+            if domain not in diff_state:
+                diff_state[domain] = {}
+            for slot, value in domain_state.items():
+                if value != cur_state[domain][slot]:
+                    # assert len(cur_state[domain][slot]) > 0, print(did, tid, domain, slot, utt)
+                    diff_state[domain][slot] = cur_state[domain][slot]
+
+    ret_diff_state = copy.deepcopy(diff_state)
+
+
+
+    for domain in diff_state:
+        for slot in diff_state[domain]:
+
+            total_value += 1
+            fix_or = False
+            if '|' in diff_state[domain][slot]:
+                value = diff_state[domain][slot].split('|')[0]
+            else:
+                value = diff_state[domain][slot]
+
+            # if dialog['original_id'] == 'PMUL2512' and tid == 17 and value == '02:45':
+            #     value = '2:45'
+
+            value_list = [value]
+            for _synonyms in synonyms:
+                if value in _synonyms:
+                    value_list = _synonyms
+
+            value_list.extend(get_time_variants(value))
+            value_list.extend(get_genitive_variants(value))
+            value_list.extend(get_bb_variants(value))
+
+            if value.endswith(' restaurant'):
+                value_list.append(value.split(' restaurant')[0])
+            if value.endswith(' hotel'):
+                value_list.append(value.split(' hotel')[0])
+            found = False
+            for value in value_list:
+                # categorical slots
+                if slot in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']:
+                    reformated_domain_slot = '-'.join([domain, slot])
+                    if reformated_domain_slot in state_cat_slot_value_dict and (value in state_cat_slot_value_dict[reformated_domain_slot] or value in ['dontcare', '', 'none', 'not mentioned']):
+                        state_update['categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot]
+                        })
+                        if domain + '-' + slot not in da_cat_slot_values:
+                            da_cat_slot_values[domain + '-' + slot] = [diff_state[domain][slot]]
+                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
+                        if value != diff_state[domain][slot]:
+                            state_update['categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        else :
+                            for _turn in prev_turns[::-1]:
+                                found = False
+                                for da in _turn['dialogue_act']['categorical']:
+                                    if da['value'] == value:
+                                        if 'start' in da:
+                                            state_update['categorical'][-1].update({
+                                                'utt_idx': _turn['utt_idx'],
+                                                'start': da['start'],
+                                                'end': da['end'],
+                                                'from': 'prev_da_span'
+                                            })
+                                            found = True
+                                            break
+                                if found:
+                                    break
+                    else:
+                        state_update['categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            'fixed_value': 'not found'
+                        })
+                        if domain + '-' + slot not in da_cat_slot_values:
+                            da_cat_slot_values[domain + '-' + slot] = []
+                        da_cat_slot_values[domain + '-' + slot].append(diff_state[domain][slot])
+                        ret_diff_state[domain][slot] = 'not found'
+                        notfoundnum += 1
+                    # reformated_domain_slot = '-'.join([domain, slot]
+                    found = True
+                    break
+
+                # process value ---> none
+                assert value not in ['none', 'not mentioned']
+                if value in ['', 'dontcare']:
+                    # if reformated_domain_slot not in state_cat_slot_values:
+                    #     state_cat_slot_values[reformated_domain_slot] = []
+                    # # if v not in cat_slot_values[reformated_domain_slot]:
+                    # state_cat_slot_values[reformated_domain_slot].append(value)
+                    state_update['non-categorical'].append({
+                        'domain': domain,
+                        'slot': slot,
+                        'value': diff_state[domain][slot]
+                    })
+                    found = True
+                    break
+
+                # first look for values in coref_dict
+                for _Domain_Act, _Slot, _value in coref_dict:
+                    _domain, _act = _Domain_Act.lower().split('-')
+                    _slot = _Slot.lower()
+                    _coref_value = coref_dict[(_Domain_Act, _Slot, _value)]['coref_value']
+                    if _coref_value == '':
+                        continue
+                    _coref_turn = coref_dict[(_Domain_Act, _Slot, _value)]['turn']
+                    if _coref_turn == -1:
+                        continue
+                    _coref_pos = coref_dict[(_Domain_Act, _Slot, _value)]['pos']
+                    if _coref_pos == '':
+                        continue
+                    _utt = coref_dict[(_Domain_Act, _Slot, _value)]['utt']
+                    if _domain == domain and _slot == slot and value == _coref_value:
+
+                        start_w, end_w = [int(p) for p in _coref_pos.split('-')]
+                        utt_list = _utt.split()
+                        start_ch = 0
+                        for i in range(start_w):
+                            start_ch += len(utt_list[i]) + 1
+                        end_ch = start_ch
+                        for i in range(start_w, end_w + 1):
+                            end_ch += len(utt_list[i]) + 1
+                        end_ch -= 1
+
+                        if not _utt[start_ch: end_ch] == _coref_value:
+                            # print(111111111111111111111111111111111)
+                            # print(_utt[start_ch: end_ch], _coref_value)
+                            continue
+
+                        state_update['non-categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            'from': 'coref',
+                            'utt_idx': _coref_turn,
+                            'start': start_ch,
+                            'end': end_ch
+                        })
+                        if value != diff_state[domain][slot]:
+                            state_update['categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        found = True
+
+                if found:
+                    break
+
+                # from da annotation
+                for _turn in prev_turns[::-1]:
+                    for da in _turn['dialogue_act']['non-categorical']:
+                        # if da['domain'] == domain and da['slot'] == slot and fuzz.ratio(da['value'], value) > 85:
+                            # if not da['value'] == value:
+                            #     print(1111111111111111)
+                            #     print(value, da['value'])
+
+                        if fuzz.ratio(da['value'], value) > 85:
+
+                            if 'start' in da:
+                                found = True
+                                state_update['non-categorical'].append({
+                                    'domain': domain,
+                                    'slot': slot,
+                                    # 'value': da['value'],
+                                    'value': diff_state[domain][slot],
+                                    'utt_idx': _turn['utt_idx'],
+                                    'start': da['start'],
+                                    'end': da['end'],
+                                    'from': 'prev_da_span'
+                                })
+                                if value != diff_state[domain][slot]:
+                                    state_update['non-categorical'][-1].update({'fixed_value': value})
+                                    ret_diff_state[domain][slot] = value
+                                if da['value'] != value:
+                                    state_update['non-categorical'][-1].update({'fixed_value':da['value']})
+                                    ret_diff_state[domain][slot] = da['value']
+
+                                break
+                    if found:
+                        break
+
+                if found:
+                    break
+
+                # from utterance
+                for _turn in prev_turns[::-1]:
+                    _utt = _turn['utterance']
+                    (start, end), num = pharse_in_sen(str(value), _utt)
+                    if num:
+                        assert value.lower() == _utt[start:end].lower() \
+                               or digit2word[value].lower() == _utt[start:end].lower()
+                        found = True
+                        state_update['non-categorical'].append({
+                            'domain': domain,
+                            'slot': slot,
+                            'value': diff_state[domain][slot],
+                            # 'value': _utt[start:end].lower(),
+                            # 'fixed_value': _utt[start:end].lower(),
+                            'from': 'prev_utt',
+                            'utt_idx': _turn['utt_idx'],
+                            'start': start,
+                            'end': end
+                        })
+                        if value != diff_state[domain][slot]:
+                            state_update['non-categorical'][-1].update({'fixed_value': value})
+                            ret_diff_state[domain][slot] = value
+                        if value != _utt[start:end].lower():
+                            state_update['non-categorical'][-1].update({'fixed_value': _utt[start:end].lower()})
+                            ret_diff_state[domain][slot] = _utt[start:end].lower()
+                        found = True
+                        break
+                if found:
+                    break
+
+                # from utterance
+                if not value.isdigit():
+                    for _turn in prev_turns[::-1]:
+                        _utt = _turn['utterance']
+
+                        s = difflib.SequenceMatcher(None, _utt, value)
+                        matches = s.get_matching_blocks()
+
+                        for i, j, n in matches:
+                            possible_value = _utt[i: i+len(value)]
+
+                            if i+ len(value) < len(_utt) and _utt[i+len(value)] not in [ ' ', ',', '.', '?', '!', '/'] :
+                                possible_value += _utt[i+len(value):].split()[0]
+
+                                if possible_value.startswith('th '):
+                                    possible_value = possible_value[3:]
+                                    i += 3
+                            if i > 0 and _utt[i-1] not in [ ' ', ',', '.', '?', '!', '/']:
+                                # cut first incomplete word
+                                if len(possible_value.split()) > 1:
+                                    i += len(possible_value.split()[0]) + 1
+                                    possible_value = ' '.join(possible_value.split()[1:])
+
+
+                                # prepend first incomplete word
+                                # possible_value = _utt[:i].split()[-1] + possible_value
+                                # i -= len(_utt[:i].split()[-1])
+
+
+                            if fuzz.token_sort_ratio(value, possible_value) > 92 or possible_value.startswith('ashley hotel and lovell lodge') :
+                                found = True
+
+                                state_update['non-categorical'].append({
+                                            'domain': domain,
+                                            'slot': slot,
+                                            'value': diff_state[domain][slot],
+                                            # 'value': possible_value,
+                                            # 'fixed_value': possible_value,
+                                            'from':'prev_utt',
+                                            'utt_idx': _turn['utt_idx'],
+                                            'start': i,
+                                            'end': i+len(possible_value)
+                                        })
+                                if value != diff_state[domain][slot]:
+                                    state_update['non-categorical'][-1].update({'fixed_value': value})
+                                    ret_diff_state[domain][slot] = value
+                                if possible_value != value:
+                                    state_update['non-categorical'][-1].update({'fixed_value': possible_value})
+                                    ret_diff_state[domain][slot] = possible_value
+                                break
+                    #             assert _utt[i:i+len(possible_value)] == possible_value, print(_utt, _utt[i:i+len(possible_value)], possible_value)
+                    #             break
+                                # if not possible_value == value:
+                                #             print(3333333333333333)
+                                #             print(value)
+                                #             print(possible_value)
+                            if found:
+                                break
+                        if found:
+                            break
+
+                if found:
+                    break
+            if not found:
+                #                 print('3333333333333333333')
+                #                 print(did, tid)
+                #                 print(domain, slot, value)
+                #                 print([_t['utterance'] for _t in prev_turns])
+                # assert slot not in ['internet', 'parking', 'pricerange', 'day', 'area', 'stars']
+
+                if (domain, slot) not in slot_notfound_dict:
+                    slot_notfound_dict[(domain, slot)] = 1
+                else:
+                    slot_notfound_dict[(domain, slot)] += 1
+                state_update['non-categorical'].append({
+                    'domain': domain,
+                    'slot': slot,
+                    'value': diff_state[domain][slot],
+                    'fixed_value': 'not found'
+                })
+                ret_diff_state[domain][slot] = 'not found'
+                notfoundnum += 1
+    return state_update, notfoundnum, total_value, ret_diff_state
+
+
+def merge_data_annotation():
+    extract_dir = os.path.join(self_dir, 'original_data')
+    data25 = json.load(open(os.path.join(self_dir, extract_dir, 'data_meta_fixed.json')))
+    # data21_train = json.load(open(os.path.join(self_dir, extract_dir, 'train.json')))
+    # data21_val = json.load(open(os.path.join(self_dir, extract_dir, 'val.json')))
+    # data21_test = json.load(open(os.path.join(self_dir, extract_dir, 'test.json')))
+    # data21 = {}
+    # data21.update(data21_train)
+    # data21.update(data21_val)
+    # data21.update(data21_test)
+
+    # update_from_25_cnt = 0
+    # total_turn = 0
+    # for dial_id, dialog in data21.items():
+    #     dial_id = dial_id + '.json'
+    #     assert dial_id in data25
+    #     for i, _turn in enumerate(dialog['log']):
+    #         total_turn += 1
+    #         if _turn['text'] == data25[dial_id]['log'][i]['text']:
+    #             _turn['span_info'].extend(copy.deepcopy(data25[dial_id]['log'][i]['span_info']))
+    #             # _turn['span_info'] = list(set(_turn['span_info']))
+    #             # _turn['dialog_act'].update(copy.deepcopy(data25[dial_id]['log'][i]['dialog_act']))
+    #             for Domain_Intent in data25[dial_id]['log'][i]['dialog_act']:
+    #                 if Domain_Intent in _turn['dialog_act']:
+    #                     _turn['dialog_act'][Domain_Intent].extend(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
+    #                 else:
+    #                     _turn['dialog_act'][Domain_Intent] = copy.deepcopy(data25[dial_id]['log'][i]['dialog_act'][Domain_Intent])
+    #                 # _turn['dialog_act'][Domain_Intent] = list(set(_turn['dialog_act'][Domain_Intent]))
+    #             if 'coreference' in data25[dial_id]['log'][i]:
+    #                 _turn['coreference'] = copy.deepcopy(data25[dial_id]['log'][i]['coreference'])
+    #             update_from_25_cnt += 1
+    #         else:
+    #             # print('==============multiwoz21=================')
+    #             # print(_turn['text'])
+    #             # print('==============multiwoz25=================')
+    #             # print(data25[dial_id]['log'][i]['text'])
+    #             continue
+
+    # print('{}/{} turns update from multiwoz25 data'.format(update_from_25_cnt, total_turn))
+    return data25
+
+
+def preprocess(da_cat_slot_values, state_cat_slot_values):
+    all_data = []
+    binary_ont = []
+    intent_ont = {}
+    state_ont = {}
+
+    data_splits = ['train', 'val', 'test']
+    # data_splits = ['test']
+    extract_dir = os.path.join(self_dir, 'original_data')
+    num_train_dialogue = 0
+    num_train_utt = 0
+
+    num_match_error_da_span = 0
+
+    if not os.path.exists('data.zip') or not os.path.exists('ontology.json'):
+        # for data_split in data_splits:
+        data_zip_file = os.path.join(self_dir, 'original_data.zip')
+        if not os.path.exists(data_zip_file):
+            raise FileNotFoundError(data_zip_file)
+
+        logging.info('unzip multiwoz data to {}'.format(extract_dir))
+        archive = zipfile.ZipFile(data_zip_file, 'r')
+        archive.extractall(extract_dir)
+
+        data = merge_data_annotation()
+        # exit()
+        # data = json.load(open(os.path.join(self_dir, extract_dir, 'data_meta_fixed.json')))
+        train_list = open(os.path.join(self_dir, extract_dir, 'trainListFile')).read().split()
+        val_list = open(os.path.join(self_dir, extract_dir, 'valListFile')).read().split()
+        test_list = open(os.path.join(self_dir, extract_dir, 'testListFile')).read().split()
+
+        total_not_found_slot = 0
+        total_slot = 0
+        total_turn = 0
+        total_not_found_turn = 0
+        total_not_found_state = 0
+
+        slot_notfound_dict = {}
+
+        dialog_idx = 0
+        for dialog_id, dialog in tqdm(data.items()):
+
+            acc_not_found_flag = False
+
+            coref_dict = {}
+
+            data_split = None
+            for _split in data_splits:
+                if dialog_id.strip('.json') in eval(_split + '_list'):
+                    data_split = _split
+                    break
+            # assert data_split is not None
+            # if data_split != 'test':
+            #     continue
+
+            if data_split == 'train':
+                num_train_dialogue += len(data)
+
+            dialog_idx += 1
+            # if dialog_idx > 10:
+            #     break
+            converted_dialogue = {
+                'dataset': 'multiwoz23',
+                'data_split': data_split,
+                'dialogue_id': 'multiwoz23_' + str(dialog_idx),
+                'original_id': dialog_id,
+                'domains': [d for d in dialog['goal'] if
+                            len(dialog['goal'][d]) != 0 and d in multiwoz_desc and d not in ['police', 'hospital', 'bus']],
+                'turns': [],
+            }
+
+            if data_split == 'train':
+                num_train_utt += len(dialog['log'])
+
+            prev_state = None
+            accum_fixed_state = {}
+            for turn_id, turn in enumerate(dialog['log']):
+
+                utt = turn['text'].lower()
+                # for several wrong words
+                utt = utt.replace('seeuni', 'see uni')
+
+                utt = ' '.join(utt.split())
+                das = turn['dialog_act']
+                role = 'user' if turn_id % 2 == 0 else 'system'
+                spans = turn['span_info']
+
+                da_dict = {}
+                for Domain_Act in das:
+                    Domain = Domain_Act.split('-')[0]
+                    if Domain.lower() not in converted_dialogue['domains'] and Domain.lower() not in ['general', 'booking']:
+                        continue
+
+                    Svs = das[Domain_Act]
+                    for S, v in Svs:
+                        v = v.lower()
+                        if v.startswith('th '):
+                            # print(v)
+                            v = v[3:]
+                        if v.startswith('he '):
+                            # print(v)
+                            v = v[3:]
+
+                        if (Domain_Act, S, v) not in da_dict:
+                            da_dict[(Domain_Act, S, v)] = {}
+
+                for span in spans:
+                    Domain_Act, S, v, start_word, end_word = span
+                    v = v.lower()
+                    if not (Domain_Act, S, v) in da_dict:
+                        # logging.info('span da annotation not found in multiwoz da label')
+                        # logging.info(dialog_id, turn_id)
+                        # logging.info((Domain_Act, S, v))
+                        # logging.info(da_dict)
+                        num_match_error_da_span += 1
+                    else:
+                        if v.startswith('th '):
+                            # print(v)
+                            v = v[3:]
+                            start_word += 3
+                        if v.startswith('he '):
+                            # print(v)
+                            v = v[3:]
+                            start_word += 3
+
+                        if 'start_word' not in da_dict[(Domain_Act, S, v)]:
+                            da_dict[(Domain_Act, S, v)]['start_word'] = []
+                            da_dict[(Domain_Act, S, v)]['end_word'] = []
+
+                        da_dict[(Domain_Act, S, v)]['start_word'].append(start_word)
+                        da_dict[(Domain_Act, S, v)]['end_word'].append(end_word)
+
+                converted_turn = {
+                    'utt_idx': turn_id,
+                    'speaker': role,
+                    'utterance': utt,
+                    'dialogue_act': convert_da(utt, da_dict, binary_ont, intent_ont, dialog_id, turn_id, da_cat_slot_values),
+                }
+
+                # for state annotations
+                if role == 'system':
+                    turn_state = turn['metadata']
+                    cur_state = {}
+                    for domain in turn_state:
+                        if domain in ['police', 'hospital', 'bus']:
+                            continue
+                        if domain not in converted_dialogue['domains']:
+                            continue
+                        cur_state[domain] = {}
+                        for subdomain in ['semi', 'book']:
+                            for slot in turn_state[domain][subdomain]:
+                                if slot == 'booked':
+                                    continue
+                                if slot == 'ticket':  # or (domain == 'train' and slot == 'people'):
+                                    # for cases where domain slot exists in REF but not in state
+                                    # because of check in evaluate.py
+                                    continue
+
+                                else:
+                                    fixed_slot = slot
+                                state_ds = domain + '-' + fixed_slot
+                                if state_ds not in slot_to_type:
+                                    logging.info('state slot not defined in da list')
+                                    logging.info(state_ds)
+                                if turn_state[domain][subdomain][slot] in ['', [], 'not mentioned', 'none']:
+                                    cur_state[domain][fixed_slot] = ""
+                                else:
+                                    if turn_state[domain][subdomain][slot].startswith('th '):
+                                        # print('state')
+                                        # print(turn_state[domain][subdomain][slot])
+                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
+                                    if turn_state[domain][subdomain][slot].startswith('he '):
+                                        # print('state')
+                                        # print(turn_state[domain][subdomain][slot])
+                                        turn_state[domain][subdomain][slot] = turn_state[domain][subdomain][slot][3:]
+
+                                    cur_state[domain][fixed_slot] = turn_state[domain][subdomain][slot]
+
+                                if domain not in state_ont:
+                                    state_ont[domain] = []
+                                if fixed_slot not in state_ont[domain]:
+                                    state_ont[domain].append(fixed_slot)
+
+                        if domain == 'train' and 'people' not in cur_state[domain]:
+                            cur_state[domain]['people'] = ''
+                        # if len(converted_turn['state'][domain]) == 0:
+                        #     converted_turn['state'].pop(domain)
+                        if len(converted_dialogue['turns']) > 0:
+                            # move state from system side to user side
+                            converted_dialogue['turns'][-1]['state'] = copy.deepcopy(cur_state)
+
+                    # for state update annotations
+                    state_update, _notfoundslot, _totalslot, ret_diff_state = get_state_update(prev_state, cur_state, converted_dialogue,
+                                                                               dialog_id, turn_id, turn['text'], coref_dict,
+                                                                               slot_notfound_dict, da_cat_slot_values)
+
+                    update_state(accum_fixed_state, ret_diff_state)
+                    for domain in accum_fixed_state:
+                        for slot in accum_fixed_state[domain]:
+                            assert isinstance(accum_fixed_state[domain][slot], str), print(accum_fixed_state[domain][slot])
+
+                    if _notfoundslot == 0:
+                        # for slot in state_update['categorical']:
+                        #     assert 'fixed_value' not in slot
+                        for slot in state_update['non-categorical']:
+                            if slot['value'] not in ['', 'dontcare']:
+                                assert 'utt_idx' in slot
+
+                    else:
+                        flag = False
+                        for slot in state_update['categorical']:
+                            if 'fixed_value' in slot:
+                                flag = True
+                                break
+                        for slot in state_update['non-categorical']:
+                            if 'utt_idx' not in slot:
+                                flag = True
+                                break
+                        assert flag, print(flag, state_update['non-categorical'])
+
+                    total_turn += 1
+                    total_slot += _totalslot
+                    total_not_found_slot += _notfoundslot
+                    total_not_found_turn += 1 if _notfoundslot > 0 else 0
+                    if _notfoundslot > 0:
+                        acc_not_found_flag = True
+                    if acc_not_found_flag:
+                        total_not_found_state += 1
+
+                    coref_dict = {}
+                    converted_dialogue['turns'][-1]['state_update'] = copy.deepcopy(state_update)
+                    converted_dialogue['turns'][-1]['fixed_state'] = copy.deepcopy(accum_fixed_state)
+                    if 'state' not in converted_dialogue['turns'][-1]:
+                        converted_dialogue['turns'][-1]['state'] = {}
+                    prev_state = copy.deepcopy(cur_state)
+
+                converted_dialogue['turns'].append(converted_turn)
+
+                if 'coreference' in turn:
+                    for Domain_Act in turn['coreference']:
+                        for Slot, value, coref, coref_turn, coref_pos in turn['coreference'][Domain_Act]:
+                            value = value.lower()
+                            coref_dict[(Domain_Act, Slot, value)] = {'turn': coref_turn, 'pos': coref_pos,
+                                                                     'coref_value': coref,
+                                                                     'utt': converted_dialogue['turns'][coref_turn][
+                                                                         'utterance']}
+
+            check_spans(converted_dialogue)
+            postprocess_update_spans(converted_dialogue)
+            if converted_dialogue['turns'][-1]['speaker'] == 'system':
+                converted_dialogue['turns'].pop(-1)
+            all_data.append(converted_dialogue)
+
+        print('total_turn', total_turn)
+        print('total_not_found_turn', total_not_found_turn)
+        print('total_slot', total_slot)
+        print('total_not_found_slot', total_not_found_slot)
+        print('total_not_found_state', total_not_found_state)
+        print(slot_notfound_dict)
+        from collections import Counter
+        # print({k : dict(Counter(v)) for k, v in cat_slot_values.items()})
+        json.dump({k : dict(Counter(v)) for k, v in state_cat_slot_values.items()}, open(os.path.join(self_dir, 'cat_slot_values.json'), 'w'), indent=4)
+        cat_slot_values = {k: list(set(v)) for k, v in state_cat_slot_values.items()}
+        da_cat_slot_values = {k: list(set(v)) for k, v in da_cat_slot_values.items()}
+
+        json.dump(all_data, open('data.json', 'w'), indent=4)
+        write_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        os.remove('data.json')
+
+        new_ont = {
+            'domains': {},
+            'intents': {},
+            'binary_dialogue_act': {}
+        }
+
+        for d_s in slot_to_type:
+            d, s = d_s.split('-')
+            if d not in new_ont['domains']:
+                new_ont['domains'][d] = {
+                    'description': multiwoz_desc[d]['domain'],
+                    'slots': {}
+                }
+            domain_ont = new_ont['domains'][d]
+            assert s not in domain_ont
+            domain_ont['slots'][s] = {
+                'description': multiwoz_desc[d][s] if s in multiwoz_desc[d] else '',
+                'is_categorical': d_s in state_cat_slot_ds,
+                'possible_values': da_cat_slot_values[d_s] if d_s in state_cat_slot_ds else []
+            }
+            domain_ont['slots'][s]['possible_values'] = [_ for _ in domain_ont['slots'][s]['possible_values'] if _ not in ['dontcare', '']]
+
+        new_ont['state'] = {}
+        # print(state_cat_slot_value_dict)
+        print(state_ont)
+        for d in state_ont:
+            new_ont['state'][d] = {}
+            for s in state_ont[d]:
+                d_s = '-'.join([d, s])
+                new_ont['state'][d][s] = ''
+                # new_ont['state'][d][s] = {
+                #     'description': multiwoz_desc[d][s] if s in multiwoz_desc[d] else '',
+                #     'is_categorical': d_s in state_cat_slot_value_dict,
+                #     'possible_values': list(state_cat_slot_value_dict[d_s].keys()) if d_s in state_cat_slot_value_dict else []
+                # }
+                # new_ont['state'][d][s]['possible_values'] = [_ for _ in new_ont['state'][d][s]['possible_values'] if
+                #                                              _ != 'dontcare']
+
+        new_ont['intents'] = {i: {'description': multiwoz_desc['intents'][i]} for i in intent_ont}
+        new_ont['binary_dialogue_act'] = binary_ont
+
+        slot_desc = json.load(open(os.path.join(self_dir, extract_dir, './slot_descriptions.json')))
+        for domain_slot in slot_desc:
+            _domain, _slot = domain_slot.split('-')
+            _desc = slot_desc[domain_slot][0]
+            if _slot == 'arriveby':
+                _slot = 'arriveBy'
+            elif _slot == 'leaveat':
+                _slot = 'leaveAt'
+            if 'book' in _slot:
+                _slot = _slot.replace('book ', '')
+            if not _domain in new_ont['state']:
+                # logging.info('domain {} not in state domains'.format(_domain))
+                continue
+            if _domain in new_ont['domains'] and _slot in new_ont['domains'][_domain]['slots']:
+                new_ont['domains'][_domain]['slots'][_slot]['description'] = _desc
+            if not _slot in new_ont['state'][_domain]:
+                logging.info('domain {} slot {} not in state'.format(_domain, _slot))
+                continue
+            # new_ont['state'][_domain][_slot] = ""
+            assert _domain in new_ont['domains'], print(_domain)
+            assert _slot in new_ont['domains'][_domain]['slots']
+
+        logging.info('num_match_error_da_span {}'.format(num_match_error_da_span))
+        json.dump(new_ont, open(os.path.join(self_dir, './ontology.json'), 'w'), indent=4)
+
+    else:
+        all_data = read_zipped_json(os.path.join(self_dir, './data.zip'), 'data.json')
+        new_ont = json.load(open(os.path.join(self_dir, './ontology.json'), 'r'))
+    logging.info('# dialogue: {}, # turn: {}'.format(num_train_dialogue, num_train_utt))
+    return all_data, new_ont
+
+
+def postprocess_update_spans(dialog):
+    changed_utt_idx_and_position = {}
+    for turn in dialog['turns']:
+        if turn['speaker'] != 'user':
+            continue
+        changed = False
+        for _update in turn['state_update']['non-categorical']:
+            if 'utt_idx' in _update:
+                utt_idx = _update['utt_idx']
+                start = _update['start']
+                end = _update['end']
+
+                # assume at most one word changes for every utterance
+                if turn['utt_idx'] not in changed_utt_idx_and_position:
+                    if utt_idx == turn['utt_idx'] and start-1 > -1 and turn['utterance'][start-1] not in [' ']:
+                        changed_utt_idx_and_position[turn['utt_idx']] = start
+                        print('=======================')
+                        print(dialog['original_id'])
+                        print(turn['utterance'])
+                        print(json.dumps(_update, indent=2))
+                        print(turn['utterance'][start: end])
+                        turn['utterance'] = turn['utterance'][:start] + ' ' + turn['utterance'][start:]
+                        print(turn['utterance'])
+                        _update['start'] += 1
+                        _update['end'] += 1
+                        changed = True
+                if utt_idx not in changed_utt_idx_and_position:
+                    continue
+                else:
+                    value = _update['fixed_value'] if 'fixed_value' in _update and _update['fixed_value'] != 'not found' else _update['value']
+                    if start >= changed_utt_idx_and_position[utt_idx]:
+                        if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+                            assert dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
+                            _update['start'] += 1
+                            _update['end'] += 1
+                    elif start < changed_utt_idx_and_position[utt_idx] < end:
+                        if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+                            assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1], value)
+                            print('fix')
+                            print(_update)
+                            _update['end'] += 1
+                            _update['fixed_value'] = turn['utterance'][_update['start']: _update['end'] + 1].strip()
+                            print(_update)
+        if changed:
+            for _update in turn['state_update']['non-categorical']:
+                if 'utt_idx' in _update:
+                    utt_idx = _update['utt_idx']
+                    start = _update['start']
+                    end = _update['end']
+
+                    if utt_idx not in changed_utt_idx_and_position:
+                        continue
+                    else:
+                        value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+                            'fixed_value'] != 'not found' else _update['value']
+                        if start >= changed_utt_idx_and_position[utt_idx]:
+                            if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+                                assert dialog['turns'][utt_idx]['utterance'][_update['start'] + 1: _update['end'] + 1] == value
+                                _update['start'] += 1
+                                _update['end'] += 1
+                        elif start < changed_utt_idx_and_position[utt_idx] < end:
+                            if dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']] != value:
+                                print('====================fix===================')
+                                print(_update)
+                                assert (dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end']+1]).replace(' ', '') == value.replace(' ', ''), print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][_update['start']+1: _update['end']+1])
+                                _update['end'] += 1
+                                _update['fixed_value'] = dialog['turns'][utt_idx]['utterance'][_update['start']: _update['end'] + 1]
+                                print(_update)
+    for turn in dialog['turns']:
+        if turn['speaker'] != 'user':
+            continue
+        for _update in turn['state_update']['non-categorical']:
+            if 'utt_idx' in _update:
+                value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+                    'fixed_value'] != 'not found' else _update['value']
+                utt_idx = _update['utt_idx']
+                start = _update['start']
+                end = _update['end']
+                if dialog['turns'][utt_idx]['utterance'][start] == ' ':
+                    _update['start'] += 1
+                    _update['fixed_value'] = value[1:]
+                    value = value[1:]
+                    start += 1
+                assert dialog['turns'][utt_idx]['utterance'][start: end] == value, print(json.dumps(turn, indent=4), [c for c in dialog['turns'][utt_idx]['utterance'][start: end]], [c for c in value])
+    return dialog
+
+
+def get_time_variants(time_text):
+    value_list = [time_text]
+    pattern_time = r'(\d{1,2}:\d{2})(\s)?(am|pm|AM|PM)?'
+    match_times = re.findall(pattern_time, time_text)
+    if len(match_times) < 1:
+        return []
+    match_time = match_times[0]
+
+    am_flag = match_time[2] in ['am', 'AM']
+    pm_flag = match_time[2] in ['pm', 'PM']
+    no_am_pm_flag = match_time[2] == ''
+    if am_flag:
+        # 4:00am -> 4:00
+        value_list.append(match_time[0])
+        if len(match_time[0]) == 4:
+            # 4:00 -> 04:00
+            value_list.append('0' + match_time[0])
+    if pm_flag:
+        # 4:00pm -> 16:00
+        hour, min = match_time[0].split(':')
+        hour = int(hour)
+        new_hour = 12 + hour
+        value_list.append(str(new_hour)+':'+min)
+    if no_am_pm_flag:
+        hour, min = match_time[0].split(':')
+        hour = int(hour)
+        if hour > 12:
+            new_hour = hour - 12
+            value_list.append(str(new_hour) + ':' + min + 'pm')
+            value_list.append(str(new_hour) + ':' + min + ' pm')
+            value_list.append(str(new_hour) + ':' + min)
+            if min == '00':
+                value_list.append(str(new_hour) + 'pm')
+                value_list.append(str(new_hour) + ' pm')
+                value_list.append(str(new_hour))
+        else:
+            value_list.append(str(hour) + ':' + min + 'am')
+            value_list.append(str(hour) + ':' + min + ' am')
+            value_list.append(str(hour) + ':' + min)
+            if min == '00':
+                value_list.append(str(hour) + 'am')
+                value_list.append(str(hour) + ' am')
+                value_list.append(str(hour))
+        if len(match_time[0]) == 5 and match_time[0][0] == '0':
+            value_list.append(match_time[0][1:])
+        value_list.append(''.join(match_time[0].split(':')))
+
+    return value_list
+
+
+def get_genitive_variants(value):
+    ret_list = []
+    value_genitive_format = r"(?=\w)s(?=\s)"
+    value_pattern = re.compile(value_genitive_format)
+
+    span_genitive_value = re.sub(value_pattern, " 's", value)
+    if span_genitive_value != value:
+        ret_list.append(span_genitive_value)
+    span_genitive_value = re.sub(value_pattern, "'s", value)
+    if span_genitive_value != value:
+        ret_list.append(span_genitive_value)
+    # if len(ret_list) > 0:
+    #     print('=============================')
+    #     print(value)
+    #     print(re.findall(value_pattern, value))
+    #     print(ret_list)
+    return ret_list
+
+
+def check_spans(dialog):
+    for turn in dialog['turns']:
+        if turn['speaker'] != 'user':
+            continue
+        for _update in turn['state_update']['non-categorical']:
+            if 'utt_idx' in _update:
+                value = _update['fixed_value'] if 'fixed_value' in _update and _update[
+                    'fixed_value'] != 'not found' else _update['value']
+                utt_idx = _update['utt_idx']
+                start = _update['start']
+                end = _update['end']
+                assert dialog['turns'][utt_idx]['utterance'][start:end] == value, print(dialog['turns'][utt_idx]['utterance'], dialog['turns'][utt_idx]['utterance'][start:end])
+
+
+
+def get_bb_variants(value):
+    ret_list = []
+    if 'bed and breakfast' in value:
+        ret_list.append(value.replace('bed and breakfast', 'b & b'))
+    return ret_list
+
+if __name__ == '__main__':
+    preprocess(da_cat_slot_values, state_cat_slot_values)
\ No newline at end of file
diff --git a/data/unified_datasets/schema/README.md b/data/unified_datasets/schema/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..32483a533a248fda59a92dfeceab436d741659de
--- /dev/null
+++ b/data/unified_datasets/schema/README.md
@@ -0,0 +1,689 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, belief state, character-level span for non-categorical slots.
+- Unseen domains and slots in the test set to quantify the performance in zero-shot or few shot settings.
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 16142        | 313822        | 19.44      | 10.02       | 16         |
+| val   | 2482         | 46244         | 18.63      | 9.94        | 16         |
+| test  | 4201         | 80393         | 19.14      | 10.7        | 18         |
+
+## Main changes
+
+1. download the original data as `original_data.zip`
+
+2. run `python preprocess` to unzip `original_data.zip` and get processed `data.zip` & `ontology.json`.
+
+Main changes:
+
+- extract intent from domains.
+- ~~numerical slot => non-categorical, use string match to get the span.~~
+- add binary_dialogue_act for those binary intents such as 'goodbye', 'request'.
+- add **count** non-categorical, numerical slot for each domain, but not appear in belief state.
+- sys state are updated by previous user frame['state']. 
+- calculate the state update according to prev state and slot spans in current turn slot_vals and all previous dialogue acts. 99.6% non-categorical state update have spans while the rest of them are like "Could you help me search for songs from **two years back** too?" 
+- values in possible values, dialogue act, state, and state_update are in **lowercase**. 
+
+Notice:
+
+- for categorical slot, value maybe **dontcare**, which is not presented in **possible_values**.
+
+## Original data
+
+The Schema-Guided Dialogue (SGD) dataset consists of over 20k annotated
+multi-domain, task-oriented conversations between a human and a virtual
+assistant. These conversations involve interactions with services and APIs
+spanning 20 domains, ranging from banks and events to media, calendar, travel,
+and weather. For most of these domains, the dataset contains multiple different
+APIs, many of which have overlapping functionalities but different interfaces,
+which reflects common real-world scenarios. The wide range of available
+annotations can be used for intent prediction, slot filling, dialogue state
+tracking, policy imitation learning, language generation, user simulation
+learning, among other tasks in large-scale virtual assistants. Besides these,
+the dataset has unseen domains and services in the evaluation set to quantify
+the performance in zero-shot or few shot settings.
+
+[[paper]](https://arxiv.org/abs/1909.05855) [[download link]](https://github.com/google-research-datasets/dstc8-schema-guided-dialogue)
+
+### Scheme Representation
+
+A service or API is essentially a set of functions (called intents), each taking
+a set of parameters (called slots). A schema is a normalized representation of
+the interface exposed by a service/API. In addition, the schema also includes
+natural language description of the included functions and their parameters to
+outline the semantics of each element. The schemas have been manually generated
+by the dataset creators. The schema for a service contains the following fields:
+
+*   **service_name** - A unique name for the service.
+*   **description** - A natural language description of the tasks supported by
+    the service.
+*   **slots** - A list of slots/attributes corresponding to the entities present
+    in the service. Each slot contains the following fields:
+    *   **name** - The name of the slot.
+    *   **description** - A natural language description of the slot.
+    *   **is_categorical** - A boolean value. If it is true, the slot has a
+        fixed set of possible values.
+    *   **possible_values** - List of possible values the slot can take. If the
+        slot is a categorical slot, it is a complete list of all the possible
+        values. If the slot is a non categorical slot, it is either an empty
+        list or a small sample of all the values taken by the slot.
+*   **intents** - The list of intents/tasks supported by the service. Each
+    method contains the following fields:
+    *   **name** - The name of the intent.
+    *   **description** - A natural language description of the intent.
+    *   **is_transactional** - A boolean value. If true, indicates that the
+        underlying API call is transactional (e.g, a booking or a purchase), as
+        opposed to a search call.
+    *   **required_slots** - A list of slot names whose values must be provided
+        before making a call to the service.
+    *   **optional_slots** - A dictionary mapping slot names to the default
+        value taken by the slot. These slots may be optionally specified by the
+        user and the user may override the default value. An empty default value
+        allows that slot to take any value by default, but the user may override
+        it.
+    *   **result_slots** - A list of slot names which are present in the results
+        returned by a call to the service or API.
+
+### Dialogue Representation
+
+The dialogue is represented as a list of turns, where each turn contains either
+a user or a system utterance. The annotations for a turn are grouped into
+frames, where each frame corresponds to a single service. Each turn in the
+single domain dataset contains exactly one frame. In multi-domain datasets, some
+turns may have multiple frames.
+
+Each dialogue is represented as a json object with the following fields:
+
+*   **dialogue_id** - A unique identifier for a dialogue.
+*   **services** - A list of services present in the dialogue.
+*   **turns** - A list of annotated system or user utterances.
+
+Each turn consists of the following fields:
+
+*   **speaker** - The speaker for the turn. Possible values are "USER" or
+    "SYSTEM".
+*   **utterance** - A string containing the natural language utterance.
+*   **frames** - A list of frames, each frame containing annotations for a
+    single service.
+
+Each frame consists of the fields listed below. The fields marked with * will
+be excluded from all user turns in the test data released to the participants.
+
+*   **service** - The name of the service corresponding to the frame. The slots
+    and intents used in the following fields are taken from the schema of this
+    service.
+*   **slots** - A list of slot spans in the utterance, only provided for
+    non-categorical slots. Each slot span contains the following fields:
+    *   **slot** - The name of the slot.
+    *   **start** - The index of the starting character in the utterance
+        corresponding to the slot value.
+    *   **exclusive_end** - The index of the character just after the last
+        character corresponding to the slot value in the utterance. In python,
+        `utterance[start:exclusive_end]` gives the slot value.
+*   **actions** - A list of actions corresponding to the system. Each action has
+    the following fields:
+    *   **act** - The type of action. The list of all possible system acts is
+        given below.
+    *   **slot** (optional) - A slot argument for some of the actions.
+    *   **values** (optional) - A list of values assigned to the slot. If the
+        values list is non-empty, then the slot must be present.
+    *   **canonical_values** (optional) - The values in their canonicalized form
+        as used by the service. It is a list of strings of the same length as
+        values.
+*   **service_call** (system turns only, optional) - The request sent to the
+    service. It consists of the following fields:
+    *   **method** - The name of the intent or function of the service or API
+        being executed.
+    *   **parameters** - A dictionary mapping slot name (all required slots and
+        possibly some optional slots) to a value in its canonicalized form.
+*   **service_results** (system turns only, optional) - A list of entities
+    containing the results obtained from the service. It is only available for
+    turns in which a service call is made. Each entity is represented as a
+    dictionary mapping a slot name to a string containing its canonical value.
+*   **state** (user turns only) - The dialogue state corresponding to the
+    service. It consists of the following fields:
+    *   **active_intent** - The intent corresponding to the service of the frame
+        which is currently being fulfilled by the system. It takes the value
+        "NONE" if none of the intents are active.
+    *   **requested_slots** - A list of slots requested by the user in the
+        current turn.
+    *   **slot_values** - A dictionary mapping slot name to a list of strings.
+        For categorical slots, this list contains a single value assigned to the
+        slot. For non-categorical slots, all the values in this list are spoken
+        variations of each other and are equivalent (e.g, "6 pm", "six in the
+        evening", "evening at 6" etc.).
+
+List of possible system acts:
+
+*   **INFORM** - Inform the value for a slot to the user. The slot and values
+    fields in the corresponding action are always non-empty.
+*   **REQUEST** - Request the value of a slot from the user. The corresponding
+    action always contains a slot, but values are optional. When values are
+    present, they are used as examples for the user e.g, "Would you like to eat
+    indian or chinese food or something else?"
+*   **CONFIRM** - Confirm the value of a slot before making a transactional
+    service call.
+*   **OFFER** - Offer a certain value for a slot to the user. The corresponding
+    action always contains a slot and a list of values for that slot offered to
+    the user.
+*   **NOTIFY_SUCCESS** - Inform the user that their request was successful. Slot
+    and values are always empty in the corresponding action.
+*   **NOTIFY_FAILURE** - Inform the user that their request failed. Slot and
+    values are always empty in the corresponding action.
+*   **INFORM_COUNT** - Inform the number of items found that satisfy the user's
+    request. The corresponding action always has "count" as the slot, and a
+    single element in values for the number of results obtained by the system.
+*   **OFFER_INTENT** - Offer a new intent to the user. Eg, "Would you like to
+    reserve a table?". The corresponding action always has "intent" as the slot,
+    and a single value containing the intent being offered. The offered intent
+    belongs to the service corresponding to the frame.
+*   **REQ_MORE** - Asking the user if they need anything else. Slot and values
+    are always empty in the corresponding action.
+*   **GOODBYE** - End the dialogue. Slot and values are always empty in the
+    corresponding action.
+
+List of possible user acts:
+
+*   **INFORM_INTENT** - Express the desire to perform a certain task to the
+    system. The action always has "intent" as the slot and a single value
+    containing the intent being informed.
+*   **NEGATE_INTENT** - Negate the intent which has been offered by the system.
+*   **AFFIRM_INTENT** - Agree to the intent which has been offered by the
+    system.
+*   **INFORM** - Inform the value of a slot to the system. The slot and values
+    fields in the corresponding action are always non-empty.
+*   **REQUEST** - Request the value of a slot from the system. The corresponding
+    action always contains a slot parameter. It may optionally contain a value,
+    in which case, the user asks the system if the slot has the specified value.
+*   **AFFIRM** - Agree to the system's proposition. Slot and values are always
+    empty.
+*   **NEGATE** - Deny the system's proposal. Slot and values are always empty.
+*   **SELECT** - Select a result being offered by the system. The corresponding
+    action may either contain no parameters, in which case all the values
+    proposed by the system are being accepted, or it may contain a slot and
+    value parameters, in which case the specified slot and value are being
+    accepted.
+*   **REQUEST_ALTS** - Ask for more results besides the ones offered by the
+    system. Slot and values are always empty.
+*   **THANK_YOU** - Thank the system. Slot and values are always empty.
+*   **GOODBYE** - End the dialogue. Slot and values are always empty.
+
+### Dataset Statistics
+
+The dataset consists of two kinds of dialogues.
+
+| Type of Dialogue |                 Train files                  |                  Dev files                   |                  Test Files                  |
+| ---------------- | :------------------------------------------: | :------------------------------------------: | :------------------------------------------: |
+| Single Domain    | `dialogues_001.json` to `dialogues_043.json` | `dialogues_001.json` to `dialogues_007.json` | `dialogues_001.json` to `dialogues_011.json` |
+| Multi Domain     | `dialogues_044.json` to `dialogues_127.json` | `dialogues_008.json` to `dialogues_020.json` | `dialogues_012.json` to `dialogues_034.json` |
+
+The single domain dialogues involve interactions with a single service, possibly
+over multiple intents. The multi-domain dialogues have interactions involving
+intents belonging to two or more different services. The multi-domain dialogues
+also involve transfer of dialogue state values from one service to the other
+wherever such a transfer is deemed natural. Eg, if a user finds a restaurant and
+searches for a movie next, the dialogue state for movie service is already
+initialized with the location from the dialogue state for restaurant service.
+
+The overall statistics of the train and dev sets are given below. The term
+*informable slots* refers to the slots over which the user can specify a
+constraint. For example, slots like *phone_number* are not informable.
+
+<table>
+    <tr>
+        <th rowspan="2"></th>
+        <th colspan="3">Train</th><th colspan="3">Dev</th><th colspan="3">Test</th>
+    </tr>
+    <tr>
+        <td>Single-domain</td>
+        <td>Multi-domain</td>
+        <td>Combined</td>
+        <td>Single-domain</td>
+        <td>Multi-domain</td>
+        <td>Combined</td>
+        <td>Single-domain</td>
+        <td>Multi-domain</td>
+        <td>Combined</td>
+    </tr>
+    <tr>
+        <td>No. of dialogues</td>
+        <td align="center">5,403</td>
+        <td align="center">10,739</td>
+        <td align="center">16,142</td>
+        <td align="center">836</td>
+        <td align="center">1,646</td>
+        <td align="center">2,482</td>
+        <td align="center">1,331</td>
+        <td align="center">2,870</td>
+        <td align="center">4,201</td>
+    </tr>
+    <tr>
+        <td>No. of turns</td>
+        <td align="center">82,588</td>
+        <td align="center">247,376</td>
+        <td align="center">329,964</td>
+        <td align="center">11,928</td>
+        <td align="center">36,798</td>
+        <td align="center">48,726</td>
+        <td align="center">16,850</td>
+        <td align="center">67,744</td>
+        <td align="center">84,594</td>
+    </tr>
+    <tr>
+        <td>No. of tokens (lower-cased)</td>
+        <td align="center">807,562</td>
+        <td align="center">2,409,857</td>
+        <td align="center">3,217,419</td>
+        <td align="center">117,492</td>
+        <td align="center">353,381</td>
+        <td align="center">470,873</td>
+        <td align="center">166,329</td>
+        <td align="center">713,731</td>
+        <td align="center">880,060</td>
+    </tr>
+     <tr>
+        <td>Average turns per dialogue</td>
+        <td align="center">15.286</td>
+        <td align="center">23.035</td>
+        <td align="center">20.441</td>
+        <td align="center">14.268</td>
+        <td align="center">22.356</td>
+        <td align="center">19.632</td>
+        <td align="center">12.660</td>
+        <td align="center">23.604</td>
+        <td align="center">20.137</td>
+    </tr>
+    <tr>
+        <td>Average tokens per turn</td>
+        <td align="center">9.778</td>
+        <td align="center">9.742</td>
+        <td align="center">9.751</td>
+        <td align="center">9.850</td>
+        <td align="center">9.603</td>
+        <td align="center">9.664</td>
+        <td align="center">9.871</td>
+        <td align="center">10.536</td>
+        <td align="center">10.403</td>
+    </tr>
+    <tr>
+        <td>Total unique tokens (lower-cased)</td>
+        <td align="center">16,350</td>
+        <td align="center">25,459</td>
+        <td align="center">30,349</td>
+        <td align="center">6,803</td>
+        <td align="center">10,533</td>
+        <td align="center">12,719</td>
+        <td align="center">7,213</td>
+        <td align="center">14,888</td>
+        <td align="center">16,382</td>
+    </tr>
+    <tr>
+        <td>Total no. of slots</td>
+        <td align="center">201</td>
+        <td align="center">214</td>
+        <td align="center">214</td>
+        <td align="center">134</td>
+        <td align="center">132</td>
+        <td align="center">136</td>
+        <td align="center">157</td>
+        <td align="center">158</td>
+        <td align="center">159</td>
+    </tr>
+    <tr>
+        <td>Total no. of informable slots</td>
+        <td align="center">138</td>
+        <td align="center">144</td>
+        <td align="center">144</td>
+        <td align="center">89</td>
+        <td align="center">87</td>
+        <td align="center">89</td>
+        <td align="center">109</td>
+        <td align="center">110</td>
+        <td align="center">111</td>
+    </tr>
+    <tr>
+        <td>Total unique slot values (lower-cased)</td>
+        <td align="center">7,070</td>
+        <td align="center">11,635</td>
+        <td align="center">14,139</td>
+        <td align="center">2,418</td>
+        <td align="center">4,182</td>
+        <td align="center">5,101</td>
+        <td align="center">2,492</td>
+        <td align="center">5,847</td>
+        <td align="center">6,533</td>
+    </tr>
+    <tr>
+        <td>Total unique informable slot values (lower-cased)</td>
+        <td align="center">3,742</td>
+        <td align="center">6,348</td>
+        <td align="center">7,661</td>
+        <td align="center">1,137</td>
+        <td align="center">2,118</td>
+        <td align="center">2,524</td>
+        <td align="center">1,387</td>
+        <td align="center">3,323</td>
+        <td align="center">3,727</td>
+    </tr>
+    <tr>
+        <td>Total domains</td>
+        <td align="center">14</td>
+        <td align="center">16</td>
+        <td align="center">16</td>
+        <td align="center">16</td>
+        <td align="center">15</td>
+        <td align="center">16</td>
+        <td align="center">17</td>
+        <td align="center">18</td>
+        <td align="center">18</td>
+    </tr>
+    <tr>
+        <td>Total services</td>
+        <td align="center">24</td>
+        <td align="center">26</td>
+        <td align="center">26</td>
+        <td align="center">17</td>
+        <td align="center">16</td>
+        <td align="center">17</td>
+        <td align="center">20</td>
+        <td align="center">21</td>
+        <td align="center">21</td>
+    </tr>
+    <tr>
+        <td>Total intents</td>
+        <td align="center">35</td>
+        <td align="center">37</td>
+        <td align="center">37</td>
+        <td align="center">28</td>
+        <td align="center">26</td>
+        <td align="center">28</td>
+        <td align="center">33</td>
+        <td align="center">34</td>
+        <td align="center">35</td>
+    </tr>
+</table>
+
+
+The following table shows how the dialogues and services are distributed among
+different domains for the train and dev sets. In this table, each multi-domain
+dialogue contirbutes to the count of every service present in the dialogue.
+Please note that a few domains like *Travel* and *Weather* are only present in
+the dev set. This is to test the generalization of models on unseen domains. The
+test set will similarly have some unseen domains which are neither present in
+the training nor in the dev set. Also, the number in parenthesis represents the
+number of unique services belonging to the corresponding domain.
+
+* In the first column, it indicates the number of unique services for the
+  domain in Train, Dev and Test datasets combined.
+* In the fourth column, it indicates the number of such unique services in the
+  Train dataset only.
+* In the seventh column, it indicates the number of such unique services in
+  the Dev dataset only.
+* In the last column, it indicates the number of such unique services in the
+  Test dataset only.
+
+<table>
+    <tr>
+        <th rowspan="2"></th>
+        <th colspan="3"># Dialogues <br> Train</th>
+        <th colspan="3"># Dialogues <br> Dev</th>
+        <th colspan="3"># Dialogues <br> Test</th>
+    </tr>
+    <tr>
+        <td>Single-domain</td>
+        <td>Multi-domain</td>
+        <td>Combined</td>
+        <td>Single-domain</td>
+        <td>Multi-domain</td>
+        <td>Combined</td>
+        <td>Single-domain</td>
+        <td>Multi-domain</td>
+        <td>Combined</td>
+    </tr>
+    <tr>
+        <td>Alarm (1)</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">37</td>
+        <td align="center">NA</td>
+        <td align="center">37 (1)</td>
+        <td align="center">47</td>
+        <td align="center">240</td>
+        <td align="center">287 (1)</td>
+    </tr>
+    <tr>
+        <td>Banks (2)</td>
+        <td align="center">207</td>
+        <td align="center">520</td>
+        <td align="center">727 (1)</td>
+        <td align="center">42</td>
+        <td align="center">252</td>
+        <td align="center">294 (1)</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+    </tr>
+    <tr>
+        <td>Buses (3)</td>
+        <td align="center">310</td>
+        <td align="center">1,970</td>
+        <td align="center">2,280 (2)</td>
+        <td align="center">44</td>
+        <td align="center">285</td>
+        <td align="center">329 (1)</td>
+        <td align="center">88</td>
+        <td align="center">438</td>
+        <td align="center">526 (1)</td>
+    </tr>
+    <tr>
+        <td>Calendar (1)</td>
+        <td align="center">169</td>
+        <td align="center">1,433</td>
+        <td align="center">1,602 (1)</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+    </tr>
+    <tr>
+        <td>Events (3)</td>
+        <td align="center">788</td>
+        <td align="center">2,721</td>
+        <td align="center">3,509 (1)</td>
+        <td align="center">73</td>
+        <td align="center">345</td>
+        <td align="center">418 (1)</td>
+        <td align="center">76</td>
+        <td align="center">516</td>
+        <td align="center">592 (1)</td>
+    </tr>
+    <tr>
+        <td>Flights (4)</td>
+        <td align="center">985</td>
+        <td align="center">1,762</td>
+        <td align="center">2,747 (2)</td>
+        <td align="center">94</td>
+        <td align="center">297</td>
+        <td align="center">391 (1)</td>
+        <td align="center">87</td>
+        <td align="center">419</td>
+        <td align="center">506 (1)</td>
+    </tr>
+        <tr>
+        <td>Homes (2)</td>
+        <td align="center">268</td>
+        <td align="center">579</td>
+        <td align="center">847 (1)</td>
+        <td align="center">81</td>
+        <td align="center">99</td>
+        <td align="center">180 (1)</td>
+        <td align="center">89</td>
+        <td align="center">157</td>
+        <td align="center">246 (1)</td>
+    </tr>
+        <tr>
+        <td>Hotels (4)</td>
+        <td align="center">457</td>
+        <td align="center">2,896</td>
+        <td align="center">3,353 (3)</td>
+        <td align="center">56</td>
+        <td align="center">521</td>
+        <td align="center">577 (2)</td>
+        <td align="center">177</td>
+        <td align="center">885</td>
+        <td align="center">1062 (2)</td>
+    </tr>
+        <tr>
+        <td>Media (3)</td>
+        <td align="center">281</td>
+        <td align="center">832</td>
+        <td align="center">1,113 (1)</td>
+        <td align="center">46</td>
+        <td align="center">133</td>
+        <td align="center">179 (1)</td>
+        <td align="center">80</td>
+        <td align="center">284</td>
+        <td align="center">364 (1)</td>
+    </tr>
+        <tr>
+        <td>Messaging (1)</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">298</td>
+        <td align="center">298 (1)</td>
+    </tr>
+        <tr>
+        <td>Movies (2)</td>
+        <td align="center">292</td>
+        <td align="center">1,325</td>
+        <td align="center">1,617 (1)</td>
+        <td align="center">47</td>
+        <td align="center">94</td>
+        <td align="center">141 (1)</td>
+        <td align="center">132</td>
+        <td align="center">449</td>
+        <td align="center">581</td>
+    </tr>
+        <tr>
+        <td>Music (3)</td>
+        <td align="center">394</td>
+        <td align="center">896</td>
+        <td align="center">1,290 (2)</td>
+        <td align="center">35</td>
+        <td align="center">161</td>
+        <td align="center">196 (1)</td>
+        <td align="center">25</td>
+        <td align="center">322</td>
+        <td align="center">347 (2)</td>
+    </tr>
+        <tr>
+        <td>Payment (1)</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">36</td>
+        <td align="center">186</td>
+        <td align="center">222 (1)</td>
+    </tr>
+        <tr>
+        <td>RentalCars (3)</td>
+        <td align="center">215</td>
+        <td align="center">1,370</td>
+        <td align="center">1,585 (2)</td>
+        <td align="center">39</td>
+        <td align="center">342</td>
+        <td align="center">381 (1)</td>
+        <td align="center">64</td>
+        <td align="center">480</td>
+        <td align="center">544 (1)</td>
+    </tr>
+        <tr>
+        <td>Restaurants (2)</td>
+        <td align="center">367</td>
+        <td align="center">2052</td>
+        <td align="center">2,419 (1)</td>
+        <td align="center">73</td>
+        <td align="center">263</td>
+        <td align="center">336 (1)</td>
+        <td align="center">73</td>
+        <td align="center">390</td>
+        <td align="center">463 (1)</td>
+    </tr>
+        <tr>
+        <td>RideSharing (2)</td>
+        <td align="center">119</td>
+        <td align="center">1,584</td>
+        <td align="center">1,703 (2)</td>
+        <td align="center">45</td>
+        <td align="center">225</td>
+        <td align="center">270 (1)</td>
+        <td align="center">34</td>
+        <td align="center">216</td>
+        <td align="center">250 (1)</td>
+    </tr>
+        <tr>
+        <td>Services (4)</td>
+        <td align="center">551</td>
+        <td align="center">1,338</td>
+        <td align="center">1,889 (3)</td>
+        <td align="center">44</td>
+        <td align="center">157</td>
+        <td align="center">201 (1)</td>
+        <td align="center">167</td>
+        <td align="center">489</td>
+        <td align="center">656 (2)</td>
+    </tr>
+        <tr>
+        <td>Trains (1)</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">NA</td>
+        <td align="center">84</td>
+        <td align="center">266</td>
+        <td align="center">350 (1)</td>
+    </tr>
+        <tr>
+        <td>Travel (1)</td>
+        <td align="center">NA</td>
+        <td align="center">1,871</td>
+        <td align="center">1,871 (1)</td>
+        <td align="center">45</td>
+        <td align="center">238</td>
+        <td align="center">283 (1)</td>
+        <td align="center">24</td>
+        <td align="center">630</td>
+        <td align="center">654 (1)</td>
+    </tr>
+        <tr>
+        <td>Weather (1)</td>
+        <td align="center">NA</td>
+        <td align="center">951</td>
+        <td align="center">951 (1)</td>
+        <td align="center">35</td>
+        <td align="center">322</td>
+        <td align="center">357 (1)</td>
+        <td align="center">48</td>
+        <td align="center">427</td>
+        <td align="center">475 (1)</td>
+    </tr>
+</table>
+
diff --git a/data/unified_datasets/schema/data.zip b/data/unified_datasets/schema/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..d9cecdf51bc1793bed81920c5953bbb7bc001699
Binary files /dev/null and b/data/unified_datasets/schema/data.zip differ
diff --git a/data/unified_datasets/schema/ontology.json b/data/unified_datasets/schema/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..56b66390614089b661d772d6fb70f75f9ce10ad1
--- /dev/null
+++ b/data/unified_datasets/schema/ontology.json
@@ -0,0 +1,7163 @@
+{
+  "domains": {
+    "bank_1": {
+      "description": "Manage bank accounts and transfer money",
+      "slots": {
+        "account_type": {
+          "description": "The account type of the user",
+          "is_categorical": true,
+          "possible_values": [
+            "checking",
+            "savings"
+          ]
+        },
+        "recipient_account_type": {
+          "description": "The account type of the recipient whom the user is transfering money to",
+          "is_categorical": true,
+          "possible_values": [
+            "checking",
+            "savings"
+          ]
+        },
+        "balance": {
+          "description": "The amount of money held in a bank account at a given time",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "amount": {
+          "description": "The amount of money to transfer",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "recipient_account_name": {
+          "description": "The account name of the recipient who is to receive the transfered money",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "bus_1": {
+      "description": "Book bus journeys from the biggest bus network in the country",
+      "slots": {
+        "from_location": {
+          "description": "City where bus is leaving from",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "to_location": {
+          "description": "City where bus is going to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "from_station": {
+          "description": "Station where bus is leaving from",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "to_station": {
+          "description": "Station where bus is going to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "leaving_date": {
+          "description": "Date of bus leaving for journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "leaving_time": {
+          "description": "Time of bus leaving for journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "fare": {
+          "description": "Fare per ticket for journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "travelers": {
+          "description": "Number of travelers for journey",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "transfers": {
+          "description": "Number of transfers in journey",
+          "is_categorical": true,
+          "possible_values": [
+            "0",
+            "1"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "bus_2": {
+      "description": "Find a bus to take you to the city you want",
+      "slots": {
+        "origin": {
+          "description": "Origin city for journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination": {
+          "description": "Destination city for journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "origin_station_name": {
+          "description": "Name of the bus terminus at the origin",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_station_name": {
+          "description": "Name of the bus terminus at the destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_date": {
+          "description": "Date of bus departure",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Price per ticket of the itinerary",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_time": {
+          "description": "Time of bus departure",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "group_size": {
+          "description": "Size of group for the booking",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "fare_type": {
+          "description": "Type of fare for the booking",
+          "is_categorical": true,
+          "possible_values": [
+            "economy",
+            "economy extra",
+            "flexible"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "calendar_1": {
+      "description": "Calendar service to manage personal events and reservations",
+      "slots": {
+        "event_date": {
+          "description": "Date of event or for checking availability",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "event_time": {
+          "description": "Start time of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "event_location": {
+          "description": "Location of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "event_name": {
+          "description": "Title of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "available_start_time": {
+          "description": "Starting time of available time slot",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "available_end_time": {
+          "description": "Ending time of available time slot",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "event_1": {
+      "description": "The comprehensive portal to find and reserve seats at events near you",
+      "slots": {
+        "category": {
+          "description": "Type of event",
+          "is_categorical": true,
+          "possible_values": [
+            "music",
+            "sports"
+          ]
+        },
+        "subcategory": {
+          "description": "Subcategory of event, either a music genre or sport name",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "event_name": {
+          "description": "Name of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date": {
+          "description": "Date of occurrence of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "time": {
+          "description": "Time when the event is scheduled to start",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_seats": {
+          "description": "Number of seats to find event tickets for",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9"
+          ]
+        },
+        "city_of_event": {
+          "description": "City where event is happening",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "event_location": {
+          "description": "Name of event location",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address_of_location": {
+          "description": "Street address of event location",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "event_2": {
+      "description": "Get tickets for the coolest concerts and sports in your area",
+      "slots": {
+        "event_type": {
+          "description": "Type of event",
+          "is_categorical": true,
+          "possible_values": [
+            "music",
+            "sports"
+          ]
+        },
+        "category": {
+          "description": "The sport or music subcategory",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "event_name": {
+          "description": "Name of match or artist for event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date": {
+          "description": "Date of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "time": {
+          "description": "Starting time for event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_tickets": {
+          "description": "Number of tickets to buy for event",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9"
+          ]
+        },
+        "city": {
+          "description": "City where the event is taking place",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "venue": {
+          "description": "Exact venue of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "venue_address": {
+          "description": "Address of event venue",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "flight_1": {
+      "description": "Find your next flight",
+      "slots": {
+        "passengers": {
+          "description": "Number of passengers in the booking",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "seating_class": {
+          "description": "Seating class for the booking",
+          "is_categorical": true,
+          "possible_values": [
+            "economy",
+            "premium economy",
+            "business",
+            "first class"
+          ]
+        },
+        "origin_city": {
+          "description": "Starting city for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_city": {
+          "description": "Ending city for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "origin_airport": {
+          "description": "Airport at the starting city for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_airport": {
+          "description": "Airport at the ending city for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_date": {
+          "description": "Start date for the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "return_date": {
+          "description": "Date of the return flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_stops": {
+          "description": "Number of layovers in the flight",
+          "is_categorical": true,
+          "possible_values": [
+            "0",
+            "1"
+          ]
+        },
+        "outbound_departure_time": {
+          "description": "Departure time for the outbound leg flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "outbound_arrival_time": {
+          "description": "Arrival time for the outbound leg flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_arrival_time": {
+          "description": "Arrival time for the return leg flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_departure_time": {
+          "description": "Departure time for the return leg flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Price of the booking per passenger",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "refundable": {
+          "description": "Whether the booking is refundable or not",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "airlines": {
+          "description": "Name of airline",
+          "is_categorical": true,
+          "possible_values": [
+            "united airlines",
+            "american airlines",
+            "delta airlines",
+            "southwest airlines",
+            "alaska airlines",
+            "british airways",
+            "air canada",
+            "air france"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "flight_2": {
+      "description": "Search for cheap flights across multiple providers",
+      "slots": {
+        "passengers": {
+          "description": "Number of passengers to book tickets for",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "seating_class": {
+          "description": "Seating class for the flight tickets",
+          "is_categorical": true,
+          "possible_values": [
+            "economy",
+            "premium economy",
+            "business",
+            "first class"
+          ]
+        },
+        "origin": {
+          "description": "City of origin for the flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination": {
+          "description": "City of destination for the flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "origin_airport": {
+          "description": "Airport where the flight is departing from",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_airport": {
+          "description": "Airport where the flight is arriving to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_date": {
+          "description": "Date of departure flight on the ticket",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "return_date": {
+          "description": "Date of return flight on the ticket",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_stops": {
+          "description": "Number of stopovers made by the flight",
+          "is_categorical": true,
+          "possible_values": [
+            "0",
+            "1"
+          ]
+        },
+        "outbound_departure_time": {
+          "description": "Departure time of the origin-destination flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "outbound_arrival_time": {
+          "description": "Arrival time of the origin-destination flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_arrival_time": {
+          "description": "Arrival time of the destination-origin flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_departure_time": {
+          "description": "Departure time of the destination-origin flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "fare": {
+          "description": "Ticket fare for each passenger",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "is_redeye": {
+          "description": "Boolean flag indicating whether the flight is a red-eye flight",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "airlines": {
+          "description": "Name of the airline to book with",
+          "is_categorical": true,
+          "possible_values": [
+            "united airlines",
+            "american airlines",
+            "delta airlines",
+            "southwest airlines",
+            "alaska airlines",
+            "british airways",
+            "air canada",
+            "air france"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "home_1": {
+      "description": "A widely used service for finding apartments and scheduling visits",
+      "slots": {
+        "area": {
+          "description": "City where the apartment is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address": {
+          "description": "Address of the apartment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "property_name": {
+          "description": "Name of the apartment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Phone number of the apartment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "furnished": {
+          "description": "Boolean flag indicating if the property is furnished",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "pets_allowed": {
+          "description": "Boolean flag indicating if pets are allowed",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "rent": {
+          "description": "Rent per month of the apartment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "visit_date": {
+          "description": "Date for the visit to the apartment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_beds": {
+          "description": "Number of bed rooms",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "number_of_baths": {
+          "description": "Number of baths in the apartment",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "hotel_1": {
+      "description": "A popular service for searching and reserving rooms in hotels",
+      "slots": {
+        "destination": {
+          "description": "Location of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_rooms": {
+          "description": "Number of rooms in the reservation",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3"
+          ]
+        },
+        "check_in_date": {
+          "description": "Start date for the reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_days": {
+          "description": "Number of days in the reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "star_rating": {
+          "description": "Star rating of the hotel",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "hotel_name": {
+          "description": "Name of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "street_address": {
+          "description": "Address of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Phone number of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price_per_night": {
+          "description": "Price per night for the reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "has_wifi": {
+          "description": "Boolean flag indicating if the hotel has wifi",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "hotel_2": {
+      "description": "A popular service for searching and booking houses for short term stay",
+      "slots": {
+        "where_to": {
+          "description": "Location of the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_adults": {
+          "description": "Number of people for the reservation",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "check_in_date": {
+          "description": "Start date for the reservation or to find the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "check_out_date": {
+          "description": "End date for the reservation or to find the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "rating": {
+          "description": "Review rating of the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address": {
+          "description": "Address of the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Phone number of the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "total_price": {
+          "description": "Price per night of the house",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "has_laundry_service": {
+          "description": "Boolean flag indicating if the house has laundry service",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "hotel_3": {
+      "description": "A leading provider for searching and booking hotel rooms",
+      "slots": {
+        "location": {
+          "description": "Location of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_rooms": {
+          "description": "Number of rooms to book",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3"
+          ]
+        },
+        "check_in_date": {
+          "description": "Start date for the hotel reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "check_out_date": {
+          "description": "End date for the hotel reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "average_rating": {
+          "description": "Average review rating for the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "hotel_name": {
+          "description": "Name of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "street_address": {
+          "description": "Address of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Phone number of the hotel",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Total price for the stay",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pets_welcome": {
+          "description": "Boolean flag indicating if pets are allowed in the hotel",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "media_1": {
+      "description": "A leading provider of movies for searching and watching on-demand",
+      "slots": {
+        "title": {
+          "description": "Title of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Genre of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "subtitles": {
+          "description": "Boolean flag indicating if subtitles are desired for this movie",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "directed_by": {
+          "description": "Name of the director of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "movie_1": {
+      "description": "A go-to provider for finding movies, searching for show times and booking tickets",
+      "slots": {
+        "price": {
+          "description": "Price per ticket",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_tickets": {
+          "description": "Number of the movie tickets to be purchased",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9"
+          ]
+        },
+        "show_type": {
+          "description": "Type of show",
+          "is_categorical": true,
+          "possible_values": [
+            "regular",
+            "3d",
+            "imax"
+          ]
+        },
+        "theater_name": {
+          "description": "Name of the theatre",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "show_time": {
+          "description": "Time of the show",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "show_date": {
+          "description": "Date of the show",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Genre of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "street_address": {
+          "description": "Address of the theatre",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "location": {
+          "description": "City where the theatre is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "movie_name": {
+          "description": "Name of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "music_1": {
+      "description": "A popular provider of a wide range of music content for searching and listening",
+      "slots": {
+        "song_name": {
+          "description": "Name of the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "artist": {
+          "description": "Artist who performed the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "album": {
+          "description": "Album the song belongs to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Genre of the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "year": {
+          "description": "Year in which the song was released",
+          "is_categorical": true,
+          "possible_values": [
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+            "2019"
+          ]
+        },
+        "playback_device": {
+          "description": "Playback device on which the song is to be played",
+          "is_categorical": true,
+          "possible_values": [
+            "tv",
+            "kitchen speaker",
+            "bedroom speaker"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "music_2": {
+      "description": "A widely used service for finding and playing music from a variety of genres and artists",
+      "slots": {
+        "song_name": {
+          "description": "Name of the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "artist": {
+          "description": "Name of the artist the song is performed by",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "album": {
+          "description": "Album the song belongs to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Genre of the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "playback_device": {
+          "description": "Playback device on which the song is to be played",
+          "is_categorical": true,
+          "possible_values": [
+            "tv",
+            "kitchen speaker",
+            "bedroom speaker"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "rentalcar_1": {
+      "description": "Car rental service with extensive coverage of locations and cars",
+      "slots": {
+        "type": {
+          "description": "Category to which rental car belongs",
+          "is_categorical": true,
+          "possible_values": [
+            "compact",
+            "standard",
+            "full-size"
+          ]
+        },
+        "car_name": {
+          "description": "Model name of rental car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_location": {
+          "description": "Location of rental car pickup",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_date": {
+          "description": "Date of rental car pickup",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_time": {
+          "description": "Time of rental car pickup",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_city": {
+          "description": "City to pick up the rental car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "dropoff_date": {
+          "description": "Date of rental car drop-off",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "total_price": {
+          "description": "Total price of car rental",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "rentalcar_2": {
+      "description": "Car rental service, available worldwide",
+      "slots": {
+        "car_type": {
+          "description": "Type of car for rental",
+          "is_categorical": true,
+          "possible_values": [
+            "compact",
+            "standard",
+            "full-size"
+          ]
+        },
+        "car_name": {
+          "description": "Name of car model",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_location": {
+          "description": "Pickup location for car rental",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_date": {
+          "description": "Date of pickup for car rental",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_time": {
+          "description": "Time of pickup for car rental",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_city": {
+          "description": "City in which to pick up rental car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "dropoff_date": {
+          "description": "End date of car rental reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "total_price": {
+          "description": "Total price of rental reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "restaurant_1": {
+      "description": "A leading provider for restaurant search and reservations",
+      "slots": {
+        "restaurant_name": {
+          "description": "Name of the restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date": {
+          "description": "Date for the reservation or to find availability",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "time": {
+          "description": "Time for the reservation or to find availability",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "serves_alcohol": {
+          "description": "Boolean flag indicating if the restaurant serves alcohol",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "has_live_music": {
+          "description": "Boolean flag indicating if the restaurant has live music",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "phone_number": {
+          "description": "Phone number of the restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "street_address": {
+          "description": "Address of the restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "party_size": {
+          "description": "Party size for a reservation",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6"
+          ]
+        },
+        "price_range": {
+          "description": "Price range for the restaurant",
+          "is_categorical": true,
+          "possible_values": [
+            "inexpensive",
+            "moderate",
+            "expensive",
+            "very expensive"
+          ]
+        },
+        "city": {
+          "description": "City in which the restaurant is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "cuisine": {
+          "description": "Cuisine of food served in the restaurant",
+          "is_categorical": false,
+          "possible_values": [
+            "mexican",
+            "chinese",
+            "indian",
+            "american",
+            "italian"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "ridesharing_1": {
+      "description": "On-demand taxi calling service",
+      "slots": {
+        "destination": {
+          "description": "Destination for taxi ride",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "shared_ride": {
+          "description": "Boolean flag whether ride is shared with other passengers",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "ride_fare": {
+          "description": "Total fare for taxi ride",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "approximate_ride_duration": {
+          "description": "Approximate duration of ride to the destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_riders": {
+          "description": "Number of riders to call taxi for",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "ridesharing_2": {
+      "description": "App to book a cab to any destination",
+      "slots": {
+        "destination": {
+          "description": "Destination address or location for cab",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "ride_type": {
+          "description": "Type of cab ride",
+          "is_categorical": true,
+          "possible_values": [
+            "pool",
+            "regular",
+            "luxury"
+          ]
+        },
+        "ride_fare": {
+          "description": "Total fare for cab ride",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "wait_time": {
+          "description": "Expected waiting time for pick-up by cab",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_seats": {
+          "description": "Number of seats to reserve in the cab",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "services_1": {
+      "description": "A widely used service for finding and reserving the hair stylist of your choice",
+      "slots": {
+        "stylist_name": {
+          "description": "Name of the hair stylist/salon",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Phone number of the stylist/salon",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "average_rating": {
+          "description": "Average review rating for the stylist/salon",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "is_unisex": {
+          "description": "Boolean flag indicating if the salon is unisex",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "street_address": {
+          "description": "Address of the stylist/salon",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "City where the salon is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_date": {
+          "description": "Date for the appointment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_time": {
+          "description": "Time of the appointment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "services_2": {
+      "description": "The go-to service for finding and booking appointments with top rated dentists",
+      "slots": {
+        "dentist_name": {
+          "description": "Name of the dentist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Phone number of the dentist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address": {
+          "description": "Address of the dentist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "City where the dentist is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_date": {
+          "description": "Date for the appointment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_time": {
+          "description": "Time for the appointment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "offers_cosmetic_services": {
+          "description": "Boolean flag indicating if the dentist offers cosmetic services",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "services_3": {
+      "description": "A popular provider for finding the right doctor for your needs. Also allows you to schedule your visit to the doctor",
+      "slots": {
+        "doctor_name": {
+          "description": "Name of the doctor or the medical practice",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Contact number for the doctor or the medical practice",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "average_rating": {
+          "description": "Average review rating of the doctor",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "street_address": {
+          "description": "Address of the doctor",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "City where the doctor is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_date": {
+          "description": "Date for scheduling the appointment with the doctor",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_time": {
+          "description": "Time for the appointment with the doctor",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "type": {
+          "description": "Speciality of the doctor",
+          "is_categorical": true,
+          "possible_values": [
+            "gynecologist",
+            "ent specialist",
+            "ophthalmologist",
+            "general practitioner",
+            "dermatologist"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "travel_1": {
+      "description": "The biggest database of tourist attractions and points of interest",
+      "slots": {
+        "location": {
+          "description": "City or town where the attraction is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "attraction_name": {
+          "description": "Common name of the attraction",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "category": {
+          "description": "Category to which the attraction belongs",
+          "is_categorical": true,
+          "possible_values": [
+            "place of worship",
+            "theme park",
+            "museum",
+            "historical landmark",
+            "park",
+            "tourist attraction",
+            "sports venue",
+            "shopping area",
+            "performing arts venue",
+            "nature preserve"
+          ]
+        },
+        "phone_number": {
+          "description": "Phone number to contact the attraction",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "free_entry": {
+          "description": "Boolean flag indicating whether entrance to attraction is free",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "good_for_kids": {
+          "description": "Boolean flag indicating whether attraction is good for to take kids to",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "weather_1": {
+      "description": "Check the weather for any place and any date",
+      "slots": {
+        "precipitation": {
+          "description": "The possibility of rain or snow in percentage",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "humidity": {
+          "description": "Percentage humidity",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "wind": {
+          "description": "Wind speed in miles per hour",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "temperature": {
+          "description": "Temperature in Fahrenheit",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "Name of the city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date": {
+          "description": "Date for the weather",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "alarm_1": {
+      "description": "Manage alarms by getting and setting them easily",
+      "slots": {
+        "alarm_time": {
+          "description": "Time of the alarm",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "alarm_name": {
+          "description": "Name of the alarm",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "new_alarm_time": {
+          "description": "Time to set for the new alarm",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "new_alarm_name": {
+          "description": "Name to use for the new alarm",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "bank_2": {
+      "description": "Service to manage your bank accounts and finances",
+      "slots": {
+        "account_type": {
+          "description": "The user's account type",
+          "is_categorical": true,
+          "possible_values": [
+            "checking",
+            "savings"
+          ]
+        },
+        "recipient_account_type": {
+          "description": "The account type of the recipient to transfer the money to",
+          "is_categorical": true,
+          "possible_values": [
+            "checking",
+            "savings"
+          ]
+        },
+        "account_balance": {
+          "description": "The balance in the specified account",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "transfer_amount": {
+          "description": "The amount of money to transfer",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "recipient_name": {
+          "description": "The name of the recipient to transfer the money to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "transfer_time": {
+          "description": "Number of days for the transfer to go through",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "flight_3": {
+      "description": "Find one way and round trip flights to your favorite city",
+      "slots": {
+        "passengers": {
+          "description": "Number of passengers to find flight seats for",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "flight_class": {
+          "description": "Fare class of flight booking",
+          "is_categorical": true,
+          "possible_values": [
+            "economy",
+            "premium economy",
+            "business",
+            "first class"
+          ]
+        },
+        "origin_city": {
+          "description": "City in which the journey originates",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_city": {
+          "description": "City in which the journey ends",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "origin_airport_name": {
+          "description": "Number of the airport flying out from",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_airport_name": {
+          "description": "Number of the airport flying to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_date": {
+          "description": "Date of departure flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "return_date": {
+          "description": "Date of return flight",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_stops": {
+          "description": "Number of stops in the itinerary",
+          "is_categorical": true,
+          "possible_values": [
+            "0",
+            "1"
+          ]
+        },
+        "outbound_departure_time": {
+          "description": "Local time of departure of flight from origin to destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "outbound_arrival_time": {
+          "description": "Local time of arrival of flight from origin to destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_arrival_time": {
+          "description": "Local time of arrival of flight from destination to origin",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_departure_time": {
+          "description": "Local time of departure of flight from destination to origin",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Price per passenger of the itinerary",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_checked_bags": {
+          "description": "Number of bags to check in",
+          "is_categorical": true,
+          "possible_values": [
+            "0",
+            "1",
+            "2"
+          ]
+        },
+        "airlines": {
+          "description": "Name of airline operating the flight",
+          "is_categorical": true,
+          "possible_values": [
+            "united airlines",
+            "american airlines",
+            "delta airlines",
+            "southwest airlines",
+            "alaska airlines",
+            "british airways",
+            "air canada",
+            "air france"
+          ]
+        },
+        "arrives_next_day": {
+          "description": "Whether the flight arrives the next day",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "hotel_4": {
+      "description": "Accommodation searching and booking portal",
+      "slots": {
+        "location": {
+          "description": "City or town where the accommodation is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_rooms": {
+          "description": "Number of rooms to reserve",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3"
+          ]
+        },
+        "check_in_date": {
+          "description": "Check in date for reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "stay_length": {
+          "description": "Length of stay in days",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "star_rating": {
+          "description": "Star rating of the accommodation",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "place_name": {
+          "description": "Name of the accommodation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "street_address": {
+          "description": "Street address of the accommodation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Contact phone number of the accommodation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price_per_night": {
+          "description": "Price per night for the stay",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "smoking_allowed": {
+          "description": "Whether or not smoking is allowed inside the place",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "media_2": {
+      "description": "The widest selection and lowest prices for movie rentals",
+      "slots": {
+        "movie_name": {
+          "description": "Name of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Main genre of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "subtitle_language": {
+          "description": "Language to use for subtitles (or None for no subtitles)",
+          "is_categorical": true,
+          "possible_values": [
+            "none",
+            "english",
+            "mandarin",
+            "spanish"
+          ]
+        },
+        "director": {
+          "description": "Name of the director of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "actors": {
+          "description": "Name of an actor starring in the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Cost of renting movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "movie_2": {
+      "description": "The definitive database to discover new movies to watch",
+      "slots": {
+        "title": {
+          "description": "Title for movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Genre of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "aggregate_rating": {
+          "description": "Aggregate user rating for movie, scale of 10",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "starring": {
+          "description": "Name of actor starring in movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "director": {
+          "description": "Name of director of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "restaurant_2": {
+      "description": "A popular restaurant search and reservation service",
+      "slots": {
+        "restaurant_name": {
+          "description": "Name of the restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date": {
+          "description": "Tentative date of restaurant reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "time": {
+          "description": "Tentative time of restaurant reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "has_seating_outdoors": {
+          "description": "Whether the restaurant has outdoor seating available",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "has_vegetarian_options": {
+          "description": "Whether the restaurant has adequate vegetarian options",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "phone_number": {
+          "description": "Phone number to contact restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "rating": {
+          "description": "Average user rating for restaurant on a scale of 5",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address": {
+          "description": "Address of restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_seats": {
+          "description": "Number of seats to reserve at the restaurant",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6"
+          ]
+        },
+        "price_range": {
+          "description": "Price range for the restaurant",
+          "is_categorical": true,
+          "possible_values": [
+            "cheap",
+            "moderate",
+            "pricey",
+            "ultra high-end"
+          ]
+        },
+        "location": {
+          "description": "City where the restaurant is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "category": {
+          "description": "The category of food offered by the restaurant",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "services_4": {
+      "description": "Discover the right therapist for you and make reservations easily",
+      "slots": {
+        "therapist_name": {
+          "description": "Name of the therapist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Contact number of the therapist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address": {
+          "description": "Address of the therapist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "Area where user wants to search for a therapist",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_date": {
+          "description": "Date of the appointment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "appointment_time": {
+          "description": "Time of the appointment",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "type": {
+          "description": "Type of the therapist",
+          "is_categorical": true,
+          "possible_values": [
+            "psychologist",
+            "family counselor",
+            "psychiatrist"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "bus_3": {
+      "description": "Affordable and comfortable bus travel across the country",
+      "slots": {
+        "from_city": {
+          "description": "The city to depart from",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "to_city": {
+          "description": "The destination city of the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "from_station": {
+          "description": "Name of station of departure",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "to_station": {
+          "description": "Name of station of arrival",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_date": {
+          "description": "The date of departure",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_time": {
+          "description": "The time of departure",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "Ticket price per passenger",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "additional_luggage": {
+          "description": "Whether to carry excess baggage in the bus",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "num_passengers": {
+          "description": "The number of tickets for the trip",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "category": {
+          "description": "How many stops the route has",
+          "is_categorical": true,
+          "possible_values": [
+            "direct",
+            "one-stop"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "event_3": {
+      "description": "Find and book tickets to any cultural events in your area",
+      "slots": {
+        "event_type": {
+          "description": "Type of cultural event",
+          "is_categorical": true,
+          "possible_values": [
+            "music",
+            "theater"
+          ]
+        },
+        "event_name": {
+          "description": "Name of artist or play",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date": {
+          "description": "Date of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "time": {
+          "description": "Start time of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_tickets": {
+          "description": "Number of tickets to reserve for the event",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9"
+          ]
+        },
+        "price_per_ticket": {
+          "description": "Price of each ticket",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "City where the event is taking place",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "venue": {
+          "description": "Exact venue of event",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "venue_address": {
+          "description": "Street address of event venue",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "flight_4": {
+      "description": "Find cheap flights in seconds and book flights",
+      "slots": {
+        "number_of_tickets": {
+          "description": "the number of flight tickets for the trip",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "seating_class": {
+          "description": "The cabin seat option",
+          "is_categorical": true,
+          "possible_values": [
+            "economy",
+            "premium economy",
+            "business"
+          ]
+        },
+        "origin_airport": {
+          "description": "The name of the airport or city to depart from",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "destination_airport": {
+          "description": "The name of the airport or city to arrive at",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "departure_date": {
+          "description": "Start date of the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "return_date": {
+          "description": "End date of the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "is_nonstop": {
+          "description": "Whether the flight is a direct one",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "outbound_departure_time": {
+          "description": "Departure time of the flight flying to the destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "outbound_arrival_time": {
+          "description": "Arrival time of the flight flying to the destination",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_arrival_time": {
+          "description": "Arrival time of the flight coming back from the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "inbound_departure_time": {
+          "description": "Departure time of the flight coming back from the trip",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price": {
+          "description": "The total cost of the flight tickets",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "airlines": {
+          "description": "The company that provides air transport services",
+          "is_categorical": true,
+          "possible_values": [
+            "united airlines",
+            "american airlines",
+            "delta airlines",
+            "southwest airlines",
+            "alaska airlines",
+            "british airways",
+            "air canada",
+            "air france",
+            "south african airways",
+            "lot polish airlines",
+            "latam brasil"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "home_2": {
+      "description": "Service for finding properties to buy and rent",
+      "slots": {
+        "intent": {
+          "description": "Whether to buy or rent a property",
+          "is_categorical": true,
+          "possible_values": [
+            "rent",
+            "buy"
+          ]
+        },
+        "area": {
+          "description": "City where the property is located",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "address": {
+          "description": "Street address of property",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "property_name": {
+          "description": "Name of property or apartment complex",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "phone_number": {
+          "description": "Contact number of property or apartment complex",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "has_garage": {
+          "description": "Whether the property has a garage",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "in_unit_laundry": {
+          "description": "Whether the property has in-unit laundry facilities",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "price": {
+          "description": "Sale price or per-month rent of property",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "visit_date": {
+          "description": "Date for visit to the property",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_beds": {
+          "description": "Number of bedrooms in the property",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "number_of_baths": {
+          "description": "Number of bathroom in the property",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "media_3": {
+      "description": "Enjoy instant and unlimited access to best shows, movies, comedy, sports, documentaries and more.",
+      "slots": {
+        "title": {
+          "description": "Title of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Category of the content",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "subtitle_language": {
+          "description": "Language of the subtitles",
+          "is_categorical": true,
+          "possible_values": [
+            "english",
+            "spanish",
+            "hindi",
+            "french"
+          ]
+        },
+        "starring": {
+          "description": "Celebs acting in the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "messaging_1": {
+      "description": "Connect and share locations with your contacts",
+      "slots": {
+        "location": {
+          "description": "Location to share with contact",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "contact_name": {
+          "description": "Name of contact to send to",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "movie_3": {
+      "description": "A review-aggregation website for movies and television",
+      "slots": {
+        "movie_title": {
+          "description": "Name of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Type of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "percent_rating": {
+          "description": "Average critic percentage rating",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "cast": {
+          "description": "Actors in the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "directed_by": {
+          "description": "Director of the movie",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "music_3": {
+      "description": "A free, personalized platform that plays music you'll love. Discover new music and enjoy old favorites.",
+      "slots": {
+        "track": {
+          "description": "Name of the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "artist": {
+          "description": "Performer's name",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "album": {
+          "description": "Collection of the song",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "genre": {
+          "description": "Type of the music",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "year": {
+          "description": "Year when the song was first released",
+          "is_categorical": true,
+          "possible_values": [
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+            "2019"
+          ]
+        },
+        "device": {
+          "description": "Place or name of the media player to play the song selected",
+          "is_categorical": true,
+          "possible_values": [
+            "living room",
+            "kitchen",
+            "patio"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "payment_1": {
+      "description": "The fast, simple way to pay in apps, on the web, and in millions of stores",
+      "slots": {
+        "payment_method": {
+          "description": "The source of money used for making the payment",
+          "is_categorical": true,
+          "possible_values": [
+            "app balance",
+            "debit card",
+            "credit card"
+          ]
+        },
+        "amount": {
+          "description": "The amount of money to send or request",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "receiver": {
+          "description": "Name of the contact or account to make the transaction with",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "private_visibility": {
+          "description": "Whether the transaction is private or not",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "rentalcar_3": {
+      "description": "A leading global provider of car rental solutions",
+      "slots": {
+        "car_type": {
+          "description": "Type of the car",
+          "is_categorical": true,
+          "possible_values": [
+            "hatchback",
+            "sedan",
+            "suv"
+          ]
+        },
+        "car_name": {
+          "description": "Car model",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_location": {
+          "description": "Place to pick up the car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "start_date": {
+          "description": "The first date to start using the rental car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "pickup_time": {
+          "description": "Time for the pick-up",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "city": {
+          "description": "City where you want to rent the car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "end_date": {
+          "description": "The date to return the car",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "price_per_day": {
+          "description": "The cost for renting the car per day",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "add_insurance": {
+          "description": "Whether to purchase insurance",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    },
+    "train_1": {
+      "description": "Service to find and reserve train journeys between cities",
+      "slots": {
+        "from": {
+          "description": "Starting city for train journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "to": {
+          "description": "Ending city for train journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "from_station": {
+          "description": "Name of station at starting city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "to_station": {
+          "description": "Name of station at ending city",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "date_of_journey": {
+          "description": "Date of train journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "journey_start_time": {
+          "description": "Time of start of train journey",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "total": {
+          "description": "Total price of train reservation",
+          "is_categorical": false,
+          "possible_values": []
+        },
+        "number_of_adults": {
+          "description": "Number of adults to reserve train tickets for",
+          "is_categorical": true,
+          "possible_values": [
+            "1",
+            "2",
+            "3",
+            "4",
+            "5"
+          ]
+        },
+        "class": {
+          "description": "Fare class for train reservation",
+          "is_categorical": true,
+          "possible_values": [
+            "value",
+            "flexible",
+            "business"
+          ]
+        },
+        "trip_protection": {
+          "description": "Whether to add trip protection to reservation, for a fee",
+          "is_categorical": true,
+          "possible_values": [
+            "true",
+            "false"
+          ]
+        },
+        "count": {
+          "description": "the number of items found that satisfy the user's request.",
+          "is_categorical": false,
+          "possible_values": []
+        }
+      }
+    }
+  },
+  "intents": {
+    "inform": {
+      "description": "Inform the value for a slot."
+    },
+    "request": {
+      "description": "Request the value of a slot."
+    },
+    "confirm": {
+      "description": "Confirm the value of a slot before making a transactional service call."
+    },
+    "offer": {
+      "description": "Offer a certain value for a slot to the user."
+    },
+    "notify_success": {
+      "description": "Inform the user that their request was successful."
+    },
+    "notify_failure": {
+      "description": "Inform the user that their request failed."
+    },
+    "inform_count": {
+      "description": "Inform the number of items found that satisfy the user's request."
+    },
+    "offer_intent": {
+      "description": "Offer a new intent to the user."
+    },
+    "req_more": {
+      "description": "Asking the user if they need anything else."
+    },
+    "goodbye": {
+      "description": "End the dialogue."
+    },
+    "inform_intent": {
+      "description": "Express the desire to perform a certain task to the system."
+    },
+    "negate_intent": {
+      "description": "Negate the intent which has been offered by the system."
+    },
+    "affirm_intent": {
+      "description": "Agree to the intent which has been offered by the system."
+    },
+    "affirm": {
+      "description": "Agree to the system's proposition. "
+    },
+    "negate": {
+      "description": "Deny the system's proposal."
+    },
+    "select": {
+      "description": "Select a result being offered by the system."
+    },
+    "request_alts": {
+      "description": "Ask for more results besides the ones offered by the system."
+    },
+    "thank_you": {
+      "description": "Thank the system."
+    }
+  },
+  "binary_dialogue_act": [
+    {
+      "intent": "affirm",
+      "domain": "",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "restaurant_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "event_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "music_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "music_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "event_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "flight_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "media_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "rentalcar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "rentalcar_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "bus_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "bus_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "services_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "services_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "services_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "home_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "bank_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "hotel_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "calendar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "hotel_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "hotel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "alarm_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "services_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "restaurant_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "bank_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "media_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "hotel_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "music_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "event_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "rentalcar_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "bus_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "home_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "train_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "movie_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "affirm_intent",
+      "domain": "media_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "goodbye",
+      "domain": "",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "restaurant_1",
+      "slot": "intent",
+      "value": "findrestaurants"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "restaurant_1",
+      "slot": "intent",
+      "value": "reserverestaurant"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "media_1",
+      "slot": "intent",
+      "value": "playmovie"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_2",
+      "slot": "intent",
+      "value": "geteventdates"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_2",
+      "slot": "intent",
+      "value": "buyeventtickets"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "music_2",
+      "slot": "intent",
+      "value": "lookupmusic"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "music_2",
+      "slot": "intent",
+      "value": "playmedia"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "music_1",
+      "slot": "intent",
+      "value": "lookupsong"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "music_1",
+      "slot": "intent",
+      "value": "playsong"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_1",
+      "slot": "intent",
+      "value": "findevents"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_1",
+      "slot": "intent",
+      "value": "buyeventtickets"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_2",
+      "slot": "intent",
+      "value": "findevents"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "movie_1",
+      "slot": "intent",
+      "value": "findmovies"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "movie_1",
+      "slot": "intent",
+      "value": "gettimesformovie"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_1",
+      "slot": "intent",
+      "value": "searchonewayflight"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_2",
+      "slot": "intent",
+      "value": "searchonewayflight"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_1",
+      "slot": "intent",
+      "value": "reserveonewayflight"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_1",
+      "slot": "intent",
+      "value": "searchroundtripflights"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_1",
+      "slot": "intent",
+      "value": "reserveroundtripflights"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_2",
+      "slot": "intent",
+      "value": "searchroundtripflights"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "media_1",
+      "slot": "intent",
+      "value": "findmovies"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "ridesharing_2",
+      "slot": "intent",
+      "value": "getride"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "ridesharing_1",
+      "slot": "intent",
+      "value": "getride"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "rentalcar_1",
+      "slot": "intent",
+      "value": "getcarsavailable"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "rentalcar_1",
+      "slot": "intent",
+      "value": "reservecar"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "rentalcar_2",
+      "slot": "intent",
+      "value": "getcarsavailable"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "rentalcar_2",
+      "slot": "intent",
+      "value": "reservecar"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bus_2",
+      "slot": "intent",
+      "value": "findbus"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bus_1",
+      "slot": "intent",
+      "value": "findbus"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_2",
+      "slot": "intent",
+      "value": "bookhouse"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bus_2",
+      "slot": "intent",
+      "value": "buybusticket"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bus_1",
+      "slot": "intent",
+      "value": "buybusticket"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_2",
+      "slot": "intent",
+      "value": "findprovider"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_2",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_1",
+      "slot": "intent",
+      "value": "findprovider"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_1",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_3",
+      "slot": "intent",
+      "value": "findprovider"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_3",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "home_1",
+      "slot": "intent",
+      "value": "findapartment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "home_1",
+      "slot": "intent",
+      "value": "schedulevisit"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bank_1",
+      "slot": "intent",
+      "value": "checkbalance"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bank_1",
+      "slot": "intent",
+      "value": "transfermoney"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_2",
+      "slot": "intent",
+      "value": "searchhouse"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "calendar_1",
+      "slot": "intent",
+      "value": "getevents"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "calendar_1",
+      "slot": "intent",
+      "value": "getavailabletime"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "calendar_1",
+      "slot": "intent",
+      "value": "addevent"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_3",
+      "slot": "intent",
+      "value": "reservehotel"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_1",
+      "slot": "intent",
+      "value": "reservehotel"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_3",
+      "slot": "intent",
+      "value": "searchhotel"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_1",
+      "slot": "intent",
+      "value": "searchhotel"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "weather_1",
+      "slot": "intent",
+      "value": "getweather"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "travel_1",
+      "slot": "intent",
+      "value": "findattractions"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "restaurant_2",
+      "slot": "intent",
+      "value": "reserverestaurant"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_3",
+      "slot": "intent",
+      "value": "searchonewayflight"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_3",
+      "slot": "intent",
+      "value": "searchroundtripflights"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "alarm_1",
+      "slot": "intent",
+      "value": "getalarms"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "alarm_1",
+      "slot": "intent",
+      "value": "addalarm"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_4",
+      "slot": "intent",
+      "value": "findprovider"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "services_4",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "restaurant_2",
+      "slot": "intent",
+      "value": "findrestaurants"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bank_2",
+      "slot": "intent",
+      "value": "checkbalance"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bank_2",
+      "slot": "intent",
+      "value": "transfermoney"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "movie_2",
+      "slot": "intent",
+      "value": "findmovies"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "media_2",
+      "slot": "intent",
+      "value": "findmovies"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "media_2",
+      "slot": "intent",
+      "value": "rentmovie"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_4",
+      "slot": "intent",
+      "value": "searchhotel"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "hotel_4",
+      "slot": "intent",
+      "value": "reservehotel"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "music_3",
+      "slot": "intent",
+      "value": "lookupmusic"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "music_3",
+      "slot": "intent",
+      "value": "playmedia"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_3",
+      "slot": "intent",
+      "value": "findevents"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "event_3",
+      "slot": "intent",
+      "value": "buyeventtickets"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_4",
+      "slot": "intent",
+      "value": "searchonewayflight"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "flight_4",
+      "slot": "intent",
+      "value": "searchroundtripflights"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "rentalcar_3",
+      "slot": "intent",
+      "value": "getcarsavailable"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "rentalcar_3",
+      "slot": "intent",
+      "value": "reservecar"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bus_3",
+      "slot": "intent",
+      "value": "findbus"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "bus_3",
+      "slot": "intent",
+      "value": "buybusticket"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "home_2",
+      "slot": "intent",
+      "value": "findhomebyarea"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "home_2",
+      "slot": "intent",
+      "value": "schedulevisit"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "movie_1",
+      "slot": "intent",
+      "value": "buymovietickets"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "payment_1",
+      "slot": "intent",
+      "value": "makepayment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "payment_1",
+      "slot": "intent",
+      "value": "requestpayment"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "train_1",
+      "slot": "intent",
+      "value": "findtrains"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "train_1",
+      "slot": "intent",
+      "value": "gettraintickets"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "movie_3",
+      "slot": "intent",
+      "value": "findmovies"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "media_3",
+      "slot": "intent",
+      "value": "playmovie"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "media_3",
+      "slot": "intent",
+      "value": "findmovies"
+    },
+    {
+      "intent": "inform_intent",
+      "domain": "messaging_1",
+      "slot": "intent",
+      "value": "sharelocation"
+    },
+    {
+      "intent": "negate",
+      "domain": "",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "event_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "event_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "movie_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "flight_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "media_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "rentalcar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "rentalcar_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "bus_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "bus_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "bank_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "services_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "services_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "services_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "home_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "hotel_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "hotel_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "hotel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "restaurant_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "calendar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "hotel_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "bank_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "media_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "services_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "restaurant_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "event_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "rentalcar_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "bus_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "train_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "music_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "media_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "negate_intent",
+      "domain": "alarm_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "restaurant_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "media_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "music_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "event_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "movie_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "event_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "flight_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "flight_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "rentalcar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "services_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "services_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "services_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "home_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "hotel_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "calendar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "hotel_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "hotel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "bus_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "weather_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "music_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "rentalcar_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "travel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "bus_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "bank_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "ridesharing_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "ridesharing_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "restaurant_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "alarm_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "services_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "media_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "hotel_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "movie_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "flight_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "bank_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "event_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "movie_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "media_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "bus_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "music_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "flight_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_failure",
+      "domain": "rentalcar_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "restaurant_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "media_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "event_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "music_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "music_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "event_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "flight_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "ridesharing_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "ridesharing_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "rentalcar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "rentalcar_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "hotel_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "bus_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "bus_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "services_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "services_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "services_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "home_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "bank_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "calendar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "hotel_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "hotel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "restaurant_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "alarm_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "services_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "bank_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "media_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "hotel_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "music_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "event_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "rentalcar_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "bus_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "home_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "movie_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "payment_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "train_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "media_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "notify_success",
+      "domain": "messaging_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "restaurant_1",
+      "slot": "intent",
+      "value": "reserverestaurant"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "event_2",
+      "slot": "intent",
+      "value": "buyeventtickets"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "music_2",
+      "slot": "intent",
+      "value": "playmedia"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "music_1",
+      "slot": "intent",
+      "value": "playsong"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "event_1",
+      "slot": "intent",
+      "value": "buyeventtickets"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "movie_1",
+      "slot": "intent",
+      "value": "buymovietickets"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "flight_1",
+      "slot": "intent",
+      "value": "reserveonewayflight"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "flight_1",
+      "slot": "intent",
+      "value": "reserveroundtripflights"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "media_1",
+      "slot": "intent",
+      "value": "playmovie"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "rentalcar_1",
+      "slot": "intent",
+      "value": "reservecar"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "rentalcar_2",
+      "slot": "intent",
+      "value": "reservecar"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "bus_2",
+      "slot": "intent",
+      "value": "buybusticket"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "bus_1",
+      "slot": "intent",
+      "value": "buybusticket"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "services_2",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "services_1",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "services_3",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "home_1",
+      "slot": "intent",
+      "value": "schedulevisit"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "bank_1",
+      "slot": "intent",
+      "value": "transfermoney"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "hotel_2",
+      "slot": "intent",
+      "value": "bookhouse"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "calendar_1",
+      "slot": "intent",
+      "value": "addevent"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "hotel_3",
+      "slot": "intent",
+      "value": "reservehotel"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "hotel_1",
+      "slot": "intent",
+      "value": "reservehotel"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "alarm_1",
+      "slot": "intent",
+      "value": "addalarm"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "services_4",
+      "slot": "intent",
+      "value": "bookappointment"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "restaurant_2",
+      "slot": "intent",
+      "value": "reserverestaurant"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "bank_2",
+      "slot": "intent",
+      "value": "transfermoney"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "media_2",
+      "slot": "intent",
+      "value": "rentmovie"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "hotel_4",
+      "slot": "intent",
+      "value": "reservehotel"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "music_3",
+      "slot": "intent",
+      "value": "playmedia"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "event_3",
+      "slot": "intent",
+      "value": "buyeventtickets"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "rentalcar_3",
+      "slot": "intent",
+      "value": "reservecar"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "bus_3",
+      "slot": "intent",
+      "value": "buybusticket"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "home_2",
+      "slot": "intent",
+      "value": "schedulevisit"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "train_1",
+      "slot": "intent",
+      "value": "gettraintickets"
+    },
+    {
+      "intent": "offer_intent",
+      "domain": "media_3",
+      "slot": "intent",
+      "value": "playmovie"
+    },
+    {
+      "intent": "req_more",
+      "domain": "",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "has_live_music",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "serves_alcohol",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "cuisine",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "price_range",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_1",
+      "slot": "title",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_1",
+      "slot": "directed_by",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_1",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_1",
+      "slot": "restaurant_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "venue_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "event_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "number_of_tickets",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_2",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_1",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_1",
+      "slot": "year",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "city_of_event",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "subcategory",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "address_of_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "number_of_seats",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "event_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "show_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "event_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "event_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_1",
+      "slot": "time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_2",
+      "slot": "venue",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "origin_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "destination_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "departure_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "origin_airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "refundable",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "destination_airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "outbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "origin",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "destination",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "departure_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "destination_airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "outbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "seating_class",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "is_redeye",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "origin_airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "passengers",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "airlines",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "outbound_departure_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "number_stops",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "return_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "inbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "return_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "number_stops",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_1",
+      "slot": "inbound_departure_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_2",
+      "slot": "inbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_2",
+      "slot": "number_of_seats",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_2",
+      "slot": "wait_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_2",
+      "slot": "ride_fare",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_2",
+      "slot": "destination",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_2",
+      "slot": "ride_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_1",
+      "slot": "number_of_riders",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_1",
+      "slot": "destination",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_1",
+      "slot": "ride_fare",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_1",
+      "slot": "approximate_ride_duration",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "ridesharing_1",
+      "slot": "shared_ride",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "pickup_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "dropoff_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "pickup_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "pickup_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "total_price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "dropoff_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "pickup_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "pickup_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "total_price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "pickup_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "origin",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "departure_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "destination",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "origin_station_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "destination_station_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "from_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "leaving_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "to_station",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "from_station",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "to_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "where_to",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "number_of_adults",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "check_in_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "check_out_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "has_laundry_service",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_2",
+      "slot": "total_price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "group_size",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "travelers",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "offers_cosmetic_services",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "appointment_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "appointment_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "average_rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "appointment_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "is_unisex",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "appointment_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "appointment_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "average_rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "appointment_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_1",
+      "slot": "number_of_beds",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_1",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_1",
+      "slot": "pets_allowed",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_1",
+      "slot": "area",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_1",
+      "slot": "furnished",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_1",
+      "slot": "visit_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_1",
+      "slot": "recipient_account_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_1",
+      "slot": "amount",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_1",
+      "slot": "account_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_2",
+      "slot": "dentist_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_1",
+      "slot": "stylist_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_3",
+      "slot": "doctor_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "calendar_1",
+      "slot": "event_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "calendar_1",
+      "slot": "event_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "calendar_1",
+      "slot": "event_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "calendar_1",
+      "slot": "event_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "check_in_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "check_out_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "average_rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "hotel_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "pets_welcome",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_3",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "destination",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "check_in_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "hotel_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "number_of_days",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "has_wifi",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "price_per_night",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "star_rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_1",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "departure_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_2",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "movie_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "weather_1",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "weather_1",
+      "slot": "humidity",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "weather_1",
+      "slot": "wind",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "weather_1",
+      "slot": "date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_2",
+      "slot": "song_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_2",
+      "slot": "artist",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_2",
+      "slot": "album",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_1",
+      "slot": "album",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_1",
+      "slot": "song_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_1",
+      "slot": "artist",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "pickup_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "car_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_1",
+      "slot": "type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "pickup_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "car_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_2",
+      "slot": "car_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel_1",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel_1",
+      "slot": "good_for_kids",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel_1",
+      "slot": "free_entry",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "travel_1",
+      "slot": "location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "fare",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "leaving_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_1",
+      "slot": "transfers",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "restaurant_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "has_vegetarian_options",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "price_range",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "has_seating_outdoors",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "restaurant_2",
+      "slot": "rating",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "origin_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "departure_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "destination_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "outbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "arrives_next_day",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "destination_airport_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "origin_airport_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "number_checked_bags",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "passengers",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "return_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "flight_class",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "inbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "alarm_1",
+      "slot": "new_alarm_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_4",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_4",
+      "slot": "address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_4",
+      "slot": "appointment_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_4",
+      "slot": "appointment_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_4",
+      "slot": "type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "services_4",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_2",
+      "slot": "recipient_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_2",
+      "slot": "transfer_amount",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_2",
+      "slot": "transfer_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bank_2",
+      "slot": "account_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_2",
+      "slot": "director",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_2",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_2",
+      "slot": "starring",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_2",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_2",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_2",
+      "slot": "actors",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_2",
+      "slot": "director",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "smoking_allowed",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "price_per_night",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "stay_length",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "check_in_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "hotel_4",
+      "slot": "street_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_3",
+      "slot": "number_stops",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_3",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "music_3",
+      "slot": "year",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "price_per_ticket",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "venue_address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "event_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "number_of_tickets",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "origin_airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "departure_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "destination_airport",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "outbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "number_of_tickets",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "seating_class",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "return_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "flight_4",
+      "slot": "inbound_arrival_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "pickup_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "end_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "start_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "price_per_day",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "add_insurance",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "to_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "departure_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "from_city",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "category",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "to_station",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "from_station",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "num_passengers",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "number_of_beds",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "area",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "in_unit_laundry",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "visit_date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "intent",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "has_garage",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "phone_number",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "number_of_baths",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "number_of_tickets",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "show_type",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "show_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_1",
+      "slot": "theater_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "property_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "home_2",
+      "slot": "address",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "payment_1",
+      "slot": "amount",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "payment_1",
+      "slot": "receiver",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "payment_1",
+      "slot": "payment_method",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "date_of_journey",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "from",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "to",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "from_station",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "to_station",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "number_of_adults",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "train_1",
+      "slot": "trip_protection",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_3",
+      "slot": "cast",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_3",
+      "slot": "directed_by",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "movie_3",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_3",
+      "slot": "genre",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_3",
+      "slot": "starring",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "media_3",
+      "slot": "title",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "departure_time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "bus_3",
+      "slot": "price",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "messaging_1",
+      "slot": "contact_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "date",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "event_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "venue",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "event_3",
+      "slot": "time",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "car_name",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "pickup_location",
+      "value": ""
+    },
+    {
+      "intent": "request",
+      "domain": "rentalcar_3",
+      "slot": "car_type",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "restaurant_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "event_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "music_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "music_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "event_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "movie_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "flight_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "flight_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "media_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "rentalcar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "rentalcar_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "bus_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "bus_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "services_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "services_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "services_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "home_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "bank_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "hotel_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "calendar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "hotel_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "hotel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "weather_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "travel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "flight_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "alarm_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "services_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "restaurant_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "bank_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "movie_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "media_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "hotel_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "music_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "event_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "flight_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "rentalcar_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "bus_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "home_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "train_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "movie_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "request_alts",
+      "domain": "media_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "restaurant_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "event_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "music_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "music_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "event_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "movie_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "flight_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "flight_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "rentalcar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "rentalcar_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "bus_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "bus_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "services_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "services_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "services_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "home_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "bank_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "hotel_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "calendar_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "hotel_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "hotel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "weather_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "travel_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "flight_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "alarm_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "services_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "restaurant_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "bank_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "movie_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "hotel_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "music_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "event_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "flight_4",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "rentalcar_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "bus_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "home_2",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "train_1",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "select",
+      "domain": "movie_3",
+      "slot": "",
+      "value": ""
+    },
+    {
+      "intent": "thank_you",
+      "domain": "",
+      "slot": "",
+      "value": ""
+    }
+  ],
+  "state": {
+    "bank_1": {
+      "account_type": "",
+      "recipient_account_type": "",
+      "balance": "",
+      "amount": "",
+      "recipient_account_name": ""
+    },
+    "bus_1": {
+      "from_location": "",
+      "to_location": "",
+      "from_station": "",
+      "to_station": "",
+      "leaving_date": "",
+      "leaving_time": "",
+      "fare": "",
+      "travelers": "",
+      "transfers": ""
+    },
+    "bus_2": {
+      "origin": "",
+      "destination": "",
+      "origin_station_name": "",
+      "destination_station_name": "",
+      "departure_date": "",
+      "price": "",
+      "departure_time": "",
+      "group_size": "",
+      "fare_type": ""
+    },
+    "calendar_1": {
+      "event_date": "",
+      "event_time": "",
+      "event_location": "",
+      "event_name": "",
+      "available_start_time": "",
+      "available_end_time": ""
+    },
+    "event_1": {
+      "category": "",
+      "subcategory": "",
+      "event_name": "",
+      "date": "",
+      "time": "",
+      "number_of_seats": "",
+      "city_of_event": "",
+      "event_location": "",
+      "address_of_location": ""
+    },
+    "event_2": {
+      "event_type": "",
+      "category": "",
+      "event_name": "",
+      "date": "",
+      "time": "",
+      "number_of_tickets": "",
+      "city": "",
+      "venue": "",
+      "venue_address": ""
+    },
+    "flight_1": {
+      "passengers": "",
+      "seating_class": "",
+      "origin_city": "",
+      "destination_city": "",
+      "origin_airport": "",
+      "destination_airport": "",
+      "departure_date": "",
+      "return_date": "",
+      "number_stops": "",
+      "outbound_departure_time": "",
+      "outbound_arrival_time": "",
+      "inbound_arrival_time": "",
+      "inbound_departure_time": "",
+      "price": "",
+      "refundable": "",
+      "airlines": ""
+    },
+    "flight_2": {
+      "passengers": "",
+      "seating_class": "",
+      "origin": "",
+      "destination": "",
+      "origin_airport": "",
+      "destination_airport": "",
+      "departure_date": "",
+      "return_date": "",
+      "number_stops": "",
+      "outbound_departure_time": "",
+      "outbound_arrival_time": "",
+      "inbound_arrival_time": "",
+      "inbound_departure_time": "",
+      "fare": "",
+      "is_redeye": "",
+      "airlines": ""
+    },
+    "home_1": {
+      "area": "",
+      "address": "",
+      "property_name": "",
+      "phone_number": "",
+      "furnished": "",
+      "pets_allowed": "",
+      "rent": "",
+      "visit_date": "",
+      "number_of_beds": "",
+      "number_of_baths": ""
+    },
+    "hotel_1": {
+      "destination": "",
+      "number_of_rooms": "",
+      "check_in_date": "",
+      "number_of_days": "",
+      "star_rating": "",
+      "hotel_name": "",
+      "street_address": "",
+      "phone_number": "",
+      "price_per_night": "",
+      "has_wifi": ""
+    },
+    "hotel_2": {
+      "where_to": "",
+      "number_of_adults": "",
+      "check_in_date": "",
+      "check_out_date": "",
+      "rating": "",
+      "address": "",
+      "phone_number": "",
+      "total_price": "",
+      "has_laundry_service": ""
+    },
+    "hotel_3": {
+      "location": "",
+      "number_of_rooms": "",
+      "check_in_date": "",
+      "check_out_date": "",
+      "average_rating": "",
+      "hotel_name": "",
+      "street_address": "",
+      "phone_number": "",
+      "price": "",
+      "pets_welcome": ""
+    },
+    "media_1": {
+      "title": "",
+      "genre": "",
+      "subtitles": "",
+      "directed_by": ""
+    },
+    "movie_1": {
+      "price": "",
+      "number_of_tickets": "",
+      "show_type": "",
+      "theater_name": "",
+      "show_time": "",
+      "show_date": "",
+      "genre": "",
+      "street_address": "",
+      "location": "",
+      "movie_name": ""
+    },
+    "music_1": {
+      "song_name": "",
+      "artist": "",
+      "album": "",
+      "genre": "",
+      "year": "",
+      "playback_device": ""
+    },
+    "music_2": {
+      "song_name": "",
+      "artist": "",
+      "album": "",
+      "genre": "",
+      "playback_device": ""
+    },
+    "rentalcar_1": {
+      "type": "",
+      "car_name": "",
+      "pickup_location": "",
+      "pickup_date": "",
+      "pickup_time": "",
+      "pickup_city": "",
+      "dropoff_date": "",
+      "total_price": ""
+    },
+    "rentalcar_2": {
+      "car_type": "",
+      "car_name": "",
+      "pickup_location": "",
+      "pickup_date": "",
+      "pickup_time": "",
+      "pickup_city": "",
+      "dropoff_date": "",
+      "total_price": ""
+    },
+    "restaurant_1": {
+      "restaurant_name": "",
+      "date": "",
+      "time": "",
+      "serves_alcohol": "",
+      "has_live_music": "",
+      "phone_number": "",
+      "street_address": "",
+      "party_size": "",
+      "price_range": "",
+      "city": "",
+      "cuisine": ""
+    },
+    "ridesharing_1": {
+      "destination": "",
+      "shared_ride": "",
+      "ride_fare": "",
+      "approximate_ride_duration": "",
+      "number_of_riders": ""
+    },
+    "ridesharing_2": {
+      "destination": "",
+      "ride_type": "",
+      "ride_fare": "",
+      "wait_time": "",
+      "number_of_seats": ""
+    },
+    "services_1": {
+      "stylist_name": "",
+      "phone_number": "",
+      "average_rating": "",
+      "is_unisex": "",
+      "street_address": "",
+      "city": "",
+      "appointment_date": "",
+      "appointment_time": ""
+    },
+    "services_2": {
+      "dentist_name": "",
+      "phone_number": "",
+      "address": "",
+      "city": "",
+      "appointment_date": "",
+      "appointment_time": "",
+      "offers_cosmetic_services": ""
+    },
+    "services_3": {
+      "doctor_name": "",
+      "phone_number": "",
+      "average_rating": "",
+      "street_address": "",
+      "city": "",
+      "appointment_date": "",
+      "appointment_time": "",
+      "type": ""
+    },
+    "travel_1": {
+      "location": "",
+      "attraction_name": "",
+      "category": "",
+      "phone_number": "",
+      "free_entry": "",
+      "good_for_kids": ""
+    },
+    "weather_1": {
+      "precipitation": "",
+      "humidity": "",
+      "wind": "",
+      "temperature": "",
+      "city": "",
+      "date": ""
+    },
+    "alarm_1": {
+      "alarm_time": "",
+      "alarm_name": "",
+      "new_alarm_time": "",
+      "new_alarm_name": ""
+    },
+    "bank_2": {
+      "account_type": "",
+      "recipient_account_type": "",
+      "account_balance": "",
+      "transfer_amount": "",
+      "recipient_name": "",
+      "transfer_time": ""
+    },
+    "flight_3": {
+      "passengers": "",
+      "flight_class": "",
+      "origin_city": "",
+      "destination_city": "",
+      "origin_airport_name": "",
+      "destination_airport_name": "",
+      "departure_date": "",
+      "return_date": "",
+      "number_stops": "",
+      "outbound_departure_time": "",
+      "outbound_arrival_time": "",
+      "inbound_arrival_time": "",
+      "inbound_departure_time": "",
+      "price": "",
+      "number_checked_bags": "",
+      "airlines": "",
+      "arrives_next_day": ""
+    },
+    "hotel_4": {
+      "location": "",
+      "number_of_rooms": "",
+      "check_in_date": "",
+      "stay_length": "",
+      "star_rating": "",
+      "place_name": "",
+      "street_address": "",
+      "phone_number": "",
+      "price_per_night": "",
+      "smoking_allowed": ""
+    },
+    "media_2": {
+      "movie_name": "",
+      "genre": "",
+      "subtitle_language": "",
+      "director": "",
+      "actors": "",
+      "price": ""
+    },
+    "movie_2": {
+      "title": "",
+      "genre": "",
+      "aggregate_rating": "",
+      "starring": "",
+      "director": ""
+    },
+    "restaurant_2": {
+      "restaurant_name": "",
+      "date": "",
+      "time": "",
+      "has_seating_outdoors": "",
+      "has_vegetarian_options": "",
+      "phone_number": "",
+      "rating": "",
+      "address": "",
+      "number_of_seats": "",
+      "price_range": "",
+      "location": "",
+      "category": ""
+    },
+    "services_4": {
+      "therapist_name": "",
+      "phone_number": "",
+      "address": "",
+      "city": "",
+      "appointment_date": "",
+      "appointment_time": "",
+      "type": ""
+    },
+    "bus_3": {
+      "from_city": "",
+      "to_city": "",
+      "from_station": "",
+      "to_station": "",
+      "departure_date": "",
+      "departure_time": "",
+      "price": "",
+      "additional_luggage": "",
+      "num_passengers": "",
+      "category": ""
+    },
+    "event_3": {
+      "event_type": "",
+      "event_name": "",
+      "date": "",
+      "time": "",
+      "number_of_tickets": "",
+      "price_per_ticket": "",
+      "city": "",
+      "venue": "",
+      "venue_address": ""
+    },
+    "flight_4": {
+      "number_of_tickets": "",
+      "seating_class": "",
+      "origin_airport": "",
+      "destination_airport": "",
+      "departure_date": "",
+      "return_date": "",
+      "is_nonstop": "",
+      "outbound_departure_time": "",
+      "outbound_arrival_time": "",
+      "inbound_arrival_time": "",
+      "inbound_departure_time": "",
+      "price": "",
+      "airlines": ""
+    },
+    "home_2": {
+      "intent": "",
+      "area": "",
+      "address": "",
+      "property_name": "",
+      "phone_number": "",
+      "has_garage": "",
+      "in_unit_laundry": "",
+      "price": "",
+      "visit_date": "",
+      "number_of_beds": "",
+      "number_of_baths": ""
+    },
+    "media_3": {
+      "title": "",
+      "genre": "",
+      "subtitle_language": "",
+      "starring": ""
+    },
+    "messaging_1": {
+      "location": "",
+      "contact_name": ""
+    },
+    "movie_3": {
+      "movie_title": "",
+      "genre": "",
+      "percent_rating": "",
+      "cast": "",
+      "directed_by": ""
+    },
+    "music_3": {
+      "track": "",
+      "artist": "",
+      "album": "",
+      "genre": "",
+      "year": "",
+      "device": ""
+    },
+    "payment_1": {
+      "payment_method": "",
+      "amount": "",
+      "receiver": "",
+      "private_visibility": ""
+    },
+    "rentalcar_3": {
+      "car_type": "",
+      "car_name": "",
+      "pickup_location": "",
+      "start_date": "",
+      "pickup_time": "",
+      "city": "",
+      "end_date": "",
+      "price_per_day": "",
+      "add_insurance": ""
+    },
+    "train_1": {
+      "from": "",
+      "to": "",
+      "from_station": "",
+      "to_station": "",
+      "date_of_journey": "",
+      "journey_start_time": "",
+      "total": "",
+      "number_of_adults": "",
+      "class": "",
+      "trip_protection": ""
+    }
+  }
+}
\ No newline at end of file
diff --git a/data/unified_datasets/schema/original_data.zip b/data/unified_datasets/schema/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..110e958b8b1f73f0c57f8660f9201a2dfb340f71
Binary files /dev/null and b/data/unified_datasets/schema/original_data.zip differ
diff --git a/data/unified_datasets/schema/preprocess.py b/data/unified_datasets/schema/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..46913390c7c89978965b0c1ae65067b4c14fe9a9
--- /dev/null
+++ b/data/unified_datasets/schema/preprocess.py
@@ -0,0 +1,549 @@
+import zipfile
+import json
+import os
+from pprint import pprint
+from copy import deepcopy
+from collections import Counter
+from tqdm import tqdm
+import numpy as np
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+import re
+self_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+norm_service2domain = {
+    'alarm': 'alarm',
+    'banks': 'bank',
+    'buses': 'bus',
+    'calendar': 'calendar',
+    'events': 'event',
+    'flights': 'flight',
+    'homes': 'home',
+    'hotels': 'hotel',
+    'media': 'media',
+    'messaging': 'messaging',
+    'movies': 'movie',
+    'music': 'music',
+    'payment': 'payment',
+    'rentalcars': 'rentalcar',
+    'restaurants': 'restaurant',
+    'ridesharing': 'ridesharing',
+    'services': 'services',
+    'trains': 'train',
+    'travel': 'travel',
+    'weather': 'weather'
+}
+
+digit2word = {
+    '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four', '5': 'five',
+    '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten'
+}
+
+match = {
+    '0': 0,
+    '1': 0,
+    '>1': 0,
+}
+
+
+def service2domain(service):
+    s, i = service.split('_')
+    return norm_service2domain[s.lower()]+'_'+i
+
+
+def slot_normalize(service, slot):
+    pass
+
+
+def pharse_in_sen(phrase, sen):
+    '''
+    match value in the sentence
+    :param phrase: str
+    :param sen: str
+    :return: start, end if matched, else None, None
+    '''
+    assert isinstance(phrase, str)
+    pw = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-]|$)'
+    pn = '(^|[\s\?!-]|\D[,\.:])(?P<v>{})($|[\s\?!-]|[,\.:]\D|[,\.:]$)'
+    if phrase.isdigit():
+        pattern = pn
+    else:
+        pattern = pw
+    p = re.compile(pattern.format(re.escape(phrase)), re.I)
+    m = re.search(p, sen)
+    if m:
+        num = len(re.findall(p, sen))
+        if num > 1:
+            match['>1'] += 1
+        else:
+            match['1'] += 1
+        return m.span('v'), num
+    if phrase.isdigit() and phrase in digit2word:
+        phrase = digit2word[phrase]
+        p = re.compile(pw.format(re.escape(phrase)), re.I)
+        m = re.search(p, sen)
+        if m:
+            num = len(re.findall(p, sen))
+            if num > 1:
+                match['>1'] += 1
+            else:
+                match['1'] += 1
+            return m.span('v'), num
+    match['0'] += 1
+    return (None, None), 0
+
+
+def number_in_sen(word, sen):
+    if ' '+word+' ' in sen:
+        return sen.index(' ' + word + ' ') + 1, sen.index(' ' + word + ' ') + 1 + len(word)
+    elif ' '+word+'.' in sen:
+        return sen.index(' ' + word + '.') + 1, sen.index(' ' + word + '.') + 1 + len(word)
+    elif ' '+word+',' in sen:
+        return sen.index(' ' + word + ',') + 1, sen.index(' ' + word + ',') + 1 + len(word)
+    elif sen.startswith(word+ ' ') or sen.startswith(word+'.') or sen.startswith(word+','):
+        return 0, len(word)
+    elif word.isdigit() and word in digit2word:
+        ori_word = word
+        ori_sen = sen
+        word = digit2word[word]
+        sen = sen.lower()
+        if ' ' + word + ' ' in sen:
+            return sen.index(' ' + word + ' ') + 1, sen.index(' ' + word + ' ') + 1 + len(word)
+        elif ' ' + word + '.' in sen:
+            return sen.index(' ' + word + '.') + 1, sen.index(' ' + word + '.') + 1 + len(word)
+        elif ' ' + word + ',' in sen:
+            return sen.index(' ' + word + ',') + 1, sen.index(' ' + word + ',') + 1 + len(word)
+        elif sen.startswith(word + ' ') or sen.startswith(word + '.') or sen.startswith(word + ','):
+            return 0, len(word)
+        word = ori_word
+        sen = ori_sen
+    return sen.index(word)
+
+
+def sys_intent():
+    return {
+        "inform": {"description": "Inform the value for a slot to the user."},
+        "request": {"description": "Request the value of a slot from the user."},
+        "confirm": {"description": "Confirm the value of a slot before making a transactional service call."},
+        "offer": {"description": "Offer a certain value for a slot to the user."},
+        "notify_success": {"description": "Inform the user that their request was successful."},
+        "notify_failure": {"description": "Inform the user that their request failed."},
+        "inform_count": {"description": "Inform the number of items found that satisfy the user's request."},
+        "offer_intent": {"description": "Offer a new intent to the user."},
+        "req_more": {"description": "Asking the user if they need anything else."},
+        "goodbye": {"description": "End the dialogue."},
+    }
+
+
+def usr_intent():
+    return {
+        "inform_intent": {"description": "Express the desire to perform a certain task to the system."},
+        "negate_intent": {"description": "Negate the intent which has been offered by the system."},
+        "affirm_intent": {"description": "Agree to the intent which has been offered by the system."},
+        "inform": {"description": "Inform the value of a slot to the system."},
+        "request": {"description": "Request the value of a slot from the system."},
+        "affirm": {"description": "Agree to the system's proposition. "},
+        "negate": {"description": "Deny the system's proposal."},
+        "select": {"description": "Select a result being offered by the system."},
+        "request_alts": {"description": "Ask for more results besides the ones offered by the system."},
+        "thank_you": {"description": "Thank the system."},
+        "goodbye": {"description": "End the dialogue."},
+    }
+
+
+def get_intent():
+    """merge sys & usr intent"""
+    return {
+        "inform": {"description": "Inform the value for a slot."},
+        "request": {"description": "Request the value of a slot."},
+        "confirm": {"description": "Confirm the value of a slot before making a transactional service call."},
+        "offer": {"description": "Offer a certain value for a slot to the user."},
+        "notify_success": {"description": "Inform the user that their request was successful."},
+        "notify_failure": {"description": "Inform the user that their request failed."},
+        "inform_count": {"description": "Inform the number of items found that satisfy the user's request."},
+        "offer_intent": {"description": "Offer a new intent to the user."},
+        "req_more": {"description": "Asking the user if they need anything else."},
+        "goodbye": {"description": "End the dialogue."},
+        "inform_intent": {"description": "Express the desire to perform a certain task to the system."},
+        "negate_intent": {"description": "Negate the intent which has been offered by the system."},
+        "affirm_intent": {"description": "Agree to the intent which has been offered by the system."},
+        "affirm": {"description": "Agree to the system's proposition. "},
+        "negate": {"description": "Deny the system's proposal."},
+        "select": {"description": "Select a result being offered by the system."},
+        "request_alts": {"description": "Ask for more results besides the ones offered by the system."},
+        "thank_you": {"description": "Thank the system."},
+    }
+
+
+def preprocess():
+    processed_dialogue = []
+    ontology = {'domains': {},
+                'intents': {},
+                'binary_dialogue_act': [],
+                'state': {}}
+    ontology['intents'].update(get_intent())
+    numerical_slots = {}
+    original_zipped_path = os.path.join(self_dir, 'original_data.zip')
+    new_dir = os.path.join(self_dir, 'original_data')
+    if not os.path.exists(original_zipped_path):
+        raise FileNotFoundError(original_zipped_path)
+    if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(os.path.join(self_dir, 'ontology.json')):
+        print('unzip to', new_dir)
+        print('This may take several minutes')
+        archive = zipfile.ZipFile(original_zipped_path, 'r')
+        archive.extractall(self_dir)
+        cnt = 1
+        non_cate_slot_update_cnt = 0
+        non_cate_slot_update_fail_cnt = 0
+        state_cnt = {}
+        num_train_dialog = 0
+        num_train_utt = 0
+        for data_split in ['train', 'dev', 'test']:
+            dataset_name = 'schema'
+            data_dir = os.path.join(new_dir, data_split)
+            # schema => ontology
+            f = open(os.path.join(data_dir, 'schema.json'))
+            data = json.load(f)
+            for schema in data:
+                domain = service2domain(schema['service_name'])
+                ontology['domains'].setdefault(domain, {})
+                ontology['domains'][domain]['description'] = schema['description']
+                ontology['domains'][domain].setdefault('slots', {})
+                ontology['state'].setdefault(domain, {})
+                for slot in schema['slots']:
+                    # numerical => non-categorical: not use
+                    # is_numerical = slot['is_categorical']
+                    # for value in slot['possible_values']:
+                    #     if not value.isdigit():
+                    #         is_numerical = False
+                    #         break
+                    # if is_numerical:
+                    #     numerical_slots.setdefault(slot['name'].lower(), 1)
+                    lower_values = [x.lower() for x in slot['possible_values']]
+                    ontology['domains'][domain]['slots'][slot['name'].lower()] = {
+                        "description": slot['description'],
+                        "is_categorical": slot['is_categorical'],
+                        "possible_values": lower_values
+                    }
+                    ontology['state'][domain][slot['name'].lower()] = ''
+                # add 'count' slot
+                ontology['domains'][domain]['slots']['count'] = {
+                    "description": "the number of items found that satisfy the user's request.",
+                    "is_categorical": False,
+                    "possible_values": []
+                }
+                # ontology['state'][domain]['count'] = ''
+            # pprint(numerical_slots)
+            # dialog
+            for root, dirs, files in os.walk(data_dir):
+                fs = sorted([x for x in files if 'dialogues' in x])
+                for f in tqdm(fs, desc='processing schema-guided-{}'.format(data_split)):
+                    data = json.load(open(os.path.join(data_dir, f)))
+                    if data_split == 'train':
+                        num_train_dialog += len(data)
+                    for d in data:
+                        dialogue = {
+                            "dataset": dataset_name,
+                            "data_split": data_split if data_split!='dev' else 'val',
+                            "dialogue_id": dataset_name+'_'+str(cnt),
+                            "original_id": d['dialogue_id'],
+                            "domains": [service2domain(s) for s in d['services']],
+                            "turns": []
+                        }
+                        # if d['dialogue_id'] != '84_00008':
+                        #     continue
+                        cnt += 1
+                        prev_sys_frames = []
+                        prev_user_frames = []
+                        all_slot_spans_from_da = []
+                        state = {}
+                        for domain in dialogue['domains']:
+                            state.setdefault(domain, deepcopy(ontology['state'][domain]))
+                        if data_split == 'train':
+                            num_train_utt += len(d['turns'])
+                        for utt_idx, t in enumerate(d['turns']):
+                            speaker = t['speaker'].lower()
+                            turn = {
+                                'speaker': speaker,
+                                'utterance': t['utterance'],
+                                'utt_idx': utt_idx,
+                                'dialogue_act': {
+                                    'binary': [],
+                                    'categorical': [],
+                                    'non-categorical': [],
+                                },
+                            }
+                            for i, frame in enumerate(t['frames']):
+                                domain = service2domain(frame['service'])
+                                for action in frame['actions']:
+                                    intent = action['act'].lower()
+                                    assert intent in ontology['intents'], [intent]
+                                    slot = action['slot'].lower()
+                                    value_list = action['values']
+                                    if action['act'] in ['REQ_MORE', 'AFFIRM', 'NEGATE', 'THANK_YOU', 'GOODBYE']:
+                                        turn['dialogue_act']['binary'].append({
+                                            "intent": intent,
+                                            "domain": '',
+                                            "slot": '',
+                                            "value": '',
+                                        })
+                                    elif action['act'] in ['NOTIFY_SUCCESS', 'NOTIFY_FAILURE', 'REQUEST_ALTS', 'AFFIRM_INTENT', 'NEGATE_INTENT']:
+                                        # Slot and values are always empty
+                                        turn['dialogue_act']['binary'].append({
+                                            "intent": intent,
+                                            "domain": domain,
+                                            "slot": '',
+                                            "value": '',
+                                        })
+                                    elif action['act'] in ['OFFER_INTENT', 'INFORM_INTENT']:
+                                        # always has "intent" as the slot, and a single value containing the intent being offered.
+                                        assert slot == 'intent'
+                                        turn['dialogue_act']['binary'].append({
+                                            "intent": intent,
+                                            "domain": domain,
+                                            "slot": slot,
+                                            "value": value_list[0].lower(),
+                                        })
+                                    elif action['act'] in ['REQUEST', 'SELECT'] and not value_list:
+                                        # always contains a slot, but values are optional.
+                                        # assert slot in ontology['domains'][domain]['slots']
+                                        turn['dialogue_act']['binary'].append({
+                                            "intent": intent,
+                                            "domain": domain,
+                                            "slot": slot,
+                                            "value": '',
+                                        })
+                                    elif action['act'] in ['INFORM_COUNT']:
+                                        # always has "count" as the slot, and a single element in values for the number of results obtained by the system.
+                                        value = value_list[0]
+                                        assert slot in ontology['domains'][domain]['slots']
+                                        (start, end), num = pharse_in_sen(value, t['utterance'])
+                                        if num:
+                                            assert value.lower() == t['utterance'][start:end].lower() \
+                                                   or digit2word[value].lower() == t['utterance'][start:end].lower()
+                                            turn['dialogue_act']['non-categorical'].append({
+                                                "intent": intent,
+                                                "domain": domain,
+                                                "slot": slot.lower(),
+                                                "value": t['utterance'][start:end].lower(),
+                                                "start": start,
+                                                "end": end
+                                            })
+                                    else:
+                                        # have slot & value
+                                        if ontology['domains'][domain]['slots'][slot]['is_categorical']:
+                                            for value in value_list:
+                                                value = value.lower()
+                                                if value not in ontology['domains'][domain]['slots'][slot]['possible_values'] and value != 'dontcare':
+                                                    ontology['domains'][domain]['slots'][slot]['possible_values'].append(value)
+                                                    print('add value to ontology', domain, slot, value)
+                                                assert value in ontology['domains'][domain]['slots'][slot][
+                                                    'possible_values'] or value == 'dontcare'
+                                                turn['dialogue_act']['categorical'].append({
+                                                    "intent": intent,
+                                                    "domain": domain,
+                                                    "slot": slot,
+                                                    "value": value,
+                                                })
+                                        elif slot in numerical_slots:
+                                            value = value_list[-1]
+                                            (start, end), num = pharse_in_sen(value, t['utterance'])
+                                            if num:
+                                                assert value.lower() == t['utterance'][start:end].lower() \
+                                                       or digit2word[value].lower() == t['utterance'][start:end].lower()
+                                                turn['dialogue_act']['non-categorical'].append({
+                                                    "intent": intent,
+                                                    "domain": domain,
+                                                    "slot": slot.lower(),
+                                                    "value": t['utterance'][start:end].lower(),
+                                                    "start": start,
+                                                    "end": end
+                                                })
+                                        else:
+                                            # span info in frame['slots']
+                                            for value in value_list:
+                                                for slot_info in frame['slots']:
+                                                    start = slot_info['start']
+                                                    end = slot_info['exclusive_end']
+                                                    if slot_info['slot'] == slot and t['utterance'][start:end] == value:
+                                                        turn['dialogue_act']['non-categorical'].append({
+                                                            "intent": intent,
+                                                            "domain": domain,
+                                                            "slot": slot,
+                                                            "value": value.lower(),
+                                                            "start": start,
+                                                            "end": end
+                                                        })
+                                                        break
+                            # add span da to all_slot_spans_from_da
+                            for ele in turn['dialogue_act']['non-categorical']:
+                                all_slot_spans_from_da.insert(0, {
+                                    "domain": ele["domain"],
+                                    "slot": ele["slot"],
+                                    "value": ele["value"].lower(),
+                                    "utt_idx": utt_idx,
+                                    "start": ele["start"],
+                                    "end": ele["end"]
+                                })
+                            if speaker == 'user':
+                                # DONE: record state update, may come from sys acts
+                                # prev_state: state. update the state using current frames.
+                                # candidate span info from prev frames and current frames
+                                slot_spans = []
+                                for frame in t['frames']:
+                                    for ele in frame['slots']:
+                                        slot, start, end = ele['slot'].lower(), ele['start'], ele['exclusive_end']
+                                        slot_spans.append({
+                                            "domain": service2domain(frame['service']),
+                                            "slot": slot,
+                                            "value": t['utterance'][start:end].lower(),
+                                            "utt_idx": utt_idx,
+                                            "start": start,
+                                            "end": end
+                                        })
+                                for frame in prev_sys_frames:
+                                    for ele in frame['slots']:
+                                        slot, start, end = ele['slot'].lower(), ele['start'], ele['exclusive_end']
+                                        slot_spans.append({
+                                            "domain": service2domain(frame['service']),
+                                            "slot": slot,
+                                            "value": d['turns'][utt_idx-1]['utterance'][start:end].lower(),
+                                            "utt_idx": utt_idx-1,
+                                            "start": start,
+                                            "end": end
+                                        })
+                                # turn['slot_spans'] = slot_spans
+                                # turn['all_slot_span'] = deepcopy(all_slot_spans_from_da)
+                                state_update = {"categorical": [], "non-categorical": []}
+                                # print(utt_idx)
+                                for frame in t['frames']:
+                                    domain = service2domain(frame['service'])
+                                    # print(domain)
+                                    for slot, value_list in frame['state']['slot_values'].items():
+                                        # For categorical slots, this list contains a single value assigned to the slot.
+                                        # For non-categorical slots, all the values in this list are spoken variations
+                                        # of each other and are equivalent (e.g, "6 pm", "six in the evening",
+                                        # "evening at 6" etc.).
+                                        numerical_equal_values = []
+                                        if slot in numerical_slots:
+                                            for value in value_list:
+                                                if value in digit2word:
+                                                    numerical_equal_values.append(digit2word[value])
+                                        value_list += numerical_equal_values
+                                        assert len(value_list) > 0, print(slot, value_list)
+                                        assert slot in state[domain]
+                                        value_list = list(set([x.lower() for x in value_list]))
+                                        if state[domain][slot] in value_list:
+                                            continue
+                                        # new value
+                                        candidate_values = value_list
+                                        for prev_user_frame in prev_user_frames:
+                                            prev_domain = service2domain(prev_user_frame['service'])
+                                            if prev_domain == domain and slot in prev_user_frame['state']['slot_values']:
+                                                prev_value_list = [x.lower() for x in prev_user_frame['state']['slot_values'][slot]]
+                                                candidate_values = list(set(value_list) - set(prev_value_list))
+                                        assert state[domain][slot] not in candidate_values
+                                        assert candidate_values
+
+                                        if ontology['domains'][domain]['slots'][slot]['is_categorical']:
+                                            state_cnt.setdefault('cate_slot_update', 0)
+                                            state_cnt['cate_slot_update'] += 1
+                                            value = candidate_values[0]
+                                            state_update['categorical'].append(
+                                                {"domain": domain, "slot": slot, "value": value}
+                                            )
+                                            state[domain][slot] = value
+                                        else:
+                                            state_cnt.setdefault('non_cate_slot_update', 0)
+                                            state_cnt['non_cate_slot_update'] += 1
+                                            span_priority = []
+                                            slot_spans_len = len(slot_spans)
+                                            all_slot_spans = slot_spans+all_slot_spans_from_da
+                                            for span_idx, slot_span in enumerate(all_slot_spans):
+                                                priority = 0
+                                                span_domain = slot_span['domain']
+                                                span_slot = slot_span['slot']
+                                                span_value = slot_span['value']
+                                                if domain == span_domain:
+                                                    priority += 1
+                                                if slot == span_slot:
+                                                    priority += 10
+                                                if span_value in candidate_values:
+                                                    priority += 100
+                                                if span_idx + 1 <= slot_spans_len:
+                                                    priority += 0.5
+                                                span_priority.append(priority)
+                                                if span_idx + 1 <= slot_spans_len:
+                                                    # slot_spans not run out
+                                                    if max(span_priority) >= 111.5:
+                                                        break
+                                                else:
+                                                    # search in previous da
+                                                    if max(span_priority) >= 111:
+                                                        break
+                                            if span_priority and max(span_priority) >= 100:
+                                                # {111.5: 114255,
+                                                #  111: 29591,
+                                                #  100: 15208,
+                                                #  110: 2159,
+                                                #  100.5: 642,
+                                                #  110.5: 125,
+                                                #  101: 24}
+                                                max_priority = max(span_priority)
+                                                state_cnt.setdefault('max_priority', Counter())
+                                                state_cnt['max_priority'][max_priority] += 1
+                                                span_idx = np.argmax(span_priority)
+                                                ele = all_slot_spans[span_idx]
+                                                state_update['non-categorical'].append({
+                                                    "domain": domain,
+                                                    "slot": slot,
+                                                    "value": ele['value'],
+                                                    "utt_idx": ele["utt_idx"],
+                                                    "start": ele["start"],
+                                                    "end": ele["end"]
+                                                })
+                                                state[domain][slot] = ele['value']
+                                            else:
+                                                # not found
+                                                value = candidate_values[0]
+                                                state_update['non-categorical'].append(
+                                                    {"domain": domain, "slot": slot, "value": value}
+                                                )
+                                                state[domain][slot] = value
+                                                # print(t['utterance'])
+                                                non_cate_slot_update_fail_cnt += 1
+                                            non_cate_slot_update_cnt += 1
+                                turn['state'] = deepcopy(state)
+                                turn['state_update'] = state_update
+                                prev_user_frames = deepcopy(t['frames'])
+                            else:
+                                prev_sys_frames = deepcopy(t['frames'])
+
+                            for da in turn['dialogue_act']['binary']:
+                                if da not in ontology['binary_dialogue_act']:
+                                    ontology['binary_dialogue_act'].append(deepcopy(da))
+                            dialogue['turns'].append(deepcopy(turn))
+                        assert len(dialogue['turns']) % 2 == 0
+                        dialogue['turns'].pop()
+                        processed_dialogue.append(dialogue)
+                        # break
+        # sort ontology binary
+        pprint(state_cnt)
+        ontology['binary_dialogue_act'] = sorted(ontology['binary_dialogue_act'], key=lambda x:x['intent'])
+        json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=2)
+        json.dump(processed_dialogue, open('data.json', 'w'), indent=2)
+        write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        os.remove('data.json')
+        print('# train dialog: {}, # train utterance: {}'.format(num_train_dialog, num_train_utt))
+        print(non_cate_slot_update_fail_cnt, non_cate_slot_update_cnt) # 395 162399
+
+    else:
+        # read from file
+        processed_dialogue = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        ontology = json.load(open(os.path.join(self_dir, 'ontology.json')))
+    return processed_dialogue, ontology
+
+
+if __name__ == '__main__':
+    preprocess()
+    print(match) # {'0': 4146, '1': 53626, '>1': 2904} =>(after user act released) {'0': 487, '1': 63886, '>1': 3097}
diff --git a/data/unified_datasets/taskmaster/README.md b/data/unified_datasets/taskmaster/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..303a82317a50d1aa4f40bd625ad17ccf97da392a
--- /dev/null
+++ b/data/unified_datasets/taskmaster/README.md
@@ -0,0 +1,26 @@
+# README
+
+## Features
+
+- Annotations: character-level span for non-categorical slots. No slot descriptions.
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 30483        | 540311        | 17.72      | 9.18        | 13         |
+
+## Main changes
+
+- each speaker for one turn
+- intent is set to **inform**
+- not annotate state and state upadte
+- span info is provided by original data
+
+## Original data
+
+https://github.com/google-research-datasets/Taskmaster
+
+TM-1: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-1-2019
+
+TM-2: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-2-2020
\ No newline at end of file
diff --git a/data/unified_datasets/taskmaster/data.zip b/data/unified_datasets/taskmaster/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..f52a3808df8a413962c71f305c64d437eb196e00
Binary files /dev/null and b/data/unified_datasets/taskmaster/data.zip differ
diff --git a/data/unified_datasets/taskmaster/ontology.json b/data/unified_datasets/taskmaster/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b5532e2bbcc36039486954e5b7bee1205a33cb5
--- /dev/null
+++ b/data/unified_datasets/taskmaster/ontology.json
@@ -0,0 +1,1168 @@
+{
+  "domains": {
+    "uber_lyft": {
+      "description": "order a car for a ride inside a city",
+      "slots": {
+        "location.from": {
+          "description": "pickup location",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5764,
+          "in original ontology": true
+        },
+        "location.to": {
+          "description": "destination of the ride",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6026,
+          "in original ontology": true
+        },
+        "type.ride": {
+          "description": "type of ride",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 13317,
+          "in original ontology": true
+        },
+        "num.people": {
+          "description": "number of people",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2636,
+          "in original ontology": true
+        },
+        "price.estimate": {
+          "description": "estimated cost of the ride",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4996,
+          "in original ontology": true
+        },
+        "duration.estimate": {
+          "description": "estimated duration of the ride",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1152,
+          "in original ontology": true
+        },
+        "time.pickup": {
+          "description": "time of pickup",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4303,
+          "in original ontology": true
+        },
+        "time.dropoff": {
+          "description": "time of dropoff",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 111,
+          "in original ontology": true
+        }
+      }
+    },
+    "movie_ticket": {
+      "description": "book movie tickets for a film",
+      "slots": {
+        "name.movie": {
+          "description": "name of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 8959,
+          "in original ontology": true
+        },
+        "name.theater": {
+          "description": "name of the theater",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6842,
+          "in original ontology": true
+        },
+        "num.tickets": {
+          "description": "number of tickets",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 7368,
+          "in original ontology": true
+        },
+        "time.start": {
+          "description": "start time of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 14820,
+          "in original ontology": true
+        },
+        "location.theater": {
+          "description": "location of the theater",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5295,
+          "in original ontology": true
+        },
+        "price.ticket": {
+          "description": "price of the ticket",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2418,
+          "in original ontology": true
+        },
+        "type.screening": {
+          "description": "type of the screening",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4579,
+          "in original ontology": true
+        },
+        "time.end": {
+          "description": "end time of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 10,
+          "in original ontology": true
+        },
+        "time.duration": {
+          "description": "duration of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 250,
+          "in original ontology": true
+        }
+      }
+    },
+    "restaurant_reservation": {
+      "description": "searching for a restaurant and make reservation",
+      "slots": {
+        "name.restaurant": {
+          "description": "name of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 14544,
+          "in original ontology": true
+        },
+        "name.reservation": {
+          "description": "name of the person who make the reservation",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 609,
+          "in original ontology": true
+        },
+        "num.guests": {
+          "description": "number of guests",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6962,
+          "in original ontology": true
+        },
+        "time.reservation": {
+          "description": "time of the reservation",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 12073,
+          "in original ontology": true
+        },
+        "type.seating": {
+          "description": "type of the seating",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 8983,
+          "in original ontology": true
+        },
+        "location.restaurant": {
+          "description": "location of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4311,
+          "in original ontology": true
+        }
+      }
+    },
+    "coffee_ordering": {
+      "description": "order a coffee drink from either Starbucks or Peets for pick up",
+      "slots": {
+        "location.store": {
+          "description": "location of the coffee store",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5510,
+          "in original ontology": true
+        },
+        "name.drink": {
+          "description": "name of the drink",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 9182,
+          "in original ontology": true
+        },
+        "size.drink": {
+          "description": "size of the drink",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 7804,
+          "in original ontology": true
+        },
+        "num.drink": {
+          "description": "number of drinks",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 848,
+          "in original ontology": true
+        },
+        "type.milk": {
+          "description": "type of the milk",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 8433,
+          "in original ontology": true
+        },
+        "preference": {
+          "description": "user preference of the drink",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 11266,
+          "in original ontology": true
+        }
+      }
+    },
+    "pizza_ordering": {
+      "description": "order a pizza",
+      "slots": {
+        "name.store": {
+          "description": "name of the pizza store",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5127,
+          "in original ontology": true
+        },
+        "name.pizza": {
+          "description": "name of the pizza",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 9208,
+          "in original ontology": true
+        },
+        "size.pizza": {
+          "description": "size of the pizza",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 9661,
+          "in original ontology": true
+        },
+        "type.topping": {
+          "description": "type of the topping",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 20639,
+          "in original ontology": true
+        },
+        "type.crust": {
+          "description": "type of the crust",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5099,
+          "in original ontology": true
+        },
+        "preference": {
+          "description": "user preference of the pizza",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4998,
+          "in original ontology": true
+        },
+        "location.store": {
+          "description": "location of the pizza store",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1487,
+          "in original ontology": true
+        }
+      }
+    },
+    "auto_repair": {
+      "description": "set up an auto repair appointment with a repair shop",
+      "slots": {
+        "name.store": {
+          "description": "name of the repair store",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4005,
+          "in original ontology": true
+        },
+        "name.customer": {
+          "description": "name of the customer",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4547,
+          "in original ontology": true
+        },
+        "date.appt": {
+          "description": "date of the appointment",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 7650,
+          "in original ontology": true
+        },
+        "time.appt": {
+          "description": "time of the appointment",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 9827,
+          "in original ontology": true
+        },
+        "reason.appt": {
+          "description": "reason of the appointment",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6509,
+          "in original ontology": true
+        },
+        "name.vehicle": {
+          "description": "name of the vehicle",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5262,
+          "in original ontology": true
+        },
+        "year.vehicle": {
+          "description": "year of the vehicle",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4561,
+          "in original ontology": true
+        },
+        "location.store": {
+          "description": "location of the repair store",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 709,
+          "in original ontology": true
+        }
+      }
+    },
+    "flights": {
+      "description": "find a round trip or multi-city flights",
+      "slots": {
+        "type": {
+          "description": "type of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1999,
+          "in original ontology": true
+        },
+        "destination1": {
+          "description": "the first destination city of the trip",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3993,
+          "in original ontology": true
+        },
+        "destination2": {
+          "description": "the second destination city of the trip",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 128,
+          "in original ontology": true
+        },
+        "origin": {
+          "description": "the origin city of the trip",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2595,
+          "in original ontology": true
+        },
+        "date.depart_origin": {
+          "description": "date of departure from origin",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3177,
+          "in original ontology": true
+        },
+        "date.depart_intermediate": {
+          "description": "date of departure from intermediate",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 48,
+          "in original ontology": true
+        },
+        "date.return": {
+          "description": "date of return",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2822,
+          "in original ontology": true
+        },
+        "time_of_day": {
+          "description": "time of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3840,
+          "in original ontology": true
+        },
+        "seating_class": {
+          "description": "seat type (first class, business class, economy class, etc.",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3626,
+          "in original ontology": true
+        },
+        "seat_location": {
+          "description": "location of the seat",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 313,
+          "in original ontology": true
+        },
+        "stops": {
+          "description": "non-stop, layovers, etc.",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6174,
+          "in original ontology": true
+        },
+        "price_range": {
+          "description": "price range of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2646,
+          "in original ontology": true
+        },
+        "num.pax": {
+          "description": "number of people",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 437,
+          "in original ontology": true
+        },
+        "luggage": {
+          "description": "luggage information",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 17,
+          "in original ontology": true
+        },
+        "total_fare": {
+          "description": "total cost of the trip",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1238,
+          "in original ontology": true
+        },
+        "other_description": {
+          "description": "other description of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2620,
+          "in original ontology": true
+        },
+        "from": {
+          "description": "departure of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1293,
+          "in original ontology": true
+        },
+        "to": {
+          "description": "destination of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1979,
+          "in original ontology": true
+        },
+        "airline": {
+          "description": "airline of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3981,
+          "in original ontology": true
+        },
+        "flight_number": {
+          "description": "the number of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 42,
+          "in original ontology": true
+        },
+        "date": {
+          "description": "date of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 756,
+          "in original ontology": true
+        },
+        "from.time": {
+          "description": "departure time of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6440,
+          "in original ontology": true
+        },
+        "to.time": {
+          "description": "arrival time of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2571,
+          "in original ontology": true
+        },
+        "stops.location": {
+          "description": "location of the stop",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1097,
+          "in original ontology": true
+        },
+        "fare": {
+          "description": "cost of the flight",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1475,
+          "in original ontology": true
+        }
+      }
+    },
+    "food_order": {
+      "description": "order take-out for a particular cuisine choice",
+      "slots": {
+        "name.item": {
+          "description": "name of the item",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6080,
+          "in original ontology": true
+        },
+        "other_description.item": {
+          "description": "other description of the item",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1474,
+          "in original ontology": true
+        },
+        "type.retrieval": {
+          "description": "type of the retrieval method",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1868,
+          "in original ontology": true
+        },
+        "total_price": {
+          "description": "total price",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5,
+          "in original ontology": true
+        },
+        "time.pickup": {
+          "description": "pick up time",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 981,
+          "in original ontology": true
+        },
+        "num.people": {
+          "description": "number of people",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 880,
+          "in original ontology": true
+        },
+        "name.restaurant": {
+          "description": "name of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 106,
+          "in original ontology": true
+        },
+        "type.food": {
+          "description": "type of food",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1247,
+          "in original ontology": true
+        },
+        "type.meal": {
+          "description": "type of meal",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 64,
+          "in original ontology": true
+        },
+        "location.restaurant": {
+          "description": "location of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 8,
+          "in original ontology": true
+        },
+        "rating.restaurant": {
+          "description": "rating of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5,
+          "in original ontology": true
+        },
+        "price_range": {
+          "description": "price range of the food",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5,
+          "in original ontology": true
+        }
+      }
+    },
+    "hotel": {
+      "description": "find a hotel using typical preferences",
+      "slots": {
+        "name.hotel": {
+          "description": "name of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5241,
+          "in original ontology": true
+        },
+        "location.hotel": {
+          "description": "location of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2940,
+          "in original ontology": true
+        },
+        "sub_location.hotel": {
+          "description": "rough location of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1869,
+          "in original ontology": true
+        },
+        "star_rating": {
+          "description": "star rating of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2049,
+          "in original ontology": true
+        },
+        "customer_rating": {
+          "description": "customer rating of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1239,
+          "in original ontology": true
+        },
+        "price_range": {
+          "description": "price range of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2357,
+          "in original ontology": true
+        },
+        "amenity": {
+          "description": "amenity of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 19030,
+          "in original ontology": true
+        },
+        "num.beds": {
+          "description": "number of beds to book",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 733,
+          "in original ontology": true
+        },
+        "type.bed": {
+          "description": "type of the bed",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1423,
+          "in original ontology": true
+        },
+        "num.rooms": {
+          "description": "number of rooms to book",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 256,
+          "in original ontology": true
+        },
+        "check-in_date": {
+          "description": "check-in date",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 608,
+          "in original ontology": true
+        },
+        "check-out_date": {
+          "description": "check-out date",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 428,
+          "in original ontology": true
+        },
+        "date_range": {
+          "description": "date range of the reservation",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2458,
+          "in original ontology": true
+        },
+        "num.guests": {
+          "description": "number of guests",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1323,
+          "in original ontology": true
+        },
+        "type.room": {
+          "description": "type of the room",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1840,
+          "in original ontology": true
+        },
+        "price_per_night": {
+          "description": "price per night",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2357,
+          "in original ontology": true
+        },
+        "total_fare": {
+          "description": "total fare",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 28,
+          "in original ontology": true
+        },
+        "location": {
+          "description": "location of the hotel",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 352,
+          "in original ontology": true
+        }
+      }
+    },
+    "movie": {
+      "description": "find a movie to watch in theaters or using a streaming service at home",
+      "slots": {
+        "name.movie": {
+          "description": "name of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 13413,
+          "in original ontology": true
+        },
+        "genre": {
+          "description": "genre of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4982,
+          "in original ontology": true
+        },
+        "name.theater": {
+          "description": "name of the theater",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2371,
+          "in original ontology": true
+        },
+        "location.theater": {
+          "description": "location of the theater",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2894,
+          "in original ontology": true
+        },
+        "time.start": {
+          "description": "start time of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 6455,
+          "in original ontology": true
+        },
+        "time.end": {
+          "description": "end time of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3,
+          "in original ontology": true
+        },
+        "price.ticket": {
+          "description": "price of the ticket",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 989,
+          "in original ontology": true
+        },
+        "price.streaming": {
+          "description": "price of the streaming",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 397,
+          "in original ontology": true
+        },
+        "type.screening": {
+          "description": "type of the screening",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1419,
+          "in original ontology": true
+        },
+        "audience_rating": {
+          "description": "audience rating",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1506,
+          "in original ontology": true
+        },
+        "movie_rating": {
+          "description": "film rating",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 273,
+          "in original ontology": true
+        },
+        "release_date": {
+          "description": "release date of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 386,
+          "in original ontology": true
+        },
+        "runtime": {
+          "description": "running time of the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 262,
+          "in original ontology": true
+        },
+        "real_person": {
+          "description": "name of actors, directors, etc.",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3406,
+          "in original ontology": true
+        },
+        "character": {
+          "description": "name of character in the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1676,
+          "in original ontology": true
+        },
+        "streaming_service": {
+          "description": "streaming service that provide the movie",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2729,
+          "in original ontology": true
+        },
+        "num.tickets": {
+          "description": "number of tickets",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1045,
+          "in original ontology": true
+        },
+        "seating": {
+          "description": "type of seating",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 13,
+          "in original ontology": true
+        }
+      }
+    },
+    "music": {
+      "description": "find several tracks to play and then comment on each one",
+      "slots": {
+        "name.track": {
+          "description": "name of the track",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4916,
+          "in original ontology": true
+        },
+        "name.artist": {
+          "description": "name of the artist",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 9287,
+          "in original ontology": true
+        },
+        "name.album": {
+          "description": "name of the album",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1106,
+          "in original ontology": true
+        },
+        "name.genre": {
+          "description": "music genre",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 452,
+          "in original ontology": true
+        },
+        "type.music": {
+          "description": "rough type of the music",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 603,
+          "in original ontology": true
+        },
+        "describes_track": {
+          "description": "description of a track to find",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2969,
+          "in original ontology": true
+        },
+        "describes_artist": {
+          "description": "description of a artist to find",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 612,
+          "in original ontology": true
+        },
+        "describes_album": {
+          "description": "description of an album to find",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 189,
+          "in original ontology": true
+        },
+        "describes_genre": {
+          "description": "description of a genre to find",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 26,
+          "in original ontology": true
+        },
+        "describes_type.music": {
+          "description": "description of the music type",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 375,
+          "in original ontology": true
+        }
+      }
+    },
+    "restaurant": {
+      "description": "ask for recommendations for a particular type of cuisine",
+      "slots": {
+        "name.restaurant": {
+          "description": "name of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 8676,
+          "in original ontology": true
+        },
+        "location": {
+          "description": "location of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 5165,
+          "in original ontology": true
+        },
+        "sub-location": {
+          "description": "rough location of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1210,
+          "in original ontology": true
+        },
+        "type.food": {
+          "description": "the cuisine of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 12412,
+          "in original ontology": true
+        },
+        "menu_item": {
+          "description": "item in the menu",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1499,
+          "in original ontology": true
+        },
+        "type.meal": {
+          "description": "type of meal",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2677,
+          "in original ontology": true
+        },
+        "rating": {
+          "description": "rating of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2951,
+          "in original ontology": true
+        },
+        "price_range": {
+          "description": "price range of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1930,
+          "in original ontology": true
+        },
+        "business_hours": {
+          "description": "business hours of the restaurant",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2024,
+          "in original ontology": true
+        },
+        "name.reservation": {
+          "description": "name of the person who make the reservation",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 16,
+          "in original ontology": true
+        },
+        "num.guests": {
+          "description": "number of guests",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 179,
+          "in original ontology": true
+        },
+        "time.reservation": {
+          "description": "time of the reservation",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 216,
+          "in original ontology": true
+        },
+        "date.reservation": {
+          "description": "date of the reservation",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 130,
+          "in original ontology": true
+        },
+        "type.seating": {
+          "description": "type of the seating",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 11,
+          "in original ontology": true
+        }
+      }
+    },
+    "sport": {
+      "description": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL",
+      "slots": {
+        "name.team": {
+          "description": "name of the team",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 19651,
+          "in original ontology": true
+        },
+        "record.team": {
+          "description": "record of the team (number of wins and losses)",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3338,
+          "in original ontology": true
+        },
+        "record.games_ahead": {
+          "description": "number of games ahead",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 33,
+          "in original ontology": true
+        },
+        "record.games_back": {
+          "description": "number of games behind",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 361,
+          "in original ontology": true
+        },
+        "place.team": {
+          "description": "ranking of the team",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4075,
+          "in original ontology": true
+        },
+        "result.match": {
+          "description": "result of the match",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3245,
+          "in original ontology": true
+        },
+        "score.match": {
+          "description": "score of the match",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 3241,
+          "in original ontology": true
+        },
+        "date.match": {
+          "description": "date of the match",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2660,
+          "in original ontology": true
+        },
+        "day.match": {
+          "description": "day of the match",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 4743,
+          "in original ontology": true
+        },
+        "time.match": {
+          "description": "time of the match",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 1283,
+          "in original ontology": true
+        },
+        "name.player": {
+          "description": "name of the player",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2365,
+          "in original ontology": true
+        },
+        "position.player": {
+          "description": "position of the player",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 2746,
+          "in original ontology": true
+        },
+        "record.player": {
+          "description": "record of the player",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 80,
+          "in original ontology": true
+        },
+        "name.non_player": {
+          "description": "name of non-palyer such as the manager, coach",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 843,
+          "in original ontology": true
+        },
+        "venue": {
+          "description": "venue of the match take place",
+          "is_categorical": false,
+          "possible_values": [],
+          "count": 328,
+          "in original ontology": true
+        }
+      }
+    }
+  },
+  "intents": {
+    "inform": {
+      "description": ""
+    }
+  },
+  "binary_dialogue_act": [],
+  "state": {}
+}
\ No newline at end of file
diff --git a/data/unified_datasets/taskmaster/original_data.zip b/data/unified_datasets/taskmaster/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..8a36e95bf829063c7bff98404eb795107baf7b87
Binary files /dev/null and b/data/unified_datasets/taskmaster/original_data.zip differ
diff --git a/data/unified_datasets/taskmaster/preprocess.py b/data/unified_datasets/taskmaster/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..298ac2562432d5cf93dad8f6dec672e626eca249
--- /dev/null
+++ b/data/unified_datasets/taskmaster/preprocess.py
@@ -0,0 +1,445 @@
+import json
+import os
+import copy
+import zipfile
+from tqdm import tqdm
+import re
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+from pprint import pprint
+
+descriptions = {
+    "uber_lyft": {
+        "uber_lyft": "order a car for a ride inside a city",
+        "location.from": "pickup location",
+        "location.to": "destination of the ride",
+        "type.ride": "type of ride",
+        "num.people": "number of people",
+        "price.estimate": "estimated cost of the ride",
+        "duration.estimate": "estimated duration of the ride",
+        "time.pickup": "time of pickup",
+        "time.dropoff": "time of dropoff",
+    },
+    "movie_ticket": {
+        "movie_ticket": "book movie tickets for a film",
+        "name.movie": "name of the movie",
+        "name.theater": "name of the theater",
+        "num.tickets": "number of tickets",
+        "time.start": "start time of the movie",
+        "location.theater": "location of the theater",
+        "price.ticket": "price of the ticket",
+        "type.screening": "type of the screening",
+        "time.end": "end time of the movie",
+        "time.duration": "duration of the movie",
+    },
+    "restaurant_reservation": {
+        "restaurant_reservation": "searching for a restaurant and make reservation",
+        "name.restaurant": "name of the restaurant",
+        "name.reservation": "name of the person who make the reservation",
+        "num.guests": "number of guests",
+        "time.reservation": "time of the reservation",
+        "type.seating": "type of the seating",
+        "location.restaurant": "location of the restaurant",
+    },
+    "coffee_ordering": {
+        "coffee_ordering": "order a coffee drink from either Starbucks or Peets for pick up",
+        "location.store": "location of the coffee store",
+        "name.drink": "name of the drink",
+        "size.drink": "size of the drink",
+        "num.drink": "number of drinks",
+        "type.milk": "type of the milk",
+        "preference": "user preference of the drink",
+    },
+    "pizza_ordering": {
+        "pizza_ordering": "order a pizza",
+        "name.store": "name of the pizza store",
+        "name.pizza": "name of the pizza",
+        "size.pizza": "size of the pizza",
+        "type.topping": "type of the topping",
+        "type.crust": "type of the crust",
+        "preference": "user preference of the pizza",
+        "location.store": "location of the pizza store",
+    },
+    "auto_repair": {
+        "auto_repair": "set up an auto repair appointment with a repair shop",
+        "name.store": "name of the repair store",
+        "name.customer": "name of the customer",
+        "date.appt": "date of the appointment",
+        "time.appt": "time of the appointment",
+        "reason.appt": "reason of the appointment",
+        "name.vehicle": "name of the vehicle",
+        "year.vehicle": "year of the vehicle",
+        "location.store": "location of the repair store",
+    },
+    "flights": {
+        "flights": "find a round trip or multi-city flights",
+        "type": "type of the flight",
+        "destination1": "the first destination city of the trip",
+        "destination2": "the second destination city of the trip",
+        "origin": "the origin city of the trip",
+        "date.depart_origin": "date of departure from origin",
+        "date.depart_intermediate": "date of departure from intermediate",
+        "date.return": "date of return",
+        "time_of_day": "time of the flight",
+        "seating_class": "seat type (first class, business class, economy class, etc.",
+        "seat_location": "location of the seat",
+        "stops": "non-stop, layovers, etc.",
+        "price_range": "price range of the flight",
+        "num.pax": "number of people",
+        "luggage": "luggage information",
+        "total_fare": "total cost of the trip",
+        "other_description": "other description of the flight",
+        "from": "departure of the flight",
+        "to": "destination of the flight",
+        "airline": "airline of the flight",
+        "flight_number": "the number of the flight",
+        "date": "date of the flight",
+        "from.time": "departure time of the flight",
+        "to.time": "arrival time of the flight",
+        "stops.location": "location of the stop",
+        "fare": "cost of the flight",
+    },
+    "food_order": {
+        "food_order": "order take-out for a particular cuisine choice",
+        "name.item": "name of the item",
+        "other_description.item": "other description of the item",
+        "type.retrieval": "type of the retrieval method",
+        "total_price": "total price",
+        "time.pickup": "pick up time",
+        "num.people": "number of people",
+        "name.restaurant": "name of the restaurant",
+        "type.food": "type of food",
+        "type.meal": "type of meal",
+        "location.restaurant": "location of the restaurant",
+        "rating.restaurant": "rating of the restaurant",
+        "price_range": "price range of the food",
+    },
+    "hotel": {
+        "hotel": "find a hotel using typical preferences",
+        "name.hotel": "name of the hotel",
+        "location.hotel": "location of the hotel",
+        "sub_location.hotel": "rough location of the hotel",
+        "star_rating": "star rating of the hotel",
+        "customer_rating": "customer rating of the hotel",
+        "price_range": "price range of the hotel",
+        "amenity": "amenity of the hotel",
+        "num.beds": "number of beds to book",
+        "type.bed": "type of the bed",
+        "num.rooms": "number of rooms to book",
+        "check-in_date": "check-in date",
+        "check-out_date": "check-out date",
+        "date_range": "date range of the reservation",
+        "num.guests": "number of guests",
+        "type.room": "type of the room",
+        "price_per_night": "price per night",
+        "total_fare": "total fare",
+        "location": "location of the hotel",
+    },
+    "movie": {
+        "movie": "find a movie to watch in theaters or using a streaming service at home",
+        "name.movie": "name of the movie",
+        "genre": "genre of the movie",
+        "name.theater": "name of the theater",
+        "location.theater": "location of the theater",
+        "time.start": "start time of the movie",
+        "time.end": "end time of the movie",
+        "price.ticket": "price of the ticket",
+        "price.streaming": "price of the streaming",
+        "type.screening": "type of the screening",
+        "audience_rating": "audience rating",
+        "movie_rating": "film rating",
+        "release_date": "release date of the movie",
+        "runtime": "running time of the movie",
+        "real_person": "name of actors, directors, etc.",
+        "character": "name of character in the movie",
+        "streaming_service": "streaming service that provide the movie",
+        "num.tickets": "number of tickets",
+        "seating": "type of seating",
+    },
+    "music": {
+        "music": "find several tracks to play and then comment on each one",
+        "name.track": "name of the track",
+        "name.artist": "name of the artist",
+        "name.album": "name of the album",
+        "name.genre": "music genre",
+        "type.music": "rough type of the music",
+        "describes_track": "description of a track to find",
+        "describes_artist": "description of a artist to find",
+        "describes_album": "description of an album to find",
+        "describes_genre": "description of a genre to find",
+        "describes_type.music": "description of the music type",
+    },
+    "restaurant": {
+        "restaurant": "ask for recommendations for a particular type of cuisine",
+        "name.restaurant": "name of the restaurant",
+        "location": "location of the restaurant",
+        "sub-location": "rough location of the restaurant",
+        "type.food": "the cuisine of the restaurant",
+        "menu_item": "item in the menu",
+        "type.meal": "type of meal",
+        "rating": "rating of the restaurant",
+        "price_range": "price range of the restaurant",
+        "business_hours": "business hours of the restaurant",
+        "name.reservation": "name of the person who make the reservation",
+        "num.guests": "number of guests",
+        "time.reservation": "time of the reservation",
+        "date.reservation": "date of the reservation",
+        "type.seating": "type of the seating",
+    },
+    "sport": {
+        "sport": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL",
+        "name.team": "name of the team",
+        "record.team": "record of the team (number of wins and losses)",
+        "record.games_ahead": "number of games ahead",
+        "record.games_back": "number of games behind",
+        "place.team": "ranking of the team",
+        "result.match": "result of the match",
+        "score.match": "score of the match",
+        "date.match": "date of the match",
+        "day.match": "day of the match",
+        "time.match": "time of the match",
+        "name.player": "name of the player",
+        "position.player": "position of the player",
+        "record.player": "record of the player",
+        "name.non_player": "name of non-palyer such as the manager, coach",
+        "venue": "venue of the match take place",
+    }
+}
+
+
+def normalize_domain_name(domain):
+    if domain == 'auto':
+        return 'auto_repair'
+    elif domain == 'pizza':
+        return 'pizza_ordering'
+    elif domain == 'coffee':
+        return 'coffee_ordering'
+    elif domain == 'uber':
+        return 'uber_lyft'
+    elif domain == 'restaurant':
+        return 'restaurant_reservation'
+    elif domain == 'movie':
+        return 'movie_ticket'
+    elif domain == 'flights':
+        return 'flights'
+    elif domain == 'food-ordering':
+        return 'food_order'
+    elif domain == 'hotels':
+        return 'hotel'
+    elif domain == 'movies':
+        return 'movie'
+    elif domain == 'music':
+        return 'music'
+    elif domain == 'restaurant-search':
+        return 'restaurant'
+    elif domain == 'sports':
+        return 'sport'
+    assert 0
+
+
+def format_turns(ori_turns):
+    new_turns = []
+    previous_speaker = None
+    utt_idx = 0
+    for i, turn in enumerate(ori_turns):
+        speaker = 'system' if turn['speaker'] == 'ASSISTANT' else 'user'
+        turn['speaker'] = speaker
+        if utt_idx == 0 and speaker == 'system':
+            continue
+        if turn['text'] == '(deleted)':
+            continue
+        if not previous_speaker:
+            assert speaker != previous_speaker
+        if speaker != previous_speaker:
+            previous_speaker = speaker
+            new_turns.append(copy.deepcopy(turn))
+            utt_idx += 1
+        else:
+            # continuous speaking
+            last_turn = new_turns[-1]
+            # if ori_turns[i-1]['text'] == turn['text']:
+            #     # skip repeat turn
+            #     continue
+            if turn['text'] in ori_turns[i-1]['text']:
+                continue
+            index_shift = len(last_turn['text']) + 1
+            last_turn['text'] += ' '+turn['text']
+            if 'segments' in turn:
+                last_turn.setdefault('segments', [])
+                for segment in turn['segments']:
+                    segment['start_index'] += index_shift
+                    segment['end_index'] += index_shift
+                last_turn['segments'] += turn['segments']
+    if new_turns and new_turns[-1]['speaker'] == 'system':
+        new_turns = new_turns[:-1]
+    return new_turns
+
+
+def log_ontology(acts, ontology, ori_ontology):
+    for item in acts:
+        intent, domain, slot, value = item['intent'], item['domain'], item['slot'], item['value']
+        if domain not in ontology['domains']:
+            ontology['domains'][domain] = {'description': "", 'slots': {}}
+        if slot not in ontology['domains'][domain]['slots']:
+            ontology['domains'][domain]['slots'][slot] = {
+                'description': '',
+                'is_categorical': False,
+                'possible_values': [],
+                'count': 1
+            }
+        else:
+            ontology['domains'][domain]['slots'][slot]['count'] += 1
+        ontology['domains'][domain]['slots'][slot]['in original ontology'] = slot in ori_ontology[domain]
+        if intent is not None and intent not in ontology['intents']:
+            ontology['intents'][intent] = {
+                "description": ''
+            }
+
+
+def preprocess():
+    self_dir = os.path.dirname(os.path.abspath(__file__))
+    processed_dialogue = []
+    ontology = {'domains': {},
+                'intents': {},
+                'binary_dialogue_act': [],
+                'state': {}}
+    original_zipped_path = os.path.join(self_dir, 'original_data.zip')
+    new_dir = os.path.join(self_dir, 'original_data')
+    if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(os.path.join(self_dir, 'ontology.json')):
+        print('unzip to', new_dir)
+        print('This may take several minutes')
+        archive = zipfile.ZipFile(original_zipped_path, 'r')
+        archive.extractall(self_dir)
+        files = [
+            ('TM-1-2019/woz-dialogs.json', 'TM-1-2019/ontology.json'),
+            ('TM-1-2019/self-dialogs.json', 'TM-1-2019/ontology.json'),
+            ('TM-2-2020/data/flights.json', 'TM-2-2020/ontology/flights.json'),
+            ('TM-2-2020/data/food-ordering.json', 'TM-2-2020/ontology/food-ordering.json'),
+            ('TM-2-2020/data/hotels.json', 'TM-2-2020/ontology/hotels.json'),
+            ('TM-2-2020/data/movies.json', 'TM-2-2020/ontology/movies.json'),
+            ('TM-2-2020/data/music.json', 'TM-2-2020/ontology/music.json'),
+            ('TM-2-2020/data/restaurant-search.json', 'TM-2-2020/ontology/restaurant-search.json'),
+            ('TM-2-2020/data/sports.json', 'TM-2-2020/ontology/sports.json')
+        ]
+        idx_count = 1
+        total = 0
+
+        for filename, ontology_filename in files:
+            data = json.load(open(os.path.join(new_dir, filename)))
+            ori_ontology = {}
+            if 'TM-1' in filename:
+                for domain, item in json.load(open(os.path.join(new_dir, ontology_filename))).items():
+                    ori_ontology[item["id"]] = {}
+                    for slot in item["required"] + item["optional"]:
+                        ori_ontology[item["id"]][slot] = 0
+            else:
+                domain = normalize_domain_name(filename.split('/')[-1].split('.')[0])
+                ori_ontology[domain] = {}
+                for _, item in json.load(open(os.path.join(new_dir, ontology_filename))).items():
+                    for group in item:
+                        for anno in group["annotations"]:
+                            ori_ontology[domain][anno] = 0
+            for d in ori_ontology:
+                if d not in ontology['domains']:
+                    ontology['domains'][d] = {'description': descriptions[d][d], 'slots': {}}
+                for s in ori_ontology[d]:
+                    if s not in ontology['domains'][d]['slots']:
+                        ontology['domains'][d]['slots'][s] = {
+                            'description': descriptions[d][s],
+                            'is_categorical': False,
+                            'possible_values': [],
+                            'count': 0,
+                            'in original ontology': True
+                        }
+            # pprint(ori_ontology)
+            for ori_sess in tqdm(data, desc='processing taskmaster-{}'.format(filename)):
+                total += 1
+                turns = format_turns(ori_sess['utterances'])
+                if not turns:
+                    continue
+                if 'TM-2' in filename:
+                    dial_domain = normalize_domain_name(filename.split('/')[-1].split('.')[0])
+                else:
+                    dial_domain = normalize_domain_name(ori_sess['instruction_id'].split('-', 1)[0])
+                dialogue = {
+                    "dataset": "taskmaster",
+                    "data_split": "train",
+                    "dialogue_id": 'taskmaster_' + str(idx_count),
+                    "original_id": ori_sess['conversation_id'],
+                    "instruction_id": ori_sess['instruction_id'],
+                    "domains": [
+                        dial_domain
+                    ],
+                    "turns": []
+                }
+                idx_count += 1
+                assert turns[0]['speaker'] == 'user' and turns[-1]['speaker'] == 'user', print(turns)
+                for utt_idx, uttr in enumerate(turns):
+                    speaker = uttr['speaker']
+                    turn = {
+                        'speaker': speaker,
+                        'utterance': uttr['text'],
+                        'utt_idx': utt_idx,
+                        'dialogue_act': {
+                            'binary': [],
+                            'categorical': [],
+                            'non-categorical': [],
+                        },
+                    }
+                    if speaker == 'user':
+                        turn['state'] = {}
+                        turn['state_update'] = {'categorical': [], 'non-categorical': []}
+
+                    if 'segments' in uttr:
+                        for segment in uttr['segments']:
+                            for item in segment['annotations']:
+                                # domain = item['name'].split('.', 1)[0]
+                                domain = dial_domain
+
+                                # if domain != item['name'].split('.', 1)[0]:
+                                #     print(domain, item['name'].split('.', 1), dialogue["original_id"])
+                                #     assert domain in item['name'].split('.', 1)[0]
+
+                                # if item['name'].split('.', 1)[0] != domain:
+                                #     print(domain, item['name'].split('.', 1), dialogue["original_id"])
+                                slot = item['name'].split('.', 1)[-1]
+                                if slot.endswith('.accept') or slot.endswith('.reject'):
+                                    slot = slot[:-7]
+                                if slot not in ori_ontology[domain]:
+                                    # print(domain, item['name'].split('.', 1), dialogue["original_id"])
+                                    continue
+                                # if domain in ori_ontology:
+                                #     ori_ontology[domain][slot] += 1
+                                # else:
+                                #     print(domain, item['name'].split('.', 1), dialogue["original_id"])
+                                # assert domain in ori_ontology, print(domain, item['name'].split('.', 1), dialogue["original_id"])
+
+                                if not segment['text']:
+                                    print(slot)
+                                    print(segment)
+                                    print()
+                                assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text']
+                                turn['dialogue_act']['non-categorical'].append({
+                                    'intent': 'inform',
+                                    'domain': domain,
+                                    'slot': slot,
+                                    'value': segment['text'].lower(),
+                                    'start': segment['start_index'],
+                                    'end': segment['end_index']
+                                })
+                        log_ontology(turn['dialogue_act']['non-categorical'], ontology, ori_ontology)
+                    dialogue['turns'].append(turn)
+                processed_dialogue.append(dialogue)
+            # pprint(ori_ontology)
+        # save ontology json
+        json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=2)
+        json.dump(processed_dialogue, open('data.json', 'w'), indent=2)
+        write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        os.remove('data.json')
+    else:
+        # read from file
+        processed_dialogue = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        ontology = json.load(open(os.path.join(self_dir, 'ontology.json')))
+    return processed_dialogue, ontology
+
+if __name__ == '__main__':
+    preprocess()
diff --git a/data/unified_datasets/woz/README.md b/data/unified_datasets/woz/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b883e678a20fba4da78f37545acedb50945c767
--- /dev/null
+++ b/data/unified_datasets/woz/README.md
@@ -0,0 +1,31 @@
+# README
+
+## Features
+
+- Annotations: dialogue act, character-level span for non-categorical slots.
+
+Statistics: 
+
+|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
+| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
+| train | 406         | 2936         | 7.23     | 11.36       | 1          |
+| dev | 135         | 941         | 6.97      | 11.99       | 1          |
+| train | 135         | 935         | 6.93       | 11.87       | 1          |
+
+
+## Main changes
+
+- domain is set to **restaurant**
+- make some rule-based fixes on categorical values to make them in `possible value` lists
+- `belief_states` in WOZ dataset contains `request` intents, which are ignored in processing
+- some state annotations are not consistent with dialogue_act annotations. for example in `woz_train_en.json`, first dialog, 2nd turn:
+ 
+    `user: "How about Chinese food?"`
+   
+    `chinese food` is included in `dialogue_act` annotation as a `inform` intent, but not updated in `belief_state` annotation.
+    
+    
+
+## Original data
+
+https://github.com/nmrksic/neural-belief-tracker/tree/master/data/woz
\ No newline at end of file
diff --git a/data/unified_datasets/woz/data.zip b/data/unified_datasets/woz/data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..14b3cc709f5d6c3c8361542a1ccdfef7696a436c
Binary files /dev/null and b/data/unified_datasets/woz/data.zip differ
diff --git a/data/unified_datasets/woz/ontology.json b/data/unified_datasets/woz/ontology.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f863f679941e2bdf4347a4d6c992a4881b8ef60
--- /dev/null
+++ b/data/unified_datasets/woz/ontology.json
@@ -0,0 +1,117 @@
+{
+    "domains": {
+        "restaurant": {
+            "description": "search for a restaurant to dine",
+            "slots": {
+                "food": {
+                    "is_categorical": false,
+                    "possible_values": [],
+                    "description": "food type of the restaurant"
+                },
+                "area": {
+                    "is_categorical": true,
+                    "possible_values": [
+                        "east",
+                        "west",
+                        "center",
+                        "north",
+                        "south"
+                    ],
+                    "description": "area of the restaurant"
+                },
+                "postcode": {
+                    "is_categorical": false,
+                    "possible_values": [],
+                    "description": "postal code of the restaurant"
+                },
+                "phone": {
+                    "is_categorical": false,
+                    "possible_values": [],
+                    "description": "phone number of the restaurant"
+                },
+                "address": {
+                    "is_categorical": false,
+                    "possible_values": [],
+                    "description": "address of the restaurant"
+                },
+                "price range": {
+                    "is_categorical": true,
+                    "possible_values": [
+                        "expensive",
+                        "moderate",
+                        "cheap"
+                    ],
+                    "description": "price range of the restaurant"
+                },
+                "name": {
+                    "is_categorical": false,
+                    "possible_values": [],
+                    "description": "name of the restaurant"
+                }
+            }
+        }
+    },
+    "intents": {
+        "inform": {
+            "description": "system informs user the value of a slot"
+        },
+        "request": {
+            "description": "system asks the user to provide value of a slot"
+        }
+    },
+    "binary_dialogue_act": [
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "postcode",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "phone",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "address",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "area",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "price range",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "food",
+            "value": ""
+        },
+        {
+            "intent": "request",
+            "domain": "restaurant",
+            "slot": "name",
+            "value": ""
+        }
+    ],
+    "state": {
+        "restaurant": {
+            "food": "",
+            "area": "",
+            "postcode": "",
+            "phone": "",
+            "address": "",
+            "price range": "",
+            "name": ""
+        }
+    }
+}
\ No newline at end of file
diff --git a/data/unified_datasets/woz/original_data.zip b/data/unified_datasets/woz/original_data.zip
new file mode 100644
index 0000000000000000000000000000000000000000..52f0a9c6d72e88082d0d52f64d3298516136c26a
Binary files /dev/null and b/data/unified_datasets/woz/original_data.zip differ
diff --git a/data/unified_datasets/woz/preprocess.py b/data/unified_datasets/woz/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..a99394b42479e927f8c2cf8194a0233bc5fe8129
--- /dev/null
+++ b/data/unified_datasets/woz/preprocess.py
@@ -0,0 +1,324 @@
+import copy
+import zipfile
+import json
+import os
+from collections import Counter
+from tqdm import tqdm
+import logging
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+logging.basicConfig(level=logging.INFO)
+from convlab2.util.file_util import read_zipped_json, write_zipped_json
+
+self_dir = os.path.dirname(os.path.abspath(__file__))
+
+cat_slots = ['price range', 'area']
+cat_slot_values = {
+    'area': [
+                        "east",
+                        "west",
+                        "center",
+                        "north",
+                        "south"
+                    ],
+    'price range': [
+                        "expensive",
+                        "moderate",
+                        "dontcare",
+                        "cheap"
+                    ]
+}
+
+woz_desc = {
+    'restaurant': {
+        'domain': 'search for a restaurant to dine',
+        'food': 'food type of the restaurant',
+        'area': 'area of the restaurant',
+        'postcode': 'postal code of the restaurant',
+        'phone': 'phone number of the restaurant',
+        'address': 'address of the restaurant',
+        'price range': 'price range of the restaurant',
+        'name': 'name of the restaurant'
+    },
+    'intents': {
+        'inform': 'system informs user the value of a slot',
+        'request': 'system asks the user to provide value of a slot',
+    }
+}
+
+
+def convert_da(da, utt, all_binary):
+    converted = {
+        'binary': [],
+        'categorical': [],
+        'non-categorical': []
+    }
+
+    for s, v in da:
+        v = 'expensive' if 'expensive' in v else v
+        v = 'center' if v == 'centre' else v
+        v = 'east' if 'east' in v else v
+
+        if s in ['request']:
+            _converted = {
+                'intent': 'request',
+                'domain': 'restaurant',
+                'slot': v,
+                'value': '',
+            }
+            converted['binary'].append(_converted)
+
+            if _converted not in all_binary:
+                all_binary.append(_converted)
+
+        else:
+            slot_name = s
+            slot_type = 'categorical' if s in cat_slots else 'non-categorical'
+
+            converted[slot_type].append({
+                'intent': 'inform',
+                'domain': 'restaurant',
+                'slot': slot_name,
+                'value': v
+            })
+
+            if slot_type == 'non-categorical':
+
+                start = utt.find(v)
+
+                if start != -1:
+                    end = start + len(v)
+                    converted[slot_type][-1]['start'] = start
+                    converted[slot_type][-1]['end'] = end
+
+    return converted
+
+
+def convert_state(state):
+    ret = {
+        'restaurant': {}
+    }
+    for s in woz_desc['restaurant']:
+        if s == 'domain':
+            continue
+        ret['restaurant'][s] = ''
+    for s in state:
+        assert s['act'] in ['request', 'inform']
+        if s['act'] == 'inform':
+            for _s, _v in s['slots']:
+                _v = 'expensive' if 'expensive' in _v else _v
+                _v = 'center' if _v == 'centre' else _v
+                _v = 'east' if 'east' in _v else _v
+                # try:
+                # assert _s not in ret['restaurant']
+                # except:
+                #     continue
+                ret['restaurant'][_s] = _v
+
+    return ret
+
+
+def get_state_update(prev_state, cur_state, usr_da, turn_idx, dialog_idx):
+
+    ret = {
+        'categorical': [],
+        'non-categorical': []
+    }
+    for k, v in prev_state['restaurant'].items():
+
+        if k in cur_state['restaurant'] and cur_state['restaurant'][k] == v:
+            continue
+        if k in cat_slots:
+            ret['categorical'].append({
+                'domain': 'restaurant',
+                'slot': k,
+                'value': cur_state['restaurant'][k]
+            })
+        else:
+            found = False
+            for _da in usr_da['non-categorical']:
+
+                if _da['slot'] == k and _da['value'] == cur_state['restaurant'][k]:
+                    found = True
+                    if v == 'dontcare':
+                        ret['non-categorical'].append({
+                            'domain': 'restaurant',
+                            'slot': k,
+                            'value': cur_state['restaurant'][k],
+                        })
+                    else:
+                        ret['non-categorical'].append({
+                            'domain': 'restaurant',
+                            'slot': k,
+                            'value': cur_state['restaurant'][k]
+                        })
+
+                        if 'start' in _da:
+                            ret['non-categorical'][-1].update({
+                                'utt_idx': turn_idx * 2,
+                                'start': _da['start'],
+                                'end': _da['end']
+                            })
+
+            if not found:
+                # print(dialog_idx, turn_idx*2)
+                # print(k, v)
+                # print('===================')
+                ret['non-categorical'].append({
+                    'domain': 'restaurant',
+                    'slot': k,
+                    'value': cur_state['restaurant'][k]
+                })
+
+    return ret
+
+
+
+def preprocess():
+    dataset_dir = 'woz'
+    data_splits = ['train', 'validate', 'test']
+    all_dialogues = []
+    all_binary_intents = []
+    all_slot = []
+    all_slot_value = {}
+    extract_dir = os.path.join(self_dir, 'original_data')
+
+    if not os.path.exists('data.zip') or not os.path.exists('ontology.json'):
+        # data not processed
+        data_zip_file = os.path.join(self_dir, 'original_data.zip')
+        if not os.path.exists(data_zip_file):
+            raise FileNotFoundError(data_zip_file)
+
+        logging.info('unzip woz data to {}'.format(extract_dir))
+        archive = zipfile.ZipFile(data_zip_file, 'r')
+        archive.extractall(extract_dir)
+
+        dialog_id = 1
+        for split in data_splits:
+
+            data = json.load(open(os.path.join(self_dir, extract_dir, 'original_data/woz_{}_en.json'.format(split))))
+
+
+            for dialogue in data:
+                ret = {}
+                ret['dataset'] = "woz"
+                ret['data_split'] = split if split != 'validate' else 'val'
+                ret['dialogue_id'] = 'woz_' + str(dialog_id)
+                ret['original_id'] = split + str(dialogue['dialogue_idx']) if split != 'validate' else 'val' + str(dialogue['dialogue_idx'])
+                ret['domains'] = ['restaurant']
+
+                ret['turns'] = []
+
+                turns = dialogue['dialogue']
+                n_turn = len(turns)
+                prev_state = {'restaurant':{k: '' for k in woz_desc['restaurant'] if k != 'domain'}}
+
+                for i in range(n_turn):
+
+                    sys_utt = turns[i]['system_transcript'].lower()
+                    usr_utt = turns[i]['transcript'].lower()
+                    usr_da = turns[i]['turn_label']
+                    bs = turns[i]['belief_state']
+
+                    for s, v in usr_da:
+                        if s == 'request':
+                            if v not in all_slot:
+                                all_slot.append(v)
+
+                            if v not in all_slot_value and v != 'dontcare':
+                                all_slot_value[v] = []
+
+                        else:
+                            if s not in all_slot:
+                                all_slot.append(s)
+                            if v == 'dontcare':
+                                continue
+                            if s not in all_slot_value:
+                                all_slot_value[s] = [v]
+                            else:
+                                if v not in all_slot_value[s]:
+                                    all_slot_value[s].append(v)
+
+                    if i != 0:
+                        ret['turns'].append({
+                            'utt_idx': len(ret['turns']),
+                            'speaker': 'system',
+                            'utterance': sys_utt,
+                            'dialogue_act': {'binary':[], 'categorical': [], 'non-categorical':[]},
+                        })
+
+                    cur_state = convert_state(bs)
+                    cur_usr_da = convert_da(usr_da, usr_utt, all_binary_intents)
+
+                    ret['turns'].append({
+                        'utt_idx': len(ret['turns']),
+                        'speaker': 'user',
+                        'utterance': usr_utt,
+                        'state': cur_state,
+                        'dialogue_act': cur_usr_da,
+                        'state_update': get_state_update(prev_state, cur_state, cur_usr_da, i, ret['dialogue_id'])
+                    })
+
+                    prev_state = copy.deepcopy(cur_state)
+
+                all_dialogues.append(ret)
+                dialog_id += 1
+
+        save_file = 'data.json'
+        json.dump(all_dialogues, open(save_file, 'w'), indent=4)
+        write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        os.remove('data.json')
+
+        new_ont = {'domains': {
+            'restaurant': {
+                'description': woz_desc['restaurant']['domain'],
+                'slots': {}
+            }
+        }, 'intents': {
+            'inform': {
+                'description': woz_desc['intents']['inform'],
+            },
+            'request': {
+                'description': woz_desc['intents']['request'],
+            },
+        }, 'binary_dialogue_act': []
+        }
+        for i in all_binary_intents:
+            new_ont['binary_dialogue_act'].append(i)
+
+        for slot in all_slot_value:
+            if slot in cat_slots:
+                new_ont['domains']['restaurant']['slots'][slot] = {
+                    'is_categorical': True,
+                    'possible_values': [],
+                    'description': woz_desc['restaurant'][slot]
+                }
+                for v in all_slot_value[slot]:
+                    v = 'expensive' if 'expensive' in v else v
+                    v = 'center' if v == 'centre' else v
+                    v = 'east' if 'east' in v else v
+                    if v not in new_ont['domains']['restaurant']['slots'][slot]['possible_values']:
+                        new_ont['domains']['restaurant']['slots'][slot]['possible_values'].append(v)
+            else:
+                new_ont['domains']['restaurant']['slots'][slot] = {
+                    'is_categorical': False,
+                    'possible_values': [],
+                    'description': woz_desc['restaurant'][slot]
+                }
+
+        new_ont['state'] = {
+            'restaurant': {k: '' for k in all_slot_value}
+        }
+
+        json.dump(new_ont, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=4)
+
+    else:
+        # read from file
+        all_dialogues = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
+        new_ont = json.load(open(os.path.join(self_dir, 'ontology.json')))
+
+    return all_dialogues, new_ont
+
+
+if __name__ == '__main__':
+    preprocess()