diff --git a/convlab/util/__init__.py b/convlab/util/__init__.py
index 6a84b7db276389d9bbcd6ba097a0b7bb00440a48..1688e21b80c08562a3ca1e5ca45fd181b57cbc98 100755
--- a/convlab/util/__init__.py
+++ b/convlab/util/__init__.py
@@ -1,3 +1 @@
-from convlab.util.unified_datasets_util import load_dataset, load_ontology, load_database, \
-    load_unified_data, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data, \
-    download_unified_datasets, relative_import_module_from_unified_datasets
\ No newline at end of file
+from convlab.util.unified_datasets_util import *
\ No newline at end of file
diff --git a/convlab/util/unified_datasets_util.py b/convlab/util/unified_datasets_util.py
index 1e3b0c20bd959ea3098b07b813ed98189aac840f..e24658410738b290da97149382c8c89030936679 100644
--- a/convlab/util/unified_datasets_util.py
+++ b/convlab/util/unified_datasets_util.py
@@ -65,12 +65,14 @@ def relative_import_module_from_unified_datasets(dataset_name, filename, names2i
             variables.append(eval(f'module.{name}'))
         return variables
 
-def load_dataset(dataset_name:str, dial_ids_order=None) -> Dict:
+def load_dataset(dataset_name:str, dial_ids_order=None, split2ratio={}) -> Dict:
     """load unified dataset from `data/unified_datasets/$dataset_name`
 
     Args:
         dataset_name (str): unique dataset name in `data/unified_datasets`
         dial_ids_order (int): idx of shuffled dial order in `data/unified_datasets/$dataset_name/shuffled_dial_ids.json`
+        split2ratio (dict): a dictionary that maps the data split to the ratio of the data you want to use. 
+            For example, if you want to use only half of the training data, you can set split2ratio = {'train': 0.5}
 
     Returns:
         dataset (dict): keys are data splits and the values are lists of dialogues
@@ -86,13 +88,17 @@ def load_dataset(dataset_name:str, dial_ids_order=None) -> Dict:
         data_path = download_unified_datasets(dataset_name, 'shuffled_dial_ids.json', data_dir)
         dial_ids = json.load(open(data_path))[dial_ids_order]
         for data_split in dial_ids:
-            dataset[data_split] = [dialogues[i] for i in dial_ids[data_split]]
+            ratio = split2ratio.get(data_split, 1)
+            dataset[data_split] = [dialogues[i] for i in dial_ids[data_split][:round(len(dial_ids[data_split])*ratio)]]
     else:
         for dialogue in dialogues:
             if dialogue['data_split'] not in dataset:
                 dataset[dialogue['data_split']] = [dialogue]
             else:
                 dataset[dialogue['data_split']].append(dialogue)
+        for data_split in dataset:
+            if data_split in split2ratio:
+                dataset[data_split] = dataset[data_split][:round(len(dataset[data_split])*split2ratio[data_split])]
     return dataset
 
 def load_ontology(dataset_name:str) -> Dict:
diff --git a/data/unified_datasets/camrest/database.py b/data/unified_datasets/camrest/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6245754ca07923b8b9375c4c42c91be3ce7c16c
--- /dev/null
+++ b/data/unified_datasets/camrest/database.py
@@ -0,0 +1,67 @@
+import json
+import os
+import random
+from fuzzywuzzy import fuzz
+from itertools import chain
+from zipfile import ZipFile
+from copy import deepcopy
+from convlab.util.unified_datasets_util import BaseDatabase, download_unified_datasets
+
+
+class Database(BaseDatabase):
+    def __init__(self):
+        """extract data.zip and load the database."""
+        data_path = download_unified_datasets('camrest', 'data.zip', os.path.dirname(os.path.abspath(__file__)))
+        archive = ZipFile(data_path)
+        self.dbs = {}
+        with archive.open('data/CamRestDB.json') as f:
+            self.dbs['restaurant'] = json.loads(f.read())
+        self.slot2dbattr = {
+            'price range': 'pricerange',
+        }
+
+    def query(self, domain: str, state: dict, topk: int, ignore_open=False, soft_contraints=(), fuzzy_match_ratio=60) -> list:
+        """return a list of topk entities (dict containing slot-value pairs) for a given domain based on the dialogue state."""
+        # query the db
+        assert domain == 'restaurant'
+        state = list(map(lambda ele: (self.slot2dbattr.get(ele[0], ele[0]), ele[1]) if not(ele[0] == 'area' and ele[1] == 'center') else ('area', 'centre'), state))
+
+        found = []
+        for i, record in enumerate(self.dbs[domain]):
+            constraints_iterator = zip(state, [False] * len(state))
+            soft_contraints_iterator = zip(soft_contraints, [True] * len(soft_contraints))
+            for (key, val), fuzzy_match in chain(constraints_iterator, soft_contraints_iterator):
+                if val in ["", "dont care", 'not mentioned', "don't care", "dontcare", "do n't care"]:
+                    pass
+                else:
+                    try:
+                        if key not in record:
+                            continue
+                        if record[key].strip() == '?':
+                            # '?' matches any constraint
+                            continue
+                        else:
+                            if not fuzzy_match:
+                                if val.strip().lower() != record[key].strip().lower():
+                                    break
+                            else:
+                                if fuzz.partial_ratio(val.strip().lower(), record[key].strip().lower()) < fuzzy_match_ratio:
+                                    break
+                    except:
+                        continue
+            else:
+                res = deepcopy(record)
+                res['Ref'] = '{0:08d}'.format(i)
+                found.append(res)
+                if len(found) == topk:
+                    return found
+        return found
+
+
+if __name__ == '__main__':
+    db = Database()
+    assert issubclass(Database, BaseDatabase)
+    assert isinstance(db, BaseDatabase)
+    res = db.query("restaurant", [['price range', 'expensive']], topk=3)
+    print(res, len(res))
+    # print(db.query("hotel", [['price range', 'moderate'], ['stars','4'], ['type', 'guesthouse'], ['internet', 'yes'], ['parking', 'no'], ['area', 'east']]))
diff --git a/data/unified_datasets/camrest/ontology.json b/data/unified_datasets/camrest/ontology.json
deleted file mode 100644
index b5617f7ef4d441674a361a280acdf3123468d53a..0000000000000000000000000000000000000000
--- a/data/unified_datasets/camrest/ontology.json
+++ /dev/null
@@ -1,122 +0,0 @@
-{
-    "domains": {
-        "restaurant": {
-            "description": "find a restaurant to eat",
-            "slots": {
-                "food": {
-                    "description": "food type the restaurant serves",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "area": {
-                    "description": "area where the restaurant is located",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "north",
-                        "east",
-                        "west",
-                        "south",
-                        "centre"
-                    ]
-                },
-                "name": {
-                    "description": "name of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "pricerange": {
-                    "description": "price range of the restaurant",
-                    "is_categorical": true,
-                    "possible_values": [
-                        "cheap",
-                        "moderate",
-                        "expensive"
-                    ]
-                },
-                "phone": {
-                    "description": "phone number of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "address": {
-                    "description": "exact location of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                },
-                "postcode": {
-                    "description": "postal code of the restaurant",
-                    "is_categorical": false,
-                    "possible_values": []
-                }
-            }
-        }
-    },
-    "intents": {
-        "inform": {
-            "description": "inform user of value of a slot"
-        },
-        "request": {
-            "description": "ask for value of a slot"
-        },
-        "nooffer": {
-            "description": "inform user that no restaurant matches his request"
-        }
-    },
-    "binary_dialogue_act": [
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "food",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "address",
-            "value": ""
-        },
-        {
-            "intent": "nooffer",
-            "domain": "restaurant",
-            "slot": "",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "area",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "phone",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "pricerange",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "postcode",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "name",
-            "value": ""
-        }
-    ],
-    "state": {
-        "restaurant": {
-            "pricerange": "",
-            "area": "",
-            "food": ""
-        }
-    }
-}
\ No newline at end of file
diff --git a/data/unified_datasets/taskmaster/README.md b/data/unified_datasets/taskmaster/README.md
deleted file mode 100644
index 303a82317a50d1aa4f40bd625ad17ccf97da392a..0000000000000000000000000000000000000000
--- a/data/unified_datasets/taskmaster/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# README
-
-## Features
-
-- Annotations: character-level span for non-categorical slots. No slot descriptions.
-
-Statistics: 
-
-|       | \# dialogues | \# utterances | avg. turns | avg. tokens | \# domains |
-| ----- | ------------ | ------------- | ---------- | ----------- | ---------- |
-| train | 30483        | 540311        | 17.72      | 9.18        | 13         |
-
-## Main changes
-
-- each speaker for one turn
-- intent is set to **inform**
-- not annotate state and state upadte
-- span info is provided by original data
-
-## Original data
-
-https://github.com/google-research-datasets/Taskmaster
-
-TM-1: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-1-2019
-
-TM-2: https://github.com/google-research-datasets/Taskmaster/tree/master/TM-2-2020
\ No newline at end of file
diff --git a/data/unified_datasets/taskmaster/data.zip b/data/unified_datasets/taskmaster/data.zip
deleted file mode 100644
index f52a3808df8a413962c71f305c64d437eb196e00..0000000000000000000000000000000000000000
Binary files a/data/unified_datasets/taskmaster/data.zip and /dev/null differ
diff --git a/data/unified_datasets/taskmaster/ontology.json b/data/unified_datasets/taskmaster/ontology.json
deleted file mode 100644
index 9b5532e2bbcc36039486954e5b7bee1205a33cb5..0000000000000000000000000000000000000000
--- a/data/unified_datasets/taskmaster/ontology.json
+++ /dev/null
@@ -1,1168 +0,0 @@
-{
-  "domains": {
-    "uber_lyft": {
-      "description": "order a car for a ride inside a city",
-      "slots": {
-        "location.from": {
-          "description": "pickup location",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5764,
-          "in original ontology": true
-        },
-        "location.to": {
-          "description": "destination of the ride",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6026,
-          "in original ontology": true
-        },
-        "type.ride": {
-          "description": "type of ride",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 13317,
-          "in original ontology": true
-        },
-        "num.people": {
-          "description": "number of people",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2636,
-          "in original ontology": true
-        },
-        "price.estimate": {
-          "description": "estimated cost of the ride",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4996,
-          "in original ontology": true
-        },
-        "duration.estimate": {
-          "description": "estimated duration of the ride",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1152,
-          "in original ontology": true
-        },
-        "time.pickup": {
-          "description": "time of pickup",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4303,
-          "in original ontology": true
-        },
-        "time.dropoff": {
-          "description": "time of dropoff",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 111,
-          "in original ontology": true
-        }
-      }
-    },
-    "movie_ticket": {
-      "description": "book movie tickets for a film",
-      "slots": {
-        "name.movie": {
-          "description": "name of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 8959,
-          "in original ontology": true
-        },
-        "name.theater": {
-          "description": "name of the theater",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6842,
-          "in original ontology": true
-        },
-        "num.tickets": {
-          "description": "number of tickets",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 7368,
-          "in original ontology": true
-        },
-        "time.start": {
-          "description": "start time of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 14820,
-          "in original ontology": true
-        },
-        "location.theater": {
-          "description": "location of the theater",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5295,
-          "in original ontology": true
-        },
-        "price.ticket": {
-          "description": "price of the ticket",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2418,
-          "in original ontology": true
-        },
-        "type.screening": {
-          "description": "type of the screening",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4579,
-          "in original ontology": true
-        },
-        "time.end": {
-          "description": "end time of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 10,
-          "in original ontology": true
-        },
-        "time.duration": {
-          "description": "duration of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 250,
-          "in original ontology": true
-        }
-      }
-    },
-    "restaurant_reservation": {
-      "description": "searching for a restaurant and make reservation",
-      "slots": {
-        "name.restaurant": {
-          "description": "name of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 14544,
-          "in original ontology": true
-        },
-        "name.reservation": {
-          "description": "name of the person who make the reservation",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 609,
-          "in original ontology": true
-        },
-        "num.guests": {
-          "description": "number of guests",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6962,
-          "in original ontology": true
-        },
-        "time.reservation": {
-          "description": "time of the reservation",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 12073,
-          "in original ontology": true
-        },
-        "type.seating": {
-          "description": "type of the seating",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 8983,
-          "in original ontology": true
-        },
-        "location.restaurant": {
-          "description": "location of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4311,
-          "in original ontology": true
-        }
-      }
-    },
-    "coffee_ordering": {
-      "description": "order a coffee drink from either Starbucks or Peets for pick up",
-      "slots": {
-        "location.store": {
-          "description": "location of the coffee store",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5510,
-          "in original ontology": true
-        },
-        "name.drink": {
-          "description": "name of the drink",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 9182,
-          "in original ontology": true
-        },
-        "size.drink": {
-          "description": "size of the drink",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 7804,
-          "in original ontology": true
-        },
-        "num.drink": {
-          "description": "number of drinks",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 848,
-          "in original ontology": true
-        },
-        "type.milk": {
-          "description": "type of the milk",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 8433,
-          "in original ontology": true
-        },
-        "preference": {
-          "description": "user preference of the drink",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 11266,
-          "in original ontology": true
-        }
-      }
-    },
-    "pizza_ordering": {
-      "description": "order a pizza",
-      "slots": {
-        "name.store": {
-          "description": "name of the pizza store",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5127,
-          "in original ontology": true
-        },
-        "name.pizza": {
-          "description": "name of the pizza",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 9208,
-          "in original ontology": true
-        },
-        "size.pizza": {
-          "description": "size of the pizza",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 9661,
-          "in original ontology": true
-        },
-        "type.topping": {
-          "description": "type of the topping",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 20639,
-          "in original ontology": true
-        },
-        "type.crust": {
-          "description": "type of the crust",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5099,
-          "in original ontology": true
-        },
-        "preference": {
-          "description": "user preference of the pizza",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4998,
-          "in original ontology": true
-        },
-        "location.store": {
-          "description": "location of the pizza store",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1487,
-          "in original ontology": true
-        }
-      }
-    },
-    "auto_repair": {
-      "description": "set up an auto repair appointment with a repair shop",
-      "slots": {
-        "name.store": {
-          "description": "name of the repair store",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4005,
-          "in original ontology": true
-        },
-        "name.customer": {
-          "description": "name of the customer",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4547,
-          "in original ontology": true
-        },
-        "date.appt": {
-          "description": "date of the appointment",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 7650,
-          "in original ontology": true
-        },
-        "time.appt": {
-          "description": "time of the appointment",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 9827,
-          "in original ontology": true
-        },
-        "reason.appt": {
-          "description": "reason of the appointment",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6509,
-          "in original ontology": true
-        },
-        "name.vehicle": {
-          "description": "name of the vehicle",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5262,
-          "in original ontology": true
-        },
-        "year.vehicle": {
-          "description": "year of the vehicle",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4561,
-          "in original ontology": true
-        },
-        "location.store": {
-          "description": "location of the repair store",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 709,
-          "in original ontology": true
-        }
-      }
-    },
-    "flights": {
-      "description": "find a round trip or multi-city flights",
-      "slots": {
-        "type": {
-          "description": "type of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1999,
-          "in original ontology": true
-        },
-        "destination1": {
-          "description": "the first destination city of the trip",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3993,
-          "in original ontology": true
-        },
-        "destination2": {
-          "description": "the second destination city of the trip",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 128,
-          "in original ontology": true
-        },
-        "origin": {
-          "description": "the origin city of the trip",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2595,
-          "in original ontology": true
-        },
-        "date.depart_origin": {
-          "description": "date of departure from origin",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3177,
-          "in original ontology": true
-        },
-        "date.depart_intermediate": {
-          "description": "date of departure from intermediate",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 48,
-          "in original ontology": true
-        },
-        "date.return": {
-          "description": "date of return",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2822,
-          "in original ontology": true
-        },
-        "time_of_day": {
-          "description": "time of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3840,
-          "in original ontology": true
-        },
-        "seating_class": {
-          "description": "seat type (first class, business class, economy class, etc.",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3626,
-          "in original ontology": true
-        },
-        "seat_location": {
-          "description": "location of the seat",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 313,
-          "in original ontology": true
-        },
-        "stops": {
-          "description": "non-stop, layovers, etc.",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6174,
-          "in original ontology": true
-        },
-        "price_range": {
-          "description": "price range of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2646,
-          "in original ontology": true
-        },
-        "num.pax": {
-          "description": "number of people",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 437,
-          "in original ontology": true
-        },
-        "luggage": {
-          "description": "luggage information",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 17,
-          "in original ontology": true
-        },
-        "total_fare": {
-          "description": "total cost of the trip",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1238,
-          "in original ontology": true
-        },
-        "other_description": {
-          "description": "other description of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2620,
-          "in original ontology": true
-        },
-        "from": {
-          "description": "departure of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1293,
-          "in original ontology": true
-        },
-        "to": {
-          "description": "destination of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1979,
-          "in original ontology": true
-        },
-        "airline": {
-          "description": "airline of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3981,
-          "in original ontology": true
-        },
-        "flight_number": {
-          "description": "the number of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 42,
-          "in original ontology": true
-        },
-        "date": {
-          "description": "date of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 756,
-          "in original ontology": true
-        },
-        "from.time": {
-          "description": "departure time of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6440,
-          "in original ontology": true
-        },
-        "to.time": {
-          "description": "arrival time of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2571,
-          "in original ontology": true
-        },
-        "stops.location": {
-          "description": "location of the stop",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1097,
-          "in original ontology": true
-        },
-        "fare": {
-          "description": "cost of the flight",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1475,
-          "in original ontology": true
-        }
-      }
-    },
-    "food_order": {
-      "description": "order take-out for a particular cuisine choice",
-      "slots": {
-        "name.item": {
-          "description": "name of the item",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6080,
-          "in original ontology": true
-        },
-        "other_description.item": {
-          "description": "other description of the item",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1474,
-          "in original ontology": true
-        },
-        "type.retrieval": {
-          "description": "type of the retrieval method",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1868,
-          "in original ontology": true
-        },
-        "total_price": {
-          "description": "total price",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5,
-          "in original ontology": true
-        },
-        "time.pickup": {
-          "description": "pick up time",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 981,
-          "in original ontology": true
-        },
-        "num.people": {
-          "description": "number of people",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 880,
-          "in original ontology": true
-        },
-        "name.restaurant": {
-          "description": "name of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 106,
-          "in original ontology": true
-        },
-        "type.food": {
-          "description": "type of food",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1247,
-          "in original ontology": true
-        },
-        "type.meal": {
-          "description": "type of meal",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 64,
-          "in original ontology": true
-        },
-        "location.restaurant": {
-          "description": "location of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 8,
-          "in original ontology": true
-        },
-        "rating.restaurant": {
-          "description": "rating of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5,
-          "in original ontology": true
-        },
-        "price_range": {
-          "description": "price range of the food",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5,
-          "in original ontology": true
-        }
-      }
-    },
-    "hotel": {
-      "description": "find a hotel using typical preferences",
-      "slots": {
-        "name.hotel": {
-          "description": "name of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5241,
-          "in original ontology": true
-        },
-        "location.hotel": {
-          "description": "location of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2940,
-          "in original ontology": true
-        },
-        "sub_location.hotel": {
-          "description": "rough location of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1869,
-          "in original ontology": true
-        },
-        "star_rating": {
-          "description": "star rating of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2049,
-          "in original ontology": true
-        },
-        "customer_rating": {
-          "description": "customer rating of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1239,
-          "in original ontology": true
-        },
-        "price_range": {
-          "description": "price range of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2357,
-          "in original ontology": true
-        },
-        "amenity": {
-          "description": "amenity of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 19030,
-          "in original ontology": true
-        },
-        "num.beds": {
-          "description": "number of beds to book",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 733,
-          "in original ontology": true
-        },
-        "type.bed": {
-          "description": "type of the bed",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1423,
-          "in original ontology": true
-        },
-        "num.rooms": {
-          "description": "number of rooms to book",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 256,
-          "in original ontology": true
-        },
-        "check-in_date": {
-          "description": "check-in date",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 608,
-          "in original ontology": true
-        },
-        "check-out_date": {
-          "description": "check-out date",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 428,
-          "in original ontology": true
-        },
-        "date_range": {
-          "description": "date range of the reservation",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2458,
-          "in original ontology": true
-        },
-        "num.guests": {
-          "description": "number of guests",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1323,
-          "in original ontology": true
-        },
-        "type.room": {
-          "description": "type of the room",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1840,
-          "in original ontology": true
-        },
-        "price_per_night": {
-          "description": "price per night",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2357,
-          "in original ontology": true
-        },
-        "total_fare": {
-          "description": "total fare",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 28,
-          "in original ontology": true
-        },
-        "location": {
-          "description": "location of the hotel",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 352,
-          "in original ontology": true
-        }
-      }
-    },
-    "movie": {
-      "description": "find a movie to watch in theaters or using a streaming service at home",
-      "slots": {
-        "name.movie": {
-          "description": "name of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 13413,
-          "in original ontology": true
-        },
-        "genre": {
-          "description": "genre of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4982,
-          "in original ontology": true
-        },
-        "name.theater": {
-          "description": "name of the theater",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2371,
-          "in original ontology": true
-        },
-        "location.theater": {
-          "description": "location of the theater",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2894,
-          "in original ontology": true
-        },
-        "time.start": {
-          "description": "start time of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 6455,
-          "in original ontology": true
-        },
-        "time.end": {
-          "description": "end time of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3,
-          "in original ontology": true
-        },
-        "price.ticket": {
-          "description": "price of the ticket",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 989,
-          "in original ontology": true
-        },
-        "price.streaming": {
-          "description": "price of the streaming",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 397,
-          "in original ontology": true
-        },
-        "type.screening": {
-          "description": "type of the screening",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1419,
-          "in original ontology": true
-        },
-        "audience_rating": {
-          "description": "audience rating",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1506,
-          "in original ontology": true
-        },
-        "movie_rating": {
-          "description": "film rating",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 273,
-          "in original ontology": true
-        },
-        "release_date": {
-          "description": "release date of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 386,
-          "in original ontology": true
-        },
-        "runtime": {
-          "description": "running time of the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 262,
-          "in original ontology": true
-        },
-        "real_person": {
-          "description": "name of actors, directors, etc.",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3406,
-          "in original ontology": true
-        },
-        "character": {
-          "description": "name of character in the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1676,
-          "in original ontology": true
-        },
-        "streaming_service": {
-          "description": "streaming service that provide the movie",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2729,
-          "in original ontology": true
-        },
-        "num.tickets": {
-          "description": "number of tickets",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1045,
-          "in original ontology": true
-        },
-        "seating": {
-          "description": "type of seating",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 13,
-          "in original ontology": true
-        }
-      }
-    },
-    "music": {
-      "description": "find several tracks to play and then comment on each one",
-      "slots": {
-        "name.track": {
-          "description": "name of the track",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4916,
-          "in original ontology": true
-        },
-        "name.artist": {
-          "description": "name of the artist",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 9287,
-          "in original ontology": true
-        },
-        "name.album": {
-          "description": "name of the album",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1106,
-          "in original ontology": true
-        },
-        "name.genre": {
-          "description": "music genre",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 452,
-          "in original ontology": true
-        },
-        "type.music": {
-          "description": "rough type of the music",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 603,
-          "in original ontology": true
-        },
-        "describes_track": {
-          "description": "description of a track to find",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2969,
-          "in original ontology": true
-        },
-        "describes_artist": {
-          "description": "description of a artist to find",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 612,
-          "in original ontology": true
-        },
-        "describes_album": {
-          "description": "description of an album to find",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 189,
-          "in original ontology": true
-        },
-        "describes_genre": {
-          "description": "description of a genre to find",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 26,
-          "in original ontology": true
-        },
-        "describes_type.music": {
-          "description": "description of the music type",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 375,
-          "in original ontology": true
-        }
-      }
-    },
-    "restaurant": {
-      "description": "ask for recommendations for a particular type of cuisine",
-      "slots": {
-        "name.restaurant": {
-          "description": "name of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 8676,
-          "in original ontology": true
-        },
-        "location": {
-          "description": "location of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 5165,
-          "in original ontology": true
-        },
-        "sub-location": {
-          "description": "rough location of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1210,
-          "in original ontology": true
-        },
-        "type.food": {
-          "description": "the cuisine of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 12412,
-          "in original ontology": true
-        },
-        "menu_item": {
-          "description": "item in the menu",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1499,
-          "in original ontology": true
-        },
-        "type.meal": {
-          "description": "type of meal",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2677,
-          "in original ontology": true
-        },
-        "rating": {
-          "description": "rating of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2951,
-          "in original ontology": true
-        },
-        "price_range": {
-          "description": "price range of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1930,
-          "in original ontology": true
-        },
-        "business_hours": {
-          "description": "business hours of the restaurant",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2024,
-          "in original ontology": true
-        },
-        "name.reservation": {
-          "description": "name of the person who make the reservation",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 16,
-          "in original ontology": true
-        },
-        "num.guests": {
-          "description": "number of guests",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 179,
-          "in original ontology": true
-        },
-        "time.reservation": {
-          "description": "time of the reservation",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 216,
-          "in original ontology": true
-        },
-        "date.reservation": {
-          "description": "date of the reservation",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 130,
-          "in original ontology": true
-        },
-        "type.seating": {
-          "description": "type of the seating",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 11,
-          "in original ontology": true
-        }
-      }
-    },
-    "sport": {
-      "description": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL",
-      "slots": {
-        "name.team": {
-          "description": "name of the team",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 19651,
-          "in original ontology": true
-        },
-        "record.team": {
-          "description": "record of the team (number of wins and losses)",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3338,
-          "in original ontology": true
-        },
-        "record.games_ahead": {
-          "description": "number of games ahead",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 33,
-          "in original ontology": true
-        },
-        "record.games_back": {
-          "description": "number of games behind",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 361,
-          "in original ontology": true
-        },
-        "place.team": {
-          "description": "ranking of the team",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4075,
-          "in original ontology": true
-        },
-        "result.match": {
-          "description": "result of the match",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3245,
-          "in original ontology": true
-        },
-        "score.match": {
-          "description": "score of the match",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 3241,
-          "in original ontology": true
-        },
-        "date.match": {
-          "description": "date of the match",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2660,
-          "in original ontology": true
-        },
-        "day.match": {
-          "description": "day of the match",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 4743,
-          "in original ontology": true
-        },
-        "time.match": {
-          "description": "time of the match",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 1283,
-          "in original ontology": true
-        },
-        "name.player": {
-          "description": "name of the player",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2365,
-          "in original ontology": true
-        },
-        "position.player": {
-          "description": "position of the player",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 2746,
-          "in original ontology": true
-        },
-        "record.player": {
-          "description": "record of the player",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 80,
-          "in original ontology": true
-        },
-        "name.non_player": {
-          "description": "name of non-palyer such as the manager, coach",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 843,
-          "in original ontology": true
-        },
-        "venue": {
-          "description": "venue of the match take place",
-          "is_categorical": false,
-          "possible_values": [],
-          "count": 328,
-          "in original ontology": true
-        }
-      }
-    }
-  },
-  "intents": {
-    "inform": {
-      "description": ""
-    }
-  },
-  "binary_dialogue_act": [],
-  "state": {}
-}
\ No newline at end of file
diff --git a/data/unified_datasets/taskmaster/original_data.zip b/data/unified_datasets/taskmaster/original_data.zip
deleted file mode 100644
index 8a36e95bf829063c7bff98404eb795107baf7b87..0000000000000000000000000000000000000000
Binary files a/data/unified_datasets/taskmaster/original_data.zip and /dev/null differ
diff --git a/data/unified_datasets/taskmaster/preprocess.py b/data/unified_datasets/taskmaster/preprocess.py
deleted file mode 100644
index 3f82931ab1781fb59ff5fb4d6757c6f5b9bfaeea..0000000000000000000000000000000000000000
--- a/data/unified_datasets/taskmaster/preprocess.py
+++ /dev/null
@@ -1,445 +0,0 @@
-import json
-import os
-import copy
-import zipfile
-from tqdm import tqdm
-import re
-from convlab.util.file_util import read_zipped_json, write_zipped_json
-from pprint import pprint
-
-descriptions = {
-    "uber_lyft": {
-        "uber_lyft": "order a car for a ride inside a city",
-        "location.from": "pickup location",
-        "location.to": "destination of the ride",
-        "type.ride": "type of ride",
-        "num.people": "number of people",
-        "price.estimate": "estimated cost of the ride",
-        "duration.estimate": "estimated duration of the ride",
-        "time.pickup": "time of pickup",
-        "time.dropoff": "time of dropoff",
-    },
-    "movie_ticket": {
-        "movie_ticket": "book movie tickets for a film",
-        "name.movie": "name of the movie",
-        "name.theater": "name of the theater",
-        "num.tickets": "number of tickets",
-        "time.start": "start time of the movie",
-        "location.theater": "location of the theater",
-        "price.ticket": "price of the ticket",
-        "type.screening": "type of the screening",
-        "time.end": "end time of the movie",
-        "time.duration": "duration of the movie",
-    },
-    "restaurant_reservation": {
-        "restaurant_reservation": "searching for a restaurant and make reservation",
-        "name.restaurant": "name of the restaurant",
-        "name.reservation": "name of the person who make the reservation",
-        "num.guests": "number of guests",
-        "time.reservation": "time of the reservation",
-        "type.seating": "type of the seating",
-        "location.restaurant": "location of the restaurant",
-    },
-    "coffee_ordering": {
-        "coffee_ordering": "order a coffee drink from either Starbucks or Peets for pick up",
-        "location.store": "location of the coffee store",
-        "name.drink": "name of the drink",
-        "size.drink": "size of the drink",
-        "num.drink": "number of drinks",
-        "type.milk": "type of the milk",
-        "preference": "user preference of the drink",
-    },
-    "pizza_ordering": {
-        "pizza_ordering": "order a pizza",
-        "name.store": "name of the pizza store",
-        "name.pizza": "name of the pizza",
-        "size.pizza": "size of the pizza",
-        "type.topping": "type of the topping",
-        "type.crust": "type of the crust",
-        "preference": "user preference of the pizza",
-        "location.store": "location of the pizza store",
-    },
-    "auto_repair": {
-        "auto_repair": "set up an auto repair appointment with a repair shop",
-        "name.store": "name of the repair store",
-        "name.customer": "name of the customer",
-        "date.appt": "date of the appointment",
-        "time.appt": "time of the appointment",
-        "reason.appt": "reason of the appointment",
-        "name.vehicle": "name of the vehicle",
-        "year.vehicle": "year of the vehicle",
-        "location.store": "location of the repair store",
-    },
-    "flights": {
-        "flights": "find a round trip or multi-city flights",
-        "type": "type of the flight",
-        "destination1": "the first destination city of the trip",
-        "destination2": "the second destination city of the trip",
-        "origin": "the origin city of the trip",
-        "date.depart_origin": "date of departure from origin",
-        "date.depart_intermediate": "date of departure from intermediate",
-        "date.return": "date of return",
-        "time_of_day": "time of the flight",
-        "seating_class": "seat type (first class, business class, economy class, etc.",
-        "seat_location": "location of the seat",
-        "stops": "non-stop, layovers, etc.",
-        "price_range": "price range of the flight",
-        "num.pax": "number of people",
-        "luggage": "luggage information",
-        "total_fare": "total cost of the trip",
-        "other_description": "other description of the flight",
-        "from": "departure of the flight",
-        "to": "destination of the flight",
-        "airline": "airline of the flight",
-        "flight_number": "the number of the flight",
-        "date": "date of the flight",
-        "from.time": "departure time of the flight",
-        "to.time": "arrival time of the flight",
-        "stops.location": "location of the stop",
-        "fare": "cost of the flight",
-    },
-    "food_order": {
-        "food_order": "order take-out for a particular cuisine choice",
-        "name.item": "name of the item",
-        "other_description.item": "other description of the item",
-        "type.retrieval": "type of the retrieval method",
-        "total_price": "total price",
-        "time.pickup": "pick up time",
-        "num.people": "number of people",
-        "name.restaurant": "name of the restaurant",
-        "type.food": "type of food",
-        "type.meal": "type of meal",
-        "location.restaurant": "location of the restaurant",
-        "rating.restaurant": "rating of the restaurant",
-        "price_range": "price range of the food",
-    },
-    "hotel": {
-        "hotel": "find a hotel using typical preferences",
-        "name.hotel": "name of the hotel",
-        "location.hotel": "location of the hotel",
-        "sub_location.hotel": "rough location of the hotel",
-        "star_rating": "star rating of the hotel",
-        "customer_rating": "customer rating of the hotel",
-        "price_range": "price range of the hotel",
-        "amenity": "amenity of the hotel",
-        "num.beds": "number of beds to book",
-        "type.bed": "type of the bed",
-        "num.rooms": "number of rooms to book",
-        "check-in_date": "check-in date",
-        "check-out_date": "check-out date",
-        "date_range": "date range of the reservation",
-        "num.guests": "number of guests",
-        "type.room": "type of the room",
-        "price_per_night": "price per night",
-        "total_fare": "total fare",
-        "location": "location of the hotel",
-    },
-    "movie": {
-        "movie": "find a movie to watch in theaters or using a streaming service at home",
-        "name.movie": "name of the movie",
-        "genre": "genre of the movie",
-        "name.theater": "name of the theater",
-        "location.theater": "location of the theater",
-        "time.start": "start time of the movie",
-        "time.end": "end time of the movie",
-        "price.ticket": "price of the ticket",
-        "price.streaming": "price of the streaming",
-        "type.screening": "type of the screening",
-        "audience_rating": "audience rating",
-        "movie_rating": "film rating",
-        "release_date": "release date of the movie",
-        "runtime": "running time of the movie",
-        "real_person": "name of actors, directors, etc.",
-        "character": "name of character in the movie",
-        "streaming_service": "streaming service that provide the movie",
-        "num.tickets": "number of tickets",
-        "seating": "type of seating",
-    },
-    "music": {
-        "music": "find several tracks to play and then comment on each one",
-        "name.track": "name of the track",
-        "name.artist": "name of the artist",
-        "name.album": "name of the album",
-        "name.genre": "music genre",
-        "type.music": "rough type of the music",
-        "describes_track": "description of a track to find",
-        "describes_artist": "description of a artist to find",
-        "describes_album": "description of an album to find",
-        "describes_genre": "description of a genre to find",
-        "describes_type.music": "description of the music type",
-    },
-    "restaurant": {
-        "restaurant": "ask for recommendations for a particular type of cuisine",
-        "name.restaurant": "name of the restaurant",
-        "location": "location of the restaurant",
-        "sub-location": "rough location of the restaurant",
-        "type.food": "the cuisine of the restaurant",
-        "menu_item": "item in the menu",
-        "type.meal": "type of meal",
-        "rating": "rating of the restaurant",
-        "price_range": "price range of the restaurant",
-        "business_hours": "business hours of the restaurant",
-        "name.reservation": "name of the person who make the reservation",
-        "num.guests": "number of guests",
-        "time.reservation": "time of the reservation",
-        "date.reservation": "date of the reservation",
-        "type.seating": "type of the seating",
-    },
-    "sport": {
-        "sport": "discuss facts and stats about players, teams, games, etc. in EPL, MLB, MLS, NBA, NFL",
-        "name.team": "name of the team",
-        "record.team": "record of the team (number of wins and losses)",
-        "record.games_ahead": "number of games ahead",
-        "record.games_back": "number of games behind",
-        "place.team": "ranking of the team",
-        "result.match": "result of the match",
-        "score.match": "score of the match",
-        "date.match": "date of the match",
-        "day.match": "day of the match",
-        "time.match": "time of the match",
-        "name.player": "name of the player",
-        "position.player": "position of the player",
-        "record.player": "record of the player",
-        "name.non_player": "name of non-palyer such as the manager, coach",
-        "venue": "venue of the match take place",
-    }
-}
-
-
-def normalize_domain_name(domain):
-    if domain == 'auto':
-        return 'auto_repair'
-    elif domain == 'pizza':
-        return 'pizza_ordering'
-    elif domain == 'coffee':
-        return 'coffee_ordering'
-    elif domain == 'uber':
-        return 'uber_lyft'
-    elif domain == 'restaurant':
-        return 'restaurant_reservation'
-    elif domain == 'movie':
-        return 'movie_ticket'
-    elif domain == 'flights':
-        return 'flights'
-    elif domain == 'food-ordering':
-        return 'food_order'
-    elif domain == 'hotels':
-        return 'hotel'
-    elif domain == 'movies':
-        return 'movie'
-    elif domain == 'music':
-        return 'music'
-    elif domain == 'restaurant-search':
-        return 'restaurant'
-    elif domain == 'sports':
-        return 'sport'
-    assert 0
-
-
-def format_turns(ori_turns):
-    new_turns = []
-    previous_speaker = None
-    utt_idx = 0
-    for i, turn in enumerate(ori_turns):
-        speaker = 'system' if turn['speaker'] == 'ASSISTANT' else 'user'
-        turn['speaker'] = speaker
-        if utt_idx == 0 and speaker == 'system':
-            continue
-        if turn['text'] == '(deleted)':
-            continue
-        if not previous_speaker:
-            assert speaker != previous_speaker
-        if speaker != previous_speaker:
-            previous_speaker = speaker
-            new_turns.append(copy.deepcopy(turn))
-            utt_idx += 1
-        else:
-            # continuous speaking
-            last_turn = new_turns[-1]
-            # if ori_turns[i-1]['text'] == turn['text']:
-            #     # skip repeat turn
-            #     continue
-            if turn['text'] in ori_turns[i-1]['text']:
-                continue
-            index_shift = len(last_turn['text']) + 1
-            last_turn['text'] += ' '+turn['text']
-            if 'segments' in turn:
-                last_turn.setdefault('segments', [])
-                for segment in turn['segments']:
-                    segment['start_index'] += index_shift
-                    segment['end_index'] += index_shift
-                last_turn['segments'] += turn['segments']
-    if new_turns and new_turns[-1]['speaker'] == 'system':
-        new_turns = new_turns[:-1]
-    return new_turns
-
-
-def log_ontology(acts, ontology, ori_ontology):
-    for item in acts:
-        intent, domain, slot, value = item['intent'], item['domain'], item['slot'], item['value']
-        if domain not in ontology['domains']:
-            ontology['domains'][domain] = {'description': "", 'slots': {}}
-        if slot not in ontology['domains'][domain]['slots']:
-            ontology['domains'][domain]['slots'][slot] = {
-                'description': '',
-                'is_categorical': False,
-                'possible_values': [],
-                'count': 1
-            }
-        else:
-            ontology['domains'][domain]['slots'][slot]['count'] += 1
-        ontology['domains'][domain]['slots'][slot]['in original ontology'] = slot in ori_ontology[domain]
-        if intent is not None and intent not in ontology['intents']:
-            ontology['intents'][intent] = {
-                "description": ''
-            }
-
-
-def preprocess():
-    self_dir = os.path.dirname(os.path.abspath(__file__))
-    processed_dialogue = []
-    ontology = {'domains': {},
-                'intents': {},
-                'binary_dialogue_act': [],
-                'state': {}}
-    original_zipped_path = os.path.join(self_dir, 'original_data.zip')
-    new_dir = os.path.join(self_dir, 'original_data')
-    if not os.path.exists(os.path.join(self_dir, 'data.zip')) or not os.path.exists(os.path.join(self_dir, 'ontology.json')):
-        print('unzip to', new_dir)
-        print('This may take several minutes')
-        archive = zipfile.ZipFile(original_zipped_path, 'r')
-        archive.extractall(self_dir)
-        files = [
-            ('TM-1-2019/woz-dialogs.json', 'TM-1-2019/ontology.json'),
-            ('TM-1-2019/self-dialogs.json', 'TM-1-2019/ontology.json'),
-            ('TM-2-2020/data/flights.json', 'TM-2-2020/ontology/flights.json'),
-            ('TM-2-2020/data/food-ordering.json', 'TM-2-2020/ontology/food-ordering.json'),
-            ('TM-2-2020/data/hotels.json', 'TM-2-2020/ontology/hotels.json'),
-            ('TM-2-2020/data/movies.json', 'TM-2-2020/ontology/movies.json'),
-            ('TM-2-2020/data/music.json', 'TM-2-2020/ontology/music.json'),
-            ('TM-2-2020/data/restaurant-search.json', 'TM-2-2020/ontology/restaurant-search.json'),
-            ('TM-2-2020/data/sports.json', 'TM-2-2020/ontology/sports.json')
-        ]
-        idx_count = 1
-        total = 0
-
-        for filename, ontology_filename in files:
-            data = json.load(open(os.path.join(new_dir, filename)))
-            ori_ontology = {}
-            if 'TM-1' in filename:
-                for domain, item in json.load(open(os.path.join(new_dir, ontology_filename))).items():
-                    ori_ontology[item["id"]] = {}
-                    for slot in item["required"] + item["optional"]:
-                        ori_ontology[item["id"]][slot] = 0
-            else:
-                domain = normalize_domain_name(filename.split('/')[-1].split('.')[0])
-                ori_ontology[domain] = {}
-                for _, item in json.load(open(os.path.join(new_dir, ontology_filename))).items():
-                    for group in item:
-                        for anno in group["annotations"]:
-                            ori_ontology[domain][anno] = 0
-            for d in ori_ontology:
-                if d not in ontology['domains']:
-                    ontology['domains'][d] = {'description': descriptions[d][d], 'slots': {}}
-                for s in ori_ontology[d]:
-                    if s not in ontology['domains'][d]['slots']:
-                        ontology['domains'][d]['slots'][s] = {
-                            'description': descriptions[d][s],
-                            'is_categorical': False,
-                            'possible_values': [],
-                            'count': 0,
-                            'in original ontology': True
-                        }
-            # pprint(ori_ontology)
-            for ori_sess in tqdm(data, desc='processing taskmaster-{}'.format(filename)):
-                total += 1
-                turns = format_turns(ori_sess['utterances'])
-                if not turns:
-                    continue
-                if 'TM-2' in filename:
-                    dial_domain = normalize_domain_name(filename.split('/')[-1].split('.')[0])
-                else:
-                    dial_domain = normalize_domain_name(ori_sess['instruction_id'].split('-', 1)[0])
-                dialogue = {
-                    "dataset": "taskmaster",
-                    "data_split": "train",
-                    "dialogue_id": 'taskmaster_' + str(idx_count),
-                    "original_id": ori_sess['conversation_id'],
-                    "instruction_id": ori_sess['instruction_id'],
-                    "domains": [
-                        dial_domain
-                    ],
-                    "turns": []
-                }
-                idx_count += 1
-                assert turns[0]['speaker'] == 'user' and turns[-1]['speaker'] == 'user', print(turns)
-                for utt_idx, uttr in enumerate(turns):
-                    speaker = uttr['speaker']
-                    turn = {
-                        'speaker': speaker,
-                        'utterance': uttr['text'],
-                        'utt_idx': utt_idx,
-                        'dialogue_act': {
-                            'binary': [],
-                            'categorical': [],
-                            'non-categorical': [],
-                        },
-                    }
-                    if speaker == 'user':
-                        turn['state'] = {}
-                        turn['state_update'] = {'categorical': [], 'non-categorical': []}
-
-                    if 'segments' in uttr:
-                        for segment in uttr['segments']:
-                            for item in segment['annotations']:
-                                # domain = item['name'].split('.', 1)[0]
-                                domain = dial_domain
-
-                                # if domain != item['name'].split('.', 1)[0]:
-                                #     print(domain, item['name'].split('.', 1), dialogue["original_id"])
-                                #     assert domain in item['name'].split('.', 1)[0]
-
-                                # if item['name'].split('.', 1)[0] != domain:
-                                #     print(domain, item['name'].split('.', 1), dialogue["original_id"])
-                                slot = item['name'].split('.', 1)[-1]
-                                if slot.endswith('.accept') or slot.endswith('.reject'):
-                                    slot = slot[:-7]
-                                if slot not in ori_ontology[domain]:
-                                    # print(domain, item['name'].split('.', 1), dialogue["original_id"])
-                                    continue
-                                # if domain in ori_ontology:
-                                #     ori_ontology[domain][slot] += 1
-                                # else:
-                                #     print(domain, item['name'].split('.', 1), dialogue["original_id"])
-                                # assert domain in ori_ontology, print(domain, item['name'].split('.', 1), dialogue["original_id"])
-
-                                if not segment['text']:
-                                    print(slot)
-                                    print(segment)
-                                    print()
-                                assert turn['utterance'][segment['start_index']:segment['end_index']] == segment['text']
-                                turn['dialogue_act']['non-categorical'].append({
-                                    'intent': 'inform',
-                                    'domain': domain,
-                                    'slot': slot,
-                                    'value': segment['text'].lower(),
-                                    'start': segment['start_index'],
-                                    'end': segment['end_index']
-                                })
-                        log_ontology(turn['dialogue_act']['non-categorical'], ontology, ori_ontology)
-                    dialogue['turns'].append(turn)
-                processed_dialogue.append(dialogue)
-            # pprint(ori_ontology)
-        # save ontology json
-        json.dump(ontology, open(os.path.join(self_dir, 'ontology.json'), 'w'), indent=2)
-        json.dump(processed_dialogue, open('data.json', 'w'), indent=2)
-        write_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
-        os.remove('data.json')
-    else:
-        # read from file
-        processed_dialogue = read_zipped_json(os.path.join(self_dir, 'data.zip'), 'data.json')
-        ontology = json.load(open(os.path.join(self_dir, 'ontology.json')))
-    return processed_dialogue, ontology
-
-if __name__ == '__main__':
-    preprocess()
diff --git a/data/unified_datasets/woz/ontology.json b/data/unified_datasets/woz/ontology.json
deleted file mode 100644
index 8f863f679941e2bdf4347a4d6c992a4881b8ef60..0000000000000000000000000000000000000000
--- a/data/unified_datasets/woz/ontology.json
+++ /dev/null
@@ -1,117 +0,0 @@
-{
-    "domains": {
-        "restaurant": {
-            "description": "search for a restaurant to dine",
-            "slots": {
-                "food": {
-                    "is_categorical": false,
-                    "possible_values": [],
-                    "description": "food type of the restaurant"
-                },
-                "area": {
-                    "is_categorical": true,
-                    "possible_values": [
-                        "east",
-                        "west",
-                        "center",
-                        "north",
-                        "south"
-                    ],
-                    "description": "area of the restaurant"
-                },
-                "postcode": {
-                    "is_categorical": false,
-                    "possible_values": [],
-                    "description": "postal code of the restaurant"
-                },
-                "phone": {
-                    "is_categorical": false,
-                    "possible_values": [],
-                    "description": "phone number of the restaurant"
-                },
-                "address": {
-                    "is_categorical": false,
-                    "possible_values": [],
-                    "description": "address of the restaurant"
-                },
-                "price range": {
-                    "is_categorical": true,
-                    "possible_values": [
-                        "expensive",
-                        "moderate",
-                        "cheap"
-                    ],
-                    "description": "price range of the restaurant"
-                },
-                "name": {
-                    "is_categorical": false,
-                    "possible_values": [],
-                    "description": "name of the restaurant"
-                }
-            }
-        }
-    },
-    "intents": {
-        "inform": {
-            "description": "system informs user the value of a slot"
-        },
-        "request": {
-            "description": "system asks the user to provide value of a slot"
-        }
-    },
-    "binary_dialogue_act": [
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "postcode",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "phone",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "address",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "area",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "price range",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "food",
-            "value": ""
-        },
-        {
-            "intent": "request",
-            "domain": "restaurant",
-            "slot": "name",
-            "value": ""
-        }
-    ],
-    "state": {
-        "restaurant": {
-            "food": "",
-            "area": "",
-            "postcode": "",
-            "phone": "",
-            "address": "",
-            "price range": "",
-            "name": ""
-        }
-    }
-}
\ No newline at end of file