diff --git a/convlab2/nlu/evaluate_unified_datasets.py b/convlab2/nlu/evaluate_unified_datasets.py
index bb244e34918c6aeaed468d6a67683c8e4e5306b4..86e91747702a1dedd727b5945bf6cbcaa08540d9 100644
--- a/convlab2/nlu/evaluate_unified_datasets.py
+++ b/convlab2/nlu/evaluate_unified_datasets.py
@@ -6,8 +6,10 @@ def evaluate(predict_result):
     predict_result = json.load(open(predict_result))
 
     metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']}
+    acc = []
 
     for sample in predict_result:
+        flag = True
         for da_type in ['binary', 'categorical', 'non-categorical']:
             if da_type == 'binary':
                 predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]]
@@ -26,6 +28,8 @@ def evaluate(predict_result):
                 if ele not in predicts:
                     metrics['overall']['FN'] += 1
                     metrics[da_type]['FN'] += 1
+            flag &= (sorted(predicts)==sorted(labels))
+        acc.append(flag)
     
     for metric in metrics:
         TP = metrics[metric].pop('TP')
@@ -37,6 +41,7 @@ def evaluate(predict_result):
         metrics[metric]['precision'] = precision
         metrics[metric]['recall'] = recall
         metrics[metric]['f1'] = f1
+    metrics['accuracy'] = sum(acc)/len(acc)
 
     return metrics
 
diff --git a/convlab2/nlu/jointBERT/README.md b/convlab2/nlu/jointBERT/README.md
index c9756d3c1ebdd42e975bb86d32a532b066a29048..c87e83551b21b7778c026916795c11a737831ce1 100755
--- a/convlab2/nlu/jointBERT/README.md
+++ b/convlab2/nlu/jointBERT/README.md
@@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file
 The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file.
 
 
+## Performance on unified format datasets
+
+To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
+
+<table>
+<thead>
+  <tr>
+    <th></th>
+    <th colspan=2>MultiWOZ 2.1</th>
+    <th colspan=2>Taskmaster-1</th>
+    <th colspan=2>Taskmaster-2</th>
+    <th colspan=2>Taskmaster-3</th>
+  </tr>
+</thead>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>BERTNLU</td>
+    <td>74.5</td><td>85.9</td>
+    <td>72.8</td><td>50.6</td>
+    <td>79.2</td><td>70.6</td>
+    <td>86.1</td><td>81.9</td>
+  </tr>
+  <tr>
+    <td>BERTNLU (context=3)</td>
+    <td>80.6</td><td>90.3</td>
+    <td>74.2</td><td>52.7</td>
+    <td>80.9</td><td>73.3</td>
+    <td>87.8</td><td>83.8</td>
+  </tr>
+</tbody>
+</table>
+
+- Acc: whether all dialogue acts of an utterance are correctly predicted
+- F1: F1 measure of the dialogue act predictions over the corpus.
+
 ## References
 
 ```
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
index d6be45577a3662065d36d112ea15de938705e224..3ed7c76788a3d307f4648625e62ecd97d3962379 100755
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
@@ -2,7 +2,7 @@
   "dataset_name": "multiwoz21",
   "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0",
   "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0",
-  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip",
+  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip",
   "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log",
   "DEVICE": "cuda:0",
   "seed": 2019,
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
index d46f4db6096028e2582bea546b847be028faf184..de131ead7e71451fc24a2dafd28b2d3b5a19d863 100755
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
@@ -2,7 +2,7 @@
   "dataset_name": "multiwoz21",
   "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3",
   "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3",
-  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip",
+  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip",
   "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log",
   "DEVICE": "cuda:0",
   "seed": 2019,
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
new file mode 100755
index 0000000000000000000000000000000000000000..74b998bc53433c6814dc2c1b403913db4717d452
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "sgd",
+  "data_dir": "unified_datasets/data/sgd/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/sgd/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip",
+  "log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
new file mode 100755
index 0000000000000000000000000000000000000000..bf713685cac1477bb453863eec3dd072168b9a63
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "sgd",
+  "data_dir": "unified_datasets/data/sgd/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/sgd/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip",
+  "log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
new file mode 100755
index 0000000000000000000000000000000000000000..47569c62a13832c4c089a4dc949baee5c2983312
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm1",
+  "data_dir": "unified_datasets/data/tm1/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm1/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
new file mode 100755
index 0000000000000000000000000000000000000000..a64f7bd573f6d4b7ce16b54da6fff2cda1931e21
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm1",
+  "data_dir": "unified_datasets/data/tm1/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm1/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
new file mode 100755
index 0000000000000000000000000000000000000000..6ca5e52144d56399e0b42723caa70f6cc7d403ba
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm2",
+  "data_dir": "unified_datasets/data/tm2/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm2/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
new file mode 100755
index 0000000000000000000000000000000000000000..3faf7a37cb0c007d5bda72272786138044c06793
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm2",
+  "data_dir": "unified_datasets/data/tm2/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm2/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
new file mode 100755
index 0000000000000000000000000000000000000000..73a73aef2516eefeca54add7e39ae1125d779be2
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm3",
+  "data_dir": "unified_datasets/data/tm3/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm3/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json
new file mode 100755
index 0000000000000000000000000000000000000000..97cfad4aa7065a8e15213a52af6670a3a83f2784
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm3",
+  "data_dir": "unified_datasets/data/tm3/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm3/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 20000,
+    "batch_size": 64,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/milu/README.md b/convlab2/nlu/milu/README.md
index bbd54d671900f26373ef369c3c6f049dcf74ac23..2213475f87ac77c1a010d0a406a49c6976810442 100755
--- a/convlab2/nlu/milu/README.md
+++ b/convlab2/nlu/milu/README.md
@@ -43,6 +43,50 @@ Note that the config file is different from the above. You should set:
 ## Predict
 See `nlu.py` under `multiwoz` and `unified_datasets` directories.
 
+## Performance on unified format datasets
+
+To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
+
+<table>
+<thead>
+  <tr>
+    <th></th>
+    <th colspan=2>MultiWOZ 2.1</th>
+    <th colspan=2>Taskmaster-1</th>
+    <th colspan=2>Taskmaster-2</th>
+    <th colspan=2>Taskmaster-3</th>
+  </tr>
+</thead>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>MILU</td>
+    <td>72.9</td><td>85.2</td>
+    <td>72.9</td><td>49.2</td>
+    <td>79.1</td><td>68.7</td>
+    <td>85.4</td><td>80.3</td>
+  </tr>
+  <tr>
+    <td>MILU (context=3)</td>
+    <td>76.6</td><td>87.9</td>
+    <td>72.4</td><td>48.5</td>
+    <td>78.9</td><td>68.4</td>
+    <td>85.1</td><td>80.1</td>
+  </tr>
+</tbody>
+</table>
+
+- Acc: whether all dialogue acts of an utterance are correctly predicted
+- F1: F1 measure of the dialogue act predictions over the corpus.
+
 ## References
 ```
 @inproceedings{lee2019convlab,
diff --git a/convlab2/nlu/milu/dataset_reader.py b/convlab2/nlu/milu/dataset_reader.py
index 35f71903ab4a269b0f9e5d3cd208d78e48278349..86732b7d0aa46d94ca813a6dd27c64b754a76492 100755
--- a/convlab2/nlu/milu/dataset_reader.py
+++ b/convlab2/nlu/milu/dataset_reader.py
@@ -76,6 +76,8 @@ class MILUDatasetReader(DatasetReader):
 
             for sample in data:
                 utterance = sample['utterance']
+                if len(utterance) == 0:
+                    continue
                 sentences = self._sent_tokenizer.tokenize(utterance)
                 sent_spans = self._sent_tokenizer.span_tokenize(utterance)
                 tokens = [token for sent in sentences for token in self._word_tokenizer.tokenize(sent)]
diff --git a/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet
index 858a57a19ecbe2acd59f02465d79c1d852341f60..94ccc02647ec9aabacb2b545202e35161addacff 100755
--- a/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet
@@ -98,7 +98,7 @@
     "num_epochs": 40,
     "grad_norm": 5.0,
     "patience": 75,
-    "cuda_device": 4
+    "cuda_device": 0
   },
   "evaluate_on_test": true
 }
diff --git a/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..db02aed0900e628bf7ffff3bf4c20dea73d9baf8
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "sgd",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..2feea6d4b866ecda20c8398abab4ef34337cff6c
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 3,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "sgd",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..7f7138f8f66560e288deff667f052c3d60c93554
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm1",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..64f5933f16e5773dcd4cad49dd8895d9d1e13898
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 3,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm1",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..a3b89448e4a280660b5a8de98500d3641c8a1512
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm2",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm2_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm2_user_context3.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..f1bf68efa1faf8e3269d3c69ee75a243abe3d730
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm2_user_context3.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 3,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm2",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm3_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm3_user.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..c554d83e99ae63c5af288ca49a76cb8d8d5bc1e0
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm3_user.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm3",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm3_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm3_user_context3.jsonnet
new file mode 100755
index 0000000000000000000000000000000000000000..879b1070b246833a76a69b2c49435febbb07d8cb
--- /dev/null
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm3_user_context3.jsonnet
@@ -0,0 +1,104 @@
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 3,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm3",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
diff --git a/convlab2/util/unified_datasets_util.py b/convlab2/util/unified_datasets_util.py
index e4344bd838785dda7a3736c37d7577a2887fd9d7..ed9c211f9ae1276df257376056df0677057eaca0 100644
--- a/convlab2/util/unified_datasets_util.py
+++ b/convlab2/util/unified_datasets_util.py
@@ -3,6 +3,7 @@ from typing import Dict, List, Tuple
 from zipfile import ZipFile
 import json
 import os
+import re
 import importlib
 from abc import ABC, abstractmethod
 from pprint import pprint
@@ -180,6 +181,81 @@ def load_rg_data(dataset, data_split='all', speaker='system', context_window_siz
     kwargs.setdefault('utterance', True)
     return load_unified_data(dataset, **kwargs)
 
+
+def create_delex_data(dataset, delex_format='[({domain})-({slot})]', ignore_values=['yes', 'no']):
+    # add delex_utterance to the dataset according to dialogue acts and belief_state
+
+    def delex_inplace(texts_placeholders, value_pattern):
+        res = []
+        for substring, is_placeholder in texts_placeholders:
+            if not is_placeholder:
+                matches = value_pattern.findall(substring)
+                res.append(len(matches) == 1)
+            else:
+                res.append(False)
+        if sum(res) == 1:
+            # only one piece matches
+            idx = res.index(True)
+            substring = texts_placeholders[idx][0]
+            searchObj = re.search(value_pattern, substring)
+            assert searchObj
+            start, end = searchObj.span(2)
+            texts_placeholders[idx:idx+1] = [(substring[0:start], False), (placeholder, True), (substring[end:], False)]
+            return True
+        return False
+
+    delex_vocab = set()
+    for data_split in dataset:
+        for dialog in dataset[data_split]:
+            state = {}
+            for turn in dialog['turns']:
+                utt = turn['utterance']
+                delex_utt = []
+                last_end = 0
+                # ignore the non-categorical das that do not have span annotation
+                spans = [x for x in turn['dialogue_acts']['non-categorical'] if 'start' in x]
+                for da in sorted(spans, key=lambda x: x['start']):
+                    # from left to right
+                    start, end = da['start'], da['end']
+                    domain, slot, value = da['domain'], da['slot'], da['value']
+                    assert utt[start:end] == value
+                    # make sure there are no words/number prepend & append and no overlap with other spans
+                    if start >= last_end and (start == 0 or re.match('\W', utt[start-1])) and (end == len(utt) or re.match('\W', utt[end])):
+                        placeholder = delex_format.format(domain=domain, slot=slot, value=value)
+                        delex_vocab.add(placeholder)
+                        delex_utt.append((utt[last_end:start], False))
+                        delex_utt.append((placeholder, True))
+                        last_end = end
+                delex_utt.append((utt[last_end:], False))
+
+                # search for value in categorical dialogue acts and belief state
+                for da in sorted(turn['dialogue_acts']['categorical'], key=lambda x: len(x['value'])):
+                    domain, slot, value = da['domain'], da['slot'], da['value']
+                    if value.lower() not in ignore_values:
+                        placeholder = delex_format.format(domain=domain, slot=slot, value=value)
+                        pattern = re.compile(r'(\W|^)'+f'({value})'+r'(\W|$)', flags=re.I)
+                        if delex_inplace(delex_utt, pattern):
+                            delex_vocab.add(placeholder)
+
+                # for domain in turn['state']
+                if 'state' in turn:
+                    state = turn['state']
+                for domain in state:
+                    for slot, values in state[domain].items():
+                        if len(values) > 0:
+                            # has value
+                            for value in values.split('|'):
+                                if value.lower() not in ignore_values:
+                                    placeholder = delex_format.format(domain=domain, slot=slot, value=value)
+                                    pattern = re.compile(r'(\W|^)'+f'({value})'+r'(\W|$)', flags=re.I)
+                                    if delex_inplace(delex_utt, pattern):
+                                        delex_vocab.add(placeholder)
+
+                turn['delex_utterance'] = ''.join([x[0] for x in delex_utt])
+    
+    return dataset, sorted(list(delex_vocab))
+
+
 if __name__ == "__main__":
     dataset = load_dataset('multiwoz21')
     print(dataset.keys())
@@ -192,3 +268,13 @@ if __name__ == "__main__":
     
     data_by_split = load_nlu_data(dataset, data_split='test', speaker='user')
     pprint(data_by_split['test'][0])
+
+    dataset, delex_vocab = create_delex_data(dataset)
+    json.dump(dataset['test'], open('delex_multiwoz21_test.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
+    json.dump(delex_vocab, open('delex_vocab.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
+    with open('delex_cmp.txt', 'w') as f:
+        for dialog in dataset['test']:
+            for turn in dialog['turns']:
+                f.write(turn['utterance']+'\n')
+                f.write(turn['delex_utterance']+'\n')
+                f.write('\n')
diff --git a/setup.py b/setup.py
index 900b92f3912b72831bb027da851da556a9d8ad3d..ef798d8ece195266dcda23b9ae8a1b3612822b8c 100755
--- a/setup.py
+++ b/setup.py
@@ -41,6 +41,7 @@ setup(
         'numpy',
         'nltk',
         'scipy',
+        'tensorboard',
         'torch>=1.6',
         'transformers>=4.0',
         'datasets>=1.8',