Merge pull request #35 from ConvLab/nlu

Add delex function; NLU benckmark result on multiwoz21, tm1, tm2, tm3

Merge pull request #35 from ConvLab/nlu
b1fd6890 · zhuqi · GitHub · 9f5d85a3 · 4f7f7ed8 · b1fd6890
Unverified Commit b1fd6890 authored Mar 3, 2022 by zhuqi Committed by GitHub Mar 3, 2022
--- a/convlab2/nlu/evaluate_unified_datasets.py
+++ b/convlab2/nlu/evaluate_unified_datasets.py
@@ -6,8 +6,10 @@ def evaluate(predict_result):
    predict_result = json.load(open(predict_result))

    metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']}
+    acc = []

    for sample in predict_result:
+        flag = True
        for da_type in ['binary', 'categorical', 'non-categorical']:
            if da_type == 'binary':
                predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]]
@@ -26,6 +28,8 @@ def evaluate(predict_result):
                if ele not in predicts:
                    metrics['overall']['FN'] += 1
                    metrics[da_type]['FN'] += 1
+            flag &= (sorted(predicts)==sorted(labels))
+        acc.append(flag)
    
    for metric in metrics:
        TP = metrics[metric].pop('TP')
@@ -37,6 +41,7 @@ def evaluate(predict_result):
        metrics[metric]['precision'] = precision
        metrics[metric]['recall'] = recall
        metrics[metric]['f1'] = f1
+    metrics['accuracy'] = sum(acc)/len(acc)

    return metrics


--- a/convlab2/nlu/jointBERT/README.md
+++ b/convlab2/nlu/jointBERT/README.md
@@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file
 The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file.


+## Performance on unified format datasets
+
+To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
+
+<table>
+<thead>
+  <tr>
+    <th></th>
+    <th colspan=2>MultiWOZ 2.1</th>
+    <th colspan=2>Taskmaster-1</th>
+    <th colspan=2>Taskmaster-2</th>
+    <th colspan=2>Taskmaster-3</th>
+  </tr>
+</thead>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>BERTNLU</td>
+    <td>74.5</td><td>85.9</td>
+    <td>72.8</td><td>50.6</td>
+    <td>79.2</td><td>70.6</td>
+    <td>86.1</td><td>81.9</td>
+  </tr>
+  <tr>
+    <td>BERTNLU (context=3)</td>
+    <td>80.6</td><td>90.3</td>
+    <td>74.2</td><td>52.7</td>
+    <td>80.9</td><td>73.3</td>
+    <td>87.8</td><td>83.8</td>
+  </tr>
+</tbody>
+</table>
+
+- Acc: whether all dialogue acts of an utterance are correctly predicted
+- F1: F1 measure of the dialogue act predictions over the corpus.
+
 ## References

 ```

--- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
@@ -2,7 +2,7 @@
  "dataset_name": "multiwoz21",
  "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0",
  "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0",
-  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip",
+  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip",
  "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log",
  "DEVICE": "cuda:0",
  "seed": 2019,

--- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
@@ -2,7 +2,7 @@
  "dataset_name": "multiwoz21",
  "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3",
  "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3",
-  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip",
+  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip",
  "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log",
  "DEVICE": "cuda:0",
  "seed": 2019,

--- a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
+{
+  "dataset_name": "sgd",
+  "data_dir": "unified_datasets/data/sgd/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/sgd/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip",
+  "log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
+{
+  "dataset_name": "sgd",
+  "data_dir": "unified_datasets/data/sgd/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/sgd/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip",
+  "log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
+{
+  "dataset_name": "tm1",
+  "data_dir": "unified_datasets/data/tm1/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm1/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
+{
+  "dataset_name": "tm1",
+  "data_dir": "unified_datasets/data/tm1/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm1/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
+{
+  "dataset_name": "tm2",
+  "data_dir": "unified_datasets/data/tm2/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm2/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
+{
+  "dataset_name": "tm2",
+  "data_dir": "unified_datasets/data/tm2/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm2/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
+{
+  "dataset_name": "tm3",
+  "data_dir": "unified_datasets/data/tm3/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm3/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json
+{
+  "dataset_name": "tm3",
+  "data_dir": "unified_datasets/data/tm3/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm3/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 20000,
+    "batch_size": 64,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
--- a/convlab2/nlu/milu/README.md
+++ b/convlab2/nlu/milu/README.md
@@ -43,6 +43,50 @@ Note that the config file is different from the above. You should set:
 ## Predict
 See `nlu.py` under `multiwoz` and `unified_datasets` directories.

+## Performance on unified format datasets
+
+To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
+
+<table>
+<thead>
+  <tr>
+    <th></th>
+    <th colspan=2>MultiWOZ 2.1</th>
+    <th colspan=2>Taskmaster-1</th>
+    <th colspan=2>Taskmaster-2</th>
+    <th colspan=2>Taskmaster-3</th>
+  </tr>
+</thead>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>MILU</td>
+    <td>72.9</td><td>85.2</td>
+    <td>72.9</td><td>49.2</td>
+    <td>79.1</td><td>68.7</td>
+    <td>85.4</td><td>80.3</td>
+  </tr>
+  <tr>
+    <td>MILU (context=3)</td>
+    <td>76.6</td><td>87.9</td>
+    <td>72.4</td><td>48.5</td>
+    <td>78.9</td><td>68.4</td>
+    <td>85.1</td><td>80.1</td>
+  </tr>
+</tbody>
+</table>
+
+- Acc: whether all dialogue acts of an utterance are correctly predicted
+- F1: F1 measure of the dialogue act predictions over the corpus.
+
 ## References
 ```
 @inproceedings{lee2019convlab,

--- a/convlab2/nlu/milu/dataset_reader.py
+++ b/convlab2/nlu/milu/dataset_reader.py
@@ -76,6 +76,8 @@ class MILUDatasetReader(DatasetReader):

            for sample in data:
                utterance = sample['utterance']
+                if len(utterance) == 0:
+                    continue
                sentences = self._sent_tokenizer.tokenize(utterance)
                sent_spans = self._sent_tokenizer.span_tokenize(utterance)
                tokens = [token for sent in sentences for token in self._word_tokenizer.tokenize(sent)]

--- a/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet
@@ -98,7 +98,7 @@
    "num_epochs": 40,
    "grad_norm": 5.0,
    "patience": 75,
-    "cuda_device": 4
+    "cuda_device": 0
  },
  "evaluate_on_test": true
 }
--- a/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "sgd",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
--- a/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 3,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "sgd",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
--- a/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm1",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
--- a/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 3,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm1",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}
--- a/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet
+++ b/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet
+{
+  "dataset_reader": {
+    "type": "milu",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      },
+      "token_characters": {
+        "type": "characters",
+        "min_padding_length": 3
+      },
+    },
+    "context_size": 0,
+    "agent": "user",
+    "use_unified_datasets": true,
+    "dataset_name": "tm2",
+    "random_context_size": false
+  },
+  "train_data_path": "train",
+  "validation_data_path": "validation",
+  "test_data_path": "test",
+  "model": {
+    "type": "milu",
+    "label_encoding": "BIO",
+    "use_unified_datasets": true,
+    "dropout": 0.3,
+    "include_start_end_transitions": false,
+    "text_field_embedder": {
+      "token_embedders": {
+        "tokens": {
+            "type": "embedding",
+            "embedding_dim": 50,
+            "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
+            "trainable": true
+        },
+        "token_characters": {
+            "type": "character_encoding",
+            "embedding": {
+            "embedding_dim": 16
+            },
+            "encoder": {
+            "type": "cnn",
+            "embedding_dim": 16,
+            "num_filters": 128,
+            "ngram_filter_sizes": [3],
+            "conv_layer_activation": "relu"
+            }
+        }
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 178,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "intent_encoder": {
+      "type": "lstm",
+      "input_size": 400,
+      "hidden_size": 200,
+      "num_layers": 1,
+      "dropout": 0.5,
+      "bidirectional": true
+    },
+    "attention": {
+      "type": "bilinear",
+      "vector_dim": 400,
+      "matrix_dim": 400
+    },    
+    "context_for_intent": true,
+    "context_for_tag": false,
+    "attention_for_intent": false,
+    "attention_for_tag": false,
+    "regularizer": [
+      [
+        "scalar_parameters",
+        {
+          "type": "l2",
+          "alpha": 0.1
+        }
+      ]
+    ]
+  },
+  "iterator": {
+    "type": "basic",
+    "batch_size": 64
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+f1-measure",
+    "num_serialized_models_to_keep": 3,
+    "num_epochs": 40,
+    "grad_norm": 5.0,
+    "patience": 75,
+    "cuda_device": 0
+  },
+  "evaluate_on_test": true
+}