From 196e2ade02947f65e632a7dd9f42b22a52abdc1d Mon Sep 17 00:00:00 2001
From: zqwerty <zhuq96@hotmail.com>
Date: Wed, 2 Mar 2022 18:05:36 +0800
Subject: [PATCH] add benchmark res for bertnlu

---
 convlab2/nlu/evaluate_unified_datasets.py     |  5 +++
 convlab2/nlu/jointBERT/README.md              | 44 +++++++++++++++++++
 .../configs/multiwoz21_user.json              |  2 +-
 .../configs/multiwoz21_user_context3.json     |  2 +-
 .../unified_datasets/configs/sgd_user.json    | 27 ++++++++++++
 .../configs/sgd_user_context3.json            | 27 ++++++++++++
 .../unified_datasets/configs/tm1_user.json    | 27 ++++++++++++
 .../configs/tm1_user_context3.json            | 27 ++++++++++++
 .../unified_datasets/configs/tm2_user.json    | 27 ++++++++++++
 .../configs/tm2_user_context3.json            | 27 ++++++++++++
 .../unified_datasets/configs/tm3_user.json    | 27 ++++++++++++
 .../configs/tm3_user_context3.json            | 27 ++++++++++++
 setup.py                                      |  1 +
 13 files changed, 268 insertions(+), 2 deletions(-)
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
 create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json

diff --git a/convlab2/nlu/evaluate_unified_datasets.py b/convlab2/nlu/evaluate_unified_datasets.py
index bb244e34..86e91747 100644
--- a/convlab2/nlu/evaluate_unified_datasets.py
+++ b/convlab2/nlu/evaluate_unified_datasets.py
@@ -6,8 +6,10 @@ def evaluate(predict_result):
     predict_result = json.load(open(predict_result))
 
     metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']}
+    acc = []
 
     for sample in predict_result:
+        flag = True
         for da_type in ['binary', 'categorical', 'non-categorical']:
             if da_type == 'binary':
                 predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]]
@@ -26,6 +28,8 @@ def evaluate(predict_result):
                 if ele not in predicts:
                     metrics['overall']['FN'] += 1
                     metrics[da_type]['FN'] += 1
+            flag &= (sorted(predicts)==sorted(labels))
+        acc.append(flag)
     
     for metric in metrics:
         TP = metrics[metric].pop('TP')
@@ -37,6 +41,7 @@ def evaluate(predict_result):
         metrics[metric]['precision'] = precision
         metrics[metric]['recall'] = recall
         metrics[metric]['f1'] = f1
+    metrics['accuracy'] = sum(acc)/len(acc)
 
     return metrics
 
diff --git a/convlab2/nlu/jointBERT/README.md b/convlab2/nlu/jointBERT/README.md
index c9756d3c..c87e8355 100755
--- a/convlab2/nlu/jointBERT/README.md
+++ b/convlab2/nlu/jointBERT/README.md
@@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file
 The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file.
 
 
+## Performance on unified format datasets
+
+To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
+
+<table>
+<thead>
+  <tr>
+    <th></th>
+    <th colspan=2>MultiWOZ 2.1</th>
+    <th colspan=2>Taskmaster-1</th>
+    <th colspan=2>Taskmaster-2</th>
+    <th colspan=2>Taskmaster-3</th>
+  </tr>
+</thead>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>BERTNLU</td>
+    <td>74.5</td><td>85.9</td>
+    <td>72.8</td><td>50.6</td>
+    <td>79.2</td><td>70.6</td>
+    <td>86.1</td><td>81.9</td>
+  </tr>
+  <tr>
+    <td>BERTNLU (context=3)</td>
+    <td>80.6</td><td>90.3</td>
+    <td>74.2</td><td>52.7</td>
+    <td>80.9</td><td>73.3</td>
+    <td>87.8</td><td>83.8</td>
+  </tr>
+</tbody>
+</table>
+
+- Acc: whether all dialogue acts of an utterance are correctly predicted
+- F1: F1 measure of the dialogue act predictions over the corpus.
+
 ## References
 
 ```
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
index d6be4557..3ed7c767 100755
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json
@@ -2,7 +2,7 @@
   "dataset_name": "multiwoz21",
   "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0",
   "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0",
-  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip",
+  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip",
   "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log",
   "DEVICE": "cuda:0",
   "seed": 2019,
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
index d46f4db6..de131ead 100755
--- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json
@@ -2,7 +2,7 @@
   "dataset_name": "multiwoz21",
   "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3",
   "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3",
-  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip",
+  "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip",
   "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log",
   "DEVICE": "cuda:0",
   "seed": 2019,
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
new file mode 100755
index 00000000..74b998bc
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "sgd",
+  "data_dir": "unified_datasets/data/sgd/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/sgd/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip",
+  "log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
new file mode 100755
index 00000000..bf713685
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "sgd",
+  "data_dir": "unified_datasets/data/sgd/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/sgd/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip",
+  "log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
new file mode 100755
index 00000000..47569c62
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm1",
+  "data_dir": "unified_datasets/data/tm1/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm1/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
new file mode 100755
index 00000000..a64f7bd5
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm1",
+  "data_dir": "unified_datasets/data/tm1/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm1/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
new file mode 100755
index 00000000..6ca5e521
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm2",
+  "data_dir": "unified_datasets/data/tm2/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm2/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
new file mode 100755
index 00000000..3faf7a37
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm2",
+  "data_dir": "unified_datasets/data/tm2/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm2/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
new file mode 100755
index 00000000..73a73aef
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm3",
+  "data_dir": "unified_datasets/data/tm3/user/context_window_size_0",
+  "output_dir": "unified_datasets/output/tm3/user/context_window_size_0",
+  "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip",
+  "log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 0,
+  "model": {
+    "finetune": true,
+    "context": false,
+    "context_grad": false,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 10000,
+    "batch_size": 128,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 768
+  }
+}
\ No newline at end of file
diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json
new file mode 100755
index 00000000..97cfad4a
--- /dev/null
+++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json
@@ -0,0 +1,27 @@
+{
+  "dataset_name": "tm3",
+  "data_dir": "unified_datasets/data/tm3/user/context_window_size_3",
+  "output_dir": "unified_datasets/output/tm3/user/context_window_size_3",
+  "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip",
+  "log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log",
+  "DEVICE": "cuda:0",
+  "seed": 2019,
+  "cut_sen_len": 40,
+  "use_bert_tokenizer": true,
+  "context_window_size": 3,
+  "model": {
+    "finetune": true,
+    "context": true,
+    "context_grad": true,
+    "pretrained_weights": "bert-base-uncased",
+    "check_step": 1000,
+    "max_step": 20000,
+    "batch_size": 64,
+    "learning_rate": 1e-4,
+    "adam_epsilon": 1e-8,
+    "warmup_steps": 0,
+    "weight_decay": 0.0,
+    "dropout": 0.1,
+    "hidden_units": 1536
+  }
+}
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 900b92f3..ef798d8e 100755
--- a/setup.py
+++ b/setup.py
@@ -41,6 +41,7 @@ setup(
         'numpy',
         'nltk',
         'scipy',
+        'tensorboard',
         'torch>=1.6',
         'transformers>=4.0',
         'datasets>=1.8',
-- 
GitLab