From 196e2ade02947f65e632a7dd9f42b22a52abdc1d Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Wed, 2 Mar 2022 18:05:36 +0800 Subject: [PATCH] add benchmark res for bertnlu --- convlab2/nlu/evaluate_unified_datasets.py | 5 +++ convlab2/nlu/jointBERT/README.md | 44 +++++++++++++++++++ .../configs/multiwoz21_user.json | 2 +- .../configs/multiwoz21_user_context3.json | 2 +- .../unified_datasets/configs/sgd_user.json | 27 ++++++++++++ .../configs/sgd_user_context3.json | 27 ++++++++++++ .../unified_datasets/configs/tm1_user.json | 27 ++++++++++++ .../configs/tm1_user_context3.json | 27 ++++++++++++ .../unified_datasets/configs/tm2_user.json | 27 ++++++++++++ .../configs/tm2_user_context3.json | 27 ++++++++++++ .../unified_datasets/configs/tm3_user.json | 27 ++++++++++++ .../configs/tm3_user_context3.json | 27 ++++++++++++ setup.py | 1 + 13 files changed, 268 insertions(+), 2 deletions(-) create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json create mode 100755 convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json diff --git a/convlab2/nlu/evaluate_unified_datasets.py b/convlab2/nlu/evaluate_unified_datasets.py index bb244e34..86e91747 100644 --- a/convlab2/nlu/evaluate_unified_datasets.py +++ b/convlab2/nlu/evaluate_unified_datasets.py @@ -6,8 +6,10 @@ def evaluate(predict_result): predict_result = json.load(open(predict_result)) metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']} + acc = [] for sample in predict_result: + flag = True for da_type in ['binary', 'categorical', 'non-categorical']: if da_type == 'binary': predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]] @@ -26,6 +28,8 @@ def evaluate(predict_result): if ele not in predicts: metrics['overall']['FN'] += 1 metrics[da_type]['FN'] += 1 + flag &= (sorted(predicts)==sorted(labels)) + acc.append(flag) for metric in metrics: TP = metrics[metric].pop('TP') @@ -37,6 +41,7 @@ def evaluate(predict_result): metrics[metric]['precision'] = precision metrics[metric]['recall'] = recall metrics[metric]['f1'] = f1 + metrics['accuracy'] = sum(acc)/len(acc) return metrics diff --git a/convlab2/nlu/jointBERT/README.md b/convlab2/nlu/jointBERT/README.md index c9756d3c..c87e8355 100755 --- a/convlab2/nlu/jointBERT/README.md +++ b/convlab2/nlu/jointBERT/README.md @@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file. +## Performance on unified format datasets + +To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal. + +<table> +<thead> + <tr> + <th></th> + <th colspan=2>MultiWOZ 2.1</th> + <th colspan=2>Taskmaster-1</th> + <th colspan=2>Taskmaster-2</th> + <th colspan=2>Taskmaster-3</th> + </tr> +</thead> +<thead> + <tr> + <th>Model</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + </tr> +</thead> +<tbody> + <tr> + <td>BERTNLU</td> + <td>74.5</td><td>85.9</td> + <td>72.8</td><td>50.6</td> + <td>79.2</td><td>70.6</td> + <td>86.1</td><td>81.9</td> + </tr> + <tr> + <td>BERTNLU (context=3)</td> + <td>80.6</td><td>90.3</td> + <td>74.2</td><td>52.7</td> + <td>80.9</td><td>73.3</td> + <td>87.8</td><td>83.8</td> + </tr> +</tbody> +</table> + +- Acc: whether all dialogue acts of an utterance are correctly predicted +- F1: F1 measure of the dialogue act predictions over the corpus. + ## References ``` diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json index d6be4557..3ed7c767 100755 --- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json @@ -2,7 +2,7 @@ "dataset_name": "multiwoz21", "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0", "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0", - "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip", + "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip", "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log", "DEVICE": "cuda:0", "seed": 2019, diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json index d46f4db6..de131ead 100755 --- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json @@ -2,7 +2,7 @@ "dataset_name": "multiwoz21", "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3", "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3", - "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip", + "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip", "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log", "DEVICE": "cuda:0", "seed": 2019, diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json new file mode 100755 index 00000000..74b998bc --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "sgd", + "data_dir": "unified_datasets/data/sgd/user/context_window_size_0", + "output_dir": "unified_datasets/output/sgd/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip", + "log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json new file mode 100755 index 00000000..bf713685 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "sgd", + "data_dir": "unified_datasets/data/sgd/user/context_window_size_3", + "output_dir": "unified_datasets/output/sgd/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip", + "log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json new file mode 100755 index 00000000..47569c62 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm1", + "data_dir": "unified_datasets/data/tm1/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm1/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip", + "log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json new file mode 100755 index 00000000..a64f7bd5 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm1", + "data_dir": "unified_datasets/data/tm1/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm1/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip", + "log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json new file mode 100755 index 00000000..6ca5e521 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm2", + "data_dir": "unified_datasets/data/tm2/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm2/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip", + "log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json new file mode 100755 index 00000000..3faf7a37 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm2", + "data_dir": "unified_datasets/data/tm2/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm2/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip", + "log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json new file mode 100755 index 00000000..73a73aef --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm3", + "data_dir": "unified_datasets/data/tm3/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm3/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip", + "log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json new file mode 100755 index 00000000..97cfad4a --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm3", + "data_dir": "unified_datasets/data/tm3/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm3/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip", + "log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 20000, + "batch_size": 64, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/setup.py b/setup.py index 900b92f3..ef798d8e 100755 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( 'numpy', 'nltk', 'scipy', + 'tensorboard', 'torch>=1.6', 'transformers>=4.0', 'datasets>=1.8', -- GitLab