diff --git a/convlab2/nlu/evaluate_unified_datasets.py b/convlab2/nlu/evaluate_unified_datasets.py index bb244e34918c6aeaed468d6a67683c8e4e5306b4..86e91747702a1dedd727b5945bf6cbcaa08540d9 100644 --- a/convlab2/nlu/evaluate_unified_datasets.py +++ b/convlab2/nlu/evaluate_unified_datasets.py @@ -6,8 +6,10 @@ def evaluate(predict_result): predict_result = json.load(open(predict_result)) metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']} + acc = [] for sample in predict_result: + flag = True for da_type in ['binary', 'categorical', 'non-categorical']: if da_type == 'binary': predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]] @@ -26,6 +28,8 @@ def evaluate(predict_result): if ele not in predicts: metrics['overall']['FN'] += 1 metrics[da_type]['FN'] += 1 + flag &= (sorted(predicts)==sorted(labels)) + acc.append(flag) for metric in metrics: TP = metrics[metric].pop('TP') @@ -37,6 +41,7 @@ def evaluate(predict_result): metrics[metric]['precision'] = precision metrics[metric]['recall'] = recall metrics[metric]['f1'] = f1 + metrics['accuracy'] = sum(acc)/len(acc) return metrics diff --git a/convlab2/nlu/jointBERT/README.md b/convlab2/nlu/jointBERT/README.md index c9756d3c1ebdd42e975bb86d32a532b066a29048..c87e83551b21b7778c026916795c11a737831ce1 100755 --- a/convlab2/nlu/jointBERT/README.md +++ b/convlab2/nlu/jointBERT/README.md @@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file. +## Performance on unified format datasets + +To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal. + +<table> +<thead> + <tr> + <th></th> + <th colspan=2>MultiWOZ 2.1</th> + <th colspan=2>Taskmaster-1</th> + <th colspan=2>Taskmaster-2</th> + <th colspan=2>Taskmaster-3</th> + </tr> +</thead> +<thead> + <tr> + <th>Model</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + </tr> +</thead> +<tbody> + <tr> + <td>BERTNLU</td> + <td>74.5</td><td>85.9</td> + <td>72.8</td><td>50.6</td> + <td>79.2</td><td>70.6</td> + <td>86.1</td><td>81.9</td> + </tr> + <tr> + <td>BERTNLU (context=3)</td> + <td>80.6</td><td>90.3</td> + <td>74.2</td><td>52.7</td> + <td>80.9</td><td>73.3</td> + <td>87.8</td><td>83.8</td> + </tr> +</tbody> +</table> + +- Acc: whether all dialogue acts of an utterance are correctly predicted +- F1: F1 measure of the dialogue act predictions over the corpus. + ## References ``` diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json index d6be45577a3662065d36d112ea15de938705e224..3ed7c76788a3d307f4648625e62ecd97d3962379 100755 --- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json @@ -2,7 +2,7 @@ "dataset_name": "multiwoz21", "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0", "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0", - "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip", + "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip", "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log", "DEVICE": "cuda:0", "seed": 2019, diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json index d46f4db6096028e2582bea546b847be028faf184..de131ead7e71451fc24a2dafd28b2d3b5a19d863 100755 --- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json @@ -2,7 +2,7 @@ "dataset_name": "multiwoz21", "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3", "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3", - "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip", + "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip", "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log", "DEVICE": "cuda:0", "seed": 2019, diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json new file mode 100755 index 0000000000000000000000000000000000000000..74b998bc53433c6814dc2c1b403913db4717d452 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "sgd", + "data_dir": "unified_datasets/data/sgd/user/context_window_size_0", + "output_dir": "unified_datasets/output/sgd/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip", + "log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..bf713685cac1477bb453863eec3dd072168b9a63 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "sgd", + "data_dir": "unified_datasets/data/sgd/user/context_window_size_3", + "output_dir": "unified_datasets/output/sgd/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip", + "log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json new file mode 100755 index 0000000000000000000000000000000000000000..47569c62a13832c4c089a4dc949baee5c2983312 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm1", + "data_dir": "unified_datasets/data/tm1/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm1/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip", + "log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..a64f7bd573f6d4b7ce16b54da6fff2cda1931e21 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm1", + "data_dir": "unified_datasets/data/tm1/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm1/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip", + "log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json new file mode 100755 index 0000000000000000000000000000000000000000..6ca5e52144d56399e0b42723caa70f6cc7d403ba --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm2", + "data_dir": "unified_datasets/data/tm2/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm2/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip", + "log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..3faf7a37cb0c007d5bda72272786138044c06793 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm2", + "data_dir": "unified_datasets/data/tm2/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm2/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip", + "log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json new file mode 100755 index 0000000000000000000000000000000000000000..73a73aef2516eefeca54add7e39ae1125d779be2 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm3", + "data_dir": "unified_datasets/data/tm3/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm3/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip", + "log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..97cfad4aa7065a8e15213a52af6670a3a83f2784 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm3", + "data_dir": "unified_datasets/data/tm3/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm3/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip", + "log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 20000, + "batch_size": 64, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/setup.py b/setup.py index 900b92f3912b72831bb027da851da556a9d8ad3d..ef798d8ece195266dcda23b9ae8a1b3612822b8c 100755 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( 'numpy', 'nltk', 'scipy', + 'tensorboard', 'torch>=1.6', 'transformers>=4.0', 'datasets>=1.8',