diff --git a/convlab2/nlu/evaluate_unified_datasets.py b/convlab2/nlu/evaluate_unified_datasets.py index bb244e34918c6aeaed468d6a67683c8e4e5306b4..86e91747702a1dedd727b5945bf6cbcaa08540d9 100644 --- a/convlab2/nlu/evaluate_unified_datasets.py +++ b/convlab2/nlu/evaluate_unified_datasets.py @@ -6,8 +6,10 @@ def evaluate(predict_result): predict_result = json.load(open(predict_result)) metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']} + acc = [] for sample in predict_result: + flag = True for da_type in ['binary', 'categorical', 'non-categorical']: if da_type == 'binary': predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]] @@ -26,6 +28,8 @@ def evaluate(predict_result): if ele not in predicts: metrics['overall']['FN'] += 1 metrics[da_type]['FN'] += 1 + flag &= (sorted(predicts)==sorted(labels)) + acc.append(flag) for metric in metrics: TP = metrics[metric].pop('TP') @@ -37,6 +41,7 @@ def evaluate(predict_result): metrics[metric]['precision'] = precision metrics[metric]['recall'] = recall metrics[metric]['f1'] = f1 + metrics['accuracy'] = sum(acc)/len(acc) return metrics diff --git a/convlab2/nlu/jointBERT/README.md b/convlab2/nlu/jointBERT/README.md index c9756d3c1ebdd42e975bb86d32a532b066a29048..c87e83551b21b7778c026916795c11a737831ce1 100755 --- a/convlab2/nlu/jointBERT/README.md +++ b/convlab2/nlu/jointBERT/README.md @@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file. +## Performance on unified format datasets + +To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal. + +<table> +<thead> + <tr> + <th></th> + <th colspan=2>MultiWOZ 2.1</th> + <th colspan=2>Taskmaster-1</th> + <th colspan=2>Taskmaster-2</th> + <th colspan=2>Taskmaster-3</th> + </tr> +</thead> +<thead> + <tr> + <th>Model</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + </tr> +</thead> +<tbody> + <tr> + <td>BERTNLU</td> + <td>74.5</td><td>85.9</td> + <td>72.8</td><td>50.6</td> + <td>79.2</td><td>70.6</td> + <td>86.1</td><td>81.9</td> + </tr> + <tr> + <td>BERTNLU (context=3)</td> + <td>80.6</td><td>90.3</td> + <td>74.2</td><td>52.7</td> + <td>80.9</td><td>73.3</td> + <td>87.8</td><td>83.8</td> + </tr> +</tbody> +</table> + +- Acc: whether all dialogue acts of an utterance are correctly predicted +- F1: F1 measure of the dialogue act predictions over the corpus. + ## References ``` diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json index d6be45577a3662065d36d112ea15de938705e224..3ed7c76788a3d307f4648625e62ecd97d3962379 100755 --- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user.json @@ -2,7 +2,7 @@ "dataset_name": "multiwoz21", "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0", "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0", - "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip", + "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip", "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log", "DEVICE": "cuda:0", "seed": 2019, diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json index d46f4db6096028e2582bea546b847be028faf184..de131ead7e71451fc24a2dafd28b2d3b5a19d863 100755 --- a/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/multiwoz21_user_context3.json @@ -2,7 +2,7 @@ "dataset_name": "multiwoz21", "data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3", "output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3", - "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip", + "zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip", "log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log", "DEVICE": "cuda:0", "seed": 2019, diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json new file mode 100755 index 0000000000000000000000000000000000000000..74b998bc53433c6814dc2c1b403913db4717d452 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "sgd", + "data_dir": "unified_datasets/data/sgd/user/context_window_size_0", + "output_dir": "unified_datasets/output/sgd/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip", + "log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..bf713685cac1477bb453863eec3dd072168b9a63 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/sgd_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "sgd", + "data_dir": "unified_datasets/data/sgd/user/context_window_size_3", + "output_dir": "unified_datasets/output/sgd/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip", + "log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json new file mode 100755 index 0000000000000000000000000000000000000000..47569c62a13832c4c089a4dc949baee5c2983312 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm1", + "data_dir": "unified_datasets/data/tm1/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm1/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip", + "log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..a64f7bd573f6d4b7ce16b54da6fff2cda1931e21 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm1_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm1", + "data_dir": "unified_datasets/data/tm1/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm1/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip", + "log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json new file mode 100755 index 0000000000000000000000000000000000000000..6ca5e52144d56399e0b42723caa70f6cc7d403ba --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm2", + "data_dir": "unified_datasets/data/tm2/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm2/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip", + "log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..3faf7a37cb0c007d5bda72272786138044c06793 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm2_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm2", + "data_dir": "unified_datasets/data/tm2/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm2/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip", + "log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json new file mode 100755 index 0000000000000000000000000000000000000000..73a73aef2516eefeca54add7e39ae1125d779be2 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm3", + "data_dir": "unified_datasets/data/tm3/user/context_window_size_0", + "output_dir": "unified_datasets/output/tm3/user/context_window_size_0", + "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip", + "log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 0, + "model": { + "finetune": true, + "context": false, + "context_grad": false, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 10000, + "batch_size": 128, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 768 + } +} \ No newline at end of file diff --git a/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json new file mode 100755 index 0000000000000000000000000000000000000000..97cfad4aa7065a8e15213a52af6670a3a83f2784 --- /dev/null +++ b/convlab2/nlu/jointBERT/unified_datasets/configs/tm3_user_context3.json @@ -0,0 +1,27 @@ +{ + "dataset_name": "tm3", + "data_dir": "unified_datasets/data/tm3/user/context_window_size_3", + "output_dir": "unified_datasets/output/tm3/user/context_window_size_3", + "zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip", + "log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log", + "DEVICE": "cuda:0", + "seed": 2019, + "cut_sen_len": 40, + "use_bert_tokenizer": true, + "context_window_size": 3, + "model": { + "finetune": true, + "context": true, + "context_grad": true, + "pretrained_weights": "bert-base-uncased", + "check_step": 1000, + "max_step": 20000, + "batch_size": 64, + "learning_rate": 1e-4, + "adam_epsilon": 1e-8, + "warmup_steps": 0, + "weight_decay": 0.0, + "dropout": 0.1, + "hidden_units": 1536 + } +} \ No newline at end of file diff --git a/convlab2/nlu/milu/README.md b/convlab2/nlu/milu/README.md index bbd54d671900f26373ef369c3c6f049dcf74ac23..2213475f87ac77c1a010d0a406a49c6976810442 100755 --- a/convlab2/nlu/milu/README.md +++ b/convlab2/nlu/milu/README.md @@ -43,6 +43,50 @@ Note that the config file is different from the above. You should set: ## Predict See `nlu.py` under `multiwoz` and `unified_datasets` directories. +## Performance on unified format datasets + +To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal. + +<table> +<thead> + <tr> + <th></th> + <th colspan=2>MultiWOZ 2.1</th> + <th colspan=2>Taskmaster-1</th> + <th colspan=2>Taskmaster-2</th> + <th colspan=2>Taskmaster-3</th> + </tr> +</thead> +<thead> + <tr> + <th>Model</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + <th>Acc</th><th>F1</th> + </tr> +</thead> +<tbody> + <tr> + <td>MILU</td> + <td>72.9</td><td>85.2</td> + <td>72.9</td><td>49.2</td> + <td>79.1</td><td>68.7</td> + <td>85.4</td><td>80.3</td> + </tr> + <tr> + <td>MILU (context=3)</td> + <td>76.6</td><td>87.9</td> + <td>72.4</td><td>48.5</td> + <td>78.9</td><td>68.4</td> + <td>85.1</td><td>80.1</td> + </tr> +</tbody> +</table> + +- Acc: whether all dialogue acts of an utterance are correctly predicted +- F1: F1 measure of the dialogue act predictions over the corpus. + ## References ``` @inproceedings{lee2019convlab, diff --git a/convlab2/nlu/milu/dataset_reader.py b/convlab2/nlu/milu/dataset_reader.py index 35f71903ab4a269b0f9e5d3cd208d78e48278349..86732b7d0aa46d94ca813a6dd27c64b754a76492 100755 --- a/convlab2/nlu/milu/dataset_reader.py +++ b/convlab2/nlu/milu/dataset_reader.py @@ -76,6 +76,8 @@ class MILUDatasetReader(DatasetReader): for sample in data: utterance = sample['utterance'] + if len(utterance) == 0: + continue sentences = self._sent_tokenizer.tokenize(utterance) sent_spans = self._sent_tokenizer.span_tokenize(utterance) tokens = [token for sent in sentences for token in self._word_tokenizer.tokenize(sent)] diff --git a/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet index 858a57a19ecbe2acd59f02465d79c1d852341f60..94ccc02647ec9aabacb2b545202e35161addacff 100755 --- a/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet +++ b/convlab2/nlu/milu/unified_datasets/configs/multiwoz21_user.jsonnet @@ -98,7 +98,7 @@ "num_epochs": 40, "grad_norm": 5.0, "patience": 75, - "cuda_device": 4 + "cuda_device": 0 }, "evaluate_on_test": true } diff --git a/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..db02aed0900e628bf7ffff3bf4c20dea73d9baf8 --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/sgd_user.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 0, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "sgd", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..2feea6d4b866ecda20c8398abab4ef34337cff6c --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/sgd_user_context3.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 3, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "sgd", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..7f7138f8f66560e288deff667f052c3d60c93554 --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/tm1_user.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 0, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "tm1", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..64f5933f16e5773dcd4cad49dd8895d9d1e13898 --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/tm1_user_context3.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 3, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "tm1", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..a3b89448e4a280660b5a8de98500d3641c8a1512 --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/tm2_user.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 0, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "tm2", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm2_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm2_user_context3.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..f1bf68efa1faf8e3269d3c69ee75a243abe3d730 --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/tm2_user_context3.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 3, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "tm2", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm3_user.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm3_user.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..c554d83e99ae63c5af288ca49a76cb8d8d5bc1e0 --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/tm3_user.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 0, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "tm3", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/nlu/milu/unified_datasets/configs/tm3_user_context3.jsonnet b/convlab2/nlu/milu/unified_datasets/configs/tm3_user_context3.jsonnet new file mode 100755 index 0000000000000000000000000000000000000000..879b1070b246833a76a69b2c49435febbb07d8cb --- /dev/null +++ b/convlab2/nlu/milu/unified_datasets/configs/tm3_user_context3.jsonnet @@ -0,0 +1,104 @@ +{ + "dataset_reader": { + "type": "milu", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters": { + "type": "characters", + "min_padding_length": 3 + }, + }, + "context_size": 3, + "agent": "user", + "use_unified_datasets": true, + "dataset_name": "tm3", + "random_context_size": false + }, + "train_data_path": "train", + "validation_data_path": "validation", + "test_data_path": "test", + "model": { + "type": "milu", + "label_encoding": "BIO", + "use_unified_datasets": true, + "dropout": 0.3, + "include_start_end_transitions": false, + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 50, + "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz", + "trainable": true + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 128, + "ngram_filter_sizes": [3], + "conv_layer_activation": "relu" + } + } + } + }, + "encoder": { + "type": "lstm", + "input_size": 178, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "intent_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.5, + "bidirectional": true + }, + "attention": { + "type": "bilinear", + "vector_dim": 400, + "matrix_dim": 400 + }, + "context_for_intent": true, + "context_for_tag": false, + "attention_for_intent": false, + "attention_for_tag": false, + "regularizer": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] + ] + }, + "iterator": { + "type": "basic", + "batch_size": 64 + }, + "trainer": { + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "validation_metric": "+f1-measure", + "num_serialized_models_to_keep": 3, + "num_epochs": 40, + "grad_norm": 5.0, + "patience": 75, + "cuda_device": 0 + }, + "evaluate_on_test": true +} diff --git a/convlab2/util/unified_datasets_util.py b/convlab2/util/unified_datasets_util.py index e4344bd838785dda7a3736c37d7577a2887fd9d7..ed9c211f9ae1276df257376056df0677057eaca0 100644 --- a/convlab2/util/unified_datasets_util.py +++ b/convlab2/util/unified_datasets_util.py @@ -3,6 +3,7 @@ from typing import Dict, List, Tuple from zipfile import ZipFile import json import os +import re import importlib from abc import ABC, abstractmethod from pprint import pprint @@ -180,6 +181,81 @@ def load_rg_data(dataset, data_split='all', speaker='system', context_window_siz kwargs.setdefault('utterance', True) return load_unified_data(dataset, **kwargs) + +def create_delex_data(dataset, delex_format='[({domain})-({slot})]', ignore_values=['yes', 'no']): + # add delex_utterance to the dataset according to dialogue acts and belief_state + + def delex_inplace(texts_placeholders, value_pattern): + res = [] + for substring, is_placeholder in texts_placeholders: + if not is_placeholder: + matches = value_pattern.findall(substring) + res.append(len(matches) == 1) + else: + res.append(False) + if sum(res) == 1: + # only one piece matches + idx = res.index(True) + substring = texts_placeholders[idx][0] + searchObj = re.search(value_pattern, substring) + assert searchObj + start, end = searchObj.span(2) + texts_placeholders[idx:idx+1] = [(substring[0:start], False), (placeholder, True), (substring[end:], False)] + return True + return False + + delex_vocab = set() + for data_split in dataset: + for dialog in dataset[data_split]: + state = {} + for turn in dialog['turns']: + utt = turn['utterance'] + delex_utt = [] + last_end = 0 + # ignore the non-categorical das that do not have span annotation + spans = [x for x in turn['dialogue_acts']['non-categorical'] if 'start' in x] + for da in sorted(spans, key=lambda x: x['start']): + # from left to right + start, end = da['start'], da['end'] + domain, slot, value = da['domain'], da['slot'], da['value'] + assert utt[start:end] == value + # make sure there are no words/number prepend & append and no overlap with other spans + if start >= last_end and (start == 0 or re.match('\W', utt[start-1])) and (end == len(utt) or re.match('\W', utt[end])): + placeholder = delex_format.format(domain=domain, slot=slot, value=value) + delex_vocab.add(placeholder) + delex_utt.append((utt[last_end:start], False)) + delex_utt.append((placeholder, True)) + last_end = end + delex_utt.append((utt[last_end:], False)) + + # search for value in categorical dialogue acts and belief state + for da in sorted(turn['dialogue_acts']['categorical'], key=lambda x: len(x['value'])): + domain, slot, value = da['domain'], da['slot'], da['value'] + if value.lower() not in ignore_values: + placeholder = delex_format.format(domain=domain, slot=slot, value=value) + pattern = re.compile(r'(\W|^)'+f'({value})'+r'(\W|$)', flags=re.I) + if delex_inplace(delex_utt, pattern): + delex_vocab.add(placeholder) + + # for domain in turn['state'] + if 'state' in turn: + state = turn['state'] + for domain in state: + for slot, values in state[domain].items(): + if len(values) > 0: + # has value + for value in values.split('|'): + if value.lower() not in ignore_values: + placeholder = delex_format.format(domain=domain, slot=slot, value=value) + pattern = re.compile(r'(\W|^)'+f'({value})'+r'(\W|$)', flags=re.I) + if delex_inplace(delex_utt, pattern): + delex_vocab.add(placeholder) + + turn['delex_utterance'] = ''.join([x[0] for x in delex_utt]) + + return dataset, sorted(list(delex_vocab)) + + if __name__ == "__main__": dataset = load_dataset('multiwoz21') print(dataset.keys()) @@ -192,3 +268,13 @@ if __name__ == "__main__": data_by_split = load_nlu_data(dataset, data_split='test', speaker='user') pprint(data_by_split['test'][0]) + + dataset, delex_vocab = create_delex_data(dataset) + json.dump(dataset['test'], open('delex_multiwoz21_test.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(delex_vocab, open('delex_vocab.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + with open('delex_cmp.txt', 'w') as f: + for dialog in dataset['test']: + for turn in dialog['turns']: + f.write(turn['utterance']+'\n') + f.write(turn['delex_utterance']+'\n') + f.write('\n') diff --git a/setup.py b/setup.py index 900b92f3912b72831bb027da851da556a9d8ad3d..ef798d8ece195266dcda23b9ae8a1b3612822b8c 100755 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( 'numpy', 'nltk', 'scipy', + 'tensorboard', 'torch>=1.6', 'transformers>=4.0', 'datasets>=1.8',