Skip to content
Snippets Groups Projects
Unverified Commit b1fd6890 authored by zhuqi's avatar zhuqi Committed by GitHub
Browse files

Merge pull request #35 from ConvLab/nlu

Add delex function; NLU benckmark result on multiwoz21, tm1, tm2, tm3
parents 9f5d85a3 4f7f7ed8
Branches
No related tags found
No related merge requests found
Showing
with 834 additions and 3 deletions
......@@ -6,8 +6,10 @@ def evaluate(predict_result):
predict_result = json.load(open(predict_result))
metrics = {x: {'TP':0, 'FP':0, 'FN':0} for x in ['overall', 'binary', 'categorical', 'non-categorical']}
acc = []
for sample in predict_result:
flag = True
for da_type in ['binary', 'categorical', 'non-categorical']:
if da_type == 'binary':
predicts = [(x['intent'], x['domain'], x['slot']) for x in sample['predictions']['dialogue_acts'][da_type]]
......@@ -26,6 +28,8 @@ def evaluate(predict_result):
if ele not in predicts:
metrics['overall']['FN'] += 1
metrics[da_type]['FN'] += 1
flag &= (sorted(predicts)==sorted(labels))
acc.append(flag)
for metric in metrics:
TP = metrics[metric].pop('TP')
......@@ -37,6 +41,7 @@ def evaluate(predict_result):
metrics[metric]['precision'] = precision
metrics[metric]['recall'] = recall
metrics[metric]['f1'] = f1
metrics['accuracy'] = sum(acc)/len(acc)
return metrics
......
......@@ -31,6 +31,50 @@ $ python test.py --config_path path_to_a_config_file
The result (`output.json`) will be saved under the `output_dir` of the config file. Also, it will be zipped as `zipped_model_path` in the config file.
## Performance on unified format datasets
To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
<table>
<thead>
<tr>
<th></th>
<th colspan=2>MultiWOZ 2.1</th>
<th colspan=2>Taskmaster-1</th>
<th colspan=2>Taskmaster-2</th>
<th colspan=2>Taskmaster-3</th>
</tr>
</thead>
<thead>
<tr>
<th>Model</th>
<th>Acc</th><th>F1</th>
<th>Acc</th><th>F1</th>
<th>Acc</th><th>F1</th>
<th>Acc</th><th>F1</th>
</tr>
</thead>
<tbody>
<tr>
<td>BERTNLU</td>
<td>74.5</td><td>85.9</td>
<td>72.8</td><td>50.6</td>
<td>79.2</td><td>70.6</td>
<td>86.1</td><td>81.9</td>
</tr>
<tr>
<td>BERTNLU (context=3)</td>
<td>80.6</td><td>90.3</td>
<td>74.2</td><td>52.7</td>
<td>80.9</td><td>73.3</td>
<td>87.8</td><td>83.8</td>
</tr>
</tbody>
</table>
- Acc: whether all dialogue acts of an utterance are correctly predicted
- F1: F1 measure of the dialogue act predictions over the corpus.
## References
```
......
......@@ -2,7 +2,7 @@
"dataset_name": "multiwoz21",
"data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_0",
"output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0",
"zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz_user_context0.zip",
"zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_0/bertnlu_unified_multiwoz21_user_context0.zip",
"log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_0/log",
"DEVICE": "cuda:0",
"seed": 2019,
......
......@@ -2,7 +2,7 @@
"dataset_name": "multiwoz21",
"data_dir": "unified_datasets/data/multiwoz21/user/context_window_size_3",
"output_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3",
"zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz_user_context3.zip",
"zipped_model_path": "unified_datasets/output/multiwoz21/user/context_window_size_3/bertnlu_unified_multiwoz21_user_context3.zip",
"log_dir": "unified_datasets/output/multiwoz21/user/context_window_size_3/log",
"DEVICE": "cuda:0",
"seed": 2019,
......
{
"dataset_name": "sgd",
"data_dir": "unified_datasets/data/sgd/user/context_window_size_0",
"output_dir": "unified_datasets/output/sgd/user/context_window_size_0",
"zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_0/bertnlu_unified_sgd_user_context0.zip",
"log_dir": "unified_datasets/output/sgd/user/context_window_size_0/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 0,
"model": {
"finetune": true,
"context": false,
"context_grad": false,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 768
}
}
\ No newline at end of file
{
"dataset_name": "sgd",
"data_dir": "unified_datasets/data/sgd/user/context_window_size_3",
"output_dir": "unified_datasets/output/sgd/user/context_window_size_3",
"zipped_model_path": "unified_datasets/output/sgd/user/context_window_size_3/bertnlu_unified_sgd_user_context3.zip",
"log_dir": "unified_datasets/output/sgd/user/context_window_size_3/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 3,
"model": {
"finetune": true,
"context": true,
"context_grad": true,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 1536
}
}
\ No newline at end of file
{
"dataset_name": "tm1",
"data_dir": "unified_datasets/data/tm1/user/context_window_size_0",
"output_dir": "unified_datasets/output/tm1/user/context_window_size_0",
"zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_0/bertnlu_unified_tm1_user_context0.zip",
"log_dir": "unified_datasets/output/tm1/user/context_window_size_0/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 0,
"model": {
"finetune": true,
"context": false,
"context_grad": false,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 768
}
}
\ No newline at end of file
{
"dataset_name": "tm1",
"data_dir": "unified_datasets/data/tm1/user/context_window_size_3",
"output_dir": "unified_datasets/output/tm1/user/context_window_size_3",
"zipped_model_path": "unified_datasets/output/tm1/user/context_window_size_3/bertnlu_unified_tm1_user_context3.zip",
"log_dir": "unified_datasets/output/tm1/user/context_window_size_3/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 3,
"model": {
"finetune": true,
"context": true,
"context_grad": true,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 1536
}
}
\ No newline at end of file
{
"dataset_name": "tm2",
"data_dir": "unified_datasets/data/tm2/user/context_window_size_0",
"output_dir": "unified_datasets/output/tm2/user/context_window_size_0",
"zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_0/bertnlu_unified_tm2_user_context0.zip",
"log_dir": "unified_datasets/output/tm2/user/context_window_size_0/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 0,
"model": {
"finetune": true,
"context": false,
"context_grad": false,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 768
}
}
\ No newline at end of file
{
"dataset_name": "tm2",
"data_dir": "unified_datasets/data/tm2/user/context_window_size_3",
"output_dir": "unified_datasets/output/tm2/user/context_window_size_3",
"zipped_model_path": "unified_datasets/output/tm2/user/context_window_size_3/bertnlu_unified_tm2_user_context3.zip",
"log_dir": "unified_datasets/output/tm2/user/context_window_size_3/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 3,
"model": {
"finetune": true,
"context": true,
"context_grad": true,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 1536
}
}
\ No newline at end of file
{
"dataset_name": "tm3",
"data_dir": "unified_datasets/data/tm3/user/context_window_size_0",
"output_dir": "unified_datasets/output/tm3/user/context_window_size_0",
"zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_0/bertnlu_unified_tm3_user_context0.zip",
"log_dir": "unified_datasets/output/tm3/user/context_window_size_0/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 0,
"model": {
"finetune": true,
"context": false,
"context_grad": false,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 10000,
"batch_size": 128,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 768
}
}
\ No newline at end of file
{
"dataset_name": "tm3",
"data_dir": "unified_datasets/data/tm3/user/context_window_size_3",
"output_dir": "unified_datasets/output/tm3/user/context_window_size_3",
"zipped_model_path": "unified_datasets/output/tm3/user/context_window_size_3/bertnlu_unified_tm3_user_context3.zip",
"log_dir": "unified_datasets/output/tm3/user/context_window_size_3/log",
"DEVICE": "cuda:0",
"seed": 2019,
"cut_sen_len": 40,
"use_bert_tokenizer": true,
"context_window_size": 3,
"model": {
"finetune": true,
"context": true,
"context_grad": true,
"pretrained_weights": "bert-base-uncased",
"check_step": 1000,
"max_step": 20000,
"batch_size": 64,
"learning_rate": 1e-4,
"adam_epsilon": 1e-8,
"warmup_steps": 0,
"weight_decay": 0.0,
"dropout": 0.1,
"hidden_units": 1536
}
}
\ No newline at end of file
......@@ -43,6 +43,50 @@ Note that the config file is different from the above. You should set:
## Predict
See `nlu.py` under `multiwoz` and `unified_datasets` directories.
## Performance on unified format datasets
To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
<table>
<thead>
<tr>
<th></th>
<th colspan=2>MultiWOZ 2.1</th>
<th colspan=2>Taskmaster-1</th>
<th colspan=2>Taskmaster-2</th>
<th colspan=2>Taskmaster-3</th>
</tr>
</thead>
<thead>
<tr>
<th>Model</th>
<th>Acc</th><th>F1</th>
<th>Acc</th><th>F1</th>
<th>Acc</th><th>F1</th>
<th>Acc</th><th>F1</th>
</tr>
</thead>
<tbody>
<tr>
<td>MILU</td>
<td>72.9</td><td>85.2</td>
<td>72.9</td><td>49.2</td>
<td>79.1</td><td>68.7</td>
<td>85.4</td><td>80.3</td>
</tr>
<tr>
<td>MILU (context=3)</td>
<td>76.6</td><td>87.9</td>
<td>72.4</td><td>48.5</td>
<td>78.9</td><td>68.4</td>
<td>85.1</td><td>80.1</td>
</tr>
</tbody>
</table>
- Acc: whether all dialogue acts of an utterance are correctly predicted
- F1: F1 measure of the dialogue act predictions over the corpus.
## References
```
@inproceedings{lee2019convlab,
......
......@@ -76,6 +76,8 @@ class MILUDatasetReader(DatasetReader):
for sample in data:
utterance = sample['utterance']
if len(utterance) == 0:
continue
sentences = self._sent_tokenizer.tokenize(utterance)
sent_spans = self._sent_tokenizer.span_tokenize(utterance)
tokens = [token for sent in sentences for token in self._word_tokenizer.tokenize(sent)]
......
......@@ -98,7 +98,7 @@
"num_epochs": 40,
"grad_norm": 5.0,
"patience": 75,
"cuda_device": 4
"cuda_device": 0
},
"evaluate_on_test": true
}
{
"dataset_reader": {
"type": "milu",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
},
},
"context_size": 0,
"agent": "user",
"use_unified_datasets": true,
"dataset_name": "sgd",
"random_context_size": false
},
"train_data_path": "train",
"validation_data_path": "validation",
"test_data_path": "test",
"model": {
"type": "milu",
"label_encoding": "BIO",
"use_unified_datasets": true,
"dropout": 0.3,
"include_start_end_transitions": false,
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50,
"pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
"trainable": true
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 16
},
"encoder": {
"type": "cnn",
"embedding_dim": 16,
"num_filters": 128,
"ngram_filter_sizes": [3],
"conv_layer_activation": "relu"
}
}
}
},
"encoder": {
"type": "lstm",
"input_size": 178,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"intent_encoder": {
"type": "lstm",
"input_size": 400,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"attention": {
"type": "bilinear",
"vector_dim": 400,
"matrix_dim": 400
},
"context_for_intent": true,
"context_for_tag": false,
"attention_for_intent": false,
"attention_for_tag": false,
"regularizer": [
[
"scalar_parameters",
{
"type": "l2",
"alpha": 0.1
}
]
]
},
"iterator": {
"type": "basic",
"batch_size": 64
},
"trainer": {
"optimizer": {
"type": "adam",
"lr": 0.001
},
"validation_metric": "+f1-measure",
"num_serialized_models_to_keep": 3,
"num_epochs": 40,
"grad_norm": 5.0,
"patience": 75,
"cuda_device": 0
},
"evaluate_on_test": true
}
{
"dataset_reader": {
"type": "milu",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
},
},
"context_size": 3,
"agent": "user",
"use_unified_datasets": true,
"dataset_name": "sgd",
"random_context_size": false
},
"train_data_path": "train",
"validation_data_path": "validation",
"test_data_path": "test",
"model": {
"type": "milu",
"label_encoding": "BIO",
"use_unified_datasets": true,
"dropout": 0.3,
"include_start_end_transitions": false,
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50,
"pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
"trainable": true
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 16
},
"encoder": {
"type": "cnn",
"embedding_dim": 16,
"num_filters": 128,
"ngram_filter_sizes": [3],
"conv_layer_activation": "relu"
}
}
}
},
"encoder": {
"type": "lstm",
"input_size": 178,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"intent_encoder": {
"type": "lstm",
"input_size": 400,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"attention": {
"type": "bilinear",
"vector_dim": 400,
"matrix_dim": 400
},
"context_for_intent": true,
"context_for_tag": false,
"attention_for_intent": false,
"attention_for_tag": false,
"regularizer": [
[
"scalar_parameters",
{
"type": "l2",
"alpha": 0.1
}
]
]
},
"iterator": {
"type": "basic",
"batch_size": 64
},
"trainer": {
"optimizer": {
"type": "adam",
"lr": 0.001
},
"validation_metric": "+f1-measure",
"num_serialized_models_to_keep": 3,
"num_epochs": 40,
"grad_norm": 5.0,
"patience": 75,
"cuda_device": 0
},
"evaluate_on_test": true
}
{
"dataset_reader": {
"type": "milu",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
},
},
"context_size": 0,
"agent": "user",
"use_unified_datasets": true,
"dataset_name": "tm1",
"random_context_size": false
},
"train_data_path": "train",
"validation_data_path": "validation",
"test_data_path": "test",
"model": {
"type": "milu",
"label_encoding": "BIO",
"use_unified_datasets": true,
"dropout": 0.3,
"include_start_end_transitions": false,
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50,
"pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
"trainable": true
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 16
},
"encoder": {
"type": "cnn",
"embedding_dim": 16,
"num_filters": 128,
"ngram_filter_sizes": [3],
"conv_layer_activation": "relu"
}
}
}
},
"encoder": {
"type": "lstm",
"input_size": 178,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"intent_encoder": {
"type": "lstm",
"input_size": 400,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"attention": {
"type": "bilinear",
"vector_dim": 400,
"matrix_dim": 400
},
"context_for_intent": true,
"context_for_tag": false,
"attention_for_intent": false,
"attention_for_tag": false,
"regularizer": [
[
"scalar_parameters",
{
"type": "l2",
"alpha": 0.1
}
]
]
},
"iterator": {
"type": "basic",
"batch_size": 64
},
"trainer": {
"optimizer": {
"type": "adam",
"lr": 0.001
},
"validation_metric": "+f1-measure",
"num_serialized_models_to_keep": 3,
"num_epochs": 40,
"grad_norm": 5.0,
"patience": 75,
"cuda_device": 0
},
"evaluate_on_test": true
}
{
"dataset_reader": {
"type": "milu",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
},
},
"context_size": 3,
"agent": "user",
"use_unified_datasets": true,
"dataset_name": "tm1",
"random_context_size": false
},
"train_data_path": "train",
"validation_data_path": "validation",
"test_data_path": "test",
"model": {
"type": "milu",
"label_encoding": "BIO",
"use_unified_datasets": true,
"dropout": 0.3,
"include_start_end_transitions": false,
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50,
"pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
"trainable": true
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 16
},
"encoder": {
"type": "cnn",
"embedding_dim": 16,
"num_filters": 128,
"ngram_filter_sizes": [3],
"conv_layer_activation": "relu"
}
}
}
},
"encoder": {
"type": "lstm",
"input_size": 178,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"intent_encoder": {
"type": "lstm",
"input_size": 400,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"attention": {
"type": "bilinear",
"vector_dim": 400,
"matrix_dim": 400
},
"context_for_intent": true,
"context_for_tag": false,
"attention_for_intent": false,
"attention_for_tag": false,
"regularizer": [
[
"scalar_parameters",
{
"type": "l2",
"alpha": 0.1
}
]
]
},
"iterator": {
"type": "basic",
"batch_size": 64
},
"trainer": {
"optimizer": {
"type": "adam",
"lr": 0.001
},
"validation_metric": "+f1-measure",
"num_serialized_models_to_keep": 3,
"num_epochs": 40,
"grad_norm": 5.0,
"patience": 75,
"cuda_device": 0
},
"evaluate_on_test": true
}
{
"dataset_reader": {
"type": "milu",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
},
},
"context_size": 0,
"agent": "user",
"use_unified_datasets": true,
"dataset_name": "tm2",
"random_context_size": false
},
"train_data_path": "train",
"validation_data_path": "validation",
"test_data_path": "test",
"model": {
"type": "milu",
"label_encoding": "BIO",
"use_unified_datasets": true,
"dropout": 0.3,
"include_start_end_transitions": false,
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50,
"pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.50d.txt.gz",
"trainable": true
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 16
},
"encoder": {
"type": "cnn",
"embedding_dim": 16,
"num_filters": 128,
"ngram_filter_sizes": [3],
"conv_layer_activation": "relu"
}
}
}
},
"encoder": {
"type": "lstm",
"input_size": 178,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"intent_encoder": {
"type": "lstm",
"input_size": 400,
"hidden_size": 200,
"num_layers": 1,
"dropout": 0.5,
"bidirectional": true
},
"attention": {
"type": "bilinear",
"vector_dim": 400,
"matrix_dim": 400
},
"context_for_intent": true,
"context_for_tag": false,
"attention_for_intent": false,
"attention_for_tag": false,
"regularizer": [
[
"scalar_parameters",
{
"type": "l2",
"alpha": 0.1
}
]
]
},
"iterator": {
"type": "basic",
"batch_size": 64
},
"trainer": {
"optimizer": {
"type": "adam",
"lr": 0.001
},
"validation_metric": "+f1-measure",
"num_serialized_models_to_keep": 3,
"num_epochs": 40,
"grad_norm": 5.0,
"patience": 75,
"cuda_device": 0
},
"evaluate_on_test": true
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment