From 96c469b984ee10cc371a59dafbc3004baea3ab38 Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Wed, 11 May 2022 16:14:10 +0800 Subject: [PATCH] run_seq2seq will use early stop(patient=10 epochs) when using load_best_model_at_end param --- convlab2/base_models/bert/infer_bio.sh | 3 +-- convlab2/base_models/bert/train_bio.sh | 3 +-- convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh | 1 - .../base_models/gpt/keyword_extraction/train_lm_dialogpt.sh | 1 - convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh | 1 - .../base_models/gpt/keyword_extraction/train_t5_key2gen.sh | 1 - convlab2/base_models/t5/dst/run_dst.sh | 2 -- convlab2/base_models/t5/dst/run_dst_fewshot.sh | 2 -- convlab2/base_models/t5/dst/run_dst_pretrain.sh | 1 - convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh | 1 - convlab2/base_models/t5/nlg/run_nlg.sh | 2 -- convlab2/base_models/t5/nlg/run_nlg_fewshot.sh | 2 -- convlab2/base_models/t5/nlg/run_nlg_pretrain.sh | 1 - convlab2/base_models/t5/nlu/run_nlu.sh | 2 -- convlab2/base_models/t5/nlu/run_nlu_fewshot.sh | 2 -- convlab2/base_models/t5/nlu/run_nlu_pretrain.sh | 1 - convlab2/base_models/t5/rg/run_rg.sh | 1 - convlab2/base_models/t5/run_seq2seq.py | 2 +- 18 files changed, 3 insertions(+), 26 deletions(-) diff --git a/convlab2/base_models/bert/infer_bio.sh b/convlab2/base_models/bert/infer_bio.sh index c07f5537..455c76a8 100644 --- a/convlab2/base_models/bert/infer_bio.sh +++ b/convlab2/base_models/bert/infer_bio.sh @@ -30,8 +30,7 @@ do --logging_dir ${logging_dir} \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ - --debug underflow_overflow + --per_device_eval_batch_size ${per_device_eval_batch_size} mv ${output_dir}/predictions.json ${output_dir}/${split}.json done diff --git a/convlab2/base_models/bert/train_bio.sh b/convlab2/base_models/bert/train_bio.sh index 4d6a4d8d..7b30df75 100644 --- a/convlab2/base_models/bert/train_bio.sh +++ b/convlab2/base_models/bert/train_bio.sh @@ -58,5 +58,4 @@ CUDA_VISIBLE_DEVICES=1,2 python -m torch.distributed.launch \ --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ - --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow + --num_train_epochs ${num_train_epochs} diff --git a/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh b/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh index 469ec695..ac204b5d 100644 --- a/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh +++ b/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh @@ -45,6 +45,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh b/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh index 303ecb3e..f260f707 100644 --- a/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh +++ b/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh @@ -43,5 +43,4 @@ python ../run_clm.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --gradient_checkpointing diff --git a/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh b/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh index fb510c88..82c63a1f 100644 --- a/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh +++ b/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh @@ -43,5 +43,4 @@ python ../run_clm.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --gradient_checkpointing diff --git a/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh b/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh index d92365e7..2c795ecf 100644 --- a/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh +++ b/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh @@ -52,6 +52,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/dst/run_dst.sh b/convlab2/base_models/t5/dst/run_dst.sh index c678005e..2dfc622d 100644 --- a/convlab2/base_models/t5/dst/run_dst.sh +++ b/convlab2/base_models/t5/dst/run_dst.sh @@ -52,7 +52,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -78,7 +77,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/dst/run_dst_fewshot.sh b/convlab2/base_models/t5/dst/run_dst_fewshot.sh index 298a37f1..d4571911 100644 --- a/convlab2/base_models/t5/dst/run_dst_fewshot.sh +++ b/convlab2/base_models/t5/dst/run_dst_fewshot.sh @@ -54,7 +54,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -80,7 +79,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/dst/run_dst_pretrain.sh b/convlab2/base_models/t5/dst/run_dst_pretrain.sh index 11995943..29cc280e 100644 --- a/convlab2/base_models/t5/dst/run_dst_pretrain.sh +++ b/convlab2/base_models/t5/dst/run_dst_pretrain.sh @@ -63,6 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh index 09a2c33a..0d10fe50 100644 --- a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh +++ b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh @@ -47,6 +47,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlg/run_nlg.sh b/convlab2/base_models/t5/nlg/run_nlg.sh index 3352e6c1..9de7fece 100644 --- a/convlab2/base_models/t5/nlg/run_nlg.sh +++ b/convlab2/base_models/t5/nlg/run_nlg.sh @@ -52,7 +52,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -78,7 +77,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh b/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh index 45d1964a..6f7c8d17 100644 --- a/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh +++ b/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh @@ -55,7 +55,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -81,7 +80,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh b/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh index 4d2b440e..8af5dd10 100644 --- a/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh +++ b/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh @@ -63,6 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlu/run_nlu.sh b/convlab2/base_models/t5/nlu/run_nlu.sh index 05671139..fb9be022 100644 --- a/convlab2/base_models/t5/nlu/run_nlu.sh +++ b/convlab2/base_models/t5/nlu/run_nlu.sh @@ -52,7 +52,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -78,7 +77,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh b/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh index 2c783912..568c2713 100644 --- a/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh +++ b/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh @@ -55,7 +55,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -81,7 +80,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh b/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh index ccc7c08f..0f500292 100644 --- a/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh +++ b/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh @@ -63,6 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/rg/run_rg.sh b/convlab2/base_models/t5/rg/run_rg.sh index 308c639c..ff97ce5a 100644 --- a/convlab2/base_models/t5/rg/run_rg.sh +++ b/convlab2/base_models/t5/rg/run_rg.sh @@ -65,6 +65,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/run_seq2seq.py b/convlab2/base_models/t5/run_seq2seq.py index c76bb5cd..c702897d 100644 --- a/convlab2/base_models/t5/run_seq2seq.py +++ b/convlab2/base_models/t5/run_seq2seq.py @@ -565,7 +565,7 @@ def main(): data_collator=data_collator, compute_metrics=compute_metrics if training_args.predict_with_generate else None, ) - if data_args.early_stopping_patience > 0: + if training_args.load_best_model_at_end: trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=data_args.early_stopping_patience)) # Training -- GitLab