diff --git a/convlab2/base_models/bert/infer_bio.sh b/convlab2/base_models/bert/infer_bio.sh index c07f55375dac3f589845781a5cf91ba7b140557c..455c76a8256503e28f0f60842c844094e4a4ad07 100644 --- a/convlab2/base_models/bert/infer_bio.sh +++ b/convlab2/base_models/bert/infer_bio.sh @@ -30,8 +30,7 @@ do --logging_dir ${logging_dir} \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ - --debug underflow_overflow + --per_device_eval_batch_size ${per_device_eval_batch_size} mv ${output_dir}/predictions.json ${output_dir}/${split}.json done diff --git a/convlab2/base_models/bert/train_bio.sh b/convlab2/base_models/bert/train_bio.sh index 4d6a4d8df84f5811484f8a5459cdd02d20759b4a..7b30df7515e32b670a9ad3ee99af61e465cddb7a 100644 --- a/convlab2/base_models/bert/train_bio.sh +++ b/convlab2/base_models/bert/train_bio.sh @@ -58,5 +58,4 @@ CUDA_VISIBLE_DEVICES=1,2 python -m torch.distributed.launch \ --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ - --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow + --num_train_epochs ${num_train_epochs} diff --git a/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh b/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh index 469ec695ba681a835c7d9c51e95803c674c87d11..ac204b5d564fe0acb5fb2ac1b49d4d1d6bcad17d 100644 --- a/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh +++ b/convlab2/base_models/gpt/keyword_extraction/test_t5_key2gen.sh @@ -45,6 +45,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh b/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh index 303ecb3e0c660a13e190b193c5b1769fbe70812d..f260f7071529e6837f9c7807d6d5ecf2469494a2 100644 --- a/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh +++ b/convlab2/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh @@ -43,5 +43,4 @@ python ../run_clm.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --gradient_checkpointing diff --git a/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh b/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh index fb510c880b25505e83780eeab76760e30dbccf9d..82c63a1f4c4a1633ad5e7d4a721a3bbac558cefb 100644 --- a/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh +++ b/convlab2/base_models/gpt/keyword_extraction/train_lm_gpt.sh @@ -43,5 +43,4 @@ python ../run_clm.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --gradient_checkpointing diff --git a/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh b/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh index d92365e787e2c58fdd8b6f4a4f870053c7561f2e..2c795ecf58e331e2acbe8ada66b4cf057ed83037 100644 --- a/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh +++ b/convlab2/base_models/gpt/keyword_extraction/train_t5_key2gen.sh @@ -52,6 +52,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/dst/run_dst.sh b/convlab2/base_models/t5/dst/run_dst.sh index c678005ef1284bcb40333ff47e9a1fbf06c90c16..2dfc622d88a9b1b38e70e15f1f5cefd2d4a78661 100644 --- a/convlab2/base_models/t5/dst/run_dst.sh +++ b/convlab2/base_models/t5/dst/run_dst.sh @@ -52,7 +52,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -78,7 +77,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/dst/run_dst_fewshot.sh b/convlab2/base_models/t5/dst/run_dst_fewshot.sh index 298a37f17a1c0817ff257742b5aa6e61bb9cd5d0..d45719112e50dd44672ab52b28c04014cb5d6e5c 100644 --- a/convlab2/base_models/t5/dst/run_dst_fewshot.sh +++ b/convlab2/base_models/t5/dst/run_dst_fewshot.sh @@ -54,7 +54,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -80,7 +79,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/dst/run_dst_pretrain.sh b/convlab2/base_models/t5/dst/run_dst_pretrain.sh index 119959431f07485f613de28e5f338b307c9647d6..29cc280e590ebeb700e2f60af6d1dd6f36d5741f 100644 --- a/convlab2/base_models/t5/dst/run_dst_pretrain.sh +++ b/convlab2/base_models/t5/dst/run_dst_pretrain.sh @@ -63,6 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh index 09a2c33aa06fa5134dba0707e1df5e633ac9f269..0d10fe5072c238b47b2302a7b0d4cfa8ced9ed73 100644 --- a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh +++ b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh @@ -47,6 +47,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlg/run_nlg.sh b/convlab2/base_models/t5/nlg/run_nlg.sh index 3352e6c14f4a5c2f61690f0c32fc31b709c73a23..9de7fece68dc5d3a7721001c4d18bdf8712e4c4a 100644 --- a/convlab2/base_models/t5/nlg/run_nlg.sh +++ b/convlab2/base_models/t5/nlg/run_nlg.sh @@ -52,7 +52,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -78,7 +77,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh b/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh index 45d1964a21c44c898958d32f53af7f995a53281a..6f7c8d177e014b5505267060c6d20f0113266140 100644 --- a/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh +++ b/convlab2/base_models/t5/nlg/run_nlg_fewshot.sh @@ -55,7 +55,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -81,7 +80,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh b/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh index 4d2b440ece35fd448277a4a50b5e3c122606a8ae..8af5dd10dfafd71a23ebd287ad1f77cc1486c787 100644 --- a/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh +++ b/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh @@ -63,6 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlu/run_nlu.sh b/convlab2/base_models/t5/nlu/run_nlu.sh index 05671139cfd691422c924c9da880af317c916a19..fb9be0227b3cced261ed6ccbffa9857e477012a2 100644 --- a/convlab2/base_models/t5/nlu/run_nlu.sh +++ b/convlab2/base_models/t5/nlu/run_nlu.sh @@ -52,7 +52,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -78,7 +77,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh b/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh index 2c783912d66281c8e44f01aafc232b4051e73f86..568c271323cf2472f7989e0cb68e9af051bcc89b 100644 --- a/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh +++ b/convlab2/base_models/t5/nlu/run_nlu_fewshot.sh @@ -55,7 +55,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing @@ -81,7 +80,6 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh b/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh index ccc7c08fcf1ebbd5d71ed800ce56b7413432631a..0f500292b54ff16381783248652f893b218bc2e8 100644 --- a/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh +++ b/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh @@ -63,6 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/rg/run_rg.sh b/convlab2/base_models/t5/rg/run_rg.sh index 308c639c71ceeb8957188ab499341e8fa2229943..ff97ce5a9f63deae0ee5660c60bf111e891ef7da 100644 --- a/convlab2/base_models/t5/rg/run_rg.sh +++ b/convlab2/base_models/t5/rg/run_rg.sh @@ -65,6 +65,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --debug underflow_overflow \ --adafactor \ --gradient_checkpointing diff --git a/convlab2/base_models/t5/run_seq2seq.py b/convlab2/base_models/t5/run_seq2seq.py index c76bb5cd690001e550aef4a1ce287d007c5a066d..c702897d5c2d19d164ae00ee058718ff0dc0be96 100644 --- a/convlab2/base_models/t5/run_seq2seq.py +++ b/convlab2/base_models/t5/run_seq2seq.py @@ -565,7 +565,7 @@ def main(): data_collator=data_collator, compute_metrics=compute_metrics if training_args.predict_with_generate else None, ) - if data_args.early_stopping_patience > 0: + if training_args.load_best_model_at_end: trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=data_args.early_stopping_patience)) # Training