From 3775f099a317d3f889fb7adb1fa0ad8a50cb28d5 Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Fri, 22 Jul 2022 10:02:24 +0800 Subject: [PATCH] change --adafactor to --optim adafactor --- .../gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh | 2 +- convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh | 2 +- .../keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh | 2 +- convlab/base_models/t5/dst/run_dst.sh | 4 ++-- convlab/base_models/t5/dst/run_dst_fewshot.sh | 4 ++-- convlab/base_models/t5/dst/run_dst_multitask.sh | 4 ++-- convlab/base_models/t5/dst/run_dst_pretrain.sh | 2 +- convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh | 2 +- convlab/base_models/t5/nlg/run_nlg.sh | 4 ++-- convlab/base_models/t5/nlg/run_nlg_fewshot.sh | 4 ++-- convlab/base_models/t5/nlg/run_nlg_multitask.sh | 4 ++-- convlab/base_models/t5/nlg/run_nlg_pretrain.sh | 2 +- convlab/base_models/t5/nlu/run_nlu.sh | 4 ++-- convlab/base_models/t5/nlu/run_nlu_fewshot.sh | 4 ++-- convlab/base_models/t5/nlu/run_nlu_multitask.sh | 4 ++-- convlab/base_models/t5/nlu/run_nlu_pretrain.sh | 2 +- convlab/base_models/t5/nlu/run_retnlu.sh | 4 ++-- convlab/base_models/t5/nlu/run_retnlu_fewshot.sh | 4 ++-- convlab/base_models/t5/nlu/run_retnlu_in_context.sh | 4 ++-- convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh | 4 ++-- convlab/base_models/t5/rg/run_rg.sh | 2 +- 21 files changed, 34 insertions(+), 34 deletions(-) diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh index 878acdd6..8e0b3617 100644 --- a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh +++ b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh @@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_steps 5000 \ - --save_total_limit 3 \ + --save_total_limit 1 \ --cache_dir ${cache_dir} \ --output_dir ${output_dir} \ --logging_dir ${logging_dir} \ diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh index b2de5541..8d9a019b 100644 --- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh +++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh @@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_steps 5000 \ - --save_total_limit 3 \ + --save_total_limit 1 \ --cache_dir ${cache_dir} \ --output_dir ${output_dir} \ --logging_dir ${logging_dir} \ diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh index 74c41816..75b79932 100644 --- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh +++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh @@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_steps 5000 \ - --save_total_limit 3 \ + --save_total_limit 1 \ --cache_dir ${cache_dir} \ --output_dir ${output_dir} \ --logging_dir ${logging_dir} \ diff --git a/convlab/base_models/t5/dst/run_dst.sh b/convlab/base_models/t5/dst/run_dst.sh index 0704ebf9..05975400 100644 --- a/convlab/base_models/t5/dst/run_dst.sh +++ b/convlab/base_models/t5/dst/run_dst.sh @@ -52,7 +52,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -77,7 +77,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/dst/run_dst_fewshot.sh b/convlab/base_models/t5/dst/run_dst_fewshot.sh index f548c053..4acd6057 100644 --- a/convlab/base_models/t5/dst/run_dst_fewshot.sh +++ b/convlab/base_models/t5/dst/run_dst_fewshot.sh @@ -54,7 +54,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -79,7 +79,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/dst/run_dst_multitask.sh b/convlab/base_models/t5/dst/run_dst_multitask.sh index 0f3b60a6..aefb1d52 100644 --- a/convlab/base_models/t5/dst/run_dst_multitask.sh +++ b/convlab/base_models/t5/dst/run_dst_multitask.sh @@ -61,7 +61,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -86,7 +86,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/dst/run_dst_pretrain.sh b/convlab/base_models/t5/dst/run_dst_pretrain.sh index 29cc280e..1a995f4c 100644 --- a/convlab/base_models/t5/dst/run_dst_pretrain.sh +++ b/convlab/base_models/t5/dst/run_dst_pretrain.sh @@ -63,5 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh index 0d10fe50..ac0a877a 100644 --- a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh +++ b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh @@ -47,5 +47,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/nlg/run_nlg.sh b/convlab/base_models/t5/nlg/run_nlg.sh index c45079a6..0b5fa390 100644 --- a/convlab/base_models/t5/nlg/run_nlg.sh +++ b/convlab/base_models/t5/nlg/run_nlg.sh @@ -52,7 +52,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -77,7 +77,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh index 4e00fb9d..61e50cda 100644 --- a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh +++ b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh @@ -55,7 +55,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -80,7 +80,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/nlg/run_nlg_multitask.sh b/convlab/base_models/t5/nlg/run_nlg_multitask.sh index 9b0a3d47..dec894aa 100644 --- a/convlab/base_models/t5/nlg/run_nlg_multitask.sh +++ b/convlab/base_models/t5/nlg/run_nlg_multitask.sh @@ -61,7 +61,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -86,7 +86,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh index a1a1b601..7ce91a50 100644 --- a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh +++ b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh @@ -60,5 +60,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/nlu/run_nlu.sh b/convlab/base_models/t5/nlu/run_nlu.sh index 8cba74ac..b81b04c0 100644 --- a/convlab/base_models/t5/nlu/run_nlu.sh +++ b/convlab/base_models/t5/nlu/run_nlu.sh @@ -52,7 +52,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -77,7 +77,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh index 8da69801..a966310a 100644 --- a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh @@ -55,7 +55,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -80,7 +80,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/nlu/run_nlu_multitask.sh b/convlab/base_models/t5/nlu/run_nlu_multitask.sh index 6380acff..b91f21e3 100644 --- a/convlab/base_models/t5/nlu/run_nlu_multitask.sh +++ b/convlab/base_models/t5/nlu/run_nlu_multitask.sh @@ -61,7 +61,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -86,7 +86,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh index 0f500292..c0511254 100644 --- a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh +++ b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh @@ -63,5 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/nlu/run_retnlu.sh b/convlab/base_models/t5/nlu/run_retnlu.sh index b45a0e45..fd44e063 100644 --- a/convlab/base_models/t5/nlu/run_retnlu.sh +++ b/convlab/base_models/t5/nlu/run_retnlu.sh @@ -53,7 +53,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -78,7 +78,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh index d165859b..e778c80b 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh @@ -56,7 +56,7 @@ num_train_epochs=100 # --gradient_accumulation_steps ${gradient_accumulation_steps} \ # --learning_rate ${lr} \ # --num_train_epochs ${num_train_epochs} \ -# --adafactor \ +# --optim adafactor \ # --gradient_checkpointing # python ../run_seq2seq.py \ @@ -81,7 +81,7 @@ num_train_epochs=100 # --gradient_accumulation_steps ${gradient_accumulation_steps} \ # --learning_rate ${lr} \ # --num_train_epochs ${num_train_epochs} \ -# --adafactor \ +# --optim adafactor \ # --gradient_checkpointing # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh index 82dae873..775b4b06 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh @@ -53,7 +53,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -78,7 +78,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh index 836152f8..913ef7cb 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh @@ -56,7 +56,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -81,7 +81,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/rg/run_rg.sh b/convlab/base_models/t5/rg/run_rg.sh index ff97ce5a..976e862e 100644 --- a/convlab/base_models/t5/rg/run_rg.sh +++ b/convlab/base_models/t5/rg/run_rg.sh @@ -65,5 +65,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing -- GitLab