change --adafactor to --optim adafactor

3775f099 · zqwerty · 824068f8 · 3775f099 · 3775f099 · 3775f099
Commit 3775f099 authored Jul 22, 2022 by zqwerty
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
+++ b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
@@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \
    --model_name_or_path ${model_name_or_path} \
    --do_train \
    --save_steps 5000 \
-    --save_total_limit 3 \
+    --save_total_limit 1 \
    --cache_dir ${cache_dir} \
    --output_dir ${output_dir} \
    --logging_dir ${logging_dir} \

--- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
+++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
@@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \
    --model_name_or_path ${model_name_or_path} \
    --do_train \
    --save_steps 5000 \
-    --save_total_limit 3 \
+    --save_total_limit 1 \
    --cache_dir ${cache_dir} \
    --output_dir ${output_dir} \
    --logging_dir ${logging_dir} \

--- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
+++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
@@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \
    --model_name_or_path ${model_name_or_path} \
    --do_train \
    --save_steps 5000 \
-    --save_total_limit 3 \
+    --save_total_limit 1 \
    --cache_dir ${cache_dir} \
    --output_dir ${output_dir} \
    --logging_dir ${logging_dir} \

--- a/convlab/base_models/t5/dst/run_dst.sh
+++ b/convlab/base_models/t5/dst/run_dst.sh
@@ -52,7 +52,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -77,7 +77,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/dst/run_dst_fewshot.sh
+++ b/convlab/base_models/t5/dst/run_dst_fewshot.sh
@@ -54,7 +54,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -79,7 +79,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}

--- a/convlab/base_models/t5/dst/run_dst_multitask.sh
+++ b/convlab/base_models/t5/dst/run_dst_multitask.sh
@@ -61,7 +61,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -86,7 +86,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/dst/run_dst_pretrain.sh
+++ b/convlab/base_models/t5/dst/run_dst_pretrain.sh
@@ -63,5 +63,5 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing
--- a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh
+++ b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh
@@ -47,5 +47,5 @@ python -m torch.distributed.launch \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing
--- a/convlab/base_models/t5/nlg/run_nlg.sh
+++ b/convlab/base_models/t5/nlg/run_nlg.sh
@@ -52,7 +52,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -77,7 +77,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh
+++ b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh
@@ -55,7 +55,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -80,7 +80,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}

--- a/convlab/base_models/t5/nlg/run_nlg_multitask.sh
+++ b/convlab/base_models/t5/nlg/run_nlg_multitask.sh
@@ -61,7 +61,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -86,7 +86,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh
+++ b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh
@@ -60,5 +60,5 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing
--- a/convlab/base_models/t5/nlu/run_nlu.sh
+++ b/convlab/base_models/t5/nlu/run_nlu.sh
@@ -52,7 +52,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -77,7 +77,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh
+++ b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh
@@ -55,7 +55,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -80,7 +80,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}

--- a/convlab/base_models/t5/nlu/run_nlu_multitask.sh
+++ b/convlab/base_models/t5/nlu/run_nlu_multitask.sh
@@ -61,7 +61,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -86,7 +86,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh
+++ b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh
@@ -63,5 +63,5 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing
--- a/convlab/base_models/t5/nlu/run_retnlu.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu.sh
@@ -53,7 +53,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -78,7 +78,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh
@@ -56,7 +56,7 @@ num_train_epochs=100
 #     --gradient_accumulation_steps ${gradient_accumulation_steps} \
 #     --learning_rate ${lr} \
 #     --num_train_epochs ${num_train_epochs} \
-#     --adafactor \
+#     --optim adafactor \
 #     --gradient_checkpointing

 # python ../run_seq2seq.py \
@@ -81,7 +81,7 @@ num_train_epochs=100
 #     --gradient_accumulation_steps ${gradient_accumulation_steps} \
 #     --learning_rate ${lr} \
 #     --num_train_epochs ${num_train_epochs} \
-#     --adafactor \
+#     --optim adafactor \
 #     --gradient_checkpointing

 # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}

--- a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh
@@ -53,7 +53,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -78,7 +78,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json

--- a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh
@@ -56,7 +56,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python ../run_seq2seq.py \
@@ -81,7 +81,7 @@ python ../run_seq2seq.py \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${lr} \
    --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
    --gradient_checkpointing

 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}