From 3775f099a317d3f889fb7adb1fa0ad8a50cb28d5 Mon Sep 17 00:00:00 2001
From: zqwerty <zhuq96@hotmail.com>
Date: Fri, 22 Jul 2022 10:02:24 +0800
Subject: [PATCH] change --adafactor to --optim adafactor

---
 .../gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh  | 2 +-
 convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh     | 2 +-
 .../keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh   | 2 +-
 convlab/base_models/t5/dst/run_dst.sh                         | 4 ++--
 convlab/base_models/t5/dst/run_dst_fewshot.sh                 | 4 ++--
 convlab/base_models/t5/dst/run_dst_multitask.sh               | 4 ++--
 convlab/base_models/t5/dst/run_dst_pretrain.sh                | 2 +-
 convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh     | 2 +-
 convlab/base_models/t5/nlg/run_nlg.sh                         | 4 ++--
 convlab/base_models/t5/nlg/run_nlg_fewshot.sh                 | 4 ++--
 convlab/base_models/t5/nlg/run_nlg_multitask.sh               | 4 ++--
 convlab/base_models/t5/nlg/run_nlg_pretrain.sh                | 2 +-
 convlab/base_models/t5/nlu/run_nlu.sh                         | 4 ++--
 convlab/base_models/t5/nlu/run_nlu_fewshot.sh                 | 4 ++--
 convlab/base_models/t5/nlu/run_nlu_multitask.sh               | 4 ++--
 convlab/base_models/t5/nlu/run_nlu_pretrain.sh                | 2 +-
 convlab/base_models/t5/nlu/run_retnlu.sh                      | 4 ++--
 convlab/base_models/t5/nlu/run_retnlu_fewshot.sh              | 4 ++--
 convlab/base_models/t5/nlu/run_retnlu_in_context.sh           | 4 ++--
 convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh   | 4 ++--
 convlab/base_models/t5/rg/run_rg.sh                           | 2 +-
 21 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
index 878acdd6..8e0b3617 100644
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
+++ b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
@@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_steps 5000 \
-    --save_total_limit 3 \
+    --save_total_limit 1 \
     --cache_dir ${cache_dir} \
     --output_dir ${output_dir} \
     --logging_dir ${logging_dir} \
diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
index b2de5541..8d9a019b 100644
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
+++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
@@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_steps 5000 \
-    --save_total_limit 3 \
+    --save_total_limit 1 \
     --cache_dir ${cache_dir} \
     --output_dir ${output_dir} \
     --logging_dir ${logging_dir} \
diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
index 74c41816..75b79932 100644
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
+++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
@@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_steps 5000 \
-    --save_total_limit 3 \
+    --save_total_limit 1 \
     --cache_dir ${cache_dir} \
     --output_dir ${output_dir} \
     --logging_dir ${logging_dir} \
diff --git a/convlab/base_models/t5/dst/run_dst.sh b/convlab/base_models/t5/dst/run_dst.sh
index 0704ebf9..05975400 100644
--- a/convlab/base_models/t5/dst/run_dst.sh
+++ b/convlab/base_models/t5/dst/run_dst.sh
@@ -52,7 +52,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -77,7 +77,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/dst/run_dst_fewshot.sh b/convlab/base_models/t5/dst/run_dst_fewshot.sh
index f548c053..4acd6057 100644
--- a/convlab/base_models/t5/dst/run_dst_fewshot.sh
+++ b/convlab/base_models/t5/dst/run_dst_fewshot.sh
@@ -54,7 +54,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -79,7 +79,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}
diff --git a/convlab/base_models/t5/dst/run_dst_multitask.sh b/convlab/base_models/t5/dst/run_dst_multitask.sh
index 0f3b60a6..aefb1d52 100644
--- a/convlab/base_models/t5/dst/run_dst_multitask.sh
+++ b/convlab/base_models/t5/dst/run_dst_multitask.sh
@@ -61,7 +61,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -86,7 +86,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/dst/run_dst_pretrain.sh b/convlab/base_models/t5/dst/run_dst_pretrain.sh
index 29cc280e..1a995f4c 100644
--- a/convlab/base_models/t5/dst/run_dst_pretrain.sh
+++ b/convlab/base_models/t5/dst/run_dst_pretrain.sh
@@ -63,5 +63,5 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
diff --git a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh
index 0d10fe50..ac0a877a 100644
--- a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh
+++ b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh
@@ -47,5 +47,5 @@ python -m torch.distributed.launch \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
diff --git a/convlab/base_models/t5/nlg/run_nlg.sh b/convlab/base_models/t5/nlg/run_nlg.sh
index c45079a6..0b5fa390 100644
--- a/convlab/base_models/t5/nlg/run_nlg.sh
+++ b/convlab/base_models/t5/nlg/run_nlg.sh
@@ -52,7 +52,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -77,7 +77,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh
index 4e00fb9d..61e50cda 100644
--- a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh
+++ b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh
@@ -55,7 +55,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -80,7 +80,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}
diff --git a/convlab/base_models/t5/nlg/run_nlg_multitask.sh b/convlab/base_models/t5/nlg/run_nlg_multitask.sh
index 9b0a3d47..dec894aa 100644
--- a/convlab/base_models/t5/nlg/run_nlg_multitask.sh
+++ b/convlab/base_models/t5/nlg/run_nlg_multitask.sh
@@ -61,7 +61,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -86,7 +86,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh
index a1a1b601..7ce91a50 100644
--- a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh
+++ b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh
@@ -60,5 +60,5 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
diff --git a/convlab/base_models/t5/nlu/run_nlu.sh b/convlab/base_models/t5/nlu/run_nlu.sh
index 8cba74ac..b81b04c0 100644
--- a/convlab/base_models/t5/nlu/run_nlu.sh
+++ b/convlab/base_models/t5/nlu/run_nlu.sh
@@ -52,7 +52,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -77,7 +77,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh
index 8da69801..a966310a 100644
--- a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh
+++ b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh
@@ -55,7 +55,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -80,7 +80,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}
diff --git a/convlab/base_models/t5/nlu/run_nlu_multitask.sh b/convlab/base_models/t5/nlu/run_nlu_multitask.sh
index 6380acff..b91f21e3 100644
--- a/convlab/base_models/t5/nlu/run_nlu_multitask.sh
+++ b/convlab/base_models/t5/nlu/run_nlu_multitask.sh
@@ -61,7 +61,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -86,7 +86,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh
index 0f500292..c0511254 100644
--- a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh
+++ b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh
@@ -63,5 +63,5 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
diff --git a/convlab/base_models/t5/nlu/run_retnlu.sh b/convlab/base_models/t5/nlu/run_retnlu.sh
index b45a0e45..fd44e063 100644
--- a/convlab/base_models/t5/nlu/run_retnlu.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu.sh
@@ -53,7 +53,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -78,7 +78,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh
index d165859b..e778c80b 100644
--- a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh
@@ -56,7 +56,7 @@ num_train_epochs=100
 #     --gradient_accumulation_steps ${gradient_accumulation_steps} \
 #     --learning_rate ${lr} \
 #     --num_train_epochs ${num_train_epochs} \
-#     --adafactor \
+#     --optim adafactor \
 #     --gradient_checkpointing
 
 # python ../run_seq2seq.py \
@@ -81,7 +81,7 @@ num_train_epochs=100
 #     --gradient_accumulation_steps ${gradient_accumulation_steps} \
 #     --learning_rate ${lr} \
 #     --num_train_epochs ${num_train_epochs} \
-#     --adafactor \
+#     --optim adafactor \
 #     --gradient_checkpointing
 
 # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}
diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh
index 82dae873..775b4b06 100644
--- a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh
@@ -53,7 +53,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -78,7 +78,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh
index 836152f8..913ef7cb 100644
--- a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh
+++ b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh
@@ -56,7 +56,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python ../run_seq2seq.py \
@@ -81,7 +81,7 @@ python ../run_seq2seq.py \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
 
 python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order}
diff --git a/convlab/base_models/t5/rg/run_rg.sh b/convlab/base_models/t5/rg/run_rg.sh
index ff97ce5a..976e862e 100644
--- a/convlab/base_models/t5/rg/run_rg.sh
+++ b/convlab/base_models/t5/rg/run_rg.sh
@@ -65,5 +65,5 @@ python -m torch.distributed.launch \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --learning_rate ${lr} \
     --num_train_epochs ${num_train_epochs} \
-    --adafactor \
+    --optim adafactor \
     --gradient_checkpointing
-- 
GitLab