diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh index 878acdd6c71dc4b20a2946b6a58049f5463f0c9e..8e0b3617210408d3226bd7da9f675534c9458398 100644 --- a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh +++ b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh @@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_steps 5000 \ - --save_total_limit 3 \ + --save_total_limit 1 \ --cache_dir ${cache_dir} \ --output_dir ${output_dir} \ --logging_dir ${logging_dir} \ diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh index b2de55410064b0234f8416b6338e2a070c79147f..8d9a019bd0fa10d63586c023705807a3eafd5ff0 100644 --- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh +++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh @@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_steps 5000 \ - --save_total_limit 3 \ + --save_total_limit 1 \ --cache_dir ${cache_dir} \ --output_dir ${output_dir} \ --logging_dir ${logging_dir} \ diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh index 74c418164815cfd538e17cb08cd0de7c24ba7624..75b79932bb94b0699d2e2349a4c8cb8846915cb3 100644 --- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh +++ b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh @@ -35,7 +35,7 @@ python -m torch.distributed.launch --master_port ${master_port} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_steps 5000 \ - --save_total_limit 3 \ + --save_total_limit 1 \ --cache_dir ${cache_dir} \ --output_dir ${output_dir} \ --logging_dir ${logging_dir} \ diff --git a/convlab/base_models/t5/dst/run_dst.sh b/convlab/base_models/t5/dst/run_dst.sh index 0704ebf9257be910c2148d052574b535182be07e..05975400bd1ca901e1058dc80587e2cce0b0f1bb 100644 --- a/convlab/base_models/t5/dst/run_dst.sh +++ b/convlab/base_models/t5/dst/run_dst.sh @@ -52,7 +52,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -77,7 +77,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/dst/run_dst_fewshot.sh b/convlab/base_models/t5/dst/run_dst_fewshot.sh index f548c053b544b51101f0cfbcc0b1a7b3a09c8088..4acd605706752c67d1f1df3b5fa04df13d2e46ad 100644 --- a/convlab/base_models/t5/dst/run_dst_fewshot.sh +++ b/convlab/base_models/t5/dst/run_dst_fewshot.sh @@ -54,7 +54,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -79,7 +79,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/dst/run_dst_multitask.sh b/convlab/base_models/t5/dst/run_dst_multitask.sh index 0f3b60a63a2f1bf861cb430a247121d966aac822..aefb1d5200db292d0e68e7d39498dfbd182d1fa0 100644 --- a/convlab/base_models/t5/dst/run_dst_multitask.sh +++ b/convlab/base_models/t5/dst/run_dst_multitask.sh @@ -61,7 +61,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -86,7 +86,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/dst/run_dst_pretrain.sh b/convlab/base_models/t5/dst/run_dst_pretrain.sh index 29cc280e590ebeb700e2f60af6d1dd6f36d5741f..1a995f4cbd24cd6a84151844b54559e3ce332b28 100644 --- a/convlab/base_models/t5/dst/run_dst_pretrain.sh +++ b/convlab/base_models/t5/dst/run_dst_pretrain.sh @@ -63,5 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh index 0d10fe5072c238b47b2302a7b0d4cfa8ced9ed73..ac0a877a146815395f392655b9ad861adbc2311e 100644 --- a/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh +++ b/convlab/base_models/t5/goal2dialogue/run_goal2dialogue.sh @@ -47,5 +47,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/nlg/run_nlg.sh b/convlab/base_models/t5/nlg/run_nlg.sh index c45079a6e5b253accb9e463407cef42ba2223272..0b5fa390dcaf98b098abc17f18026994ee54702c 100644 --- a/convlab/base_models/t5/nlg/run_nlg.sh +++ b/convlab/base_models/t5/nlg/run_nlg.sh @@ -52,7 +52,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -77,7 +77,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh index 4e00fb9dbd0b7c0f98850c300960787aafcc6909..61e50cdaa094b301660d38f74fcf8420424a7d3f 100644 --- a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh +++ b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh @@ -55,7 +55,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -80,7 +80,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/nlg/run_nlg_multitask.sh b/convlab/base_models/t5/nlg/run_nlg_multitask.sh index 9b0a3d47e68ddaffe53112b3eb6a51030902b44d..dec894aab37a37ba7923d60431fb22ef5ac4d6b6 100644 --- a/convlab/base_models/t5/nlg/run_nlg_multitask.sh +++ b/convlab/base_models/t5/nlg/run_nlg_multitask.sh @@ -61,7 +61,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -86,7 +86,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh index a1a1b6010c14953a18d3842235eb04e0f2d0fcc9..7ce91a5000cd4627d1549efe88e4d4826bfca0a1 100644 --- a/convlab/base_models/t5/nlg/run_nlg_pretrain.sh +++ b/convlab/base_models/t5/nlg/run_nlg_pretrain.sh @@ -60,5 +60,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/nlu/run_nlu.sh b/convlab/base_models/t5/nlu/run_nlu.sh index 8cba74aca0510464d176aa44ef0388c914796f5f..b81b04c0f360fe55c25e55f85ff8ceac3578a99d 100644 --- a/convlab/base_models/t5/nlu/run_nlu.sh +++ b/convlab/base_models/t5/nlu/run_nlu.sh @@ -52,7 +52,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -77,7 +77,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh index 8da69801df77d8f72d23204c8cf008ea7512d10c..a966310a5bea242db413dda7b9ca12bcbda0ae43 100644 --- a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh @@ -55,7 +55,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -80,7 +80,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/nlu/run_nlu_multitask.sh b/convlab/base_models/t5/nlu/run_nlu_multitask.sh index 6380acff2fc5e8a2712e530823c5d0b61af451a2..b91f21e3f02270ff2f1dfa42fe8baa8f16a20acc 100644 --- a/convlab/base_models/t5/nlu/run_nlu_multitask.sh +++ b/convlab/base_models/t5/nlu/run_nlu_multitask.sh @@ -61,7 +61,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -86,7 +86,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh index 0f500292b54ff16381783248652f893b218bc2e8..c0511254f44ff39328ec5759253df5aae0a0d360 100644 --- a/convlab/base_models/t5/nlu/run_nlu_pretrain.sh +++ b/convlab/base_models/t5/nlu/run_nlu_pretrain.sh @@ -63,5 +63,5 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing diff --git a/convlab/base_models/t5/nlu/run_retnlu.sh b/convlab/base_models/t5/nlu/run_retnlu.sh index b45a0e45643fd5a2247633305df7e0c1f11ce848..fd44e063dc84da86e4f77ead69b0e329ac0cc7d1 100644 --- a/convlab/base_models/t5/nlu/run_retnlu.sh +++ b/convlab/base_models/t5/nlu/run_retnlu.sh @@ -53,7 +53,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -78,7 +78,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh index d165859b01485b7885f88e5b1ae3a279e41f4caf..e778c80bdc844dfea732421e9234e8965e20d987 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh @@ -56,7 +56,7 @@ num_train_epochs=100 # --gradient_accumulation_steps ${gradient_accumulation_steps} \ # --learning_rate ${lr} \ # --num_train_epochs ${num_train_epochs} \ -# --adafactor \ +# --optim adafactor \ # --gradient_checkpointing # python ../run_seq2seq.py \ @@ -81,7 +81,7 @@ num_train_epochs=100 # --gradient_accumulation_steps ${gradient_accumulation_steps} \ # --learning_rate ${lr} \ # --num_train_epochs ${num_train_epochs} \ -# --adafactor \ +# --optim adafactor \ # --gradient_checkpointing # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh index 82dae873ebb419d1d311f347a813f1da6071dccb..775b4b06ed35f82610466ca96e518e95eb9b86f8 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh @@ -53,7 +53,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -78,7 +78,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh index 836152f80e9d21695aaadde0016aa7399eedbdf2..913ef7cbad5fae0b3092c29fe0cd5f44604c333d 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh @@ -56,7 +56,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python ../run_seq2seq.py \ @@ -81,7 +81,7 @@ python ../run_seq2seq.py \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} diff --git a/convlab/base_models/t5/rg/run_rg.sh b/convlab/base_models/t5/rg/run_rg.sh index ff97ce5a9f63deae0ee5660c60bf111e891ef7da..976e862e40c32d1a2f78c77564e577af5edcccfa 100644 --- a/convlab/base_models/t5/rg/run_rg.sh +++ b/convlab/base_models/t5/rg/run_rg.sh @@ -65,5 +65,5 @@ python -m torch.distributed.launch \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ - --adafactor \ + --optim adafactor \ --gradient_checkpointing