From beda1b76d181e77f6396510f346c3523d27d0091 Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Sat, 7 May 2022 15:39:15 +0800 Subject: [PATCH] add rm datadir before multiple datasets training to avoid repeating samples --- convlab2/base_models/bert/train_bio.sh | 1 + .../base_models/gpt/keyword_extraction/gen_pretraining_data.sh | 1 + convlab2/base_models/t5/dst/run_dst_pretrain.sh | 1 + convlab2/base_models/t5/nlg/run_nlg_pretrain.sh | 1 + convlab2/base_models/t5/nlu/run_nlu_pretrain.sh | 1 + convlab2/base_models/t5/rg/run_rg.sh | 1 + 6 files changed, 6 insertions(+) diff --git a/convlab2/base_models/bert/train_bio.sh b/convlab2/base_models/bert/train_bio.sh index 59973634..4d6a4d8d 100644 --- a/convlab2/base_models/bert/train_bio.sh +++ b/convlab2/base_models/bert/train_bio.sh @@ -20,6 +20,7 @@ num_train_epochs=1 metric_for_best_model="f1" names=$(echo ${dataset_name} | tr "+" "\n") +rm -r ${data_dir} mkdir -p ${data_dir} for name in ${names}; do diff --git a/convlab2/base_models/gpt/keyword_extraction/gen_pretraining_data.sh b/convlab2/base_models/gpt/keyword_extraction/gen_pretraining_data.sh index 8a4290c3..c48f49b5 100644 --- a/convlab2/base_models/gpt/keyword_extraction/gen_pretraining_data.sh +++ b/convlab2/base_models/gpt/keyword_extraction/gen_pretraining_data.sh @@ -2,6 +2,7 @@ dataset_name="metalwoz+sgd+tm1+tm2+tm3" names=$(echo ${dataset_name} | tr "+" "\n") model_type="gpt" data_dir=data/key2gen_shuffle_noisy/${model_type}/${name}/${dataset_name} +rm -r ${data_dir} mkdir -p ${data_dir} train_file="${data_dir}/train.json" validation_file="${data_dir}/validation.json" diff --git a/convlab2/base_models/t5/dst/run_dst_pretrain.sh b/convlab2/base_models/t5/dst/run_dst_pretrain.sh index f1c5c3d4..11995943 100644 --- a/convlab2/base_models/t5/dst/run_dst_pretrain.sh +++ b/convlab2/base_models/t5/dst/run_dst_pretrain.sh @@ -25,6 +25,7 @@ lr=1e-3 num_train_epochs=1 names=$(echo ${dataset_name} | tr "+" "\n") +rm -r ${data_dir} mkdir -p ${data_dir} for name in ${names}; do diff --git a/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh b/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh index 4ff752b6..4d2b440e 100644 --- a/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh +++ b/convlab2/base_models/t5/nlg/run_nlg_pretrain.sh @@ -25,6 +25,7 @@ lr=1e-3 num_train_epochs=1 names=$(echo ${dataset_name} | tr "+" "\n") +rm -r ${data_dir} mkdir -p ${data_dir} for name in ${names}; do diff --git a/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh b/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh index 3a059c2d..ccc7c08f 100644 --- a/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh +++ b/convlab2/base_models/t5/nlu/run_nlu_pretrain.sh @@ -25,6 +25,7 @@ lr=1e-3 num_train_epochs=1 names=$(echo ${dataset_name} | tr "+" "\n") +rm -r ${data_dir} mkdir -p ${data_dir} for name in ${names}; do diff --git a/convlab2/base_models/t5/rg/run_rg.sh b/convlab2/base_models/t5/rg/run_rg.sh index ac065b81..308c639c 100644 --- a/convlab2/base_models/t5/rg/run_rg.sh +++ b/convlab2/base_models/t5/rg/run_rg.sh @@ -23,6 +23,7 @@ lr=1e-3 num_train_epochs=1 names=$(echo ${dataset_name} | tr "+" "\n") +rm -r ${data_dir} mkdir -p ${data_dir} for name in ${names}; do -- GitLab