From 4530095d0e6716d751aeffbf1dd55dffad99c2e5 Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Thu, 23 Feb 2023 20:31:56 +0800 Subject: [PATCH] update run_seq2seq.py for rename inferenced file and handle error of AutoTokenizer on UnifiedQA_v2 --- .../base_models/t5/dst/merge_predict_res.py | 2 +- convlab/base_models/t5/dst/run_dst.sh | 12 +- convlab/base_models/t5/dst/run_dst_fewshot.sh | 2 +- .../base_models/t5/dst/run_dst_multitask.sh | 2 +- .../base_models/t5/nlg/merge_predict_res.py | 14 +- convlab/base_models/t5/nlg/run_nlg.sh | 74 +++++----- convlab/base_models/t5/nlg/run_nlg_fewshot.sh | 2 +- .../base_models/t5/nlg/run_nlg_multitask.sh | 126 +++++++++--------- .../base_models/t5/nlu/merge_predict_res.py | 2 +- convlab/base_models/t5/nlu/run_nlu.sh | 16 +-- convlab/base_models/t5/nlu/run_nlu_fewshot.sh | 2 +- .../base_models/t5/nlu/run_nlu_multitask.sh | 2 +- convlab/base_models/t5/nlu/run_retnlu.sh | 2 +- .../base_models/t5/nlu/run_retnlu_fewshot.sh | 2 +- .../t5/nlu/run_retnlu_in_context.sh | 2 +- .../t5/nlu/run_retnlu_in_context_fewshot.sh | 2 +- convlab/base_models/t5/run_seq2seq.py | 72 ++++++---- 17 files changed, 184 insertions(+), 152 deletions(-) diff --git a/convlab/base_models/t5/dst/merge_predict_res.py b/convlab/base_models/t5/dst/merge_predict_res.py index f25279a8..a1445ea3 100755 --- a/convlab/base_models/t5/dst/merge_predict_res.py +++ b/convlab/base_models/t5/dst/merge_predict_res.py @@ -35,7 +35,7 @@ if __name__ == '__main__': parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') - parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') + parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') args = parser.parse_args() print(args) diff --git a/convlab/base_models/t5/dst/run_dst.sh b/convlab/base_models/t5/dst/run_dst.sh index 05975400..0e2b9496 100644 --- a/convlab/base_models/t5/dst/run_dst.sh +++ b/convlab/base_models/t5/dst/run_dst.sh @@ -1,6 +1,6 @@ n_gpus=1 task_name="dst" -dataset_name=$1 +dataset_name=crosswoz speaker="user" context_window_size=100 data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" @@ -17,10 +17,10 @@ target_column="state_seq" truncation_side="left" max_source_length=1024 max_target_length=512 -model_name_or_path="t5-small" -per_device_train_batch_size=64 -per_device_eval_batch_size=64 -gradient_accumulation_steps=2 +model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small" +per_device_train_batch_size=16 +per_device_eval_batch_size=16 +gradient_accumulation_steps=4 lr=1e-3 num_train_epochs=10 @@ -80,6 +80,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/dst/run_dst_fewshot.sh b/convlab/base_models/t5/dst/run_dst_fewshot.sh index 4acd6057..2e2e9998 100644 --- a/convlab/base_models/t5/dst/run_dst_fewshot.sh +++ b/convlab/base_models/t5/dst/run_dst_fewshot.sh @@ -82,6 +82,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order} python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/dst/run_dst_multitask.sh b/convlab/base_models/t5/dst/run_dst_multitask.sh index aefb1d52..4803b521 100644 --- a/convlab/base_models/t5/dst/run_dst_multitask.sh +++ b/convlab/base_models/t5/dst/run_dst_multitask.sh @@ -89,6 +89,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlg/merge_predict_res.py b/convlab/base_models/t5/nlg/merge_predict_res.py index 7d2995d8..9e4615cf 100755 --- a/convlab/base_models/t5/nlg/merge_predict_res.py +++ b/convlab/base_models/t5/nlg/merge_predict_res.py @@ -24,19 +24,27 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) continue sample['predictions'] = {'utterance': predict_result[i]} i += 1 - merged.append(sample) + if args.sub_dataset: + if dataset_name == args.sub_dataset: + merged.append(sample) + else: + merged.append(sample) - json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + if args.sub_dataset: + json.dump(merged, open(os.path.join(save_dir, f'{args.sub_dataset}predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + else: + json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser(description="merge predict results with original data for unified NLU evaluation") parser.add_argument('--dataset', '-d', metavar='dataset_name', type=str, help='name of the unified dataset') + parser.add_argument('--sub_dataset', metavar='sub dataset_name', type=str, help='name of the unified dataset') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') - parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') + parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') args = parser.parse_args() print(args) diff --git a/convlab/base_models/t5/nlg/run_nlg.sh b/convlab/base_models/t5/nlg/run_nlg.sh index 0b5fa390..b1ca7bf4 100644 --- a/convlab/base_models/t5/nlg/run_nlg.sh +++ b/convlab/base_models/t5/nlg/run_nlg.sh @@ -1,8 +1,8 @@ n_gpus=1 task_name="nlg" -dataset_name=$1 -speaker="system" -context_window_size=$2 +dataset_name=crosswoz +speaker="all" +context_window_size=0 data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" cache_dir="../cache" @@ -17,43 +17,43 @@ target_column="response" truncation_side="left" max_source_length=512 max_target_length=512 -model_name_or_path="t5-small" -per_device_train_batch_size=128 -per_device_eval_batch_size=64 -gradient_accumulation_steps=4 +model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small" +per_device_train_batch_size=32 +per_device_eval_batch_size=16 +gradient_accumulation_steps=8 lr=1e-3 num_train_epochs=10 -python ../create_data.py -t ${task_name} -d ${dataset_name} -s ${speaker} -c ${context_window_size} +# python ../create_data.py -t ${task_name} -d ${dataset_name} -s ${speaker} -c ${context_window_size} -python ../run_seq2seq.py \ - --task_name ${task_name} \ - --train_file ${train_file} \ - --validation_file ${validation_file} \ - --source_column ${source_column} \ - --target_column ${target_column} \ - --max_source_length ${max_source_length} \ - --max_target_length ${max_target_length} \ - --truncation_side ${truncation_side} \ - --model_name_or_path ${model_name_or_path} \ - --do_train \ - --do_eval \ - --save_strategy epoch \ - --evaluation_strategy epoch \ - --save_total_limit 1 \ - --prediction_loss_only \ - --cache_dir ${cache_dir} \ - --output_dir ${output_dir} \ - --logging_dir ${logging_dir} \ - --overwrite_output_dir \ - --preprocessing_num_workers 4 \ - --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ - --gradient_accumulation_steps ${gradient_accumulation_steps} \ - --learning_rate ${lr} \ - --num_train_epochs ${num_train_epochs} \ - --optim adafactor \ - --gradient_checkpointing +# python ../run_seq2seq.py \ +# --task_name ${task_name} \ +# --train_file ${train_file} \ +# --validation_file ${validation_file} \ +# --source_column ${source_column} \ +# --target_column ${target_column} \ +# --max_source_length ${max_source_length} \ +# --max_target_length ${max_target_length} \ +# --truncation_side ${truncation_side} \ +# --model_name_or_path ${model_name_or_path} \ +# --do_train \ +# --do_eval \ +# --save_strategy epoch \ +# --evaluation_strategy epoch \ +# --save_total_limit 1 \ +# --prediction_loss_only \ +# --cache_dir ${cache_dir} \ +# --output_dir ${output_dir} \ +# --logging_dir ${logging_dir} \ +# --overwrite_output_dir \ +# --preprocessing_num_workers 4 \ +# --per_device_train_batch_size ${per_device_train_batch_size} \ +# --per_device_eval_batch_size ${per_device_eval_batch_size} \ +# --gradient_accumulation_steps ${gradient_accumulation_steps} \ +# --learning_rate ${lr} \ +# --num_train_epochs ${num_train_epochs} \ +# --optim adafactor \ +# --gradient_checkpointing python ../run_seq2seq.py \ --task_name ${task_name} \ @@ -80,6 +80,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} diff --git a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh index 61e50cda..17f110a0 100644 --- a/convlab/base_models/t5/nlg/run_nlg_fewshot.sh +++ b/convlab/base_models/t5/nlg/run_nlg_fewshot.sh @@ -83,6 +83,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order} python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} diff --git a/convlab/base_models/t5/nlg/run_nlg_multitask.sh b/convlab/base_models/t5/nlg/run_nlg_multitask.sh index dec894aa..6caf2239 100644 --- a/convlab/base_models/t5/nlg/run_nlg_multitask.sh +++ b/convlab/base_models/t5/nlg/run_nlg_multitask.sh @@ -1,6 +1,6 @@ n_gpus=1 task_name="nlg" -dataset_name="sgd+tm1+tm2+tm3+multiwoz21" +dataset_name="tm1+tm2+tm3" speaker="system" context_window_size=0 data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" @@ -25,70 +25,70 @@ lr=1e-3 num_train_epochs=10 names=$(echo ${dataset_name} | tr "+" "\n") -rm -r ${data_dir} -mkdir -p ${data_dir} -for name in ${names}; -do - echo "preprocessing ${name}" - python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size} -done +# rm -r ${data_dir} +# mkdir -p ${data_dir} +# for name in ${names}; +# do +# echo "preprocessing ${name}" +# python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size} +# done -python merge_data.py $(echo ${dataset_name} | tr "+" " ") +# python merge_data.py $(echo ${dataset_name} | tr "+" " ") -python ../run_seq2seq.py \ - --task_name ${task_name} \ - --train_file ${train_file} \ - --validation_file ${validation_file} \ - --source_column ${source_column} \ - --target_column ${target_column} \ - --max_source_length ${max_source_length} \ - --max_target_length ${max_target_length} \ - --truncation_side ${truncation_side} \ - --model_name_or_path ${model_name_or_path} \ - --do_train \ - --do_eval \ - --save_strategy epoch \ - --evaluation_strategy epoch \ - --save_total_limit 1 \ - --prediction_loss_only \ - --cache_dir ${cache_dir} \ - --output_dir ${output_dir} \ - --logging_dir ${logging_dir} \ - --overwrite_output_dir \ - --preprocessing_num_workers 4 \ - --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ - --gradient_accumulation_steps ${gradient_accumulation_steps} \ - --learning_rate ${lr} \ - --num_train_epochs ${num_train_epochs} \ - --optim adafactor \ - --gradient_checkpointing +# python ../run_seq2seq.py \ +# --task_name ${task_name} \ +# --train_file ${train_file} \ +# --validation_file ${validation_file} \ +# --source_column ${source_column} \ +# --target_column ${target_column} \ +# --max_source_length ${max_source_length} \ +# --max_target_length ${max_target_length} \ +# --truncation_side ${truncation_side} \ +# --model_name_or_path ${model_name_or_path} \ +# --do_train \ +# --do_eval \ +# --save_strategy epoch \ +# --evaluation_strategy epoch \ +# --save_total_limit 1 \ +# --prediction_loss_only \ +# --cache_dir ${cache_dir} \ +# --output_dir ${output_dir} \ +# --logging_dir ${logging_dir} \ +# --overwrite_output_dir \ +# --preprocessing_num_workers 4 \ +# --per_device_train_batch_size ${per_device_train_batch_size} \ +# --per_device_eval_batch_size ${per_device_eval_batch_size} \ +# --gradient_accumulation_steps ${gradient_accumulation_steps} \ +# --learning_rate ${lr} \ +# --num_train_epochs ${num_train_epochs} \ +# --optim adafactor \ +# --gradient_checkpointing -python ../run_seq2seq.py \ - --task_name ${task_name} \ - --test_file ${test_file} \ - --source_column ${source_column} \ - --target_column ${target_column} \ - --max_source_length ${max_source_length} \ - --max_target_length ${max_target_length} \ - --truncation_side ${truncation_side} \ - --model_name_or_path ${output_dir} \ - --do_predict \ - --predict_with_generate \ - --metric_name_or_path ${metric_name_or_path} \ - --cache_dir ${cache_dir} \ - --output_dir ${output_dir} \ - --logging_dir ${logging_dir} \ - --overwrite_output_dir \ - --preprocessing_num_workers 4 \ - --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ - --gradient_accumulation_steps ${gradient_accumulation_steps} \ - --learning_rate ${lr} \ - --num_train_epochs ${num_train_epochs} \ - --optim adafactor \ - --gradient_checkpointing +# python ../run_seq2seq.py \ +# --task_name ${task_name} \ +# --test_file ${test_file} \ +# --source_column ${source_column} \ +# --target_column ${target_column} \ +# --max_source_length ${max_source_length} \ +# --max_target_length ${max_target_length} \ +# --truncation_side ${truncation_side} \ +# --model_name_or_path ${output_dir} \ +# --do_predict \ +# --predict_with_generate \ +# --metric_name_or_path ${metric_name_or_path} \ +# --cache_dir ${cache_dir} \ +# --output_dir ${output_dir} \ +# --logging_dir ${logging_dir} \ +# --overwrite_output_dir \ +# --preprocessing_num_workers 4 \ +# --per_device_train_batch_size ${per_device_train_batch_size} \ +# --per_device_eval_batch_size ${per_device_eval_batch_size} \ +# --gradient_accumulation_steps ${gradient_accumulation_steps} \ +# --learning_rate ${lr} \ +# --num_train_epochs ${num_train_epochs} \ +# --optim adafactor \ +# --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +# python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -# python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} +python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/Nonepredictions.json --dataset_name tm1 diff --git a/convlab/base_models/t5/nlu/merge_predict_res.py b/convlab/base_models/t5/nlu/merge_predict_res.py index e2471607..bdb7e404 100755 --- a/convlab/base_models/t5/nlu/merge_predict_res.py +++ b/convlab/base_models/t5/nlu/merge_predict_res.py @@ -35,7 +35,7 @@ if __name__ == '__main__': parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') - parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') + parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') args = parser.parse_args() print(args) diff --git a/convlab/base_models/t5/nlu/run_nlu.sh b/convlab/base_models/t5/nlu/run_nlu.sh index b81b04c0..cf668b5d 100644 --- a/convlab/base_models/t5/nlu/run_nlu.sh +++ b/convlab/base_models/t5/nlu/run_nlu.sh @@ -1,8 +1,8 @@ n_gpus=1 task_name="nlu" -dataset_name=$1 -speaker="user" -context_window_size=$2 +dataset_name=crosswoz +speaker="all" +context_window_size=0 data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" cache_dir="../cache" @@ -17,10 +17,10 @@ target_column="dialogue_acts_seq" truncation_side="left" max_source_length=512 max_target_length=512 -model_name_or_path="t5-small" -per_device_train_batch_size=128 -per_device_eval_batch_size=64 -gradient_accumulation_steps=2 +model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small" +per_device_train_batch_size=16 +per_device_eval_batch_size=16 +gradient_accumulation_steps=16 lr=1e-3 num_train_epochs=10 @@ -80,6 +80,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh index a966310a..5f04579f 100644 --- a/convlab/base_models/t5/nlu/run_nlu_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_nlu_fewshot.sh @@ -83,6 +83,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order} python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlu/run_nlu_multitask.sh b/convlab/base_models/t5/nlu/run_nlu_multitask.sh index b91f21e3..9e4425b4 100644 --- a/convlab/base_models/t5/nlu/run_nlu_multitask.sh +++ b/convlab/base_models/t5/nlu/run_nlu_multitask.sh @@ -89,6 +89,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu.sh b/convlab/base_models/t5/nlu/run_retnlu.sh index fd44e063..ede928ab 100644 --- a/convlab/base_models/t5/nlu/run_retnlu.sh +++ b/convlab/base_models/t5/nlu/run_retnlu.sh @@ -81,6 +81,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh index e778c80b..a3bfbfaa 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_fewshot.sh @@ -84,6 +84,6 @@ num_train_epochs=100 # --optim adafactor \ # --gradient_checkpointing -# python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} +# python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order} python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh index 775b4b06..5c4a091d 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_in_context.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_in_context.sh @@ -81,6 +81,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh index 913ef7cb..3a6c4ce0 100644 --- a/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh +++ b/convlab/base_models/t5/nlu/run_retnlu_in_context_fewshot.sh @@ -84,6 +84,6 @@ python ../run_seq2seq.py \ --optim adafactor \ --gradient_checkpointing -python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order} python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json diff --git a/convlab/base_models/t5/run_seq2seq.py b/convlab/base_models/t5/run_seq2seq.py index 5fa921f0..bdf1b10f 100644 --- a/convlab/base_models/t5/run_seq2seq.py +++ b/convlab/base_models/t5/run_seq2seq.py @@ -37,6 +37,8 @@ from transformers import ( AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer, + T5ForConditionalGeneration, + T5Tokenizer, DataCollatorForSeq2Seq, HfArgumentParser, EarlyStoppingCallback, @@ -358,22 +360,40 @@ def main(): revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) - tokenizer = AutoTokenizer.from_pretrained( - model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, - cache_dir=model_args.cache_dir, - use_fast=model_args.use_fast_tokenizer, - truncation_side=model_args.truncation_side, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - model = AutoModelForSeq2SeqLM.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) + try: + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=model_args.use_fast_tokenizer, + truncation_side=model_args.truncation_side, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + model = AutoModelForSeq2SeqLM.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + except: + tokenizer = T5Tokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=model_args.use_fast_tokenizer, + truncation_side=model_args.truncation_side, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + model = T5ForConditionalGeneration.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) model.resize_token_embeddings(len(tokenizer)) @@ -612,16 +632,17 @@ def main(): # Predict if training_args.do_predict: - logger.info("*** Predict ***") - predict_results = trainer.predict(predict_dataset, metric_key_prefix="predict") + file_prefix = os.path.splitext(os.path.basename(data_args.test_file))[0] + logger.info(f"*** Predict {file_prefix}***") + predict_results = trainer.predict(predict_dataset, metric_key_prefix=file_prefix) metrics = predict_results.metrics max_predict_samples = ( data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset) ) - metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset)) + metrics[f"{file_prefix}_samples"] = min(max_predict_samples, len(predict_dataset)) - trainer.log_metrics("predict", metrics) - trainer.save_metrics("predict", metrics) + trainer.log_metrics(file_prefix, metrics) + trainer.save_metrics(file_prefix, metrics) if trainer.is_world_process_zero(): if training_args.predict_with_generate: @@ -629,10 +650,13 @@ def main(): predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True ) predictions = [pred.strip() for pred in predictions] - output_prediction_file = os.path.join(training_args.output_dir, "generated_predictions.json") + output_prediction_file = os.path.join(training_args.output_dir, f"{file_prefix}_generated_predictions.json") with open(output_prediction_file, "w", encoding='utf-8') as writer: - for sample, pred in zip(raw_datasets["test"], predictions): - sample["predictions"] = pred + for idx, sample in enumerate(raw_datasets["test"]): + if training_args.num_return_sequences > 1: + sample["predictions"] = predictions[idx*training_args.num_return_sequences:(idx+1)*training_args.num_return_sequences] + else: + sample["predictions"] = predictions[idx] writer.write(json.dumps(sample, ensure_ascii=False)+'\n') kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name} -- GitLab