Skip to content
Snippets Groups Projects
Commit 4530095d authored by zqwerty's avatar zqwerty
Browse files

update run_seq2seq.py for rename inferenced file and handle error of AutoTokenizer on UnifiedQA_v2

parent 7f78fa3f
Branches
No related tags found
No related merge requests found
Showing
with 184 additions and 152 deletions
...@@ -35,7 +35,7 @@ if __name__ == '__main__': ...@@ -35,7 +35,7 @@ if __name__ == '__main__':
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances')
parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
......
n_gpus=1 n_gpus=1
task_name="dst" task_name="dst"
dataset_name=$1 dataset_name=crosswoz
speaker="user" speaker="user"
context_window_size=100 context_window_size=100
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
...@@ -17,10 +17,10 @@ target_column="state_seq" ...@@ -17,10 +17,10 @@ target_column="state_seq"
truncation_side="left" truncation_side="left"
max_source_length=1024 max_source_length=1024
max_target_length=512 max_target_length=512
model_name_or_path="t5-small" model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small"
per_device_train_batch_size=64 per_device_train_batch_size=16
per_device_eval_batch_size=64 per_device_eval_batch_size=16
gradient_accumulation_steps=2 gradient_accumulation_steps=4
lr=1e-3 lr=1e-3
num_train_epochs=10 num_train_epochs=10
...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \ ...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -82,6 +82,6 @@ python ../run_seq2seq.py \ ...@@ -82,6 +82,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -89,6 +89,6 @@ python ../run_seq2seq.py \ ...@@ -89,6 +89,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -24,8 +24,15 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) ...@@ -24,8 +24,15 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result)
continue continue
sample['predictions'] = {'utterance': predict_result[i]} sample['predictions'] = {'utterance': predict_result[i]}
i += 1 i += 1
if args.sub_dataset:
if dataset_name == args.sub_dataset:
merged.append(sample)
else:
merged.append(sample) merged.append(sample)
if args.sub_dataset:
json.dump(merged, open(os.path.join(save_dir, f'{args.sub_dataset}predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
else:
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
...@@ -33,10 +40,11 @@ if __name__ == '__main__': ...@@ -33,10 +40,11 @@ if __name__ == '__main__':
from argparse import ArgumentParser from argparse import ArgumentParser
parser = ArgumentParser(description="merge predict results with original data for unified NLU evaluation") parser = ArgumentParser(description="merge predict results with original data for unified NLU evaluation")
parser.add_argument('--dataset', '-d', metavar='dataset_name', type=str, help='name of the unified dataset') parser.add_argument('--dataset', '-d', metavar='dataset_name', type=str, help='name of the unified dataset')
parser.add_argument('--sub_dataset', metavar='sub dataset_name', type=str, help='name of the unified dataset')
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances')
parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
......
n_gpus=1 n_gpus=1
task_name="nlg" task_name="nlg"
dataset_name=$1 dataset_name=crosswoz
speaker="system" speaker="all"
context_window_size=$2 context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache" cache_dir="../cache"
...@@ -17,43 +17,43 @@ target_column="response" ...@@ -17,43 +17,43 @@ target_column="response"
truncation_side="left" truncation_side="left"
max_source_length=512 max_source_length=512
max_target_length=512 max_target_length=512
model_name_or_path="t5-small" model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small"
per_device_train_batch_size=128 per_device_train_batch_size=32
per_device_eval_batch_size=64 per_device_eval_batch_size=16
gradient_accumulation_steps=4 gradient_accumulation_steps=8
lr=1e-3 lr=1e-3
num_train_epochs=10 num_train_epochs=10
python ../create_data.py -t ${task_name} -d ${dataset_name} -s ${speaker} -c ${context_window_size} # python ../create_data.py -t ${task_name} -d ${dataset_name} -s ${speaker} -c ${context_window_size}
python ../run_seq2seq.py \ # python ../run_seq2seq.py \
--task_name ${task_name} \ # --task_name ${task_name} \
--train_file ${train_file} \ # --train_file ${train_file} \
--validation_file ${validation_file} \ # --validation_file ${validation_file} \
--source_column ${source_column} \ # --source_column ${source_column} \
--target_column ${target_column} \ # --target_column ${target_column} \
--max_source_length ${max_source_length} \ # --max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \ # --max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \ # --truncation_side ${truncation_side} \
--model_name_or_path ${model_name_or_path} \ # --model_name_or_path ${model_name_or_path} \
--do_train \ # --do_train \
--do_eval \ # --do_eval \
--save_strategy epoch \ # --save_strategy epoch \
--evaluation_strategy epoch \ # --evaluation_strategy epoch \
--save_total_limit 1 \ # --save_total_limit 1 \
--prediction_loss_only \ # --prediction_loss_only \
--cache_dir ${cache_dir} \ # --cache_dir ${cache_dir} \
--output_dir ${output_dir} \ # --output_dir ${output_dir} \
--logging_dir ${logging_dir} \ # --logging_dir ${logging_dir} \
--overwrite_output_dir \ # --overwrite_output_dir \
--preprocessing_num_workers 4 \ # --preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \ # --per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \ # --per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \ # --gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \ # --learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \ # --num_train_epochs ${num_train_epochs} \
--optim adafactor \ # --optim adafactor \
--gradient_checkpointing # --gradient_checkpointing
python ../run_seq2seq.py \ python ../run_seq2seq.py \
--task_name ${task_name} \ --task_name ${task_name} \
...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \ ...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name}
...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \ ...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name}
n_gpus=1 n_gpus=1
task_name="nlg" task_name="nlg"
dataset_name="sgd+tm1+tm2+tm3+multiwoz21" dataset_name="tm1+tm2+tm3"
speaker="system" speaker="system"
context_window_size=0 context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
...@@ -25,70 +25,70 @@ lr=1e-3 ...@@ -25,70 +25,70 @@ lr=1e-3
num_train_epochs=10 num_train_epochs=10
names=$(echo ${dataset_name} | tr "+" "\n") names=$(echo ${dataset_name} | tr "+" "\n")
rm -r ${data_dir} # rm -r ${data_dir}
mkdir -p ${data_dir} # mkdir -p ${data_dir}
for name in ${names}; # for name in ${names};
do # do
echo "preprocessing ${name}" # echo "preprocessing ${name}"
python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size} # python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size}
done # done
python merge_data.py $(echo ${dataset_name} | tr "+" " ") # python merge_data.py $(echo ${dataset_name} | tr "+" " ")
python ../run_seq2seq.py \ # python ../run_seq2seq.py \
--task_name ${task_name} \ # --task_name ${task_name} \
--train_file ${train_file} \ # --train_file ${train_file} \
--validation_file ${validation_file} \ # --validation_file ${validation_file} \
--source_column ${source_column} \ # --source_column ${source_column} \
--target_column ${target_column} \ # --target_column ${target_column} \
--max_source_length ${max_source_length} \ # --max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \ # --max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \ # --truncation_side ${truncation_side} \
--model_name_or_path ${model_name_or_path} \ # --model_name_or_path ${model_name_or_path} \
--do_train \ # --do_train \
--do_eval \ # --do_eval \
--save_strategy epoch \ # --save_strategy epoch \
--evaluation_strategy epoch \ # --evaluation_strategy epoch \
--save_total_limit 1 \ # --save_total_limit 1 \
--prediction_loss_only \ # --prediction_loss_only \
--cache_dir ${cache_dir} \ # --cache_dir ${cache_dir} \
--output_dir ${output_dir} \ # --output_dir ${output_dir} \
--logging_dir ${logging_dir} \ # --logging_dir ${logging_dir} \
--overwrite_output_dir \ # --overwrite_output_dir \
--preprocessing_num_workers 4 \ # --preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \ # --per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \ # --per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \ # --gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \ # --learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \ # --num_train_epochs ${num_train_epochs} \
--optim adafactor \ # --optim adafactor \
--gradient_checkpointing # --gradient_checkpointing
python ../run_seq2seq.py \ # python ../run_seq2seq.py \
--task_name ${task_name} \ # --task_name ${task_name} \
--test_file ${test_file} \ # --test_file ${test_file} \
--source_column ${source_column} \ # --source_column ${source_column} \
--target_column ${target_column} \ # --target_column ${target_column} \
--max_source_length ${max_source_length} \ # --max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \ # --max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \ # --truncation_side ${truncation_side} \
--model_name_or_path ${output_dir} \ # --model_name_or_path ${output_dir} \
--do_predict \ # --do_predict \
--predict_with_generate \ # --predict_with_generate \
--metric_name_or_path ${metric_name_or_path} \ # --metric_name_or_path ${metric_name_or_path} \
--cache_dir ${cache_dir} \ # --cache_dir ${cache_dir} \
--output_dir ${output_dir} \ # --output_dir ${output_dir} \
--logging_dir ${logging_dir} \ # --logging_dir ${logging_dir} \
--overwrite_output_dir \ # --overwrite_output_dir \
--preprocessing_num_workers 4 \ # --preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \ # --per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \ # --per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \ # --gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \ # --learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \ # --num_train_epochs ${num_train_epochs} \
--optim adafactor \ # --optim adafactor \
--gradient_checkpointing # --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
# python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/Nonepredictions.json --dataset_name tm1
...@@ -35,7 +35,7 @@ if __name__ == '__main__': ...@@ -35,7 +35,7 @@ if __name__ == '__main__':
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances')
parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
......
n_gpus=1 n_gpus=1
task_name="nlu" task_name="nlu"
dataset_name=$1 dataset_name=crosswoz
speaker="user" speaker="all"
context_window_size=$2 context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache" cache_dir="../cache"
...@@ -17,10 +17,10 @@ target_column="dialogue_acts_seq" ...@@ -17,10 +17,10 @@ target_column="dialogue_acts_seq"
truncation_side="left" truncation_side="left"
max_source_length=512 max_source_length=512
max_target_length=512 max_target_length=512
model_name_or_path="t5-small" model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small"
per_device_train_batch_size=128 per_device_train_batch_size=16
per_device_eval_batch_size=64 per_device_eval_batch_size=16
gradient_accumulation_steps=2 gradient_accumulation_steps=16
lr=1e-3 lr=1e-3
num_train_epochs=10 num_train_epochs=10
...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \ ...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \ ...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -89,6 +89,6 @@ python ../run_seq2seq.py \ ...@@ -89,6 +89,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \ ...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -84,6 +84,6 @@ num_train_epochs=100 ...@@ -84,6 +84,6 @@ num_train_epochs=100
# --optim adafactor \ # --optim adafactor \
# --gradient_checkpointing # --gradient_checkpointing
# python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \ ...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -84,6 +84,6 @@ python ../run_seq2seq.py \ ...@@ -84,6 +84,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -37,6 +37,8 @@ from transformers import ( ...@@ -37,6 +37,8 @@ from transformers import (
AutoConfig, AutoConfig,
AutoModelForSeq2SeqLM, AutoModelForSeq2SeqLM,
AutoTokenizer, AutoTokenizer,
T5ForConditionalGeneration,
T5Tokenizer,
DataCollatorForSeq2Seq, DataCollatorForSeq2Seq,
HfArgumentParser, HfArgumentParser,
EarlyStoppingCallback, EarlyStoppingCallback,
...@@ -358,6 +360,7 @@ def main(): ...@@ -358,6 +360,7 @@ def main():
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, use_auth_token=True if model_args.use_auth_token else None,
) )
try:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
...@@ -374,6 +377,23 @@ def main(): ...@@ -374,6 +377,23 @@ def main():
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, use_auth_token=True if model_args.use_auth_token else None,
) )
except:
tokenizer = T5Tokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
truncation_side=model_args.truncation_side,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
model = T5ForConditionalGeneration.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
model.resize_token_embeddings(len(tokenizer)) model.resize_token_embeddings(len(tokenizer))
...@@ -612,16 +632,17 @@ def main(): ...@@ -612,16 +632,17 @@ def main():
# Predict # Predict
if training_args.do_predict: if training_args.do_predict:
logger.info("*** Predict ***") file_prefix = os.path.splitext(os.path.basename(data_args.test_file))[0]
predict_results = trainer.predict(predict_dataset, metric_key_prefix="predict") logger.info(f"*** Predict {file_prefix}***")
predict_results = trainer.predict(predict_dataset, metric_key_prefix=file_prefix)
metrics = predict_results.metrics metrics = predict_results.metrics
max_predict_samples = ( max_predict_samples = (
data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset) data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
) )
metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset)) metrics[f"{file_prefix}_samples"] = min(max_predict_samples, len(predict_dataset))
trainer.log_metrics("predict", metrics) trainer.log_metrics(file_prefix, metrics)
trainer.save_metrics("predict", metrics) trainer.save_metrics(file_prefix, metrics)
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
if training_args.predict_with_generate: if training_args.predict_with_generate:
...@@ -629,10 +650,13 @@ def main(): ...@@ -629,10 +650,13 @@ def main():
predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
) )
predictions = [pred.strip() for pred in predictions] predictions = [pred.strip() for pred in predictions]
output_prediction_file = os.path.join(training_args.output_dir, "generated_predictions.json") output_prediction_file = os.path.join(training_args.output_dir, f"{file_prefix}_generated_predictions.json")
with open(output_prediction_file, "w", encoding='utf-8') as writer: with open(output_prediction_file, "w", encoding='utf-8') as writer:
for sample, pred in zip(raw_datasets["test"], predictions): for idx, sample in enumerate(raw_datasets["test"]):
sample["predictions"] = pred if training_args.num_return_sequences > 1:
sample["predictions"] = predictions[idx*training_args.num_return_sequences:(idx+1)*training_args.num_return_sequences]
else:
sample["predictions"] = predictions[idx]
writer.write(json.dumps(sample, ensure_ascii=False)+'\n') writer.write(json.dumps(sample, ensure_ascii=False)+'\n')
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name} kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment