diff --git a/convlab/base_models/t5/nlg/merge_predict_res.py b/convlab/base_models/t5/nlg/merge_predict_res.py index d21fd489225aab8d75dde9f6f266b778e956512c..7d2995d84737378958a54765f3efc5f996f112e3 100755 --- a/convlab/base_models/t5/nlg/merge_predict_res.py +++ b/convlab/base_models/t5/nlg/merge_predict_res.py @@ -24,6 +24,7 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) continue sample['predictions'] = {'utterance': predict_result[i]} i += 1 + merged.append(sample) json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) diff --git a/convlab/base_models/t5/nlu/merge_data.py b/convlab/base_models/t5/nlu/merge_data.py new file mode 100644 index 0000000000000000000000000000000000000000..bb692de53b073e18e4cebcfb4762c0ef473c7b69 --- /dev/null +++ b/convlab/base_models/t5/nlu/merge_data.py @@ -0,0 +1,21 @@ +import json +import os +import sys + +if __name__ == '__main__': + merged_data = {'train': [], 'validation': [], 'test': []} + print(sys.argv) + for dataset_name in sys.argv[1:]: + data_dir = os.path.join('data/nlu', dataset_name, 'user/context_0') + for data_split in merged_data: + with open(os.path.join(data_dir, f'{data_split}.json'), 'r') as f: + for line in f: + item = json.loads(line) + item['context'] = f"{dataset_name}: {item['context']}" + merged_data[data_split].append(item) + for data_split in merged_data: + data_dir = os.path.join('data/nlu', '+'.join(sys.argv[1:]), 'user/context_0') + os.makedirs(data_dir, exist_ok=True) + with open(os.path.join(data_dir, f'{data_split}.json'), 'w') as f: + for item in merged_data[data_split]: + f.write(json.dumps(item)+'\n') diff --git a/convlab/base_models/t5/nlu/merge_predict_res.py b/convlab/base_models/t5/nlu/merge_predict_res.py index 58cf29d194272accd7578d58ba8bac415c025541..e247160769f7e5b0c9445b38e4dc2a5caa567fd0 100755 --- a/convlab/base_models/t5/nlu/merge_predict_res.py +++ b/convlab/base_models/t5/nlu/merge_predict_res.py @@ -4,10 +4,8 @@ from convlab.util import load_dataset, load_nlu_data from convlab.base_models.t5.nlu.serialization import deserialize_dialogue_acts -def merge(dataset_name, speaker, save_dir, context_window_size, predict_result): +def merge(dataset_names, speaker, save_dir, context_window_size, predict_result): assert os.path.exists(predict_result) - dataset = load_dataset(dataset_name, args.dial_ids_order) - data = load_nlu_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test'] if save_dir is None: save_dir = os.path.dirname(predict_result) @@ -15,10 +13,19 @@ def merge(dataset_name, speaker, save_dir, context_window_size, predict_result): os.makedirs(save_dir, exist_ok=True) predict_result = [deserialize_dialogue_acts(json.loads(x)['predictions'].strip()) for x in open(predict_result)] - for sample, prediction in zip(data, predict_result): - sample['predictions'] = {'dialogue_acts': prediction} + merged = [] + i = 0 + for dataset_name in dataset_names.split('+'): + print(dataset_name) + dataset = load_dataset(dataset_name, args.dial_ids_order) + data = load_nlu_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test'] + + for sample in data: + sample['predictions'] = {'dialogue_acts': predict_result[i]} + i += 1 + merged.append(sample) - json.dump(data, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) if __name__ == '__main__': diff --git a/convlab/base_models/t5/nlu/run_nlu_multitask.sh b/convlab/base_models/t5/nlu/run_nlu_multitask.sh new file mode 100644 index 0000000000000000000000000000000000000000..6380acff2fc5e8a2712e530823c5d0b61af451a2 --- /dev/null +++ b/convlab/base_models/t5/nlu/run_nlu_multitask.sh @@ -0,0 +1,94 @@ +n_gpus=1 +task_name="nlu" +dataset_name="tm1+tm2+tm3" +speaker="user" +context_window_size=0 +data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" +output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" +cache_dir="../cache" +logging_dir="${output_dir}/runs" +train_file="${data_dir}/train.json" +validation_file="${data_dir}/validation.json" +test_file="${data_dir}/test.json" +metric_name_or_path="nlu_metric.py" +metric_for_best_model="overall_f1" +source_column="context" +target_column="dialogue_acts_seq" +truncation_side="left" +max_source_length=512 +max_target_length=512 +model_name_or_path="t5-small" +per_device_train_batch_size=128 +per_device_eval_batch_size=64 +gradient_accumulation_steps=2 +lr=1e-3 +num_train_epochs=10 + +names=$(echo ${dataset_name} | tr "+" "\n") +rm -r ${data_dir} +mkdir -p ${data_dir} +for name in ${names}; +do + echo "preprocessing ${name}" + python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size} +done + +python merge_data.py $(echo ${dataset_name} | tr "+" " ") + +python ../run_seq2seq.py \ + --task_name ${task_name} \ + --train_file ${train_file} \ + --validation_file ${validation_file} \ + --source_column ${source_column} \ + --target_column ${target_column} \ + --max_source_length ${max_source_length} \ + --max_target_length ${max_target_length} \ + --truncation_side ${truncation_side} \ + --model_name_or_path ${model_name_or_path} \ + --do_train \ + --do_eval \ + --save_strategy epoch \ + --evaluation_strategy epoch \ + --save_total_limit 1 \ + --prediction_loss_only \ + --cache_dir ${cache_dir} \ + --output_dir ${output_dir} \ + --logging_dir ${logging_dir} \ + --overwrite_output_dir \ + --preprocessing_num_workers 4 \ + --per_device_train_batch_size ${per_device_train_batch_size} \ + --per_device_eval_batch_size ${per_device_eval_batch_size} \ + --gradient_accumulation_steps ${gradient_accumulation_steps} \ + --learning_rate ${lr} \ + --num_train_epochs ${num_train_epochs} \ + --adafactor \ + --gradient_checkpointing + +python ../run_seq2seq.py \ + --task_name ${task_name} \ + --test_file ${test_file} \ + --source_column ${source_column} \ + --target_column ${target_column} \ + --max_source_length ${max_source_length} \ + --max_target_length ${max_target_length} \ + --truncation_side ${truncation_side} \ + --model_name_or_path ${output_dir} \ + --do_predict \ + --predict_with_generate \ + --metric_name_or_path ${metric_name_or_path} \ + --cache_dir ${cache_dir} \ + --output_dir ${output_dir} \ + --logging_dir ${logging_dir} \ + --overwrite_output_dir \ + --preprocessing_num_workers 4 \ + --per_device_train_batch_size ${per_device_train_batch_size} \ + --per_device_eval_batch_size ${per_device_eval_batch_size} \ + --gradient_accumulation_steps ${gradient_accumulation_steps} \ + --learning_rate ${lr} \ + --num_train_epochs ${num_train_epochs} \ + --adafactor \ + --gradient_checkpointing + +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json + +python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json