diff --git a/convlab2/base_models/t5/create_data.py b/convlab2/base_models/t5/create_data.py index 19be0b81520cf4077ac34166e7b2e7a0d12f80a3..d4b99d9a7ebfc294433f06e795aff2620f0c1fb5 100644 --- a/convlab2/base_models/t5/create_data.py +++ b/convlab2/base_models/t5/create_data.py @@ -4,6 +4,7 @@ from tqdm import tqdm import re from convlab2.util import load_dataset, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data from convlab2.base_models.t5.nlu.serialization import serialize_dialogue_acts, deserialize_dialogue_acts, equal_da_seq +from convlab2.base_models.t5.dst.serialization import serialize_dialogue_state, deserialize_dialogue_state, equal_state_seq def create_rg_data(dataset, data_dir, args): data_by_split = load_rg_data(dataset, speaker=args.speaker) @@ -11,9 +12,6 @@ def create_rg_data(dataset, data_dir, args): os.makedirs(data_dir, exist_ok=True) data_splits = data_by_split.keys() - file_name = os.path.join(data_dir, f"source_prefix.txt") - with open(file_name, "w") as f: - f.write("generate a system response according to the context: ") for data_split in data_splits: data = [] for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): @@ -31,9 +29,6 @@ def create_nlu_data(dataset, data_dir, args): os.makedirs(data_dir, exist_ok=True) data_splits = data_by_split.keys() - file_name = os.path.join(data_dir, f"source_prefix.txt") - with open(file_name, "w") as f: - f.write("parse the dialogue action of the last utterance: ") for data_split in data_splits: data = [] for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): @@ -50,14 +45,33 @@ def create_nlu_data(dataset, data_dir, args): with open(file_name, "w", encoding='utf-8') as f: f.writelines(data) +def create_dst_data(dataset, data_dir, args): + data_by_split = load_dst_data(dataset, speaker=args.speaker, use_context=args.context_window_size>0, context_window_size=args.context_window_size) + data_dir = os.path.join(data_dir, args.speaker, f'context_{args.context_window_size}') + os.makedirs(data_dir, exist_ok=True) + + data_splits = data_by_split.keys() + for data_split in data_splits: + data = [] + for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): + response = f"{sample['speaker']}: {sample['utterance']}" + if args.context_window_size>0: + context = ' '.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['context']]+[response]) + else: + context = response + state_seq = serialize_dialogue_state(sample['state']) + assert equal_state_seq(sample['state'], state_seq), print(sample['state'], state_seq, deserialize_dialogue_state(state_seq)) + data.append(json.dumps({'context': context, 'state_seq': state_seq}, ensure_ascii=False)+'\n') + + file_name = os.path.join(data_dir, f"{data_split}.json") + with open(file_name, "w", encoding='utf-8') as f: + f.writelines(data) + def create_goal2dialogue_data(dataset, data_dir, args): data_by_split = dataset os.makedirs(data_dir, exist_ok=True) data_splits = data_by_split.keys() - file_name = os.path.join(data_dir, f"source_prefix.txt") - with open(file_name, "w") as f: - f.write("generate a dialogue between user and system according to the user goal: ") for data_split in data_splits: data = [] for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): @@ -73,7 +87,7 @@ def create_goal2dialogue_data(dataset, data_dir, args): if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser(description="create data for seq2seq training") - parser.add_argument('--tasks', '-t', metavar='task_name', nargs='*', choices=['rg', 'nlu', 'goal2dialogue'], help='names of tasks') + parser.add_argument('--tasks', '-t', metavar='task_name', nargs='*', choices=['rg', 'nlu', 'dst', 'goal2dialogue'], help='names of tasks') parser.add_argument('--datasets', '-d', metavar='dataset_name', nargs='*', help='names of unified datasets') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s)') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') diff --git a/convlab2/base_models/t5/dst/dst_metric.py b/convlab2/base_models/t5/dst/dst_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..8a4f73b02a08b4b924a1d4f1bc545a023664e2bc --- /dev/null +++ b/convlab2/base_models/t5/dst/dst_metric.py @@ -0,0 +1,107 @@ +# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DST Metric""" + +import datasets +from convlab2.base_models.t5.dst.serialization import deserialize_dialogue_state + + +# TODO: Add BibTeX citation +_CITATION = """\ +""" + +_DESCRIPTION = """\ +Metric to evaluate text-to-text models on the dialog state tracking task. +""" + +_KWARGS_DESCRIPTION = """ +Calculates sequence exact match, joint goal accuracy and slot f1 +Args: + predictions: list of predictions to score. Each predictions + should be a string. + references: list of reference for each prediction. Each + reference should be a string. +Returns: + seq_em: sequence exact match + accuracy: dialog state accuracy + slot_f1: slot f1 +Examples: + + >>> dst_metric = datasets.load_metric("dst_metric.py") + >>> predictions = ["[restaurant][price range][moderate]", "[restaurant][price range][moderate];[restaurant][food][catalan];[restaurant][area][centre]"] + >>> references = ["[restaurant][price range][moderate]", "[restaurant][price range][moderate];[restaurant][food][catalan];[attraction][area][centre]"] + >>> results = dst_metric.compute(predictions=predictions, references=references) + >>> print(results) + {'seq_em': 0.5, 'accuracy': 0.5, + 'slot_f1': 0.75, 'slot_precision': 0.75, 'slot_recall': 0.75} +""" + + +@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) +class DSTMetrics(datasets.Metric): + """Metric to evaluate text-to-text models on the dialog state tracking task.""" + + def _info(self): + return datasets.MetricInfo( + description=_DESCRIPTION, + citation=_CITATION, + inputs_description=_KWARGS_DESCRIPTION, + # This defines the format of each prediction and reference + features=datasets.Features({ + 'predictions': datasets.Value('string'), + 'references': datasets.Value('string'), + }) + ) + + def _compute(self, predictions, references): + """Returns the scores: sequence exact match, joint goal accuracy and slot f1""" + seq_em = [] + acc = [] + f1_metrics = {'TP':0, 'FP':0, 'FN':0} + + for prediction, reference in zip(predictions, references): + seq_em.append(prediction.strip()==reference.strip()) + pred_state = deserialize_dialogue_state(prediction) + gold_state = deserialize_dialogue_state(reference) + + predicts = sorted(list({(domain, slot, value) for domain in pred_state for slot, value in pred_state[domain].items() if len(value)>0})) + labels = sorted(list({(domain, slot, value) for domain in gold_state for slot, value in gold_state[domain].items() if len(value)>0})) + + flag = True + for ele in predicts: + if ele in labels: + f1_metrics['TP'] += 1 + else: + f1_metrics['FP'] += 1 + for ele in labels: + if ele not in predicts: + f1_metrics['FN'] += 1 + flag &= (predicts==labels) + acc.append(flag) + + TP = f1_metrics.pop('TP') + FP = f1_metrics.pop('FP') + FN = f1_metrics.pop('FN') + precision = 1.0 * TP / (TP + FP) if TP + FP else 0. + recall = 1.0 * TP / (TP + FN) if TP + FN else 0. + f1 = 2.0 * precision * recall / (precision + recall) if precision + recall else 0. + f1_metrics[f'slot_f1'] = f1 + f1_metrics[f'slot_precision'] = precision + f1_metrics[f'slot_recall'] = recall + + return { + "seq_em": sum(seq_em)/len(seq_em), + "accuracy": sum(acc)/len(acc), + **f1_metrics + } diff --git a/convlab2/base_models/t5/dst/merge_predict_res.py b/convlab2/base_models/t5/dst/merge_predict_res.py new file mode 100755 index 0000000000000000000000000000000000000000..ebdada8a6cd5d428e02f72f7d40448076454a9e9 --- /dev/null +++ b/convlab2/base_models/t5/dst/merge_predict_res.py @@ -0,0 +1,34 @@ +import json +import os +from convlab2.util import load_dataset, load_dst_data +from convlab2.base_models.t5.dst.serialization import deserialize_state + + +def merge(dataset_name, speaker, save_dir, context_window_size, predict_result): + assert os.path.exists(predict_result) + dataset = load_dataset(dataset_name) + data = load_dst_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test'] + + if save_dir is None: + save_dir = os.path.dirname(predict_result) + else: + os.makedirs(save_dir, exist_ok=True) + predict_result = [deserialize_state(json.loads(x)['predictions'].strip()) for x in open(predict_result)] + + for sample, prediction in zip(data, predict_result): + sample['predictions'] = {'state': prediction} + + json.dump(data, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + + +if __name__ == '__main__': + from argparse import ArgumentParser + parser = ArgumentParser(description="merge predict results with original data for unified NLU evaluation") + parser.add_argument('--dataset', '-d', metavar='dataset_name', type=str, help='name of the unified dataset') + parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') + parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') + parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') + parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') + args = parser.parse_args() + print(args) + merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result) diff --git a/convlab2/base_models/t5/dst/run_multiwoz21.sh b/convlab2/base_models/t5/dst/run_multiwoz21.sh new file mode 100644 index 0000000000000000000000000000000000000000..e031be48b32d252a0b835d51ac4674ee8b927723 --- /dev/null +++ b/convlab2/base_models/t5/dst/run_multiwoz21.sh @@ -0,0 +1,83 @@ +n_gpus=4 +task_name="dst" +dataset_name="multiwoz21" +speaker="user" +context_window_size=100 +data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" +output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" +cache_dir="../cache" +logging_dir="${output_dir}/runs" +train_file="${data_dir}/train.json" +validation_file="${data_dir}/validation.json" +test_file="${data_dir}/test.json" +metric_name_or_path="dst_metric.py" +metric_for_best_model="accuracy" +source_column="context" +target_column="state_seq" +truncation_side="left" +max_source_length=512 +max_target_length=512 +model_name_or_path="t5-small" +per_device_train_batch_size=64 +per_device_eval_batch_size=64 +gradient_accumulation_steps=2 +lr=1e-3 +num_train_epochs=10 + +python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --speaker ${speaker} --context_window_size ${context_window_size} + +python -m torch.distributed.launch --master_port 29501 \ + --nproc_per_node ${n_gpus} ../run_seq2seq.py \ + --task_name ${task_name} \ + --train_file ${train_file} \ + --validation_file ${validation_file} \ + --test_file ${test_file} \ + --source_column ${source_column} \ + --target_column ${target_column} \ + --max_source_length ${max_source_length} \ + --max_target_length ${max_target_length} \ + --truncation_side ${truncation_side} \ + --model_name_or_path ${model_name_or_path} \ + --do_train \ + --do_eval \ + --do_predict \ + --save_strategy epoch \ + --evaluation_strategy epoch \ + --load_best_model_at_end \ + --predict_with_generate \ + --metric_name_or_path ${metric_name_or_path} \ + --cache_dir ${cache_dir} \ + --output_dir ${output_dir} \ + --logging_dir ${logging_dir} \ + --overwrite_output_dir \ + --preprocessing_num_workers 4 \ + --per_device_train_batch_size ${per_device_train_batch_size} \ + --per_device_eval_batch_size ${per_device_eval_batch_size} \ + --gradient_accumulation_steps ${gradient_accumulation_steps} \ + --learning_rate ${lr} \ + --num_train_epochs ${num_train_epochs} \ + --debug underflow_overflow \ + --adafactor \ + --gradient_checkpointing + +# python -m torch.distributed.launch \ +# --nproc_per_node ${n_gpus} ../run_seq2seq.py \ +# --task_name ${task_name} \ +# --test_file ${test_file} \ +# --source_column ${source_column} \ +# --target_column ${target_column} \ +# --max_source_length ${max_source_length} \ +# --max_target_length ${max_target_length} \ +# --truncation_side ${truncation_side} \ +# --model_name_or_path ${output_dir} \ +# --do_predict \ +# --predict_with_generate \ +# --metric_name_or_path ${metric_name_or_path} \ +# --cache_dir ${cache_dir} \ +# --output_dir ${output_dir} \ +# --logging_dir ${logging_dir} \ +# --overwrite_output_dir \ +# --preprocessing_num_workers 4 \ +# --per_device_eval_batch_size ${per_device_eval_batch_size} \ + +python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json diff --git a/convlab2/base_models/t5/dst/serialization.py b/convlab2/base_models/t5/dst/serialization.py new file mode 100644 index 0000000000000000000000000000000000000000..6ccf25ae76048a11834566163591366ae5cdc61a --- /dev/null +++ b/convlab2/base_models/t5/dst/serialization.py @@ -0,0 +1,38 @@ +def serialize_dialogue_state(state): + state_seqs = [] + for domain in state: + for slot, value in state[domain].items(): + if len(value) > 0: + state_seqs.append(f'[{domain}][{slot}][{value}]') + + return ';'.join(state_seqs) + +def deserialize_dialogue_state(state_seq): + state = {} + if len(state_seq) == 0: + return state + state_seqs = state_seq.split('];[') + for i, state_seq in enumerate(state_seqs): + if len(state_seq) == 0: + continue + if i == 0: + if state_seq[0] == '[': + state_seq = state_seq[1:] + if i == len(state_seqs) - 1: + if state_seq[-1] == ']': + state_seq = state_seq[:-1] + s = state_seq.split('][') + if len(s) != 3: + continue + domain, slot, value = s + state.setdefault(domain, {}) + state[domain][slot] = value + return state + +def equal_state_seq(state, state_seq): + predict_state = deserialize_dialogue_state(state_seq) + svs = sorted([(domain, slot, value) for domain in state for slot, value in state[domain].items() if len(value)>0]) + predict_svs = sorted([(domain, slot, value) for domain in predict_state for slot, value in predict_state[domain].items() if len(value)>0]) + if svs != predict_svs: + return False + return True diff --git a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh index a60f77c2bbd9b2660b5616dcad8c9a162f728f12..09a2c33aa06fa5134dba0707e1df5e633ac9f269 100644 --- a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh +++ b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh @@ -8,7 +8,6 @@ logging_dir="${output_dir}/runs" train_file="${data_dir}/train.json" validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" -source_prefix="${data_dir}/source_prefix.txt" source_column="goal" target_column="dialogue" max_target_length=1024 @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --source_column ${source_column} \ --target_column ${target_column} \ --max_target_length ${max_target_length} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --do_eval \ diff --git a/convlab2/base_models/t5/nlu/nlu_metric.py b/convlab2/base_models/t5/nlu/nlu_metric.py index 1eb57c84a02bd1f019eb1978271cb53c3b3a1916..a2f3c0bc4f9a50464eebc67d220964c39a45f2be 100644 --- a/convlab2/base_models/t5/nlu/nlu_metric.py +++ b/convlab2/base_models/t5/nlu/nlu_metric.py @@ -83,13 +83,11 @@ class NLUMetrics(datasets.Metric): flag = True for da_type in ['binary', 'categorical', 'non-categorical']: if da_type == 'binary': - predicts = [(x['intent'], x['domain'], x['slot']) for x in pred_da[da_type]] - labels = [(x['intent'], x['domain'], x['slot']) for x in gold_da[da_type]] + predicts = sorted(list({(x['intent'], x['domain'], x['slot']) for x in pred_da[da_type]})) + labels = sorted(list({(x['intent'], x['domain'], x['slot']) for x in gold_da[da_type]})) else: - predicts = [(x['intent'], x['domain'], x['slot'], ''.join(x['value'].split()).lower()) for x in pred_da[da_type]] - labels = [(x['intent'], x['domain'], x['slot'], ''.join(x['value'].split()).lower()) for x in gold_da[da_type]] - predicts = sorted(list(set(predicts))) - labels = sorted(list(set(labels))) + predicts = sorted(list({(x['intent'], x['domain'], x['slot'], ''.join(x['value'].split()).lower()) for x in pred_da[da_type]})) + labels = sorted(list({(x['intent'], x['domain'], x['slot'], ''.join(x['value'].split()).lower()) for x in gold_da[da_type]})) for ele in predicts: if ele in labels: f1_metrics['overall']['TP'] += 1 diff --git a/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh b/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh index 85f3ec8302d161b29ba71b760a56d0f64a6b4dfc..4080a09249fbed8260ed8e0b0fab7961d9d9120a 100644 --- a/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh +++ b/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh b/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh index 8d7b5c93e8deb9c8c5da9ecd03e42bbc53341442..a8cbc9bd6c30b91a16c1e278450161992d8dca9a 100644 --- a/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh +++ b/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_tm1_user.sh b/convlab2/base_models/t5/nlu/run_tm1_user.sh index 16a16fdb106f09a7001190477de8b0878d2e20f3..9faedd7f664fb40ba20883033aaa1d3817d66c1c 100644 --- a/convlab2/base_models/t5/nlu/run_tm1_user.sh +++ b/convlab2/base_models/t5/nlu/run_tm1_user.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh b/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh index ccb67609279be5c4b044a9baadc19672d69c1532..bb6b55fe06c54bab7294a55d8abda30e959acf34 100644 --- a/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh +++ b/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_tm2_user.sh b/convlab2/base_models/t5/nlu/run_tm2_user.sh index 8686822fea882cb75776bee89dbd4344b71ea64b..728a8a94748c8344104fb9176fd8d2599580b11d 100644 --- a/convlab2/base_models/t5/nlu/run_tm2_user.sh +++ b/convlab2/base_models/t5/nlu/run_tm2_user.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh b/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh index 03c2489940e38dd16256f6b4f2683a413f514235..8ebb102dd99c22a9e6dc752c09b48b1538c77ad8 100644 --- a/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh +++ b/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_tm3_user.sh b/convlab2/base_models/t5/nlu/run_tm3_user.sh index 470cb7d71c2b7a630e6917912e21d2c61ca1c075..0d775f7ae41a63f72fc93539186b63aa2b4a551f 100644 --- a/convlab2/base_models/t5/nlu/run_tm3_user.sh +++ b/convlab2/base_models/t5/nlu/run_tm3_user.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh b/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh index 5e325d1fe2b127ef1af0b0733dd5db03bb1cbe3c..c8a4a9f6b5e77ee6e05ae36aea2e002202243c72 100644 --- a/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh +++ b/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh @@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" metric_name_or_path="nlu_metric.py" metric_for_best_model="overall_f1" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="dialogue_acts_seq" model_name_or_path="t5-small" @@ -30,7 +29,6 @@ python -m torch.distributed.launch \ --train_file ${train_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --save_strategy epoch \ @@ -41,7 +39,6 @@ python -m torch.distributed.launch \ --overwrite_output_dir \ --preprocessing_num_workers 4 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --learning_rate ${lr} \ --num_train_epochs ${num_train_epochs} \ @@ -55,7 +52,6 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ --model_name_or_path ${output_dir} \ --do_predict \ --predict_with_generate \ diff --git a/convlab2/base_models/t5/rg/run_rg.sh b/convlab2/base_models/t5/rg/run_rg.sh index 8bf742d51b1f28765a5d28775970d12a25178434..55accadfaeceb7c43ad9df079f054aa3e00c5a1c 100644 --- a/convlab2/base_models/t5/rg/run_rg.sh +++ b/convlab2/base_models/t5/rg/run_rg.sh @@ -1,7 +1,8 @@ -n_gpus=8 +set -e +n_gpus=2 task_name="rg" -dataset_name="multiwoz21" -speaker="system" +dataset_name="metalwoz+sgd+tm1+tm2+tm3" +speaker="all" data_dir="data/${task_name}/${dataset_name}/${speaker}" output_dir="output/${task_name}/${dataset_name}/${speaker}" cache_dir="../cache" @@ -9,17 +10,30 @@ logging_dir="${output_dir}/runs" train_file="${data_dir}/train.json" validation_file="${data_dir}/validation.json" test_file="${data_dir}/test.json" -source_prefix="${data_dir}/source_prefix.txt" source_column="context" target_column="response" +truncation_side="left" +max_source_length=512 +max_target_length=128 model_name_or_path="t5-small" per_device_train_batch_size=32 per_device_eval_batch_size=128 -gradient_accumulation_steps=1 +gradient_accumulation_steps=4 lr=1e-3 num_train_epochs=5 -python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --speaker ${speaker} +# names=$(echo ${dataset_name} | tr "+" "\n") +# mkdir -p ${data_dir} +# for name in ${names}; +# do +# echo "preprocessing ${name}" +# python ../create_data.py --tasks ${task_name} --datasets ${name} --speaker ${speaker} +# if [ "${name}" != "${dataset_name}" ]; then +# cat "data/${task_name}/${name}/${speaker}/train.json" >> ${train_file} +# cat "data/${task_name}/${name}/${speaker}/validation.json" >> ${validation_file} +# cat "data/${task_name}/${name}/${speaker}/test.json" >> ${test_file} +# fi +# done python -m torch.distributed.launch \ --nproc_per_node ${n_gpus} ../run_seq2seq.py \ @@ -29,7 +43,9 @@ python -m torch.distributed.launch \ --test_file ${test_file} \ --source_column ${source_column} \ --target_column ${target_column} \ - --source_prefix ${source_prefix} \ + --max_source_length ${max_source_length} \ + --max_target_length ${max_target_length} \ + --truncation_side ${truncation_side} \ --model_name_or_path ${model_name_or_path} \ --do_train \ --do_eval \ diff --git a/convlab2/base_models/t5/run_seq2seq.py b/convlab2/base_models/t5/run_seq2seq.py index e9348f5579d35dffe85b9b7e9b90cedf176d0de3..dace9713d540b7fe2aa1c552132cc4c54d698989 100644 --- a/convlab2/base_models/t5/run_seq2seq.py +++ b/convlab2/base_models/t5/run_seq2seq.py @@ -47,7 +47,7 @@ from transformers.utils.versions import require_version # Will error if the minimal version of Transformers is not installed. Remove at your own risks. -check_min_version("4.12.5") +check_min_version("4.17.0") require_version("datasets>=1.16.1") @@ -78,6 +78,10 @@ class ModelArguments: default=True, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, ) + truncation_side: Optional[str] = field( + default="right", + metadata={"help": "Which side to truncate, left or right."} + ) model_revision: str = field( default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, @@ -341,6 +345,7 @@ def main(): model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, + truncation_side=model_args.truncation_side, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) @@ -382,10 +387,11 @@ def main(): ) if data_args.source_prefix_filepath is not None: - prefix = open(data_args.source_prefix_filepath, 'r', encoding='utf-8').readline().strip() + prefix = open(data_args.source_prefix_filepath, 'r', encoding='utf-8').readline().strip('\n') else: prefix = "" + logger.info(f'source prefix: "{prefix}"') # Preprocessing the datasets. # We need to tokenize inputs and targets.