Skip to content
Snippets Groups Projects
Commit 2d5519ce authored by Carel van Niekerk's avatar Carel van Niekerk
Browse files

Merge branch 'github_master' into setsumbt

parents 871a5bbd bca7f9ad
No related branches found
No related tags found
No related merge requests found
Showing
with 545 additions and 34 deletions
......@@ -3,7 +3,7 @@ import json
from tqdm import tqdm
import re
from transformers import AutoTokenizer
from convlab.util import load_dataset, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data
from convlab.util import load_dataset, load_nlu_data, load_dst_data, load_policy_data, load_nlg_data, load_e2e_data, load_rg_data, retrieve_utterances
from convlab.base_models.t5.nlu.serialization import serialize_dialogue_acts, deserialize_dialogue_acts, equal_da_seq
from convlab.base_models.t5.dst.serialization import serialize_dialogue_state, deserialize_dialogue_state, equal_state_seq
......@@ -85,6 +85,9 @@ def create_nlg_data(dataset, data_dir, args):
data = []
for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
dialogue_acts_seq = serialize_dialogue_acts(sample['dialogue_acts'])
if len(dialogue_acts_seq) == 0:
# skip empty dialogue acts
continue
if args.context_window_size>0:
context = '\n'.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['context']]+[f'{sample["speaker"]}: '])
context = f'{dialogue_acts_seq}\n\n{context}'
......@@ -117,6 +120,59 @@ def create_goal2dialogue_data(dataset, data_dir, args):
data_by_split[data_split] = data
return data_by_split
def create_retnlu_data(dataset, data_dir, args):
dataset_name = dataset[list(dataset.keys())[0]][0]['dataset']
data_by_split = load_nlu_data(dataset, speaker=args.speaker, use_context=args.context_window_size>0, context_window_size=args.context_window_size)
data_dir = os.path.join(data_dir, args.speaker, f'context_{args.context_window_size}', \
f'in_context_{args.retrieval_in_context}', f'topk_{args.retrieval_topk}')
os.makedirs(data_dir, exist_ok=True)
turn_pool = []
for d in args.retrieval_datasets:
pool_dataset = load_dataset(d)
for turn in load_nlu_data(pool_dataset, data_split='train', speaker=args.speaker)['train']:
if any([len(das) > 0 for da_type, das in turn['dialogue_acts'].items()]):
turn_pool.append({'dataset': d, **turn})
data_splits = data_by_split.keys()
query_turns = []
for data_split in data_splits:
query_turns.extend(data_by_split[data_split])
augmented_dataset = retrieve_utterances(query_turns, turn_pool, args.retrieval_topk, 'all-MiniLM-L6-v2')
i = 0
for data_split in data_splits:
data = []
for j in tqdm(range(len(data_by_split[data_split])), desc=f'{data_split} sample', leave=False):
sample = augmented_dataset[i+j]
response = f"{sample['speaker']}: {sample['utterance']}"
if args.context_window_size>0:
context = '\n'.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['context']]+[response])
else:
context = response
context = ' '.join([dataset_name, context])
dialogue_acts_seq = serialize_dialogue_acts(sample['dialogue_acts'])
assert equal_da_seq(sample['dialogue_acts'], dialogue_acts_seq), print(sample['dialogue_acts'], dialogue_acts_seq, deserialize_dialogue_acts(dialogue_acts_seq))
retrieved_turns = sample['retrieved_turns']
for t in retrieved_turns:
# in-context learning
retrieved_utterance = f"{t['dataset']} {t['speaker']}: {t['utterance']}"
retrieved_dialogue_acts_seq = serialize_dialogue_acts(t['dialogue_acts'])
if args.retrieval_in_context:
context = f"{retrieved_utterance} => {retrieved_dialogue_acts_seq}\n\n" + context
elif data_split != 'test':
data.append(json.dumps({'context': retrieved_utterance, 'dialogue_acts_seq': retrieved_dialogue_acts_seq}, ensure_ascii=False)+'\n')
data.append(json.dumps({'context': context, 'dialogue_acts_seq': dialogue_acts_seq}, ensure_ascii=False)+'\n')
i += len(data_by_split[data_split])
file_name = os.path.join(data_dir, f"{data_split}.json")
with open(file_name, "w", encoding='utf-8') as f:
f.writelines(data)
data_by_split[data_split] = data
return data_by_split
def get_max_len(data_by_split, tokenizer):
for data_split in data_by_split.keys():
seq_len = {}
......@@ -133,22 +189,25 @@ def get_max_len(data_by_split, tokenizer):
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser(description="create data for seq2seq training")
parser.add_argument('--tasks', '-t', metavar='task_name', nargs='*', choices=['rg', 'nlu', 'dst', 'nlg', 'goal2dialogue'], help='names of tasks')
parser.add_argument('--tasks', '-t', metavar='task_name', nargs='*', choices=['rg', 'nlu', 'dst', 'nlg', 'goal2dialogue', 'retnlu', 'retnlg'], help='names of tasks')
parser.add_argument('--datasets', '-d', metavar='dataset_name', nargs='*', help='names of unified datasets')
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s)')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--len_tokenizer', '-l', type=str, default=None, help='name or path of tokenizer that used to get seq len')
parser.add_argument('--ratio', '-r', type=float, default=None, help='how many data is used for training and evaluation')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
parser.add_argument('--retrieval_datasets', metavar='dataset_name for retrieval augmentation', nargs='*', help='names of unified datasets for retrieval')
parser.add_argument('--retrieval_topk', type=int, default=3, help='how many utterances to be retrieved')
parser.add_argument('--retrieval_in_context', action='store_true', default=False, help='whether use the retrieved utterance by in-context learning')
args = parser.parse_args()
print(args)
if args.len_tokenizer:
tokenizer = AutoTokenizer.from_pretrained(args.len_tokenizer)
for dataset_name in tqdm(args.datasets, desc='datasets'):
dataset = load_dataset(dataset_name, args.dial_ids_order)
if args.ratio:
dataset['train'] = dataset['train'][:round(len(dataset['train'])*args.ratio)]
dataset['validation'] = dataset['validation'][:round(len(dataset['validation'])*args.ratio)]
dataset = load_dataset(dataset_name, dial_ids_order=args.dial_ids_order, split2ratio={'train': args.ratio, 'validation': args.ratio})
else:
dataset = load_dataset(dataset_name, args.dial_ids_order)
for task_name in tqdm(args.tasks, desc='tasks', leave=False):
data_dir = os.path.join('data', task_name, (dataset_name if not args.ratio else f'{dataset_name}_{args.ratio}_order{args.dial_ids_order}'))
data_by_split = eval(f"create_{task_name}_data")(dataset, data_dir, args)
......
import json
import os
import sys
if __name__ == '__main__':
merged_data = {'train': [], 'validation': [], 'test': []}
print(sys.argv)
for dataset_name in sys.argv[1:]:
data_dir = os.path.join('data/dst', dataset_name, 'user/context_100')
for data_split in merged_data:
with open(os.path.join(data_dir, f'{data_split}.json'), 'r') as f:
for line in f:
item = json.loads(line)
item['context'] = f"{dataset_name}: {item['context']}"
merged_data[data_split].append(item)
for data_split in merged_data:
data_dir = os.path.join('data/dst', '+'.join(sys.argv[1:]), 'user/context_100')
os.makedirs(data_dir, exist_ok=True)
with open(os.path.join(data_dir, f'{data_split}.json'), 'w') as f:
for item in merged_data[data_split]:
f.write(json.dumps(item)+'\n')
......@@ -4,10 +4,8 @@ from convlab.util import load_dataset, load_dst_data
from convlab.base_models.t5.dst.serialization import deserialize_dialogue_state
def merge(dataset_name, speaker, save_dir, context_window_size, predict_result):
def merge(dataset_names, speaker, save_dir, context_window_size, predict_result):
assert os.path.exists(predict_result)
dataset = load_dataset(dataset_name, args.dial_ids_order)
data = load_dst_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
if save_dir is None:
save_dir = os.path.dirname(predict_result)
......@@ -15,10 +13,19 @@ def merge(dataset_name, speaker, save_dir, context_window_size, predict_result):
os.makedirs(save_dir, exist_ok=True)
predict_result = [deserialize_dialogue_state(json.loads(x)['predictions'].strip()) for x in open(predict_result)]
for sample, prediction in zip(data, predict_result):
sample['predictions'] = {'state': prediction}
merged = []
i = 0
for dataset_name in dataset_names.split('+'):
print(dataset_name)
dataset = load_dataset(dataset_name, args.dial_ids_order)
data = load_dst_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
for sample in data:
sample['predictions'] = {'state': predict_result[i]}
i += 1
merged.append(sample)
json.dump(data, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
if __name__ == '__main__':
......
......@@ -40,7 +40,7 @@ python ../run_seq2seq.py \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 3 \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
......
......@@ -40,7 +40,7 @@ python ../run_seq2seq.py \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 3 \
--save_total_limit 1 \
--early_stopping_patience 10 \
--prediction_loss_only \
--load_best_model_at_end \
......
n_gpus=1
task_name="dst"
dataset_name="sgd+tm1+tm2+tm3+multiwoz21"
speaker="user"
context_window_size=100
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache"
logging_dir="${output_dir}/runs"
train_file="${data_dir}/train.json"
validation_file="${data_dir}/validation.json"
test_file="${data_dir}/test.json"
metric_name_or_path="dst_metric.py"
metric_for_best_model="accuracy"
source_column="context"
target_column="state_seq"
truncation_side="left"
max_source_length=1024
max_target_length=512
model_name_or_path="t5-small"
per_device_train_batch_size=64
per_device_eval_batch_size=64
gradient_accumulation_steps=2
lr=1e-3
num_train_epochs=10
names=$(echo ${dataset_name} | tr "+" "\n")
rm -r ${data_dir}
mkdir -p ${data_dir}
for name in ${names};
do
echo "preprocessing ${name}"
# python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size}
done
python merge_data.py $(echo ${dataset_name} | tr "+" " ")
python ../run_seq2seq.py \
--task_name ${task_name} \
--train_file ${train_file} \
--validation_file ${validation_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${model_name_or_path} \
--do_train \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python ../run_seq2seq.py \
--task_name ${task_name} \
--test_file ${test_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${output_dir} \
--do_predict \
--predict_with_generate \
--metric_name_or_path ${metric_name_or_path} \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
import json
import os
import sys
if __name__ == '__main__':
merged_data = {'train': [], 'validation': [], 'test': []}
print(sys.argv)
for dataset_name in sys.argv[1:]:
data_dir = os.path.join('data/nlg', dataset_name, 'system/context_0')
for data_split in merged_data:
with open(os.path.join(data_dir, f'{data_split}.json'), 'r') as f:
for line in f:
item = json.loads(line)
item['context+da'] = f"{dataset_name}: {item['context+da']}"
merged_data[data_split].append(item)
for data_split in merged_data:
data_dir = os.path.join('data/nlg', '+'.join(sys.argv[1:]), 'system/context_0')
os.makedirs(data_dir, exist_ok=True)
with open(os.path.join(data_dir, f'{data_split}.json'), 'w') as f:
for item in merged_data[data_split]:
f.write(json.dumps(item)+'\n')
......@@ -3,10 +3,8 @@ import os
from convlab.util import load_dataset, load_nlg_data
def merge(dataset_name, speaker, save_dir, context_window_size, predict_result):
def merge(dataset_names, speaker, save_dir, context_window_size, predict_result):
assert os.path.exists(predict_result)
dataset = load_dataset(dataset_name, args.dial_ids_order)
data = load_nlg_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
if save_dir is None:
save_dir = os.path.dirname(predict_result)
......@@ -14,10 +12,21 @@ def merge(dataset_name, speaker, save_dir, context_window_size, predict_result):
os.makedirs(save_dir, exist_ok=True)
predict_result = [json.loads(x)['predictions'].strip() for x in open(predict_result)]
for sample, prediction in zip(data, predict_result):
sample['predictions'] = {'utterance': prediction}
merged = []
i = 0
for dataset_name in dataset_names.split('+'):
print(dataset_name)
dataset = load_dataset(dataset_name, args.dial_ids_order)
data = load_nlg_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
for sample in data:
if all([len(sample['dialogue_acts'][da_type])==0 for da_type in sample['dialogue_acts']]):
continue
sample['predictions'] = {'utterance': predict_result[i]}
i += 1
merged.append(sample)
json.dump(data, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
if __name__ == '__main__':
......
......@@ -73,6 +73,7 @@ class NLGMetrics(datasets.Metric):
def _compute(self, predictions, references):
"""Returns the scores: bleu"""
references = [" " if ref=="" else ref for ref in references]
bleu = sacrebleu.corpus_bleu(predictions, [references], lowercase=True).score
return {
......
......@@ -40,7 +40,7 @@ python ../run_seq2seq.py \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 3 \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
......
......@@ -42,7 +42,7 @@ python ../run_seq2seq.py \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 3 \
--save_total_limit 1 \
--prediction_loss_only \
--load_best_model_at_end \
--cache_dir ${cache_dir} \
......
n_gpus=1
task_name="nlg"
dataset_name="sgd+tm1+tm2+tm3+multiwoz21"
speaker="system"
context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache"
logging_dir="${output_dir}/runs"
train_file="${data_dir}/train.json"
validation_file="${data_dir}/validation.json"
test_file="${data_dir}/test.json"
metric_name_or_path="nlg_metric.py"
metric_for_best_model="bleu"
source_column="context+da"
target_column="response"
truncation_side="left"
max_source_length=512
max_target_length=512
model_name_or_path="t5-small"
per_device_train_batch_size=64
per_device_eval_batch_size=64
gradient_accumulation_steps=8
lr=1e-3
num_train_epochs=10
names=$(echo ${dataset_name} | tr "+" "\n")
rm -r ${data_dir}
mkdir -p ${data_dir}
for name in ${names};
do
echo "preprocessing ${name}"
python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size}
done
python merge_data.py $(echo ${dataset_name} | tr "+" " ")
python ../run_seq2seq.py \
--task_name ${task_name} \
--train_file ${train_file} \
--validation_file ${validation_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${model_name_or_path} \
--do_train \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python ../run_seq2seq.py \
--task_name ${task_name} \
--test_file ${test_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${output_dir} \
--do_predict \
--predict_with_generate \
--metric_name_or_path ${metric_name_or_path} \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
# python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name}
......@@ -31,13 +31,10 @@ for name in ${names};
do
echo "preprocessing ${name}"
python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size}
if [ "${name}" != "${dataset_name}" ]; then
cat "data/${task_name}/${name}/${speaker}/context_${context_window_size}/train.json" >> ${train_file}
cat "data/${task_name}/${name}/${speaker}/context_${context_window_size}/validation.json" >> ${validation_file}
cat "data/${task_name}/${name}/${speaker}/context_${context_window_size}/test.json" >> ${test_file}
fi
done
python merge_data.py $(echo ${dataset_name} | tr "+" " ")
python ../run_seq2seq.py \
--task_name ${task_name} \
--train_file ${train_file} \
......
import json
import os
import sys
if __name__ == '__main__':
merged_data = {'train': [], 'validation': [], 'test': []}
print(sys.argv)
for dataset_name in sys.argv[1:]:
data_dir = os.path.join('data/nlu', dataset_name, 'user/context_0')
for data_split in merged_data:
with open(os.path.join(data_dir, f'{data_split}.json'), 'r') as f:
for line in f:
item = json.loads(line)
item['context'] = f"{dataset_name}: {item['context']}"
merged_data[data_split].append(item)
for data_split in merged_data:
data_dir = os.path.join('data/nlu', '+'.join(sys.argv[1:]), 'user/context_0')
os.makedirs(data_dir, exist_ok=True)
with open(os.path.join(data_dir, f'{data_split}.json'), 'w') as f:
for item in merged_data[data_split]:
f.write(json.dumps(item)+'\n')
......@@ -4,10 +4,8 @@ from convlab.util import load_dataset, load_nlu_data
from convlab.base_models.t5.nlu.serialization import deserialize_dialogue_acts
def merge(dataset_name, speaker, save_dir, context_window_size, predict_result):
def merge(dataset_names, speaker, save_dir, context_window_size, predict_result):
assert os.path.exists(predict_result)
dataset = load_dataset(dataset_name, args.dial_ids_order)
data = load_nlu_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
if save_dir is None:
save_dir = os.path.dirname(predict_result)
......@@ -15,10 +13,19 @@ def merge(dataset_name, speaker, save_dir, context_window_size, predict_result):
os.makedirs(save_dir, exist_ok=True)
predict_result = [deserialize_dialogue_acts(json.loads(x)['predictions'].strip()) for x in open(predict_result)]
for sample, prediction in zip(data, predict_result):
sample['predictions'] = {'dialogue_acts': prediction}
merged = []
i = 0
for dataset_name in dataset_names.split('+'):
print(dataset_name)
dataset = load_dataset(dataset_name, args.dial_ids_order)
data = load_nlu_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
for sample in data:
sample['predictions'] = {'dialogue_acts': predict_result[i]}
i += 1
merged.append(sample)
json.dump(data, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
if __name__ == '__main__':
......
......@@ -40,7 +40,7 @@ python ../run_seq2seq.py \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 3 \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
......
......@@ -42,7 +42,7 @@ python ../run_seq2seq.py \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 3 \
--save_total_limit 1 \
--prediction_loss_only \
--load_best_model_at_end \
--cache_dir ${cache_dir} \
......
n_gpus=1
task_name="nlu"
dataset_name="tm1+tm2+tm3"
speaker="user"
context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache"
logging_dir="${output_dir}/runs"
train_file="${data_dir}/train.json"
validation_file="${data_dir}/validation.json"
test_file="${data_dir}/test.json"
metric_name_or_path="nlu_metric.py"
metric_for_best_model="overall_f1"
source_column="context"
target_column="dialogue_acts_seq"
truncation_side="left"
max_source_length=512
max_target_length=512
model_name_or_path="t5-small"
per_device_train_batch_size=128
per_device_eval_batch_size=64
gradient_accumulation_steps=2
lr=1e-3
num_train_epochs=10
names=$(echo ${dataset_name} | tr "+" "\n")
rm -r ${data_dir}
mkdir -p ${data_dir}
for name in ${names};
do
echo "preprocessing ${name}"
python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size}
done
python merge_data.py $(echo ${dataset_name} | tr "+" " ")
python ../run_seq2seq.py \
--task_name ${task_name} \
--train_file ${train_file} \
--validation_file ${validation_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${model_name_or_path} \
--do_train \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python ../run_seq2seq.py \
--task_name ${task_name} \
--test_file ${test_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${output_dir} \
--do_predict \
--predict_with_generate \
--metric_name_or_path ${metric_name_or_path} \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
n_gpus=1
task_name="retnlu"
dataset_name="multiwoz21"
speaker="user"
context_window_size=0
retrieval_topk=1
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}/in_context_False/topk_${retrieval_topk}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}/in_context_False/topk_${retrieval_topk}"
cache_dir="../cache"
logging_dir="${output_dir}/runs"
train_file="${data_dir}/train.json"
validation_file="${data_dir}/validation.json"
test_file="${data_dir}/test.json"
metric_name_or_path="nlu_metric.py"
metric_for_best_model="overall_f1"
source_column="context"
target_column="dialogue_acts_seq"
truncation_side="left"
max_source_length=512
max_target_length=512
model_name_or_path="t5-small"
per_device_train_batch_size=128
per_device_eval_batch_size=64
gradient_accumulation_steps=2
lr=1e-3
num_train_epochs=10
python ../create_data.py -t ${task_name} -d ${dataset_name} -s ${speaker} -c ${context_window_size} --retrieval_datasets sgd tm1 tm2 tm3 --retrieval_topk ${retrieval_topk}
python ../run_seq2seq.py \
--task_name ${task_name} \
--train_file ${train_file} \
--validation_file ${validation_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${model_name_or_path} \
--do_train \
--do_eval \
--save_strategy epoch \
--evaluation_strategy epoch \
--save_total_limit 1 \
--prediction_loss_only \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python ../run_seq2seq.py \
--task_name ${task_name} \
--test_file ${test_file} \
--source_column ${source_column} \
--target_column ${target_column} \
--max_source_length ${max_source_length} \
--max_target_length ${max_target_length} \
--truncation_side ${truncation_side} \
--model_name_or_path ${output_dir} \
--do_predict \
--predict_with_generate \
--metric_name_or_path ${metric_name_or_path} \
--cache_dir ${cache_dir} \
--output_dir ${output_dir} \
--logging_dir ${logging_dir} \
--overwrite_output_dir \
--preprocessing_num_workers 4 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--learning_rate ${lr} \
--num_train_epochs ${num_train_epochs} \
--adafactor \
--gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment