Skip to content
Snippets Groups Projects
Commit 26a6256d authored by Christian's avatar Christian
Browse files

Merge branch 'github_master' of...

Merge branch 'github_master' of gitlab.cs.uni-duesseldorf.de:dsml/convlab/ConvLab3 into github_master
parents 0cd19bb3 ef06e29f
No related branches found
No related tags found
No related merge requests found
Showing
with 315 additions and 138 deletions
...@@ -16,11 +16,12 @@ ...@@ -16,11 +16,12 @@
## Updates ## Updates
- **2023.2.26**: Update ConvLab on PyPI to 3.0.1 to reflect bug fixes.
- **2022.11.30**: ConvLab-3 release. - **2022.11.30**: ConvLab-3 release.
## Installation ## Installation
You can install ConvLab-3 in one of the following ways according to your need. Higher versions of `torch` and `transformers` may also work. You can install ConvLab-3 in one of the following ways according to your need. We use `torch>=1.10.1,<=1.13` and `transformers>=4.17.0,<=4.24.0`. Higher versions of `torch` and `transformers` may also work.
### Git clone and pip install in development mode (Recommend) ### Git clone and pip install in development mode (Recommend)
......
...@@ -4,7 +4,7 @@ from convlab.util import load_dataset, load_dst_data ...@@ -4,7 +4,7 @@ from convlab.util import load_dataset, load_dst_data
from convlab.base_models.t5.dst.serialization import deserialize_dialogue_state from convlab.base_models.t5.dst.serialization import deserialize_dialogue_state
def merge(dataset_names, speaker, save_dir, context_window_size, predict_result): def merge(dataset_names, speaker, save_dir, context_window_size, predict_result, dial_ids_order):
assert os.path.exists(predict_result) assert os.path.exists(predict_result)
if save_dir is None: if save_dir is None:
...@@ -17,14 +17,18 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) ...@@ -17,14 +17,18 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result)
i = 0 i = 0
for dataset_name in dataset_names.split('+'): for dataset_name in dataset_names.split('+'):
print(dataset_name) print(dataset_name)
dataset = load_dataset(dataset_name, args.dial_ids_order) single = []
dataset = load_dataset(dataset_name, dial_ids_order)
data = load_dst_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test'] data = load_dst_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
for sample in data: for sample in data:
sample['predictions'] = {'state': predict_result[i]} sample['predictions'] = {'state': predict_result[i]}
i += 1 i += 1
single.append(sample)
merged.append(sample) merged.append(sample)
json.dump(single, open(os.path.join(save_dir, f'{dataset_name}_predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
...@@ -35,8 +39,8 @@ if __name__ == '__main__': ...@@ -35,8 +39,8 @@ if __name__ == '__main__':
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances')
parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result) merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result, args.dial_ids_order)
n_gpus=1 n_gpus=1
task_name="dst" task_name="dst"
dataset_name=$1 dataset_name=crosswoz
speaker="user" speaker="user"
context_window_size=100 context_window_size=100
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
...@@ -17,10 +17,10 @@ target_column="state_seq" ...@@ -17,10 +17,10 @@ target_column="state_seq"
truncation_side="left" truncation_side="left"
max_source_length=1024 max_source_length=1024
max_target_length=512 max_target_length=512
model_name_or_path="t5-small" model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small"
per_device_train_batch_size=64 per_device_train_batch_size=16
per_device_eval_batch_size=64 per_device_eval_batch_size=16
gradient_accumulation_steps=2 gradient_accumulation_steps=4
lr=1e-3 lr=1e-3
num_train_epochs=10 num_train_epochs=10
...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \ ...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -82,6 +82,6 @@ python ../run_seq2seq.py \ ...@@ -82,6 +82,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -30,7 +30,7 @@ mkdir -p ${data_dir} ...@@ -30,7 +30,7 @@ mkdir -p ${data_dir}
for name in ${names}; for name in ${names};
do do
echo "preprocessing ${name}" echo "preprocessing ${name}"
# python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size} python ../create_data.py -t ${task_name} -d ${name} -s ${speaker} -c ${context_window_size}
done done
python merge_data.py $(echo ${dataset_name} | tr "+" " ") python merge_data.py $(echo ${dataset_name} | tr "+" " ")
...@@ -89,6 +89,10 @@ python ../run_seq2seq.py \ ...@@ -89,6 +89,10 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/predictions.json for name in ${names};
do
echo "evaluating ${name}"
python ../../../dst/evaluate_unified_datasets.py -p ${output_dir}/${name}_predictions.json
done
\ No newline at end of file
...@@ -3,7 +3,7 @@ import os ...@@ -3,7 +3,7 @@ import os
from convlab.util import load_dataset, load_nlg_data from convlab.util import load_dataset, load_nlg_data
def merge(dataset_names, speaker, save_dir, context_window_size, predict_result): def merge(dataset_names, speaker, save_dir, context_window_size, predict_result, dial_ids_order):
assert os.path.exists(predict_result) assert os.path.exists(predict_result)
if save_dir is None: if save_dir is None:
...@@ -16,7 +16,8 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) ...@@ -16,7 +16,8 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result)
i = 0 i = 0
for dataset_name in dataset_names.split('+'): for dataset_name in dataset_names.split('+'):
print(dataset_name) print(dataset_name)
dataset = load_dataset(dataset_name, args.dial_ids_order) single = []
dataset = load_dataset(dataset_name, dial_ids_order)
data = load_nlg_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test'] data = load_nlg_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
for sample in data: for sample in data:
...@@ -24,8 +25,11 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) ...@@ -24,8 +25,11 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result)
continue continue
sample['predictions'] = {'utterance': predict_result[i]} sample['predictions'] = {'utterance': predict_result[i]}
i += 1 i += 1
single.append(sample)
merged.append(sample) merged.append(sample)
json.dump(single, open(os.path.join(save_dir, f'{dataset_name}_predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
...@@ -36,8 +40,8 @@ if __name__ == '__main__': ...@@ -36,8 +40,8 @@ if __name__ == '__main__':
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances')
parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result) merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result, args.dial_ids_order)
n_gpus=1 n_gpus=1
task_name="nlg" task_name="nlg"
dataset_name=$1 dataset_name=crosswoz
speaker="system" speaker="all"
context_window_size=$2 context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache" cache_dir="../cache"
...@@ -17,10 +17,10 @@ target_column="response" ...@@ -17,10 +17,10 @@ target_column="response"
truncation_side="left" truncation_side="left"
max_source_length=512 max_source_length=512
max_target_length=512 max_target_length=512
model_name_or_path="t5-small" model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small"
per_device_train_batch_size=128 per_device_train_batch_size=32
per_device_eval_batch_size=64 per_device_eval_batch_size=16
gradient_accumulation_steps=4 gradient_accumulation_steps=8
lr=1e-3 lr=1e-3
num_train_epochs=10 num_train_epochs=10
...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \ ...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name}
...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \ ...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name}
...@@ -89,6 +89,10 @@ python ../run_seq2seq.py \ ...@@ -89,6 +89,10 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
# python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/predictions.json --dataset_name ${dataset_name} for name in ${names};
do
echo "evaluating ${name}"
python ../../../nlg/evaluate_unified_datasets.py -p ${output_dir}/${name}_predictions.json --dataset_name ${name}
done
\ No newline at end of file
...@@ -4,7 +4,7 @@ from convlab.util import load_dataset, load_nlu_data ...@@ -4,7 +4,7 @@ from convlab.util import load_dataset, load_nlu_data
from convlab.base_models.t5.nlu.serialization import deserialize_dialogue_acts from convlab.base_models.t5.nlu.serialization import deserialize_dialogue_acts
def merge(dataset_names, speaker, save_dir, context_window_size, predict_result): def merge(dataset_names, speaker, save_dir, context_window_size, predict_result, dial_ids_order):
assert os.path.exists(predict_result) assert os.path.exists(predict_result)
if save_dir is None: if save_dir is None:
...@@ -17,14 +17,18 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result) ...@@ -17,14 +17,18 @@ def merge(dataset_names, speaker, save_dir, context_window_size, predict_result)
i = 0 i = 0
for dataset_name in dataset_names.split('+'): for dataset_name in dataset_names.split('+'):
print(dataset_name) print(dataset_name)
dataset = load_dataset(dataset_name, args.dial_ids_order) single = []
dataset = load_dataset(dataset_name, dial_ids_order)
data = load_nlu_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test'] data = load_nlu_data(dataset, data_split='test', speaker=speaker, use_context=context_window_size>0, context_window_size=context_window_size)['test']
for sample in data: for sample in data:
sample['predictions'] = {'dialogue_acts': predict_result[i]} sample['predictions'] = {'dialogue_acts': predict_result[i]}
i += 1 i += 1
single.append(sample)
merged.append(sample) merged.append(sample)
json.dump(single, open(os.path.join(save_dir, f'{dataset_name}_predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False) json.dump(merged, open(os.path.join(save_dir, 'predictions.json'), 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
...@@ -35,8 +39,8 @@ if __name__ == '__main__': ...@@ -35,8 +39,8 @@ if __name__ == '__main__':
parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances') parser.add_argument('--speaker', '-s', type=str, choices=['user', 'system', 'all'], help='speaker(s) of utterances')
parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result') parser.add_argument('--save_dir', type=str, help='merged data will be saved as $save_dir/predictions.json. default: on the same directory as predict_result')
parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered') parser.add_argument('--context_window_size', '-c', type=int, default=0, help='how many contextual utterances are considered')
parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json') parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file test_generated_predictions.json')
parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments') parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
args = parser.parse_args() args = parser.parse_args()
print(args) print(args)
merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result) merge(args.dataset, args.speaker, args.save_dir, args.context_window_size, args.predict_result, args.dial_ids_order)
n_gpus=1 n_gpus=1
task_name="nlu" task_name="nlu"
dataset_name=$1 dataset_name=crosswoz
speaker="user" speaker="all"
context_window_size=$2 context_window_size=0
data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" data_dir="data/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}" output_dir="output/${task_name}/${dataset_name}/${speaker}/context_${context_window_size}"
cache_dir="../cache" cache_dir="../cache"
...@@ -17,10 +17,10 @@ target_column="dialogue_acts_seq" ...@@ -17,10 +17,10 @@ target_column="dialogue_acts_seq"
truncation_side="left" truncation_side="left"
max_source_length=512 max_source_length=512
max_target_length=512 max_target_length=512
model_name_or_path="t5-small" model_name_or_path="/data/zhuqi/pre-trained-models/mt5-small"
per_device_train_batch_size=128 per_device_train_batch_size=16
per_device_eval_batch_size=64 per_device_eval_batch_size=16
gradient_accumulation_steps=2 gradient_accumulation_steps=16
lr=1e-3 lr=1e-3
num_train_epochs=10 num_train_epochs=10
...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \ ...@@ -80,6 +80,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \ ...@@ -83,6 +83,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -89,6 +89,10 @@ python ../run_seq2seq.py \ ...@@ -89,6 +89,10 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json for name in ${names};
do
echo "evaluating ${name}"
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/${name}_predictions.json
done
\ No newline at end of file
...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \ ...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -84,6 +84,6 @@ num_train_epochs=100 ...@@ -84,6 +84,6 @@ num_train_epochs=100
# --optim adafactor \ # --optim adafactor \
# --gradient_checkpointing # --gradient_checkpointing
# python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} # python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \ ...@@ -81,6 +81,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -84,6 +84,6 @@ python ../run_seq2seq.py \ ...@@ -84,6 +84,6 @@ python ../run_seq2seq.py \
--optim adafactor \ --optim adafactor \
--gradient_checkpointing --gradient_checkpointing
python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/generated_predictions.json -o ${dial_ids_order} python merge_predict_res.py -d ${dataset_name} -s ${speaker} -c ${context_window_size} -p ${output_dir}/test_generated_predictions.json -o ${dial_ids_order}
python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json python ../../../nlu/evaluate_unified_datasets.py -p ${output_dir}/predictions.json
...@@ -37,6 +37,8 @@ from transformers import ( ...@@ -37,6 +37,8 @@ from transformers import (
AutoConfig, AutoConfig,
AutoModelForSeq2SeqLM, AutoModelForSeq2SeqLM,
AutoTokenizer, AutoTokenizer,
T5ForConditionalGeneration,
T5Tokenizer,
DataCollatorForSeq2Seq, DataCollatorForSeq2Seq,
HfArgumentParser, HfArgumentParser,
EarlyStoppingCallback, EarlyStoppingCallback,
...@@ -358,6 +360,7 @@ def main(): ...@@ -358,6 +360,7 @@ def main():
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, use_auth_token=True if model_args.use_auth_token else None,
) )
try:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
...@@ -374,6 +377,23 @@ def main(): ...@@ -374,6 +377,23 @@ def main():
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, use_auth_token=True if model_args.use_auth_token else None,
) )
except:
tokenizer = T5Tokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
truncation_side=model_args.truncation_side,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
model = T5ForConditionalGeneration.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
model.resize_token_embeddings(len(tokenizer)) model.resize_token_embeddings(len(tokenizer))
...@@ -612,16 +632,17 @@ def main(): ...@@ -612,16 +632,17 @@ def main():
# Predict # Predict
if training_args.do_predict: if training_args.do_predict:
logger.info("*** Predict ***") file_prefix = os.path.splitext(os.path.basename(data_args.test_file))[0]
predict_results = trainer.predict(predict_dataset, metric_key_prefix="predict") logger.info(f"*** Predict {file_prefix}***")
predict_results = trainer.predict(predict_dataset, metric_key_prefix=file_prefix)
metrics = predict_results.metrics metrics = predict_results.metrics
max_predict_samples = ( max_predict_samples = (
data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset) data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
) )
metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset)) metrics[f"{file_prefix}_samples"] = min(max_predict_samples, len(predict_dataset))
trainer.log_metrics("predict", metrics) trainer.log_metrics(file_prefix, metrics)
trainer.save_metrics("predict", metrics) trainer.save_metrics(file_prefix, metrics)
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
if training_args.predict_with_generate: if training_args.predict_with_generate:
...@@ -629,10 +650,13 @@ def main(): ...@@ -629,10 +650,13 @@ def main():
predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
) )
predictions = [pred.strip() for pred in predictions] predictions = [pred.strip() for pred in predictions]
output_prediction_file = os.path.join(training_args.output_dir, "generated_predictions.json") output_prediction_file = os.path.join(training_args.output_dir, f"{file_prefix}_generated_predictions.json")
with open(output_prediction_file, "w", encoding='utf-8') as writer: with open(output_prediction_file, "w", encoding='utf-8') as writer:
for sample, pred in zip(raw_datasets["test"], predictions): for idx, sample in enumerate(raw_datasets["test"]):
sample["predictions"] = pred if training_args.num_return_sequences > 1:
sample["predictions"] = predictions[idx*training_args.num_return_sequences:(idx+1)*training_args.num_return_sequences]
else:
sample["predictions"] = predictions[idx]
writer.write(json.dumps(sample, ensure_ascii=False)+'\n') writer.write(json.dumps(sample, ensure_ascii=False)+'\n')
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name} kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": data_args.task_name}
......
...@@ -70,82 +70,88 @@ if __name__ == '__main__': ...@@ -70,82 +70,88 @@ if __name__ == '__main__':
dst = RuleDST() dst = RuleDST()
# Action is a dict. Its keys are strings(domain-type pairs, both uppercase and lowercase is OK) and its values are list of lists. # Action (dialog acts) is a list of (intent, domain, slot, value) tuples.
# The domain may be one of ('Attraction', 'Hospital', 'Booking', 'Hotel', 'Restaurant', 'Taxi', 'Train', 'Police'). # RuleDST will only handle `inform` and `request` actions
# The type may be "inform" or "request".
# For example, the action below has a key "Hotel-Inform", in which "Hotel" is domain and "Inform" is action type.
# Each list in the value of "Hotel-Inform" is a slot-value pair. "Area" is slot and "east" is value. "Star" is slot and "4" is value.
action = [ action = [
["Inform", "Hotel", "Area", "east"], ["inform", "hotel", "area", "east"],
["Inform", "Hotel", "Stars", "4"] ["inform", "hotel", "stars", "4"]
] ]
# method `update` updates the attribute `state` of tracker, and returns it. # method `update` updates the attribute `state` of tracker, and returns it.
state = dst.update(action) state = dst.update(action)
assert state == dst.state assert state == dst.state
assert state == {'user_action': [], assert state == {'belief_state': {'attraction': {'area': '', 'name': '', 'type': ''},
'system_action': [], 'hospital': {'department': ''},
'belief_state': {'police': {'book': {'booked': []}, 'semi': {}}, 'hotel': {'area': 'east',
'hotel': {'book': {'booked': [], 'people': '', 'day': '', 'stay': ''}, 'book day': '',
'semi': {'name': '', 'book people': '',
'area': 'east', 'book stay': '',
'internet': '',
'name': '',
'parking': '', 'parking': '',
'price range': '', 'price range': '',
'stars': '4', 'stars': '4',
'internet': '', 'type': ''},
'type': ''}}, 'restaurant': {'area': '',
'attraction': {'book': {'booked': []}, 'book day': '',
'semi': {'type': '', 'name': '', 'area': ''}}, 'book people': '',
'restaurant': {'book': {'booked': [], 'people': '', 'day': '', 'time': ''}, 'book time': '',
'semi': {'food': '', 'pricerange': '', 'name': '', 'area': ''}}, 'food': '',
'hospital': {'book': {'booked': []}, 'semi': {'department': ''}}, 'name': '',
'taxi': {'book': {'booked': []}, 'price range': ''},
'semi': {'leaveAt': '', 'taxi': {'arrive by': '',
'destination': '',
'departure': '', 'departure': '',
'arriveBy': ''}},
'train': {'book': {'booked': [], 'people': ''},
'semi': {'leaveAt': '',
'destination': '', 'destination': '',
'leave at': ''},
'train': {'arrive by': '',
'book people': '',
'day': '', 'day': '',
'arriveBy': '', 'departure': '',
'departure': ''}}}, 'destination': '',
'leave at': ''}},
'booked': {},
'history': [],
'request_state': {}, 'request_state': {},
'system_action': [],
'terminated': False, 'terminated': False,
'history': []} 'user_action': []}
# Please call `init_session` before a new dialog. This initializes the attribute `state` of tracker with a default state, which `convlab.util.multiwoz.state.default_state` returns. But You needn't call it before the first dialog, because tracker gets a default state in its constructor. # Please call `init_session` before a new dialog. This initializes the attribute `state` of tracker with a default state, which `convlab.util.multiwoz.state.default_state` returns. But You needn't call it before the first dialog, because tracker gets a default state in its constructor.
dst.init_session() dst.init_session()
action = [["Inform", "Train", "Arrive", "19:45"]] action = [["inform", "train", "arrive by", "19:45"]]
state = dst.update(action) state = dst.update(action)
assert state == {'user_action': [], assert state == {'belief_state': {'attraction': {'area': '', 'name': '', 'type': ''},
'system_action': [], 'hospital': {'department': ''},
'belief_state': {'police': {'book': {'booked': []}, 'semi': {}}, 'hotel': {'area': '',
'hotel': {'book': {'booked': [], 'people': '', 'day': '', 'stay': ''}, 'book day': '',
'semi': {'name': '', 'book people': '',
'area': '', 'book stay': '',
'internet': '',
'name': '',
'parking': '', 'parking': '',
'price range': '', 'price range': '',
'stars': '', 'stars': '',
'internet': '', 'type': ''},
'type': ''}}, 'restaurant': {'area': '',
'attraction': {'book': {'booked': []}, 'book day': '',
'semi': {'type': '', 'name': '', 'area': ''}}, 'book people': '',
'restaurant': {'book': {'booked': [], 'people': '', 'day': '', 'time': ''}, 'book time': '',
'semi': {'food': '', 'pricerange': '', 'name': '', 'area': ''}}, 'food': '',
'hospital': {'book': {'booked': []}, 'semi': {'department': ''}}, 'name': '',
'taxi': {'book': {'booked': []}, 'price range': ''},
'semi': {'leaveAt': '', 'taxi': {'arrive by': '',
'destination': '',
'departure': '', 'departure': '',
'arriveBy': ''}},
'train': {'book': {'booked': [], 'people': ''},
'semi': {'leaveAt': '',
'destination': '', 'destination': '',
'leave at': ''},
'train': {'arrive by': '19:45',
'book people': '',
'day': '', 'day': '',
'arriveBy': '19:45', 'departure': '',
'departure': ''}}}, 'destination': '',
'leave at': ''}},
'booked': {},
'history': [],
'request_state': {}, 'request_state': {},
'system_action': [],
'terminated': False, 'terminated': False,
'history': []} 'user_action': []}
# -*- coding: utf-8 -*-
# Copyright 2023 DSML Group, Heinrich Heine University, Düsseldorf
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MultiWOZ Test data inference for RuleDST and BERTNLU+RuleDST"""
import json
from copy import deepcopy
import os
from tqdm import tqdm
from convlab.util import load_dataset, load_dst_data
from convlab.dst.rule.multiwoz.dst import RuleDST
from convlab.nlu.jointBERT.unified_datasets.nlu import BERTNLU
BERTNLU_PATH = "https://huggingface.co/ConvLab/bert-base-nlu/resolve/main/bertnlu_unified_multiwoz21_user_context3.zip"
def flatten_act(acts: dict) -> list:
acts_list = list()
for act_type, _acts in acts.items():
for act in _acts:
if 'value' in act:
_act = [act['intent'], act['domain'], act['slot'], act['value']]
else:
_act = [act['intent'], act['domain'], act['slot'], '']
acts_list.append(_act)
return acts_list
def load_act_data(dataset: dict) -> list:
data = list()
for dialogue in tqdm(dataset['test']):
dial = []
for _turn in dialogue['turns']:
if _turn['speaker'] == 'user':
turn = {'user_acts': flatten_act(_turn['dialogue_acts']),
'state': _turn['state']}
dial.append(turn)
data.append(dial)
return data
def load_text_data(dataset: dict) -> list:
data = list()
for dialogue in tqdm(dataset['test']):
dial = []
turn = {'user': '', 'system': 'Start', 'state': None}
for _turn in dialogue['turns']:
if _turn['speaker'] == 'user':
turn['user'] = _turn['utterance']
turn['state'] = _turn['state']
elif _turn['speaker'] == 'system':
turn['system'] = _turn['utterance']
if turn['user'] and turn['system']:
if turn['system'] == 'Start':
turn['system'] = ''
dial.append(deepcopy(turn))
turn = {'user': '', 'system': '', 'state': None}
data.append(dial)
return data
def predict_acts(data: list, nlu: BERTNLU) -> list:
processed_data = list()
for dialogue in tqdm(data):
context = list()
dial = list()
for turn in dialogue:
context.append(['sys', turn['system']])
acts = nlu.predict(turn['user'], context=context)
context.append(['usr', turn['user']])
dial.append({'user_acts': deepcopy(acts), 'state': turn['state']})
processed_data.append(dial)
return processed_data
def predict_states(data: list):
dst = RuleDST()
processed_data = list()
for dialogue in tqdm(data):
dst.init_session()
for turn in dialogue:
pred = dst.update(turn['user_acts'])
dial = {'state': turn['state'],
'predictions': {'state': deepcopy(pred['belief_state'])}}
processed_data.append(dial)
return processed_data
if __name__ == '__main__':
dataset = load_dataset(dataset_name='multiwoz21')
dataset = load_dst_data(dataset, data_split='test', speaker='all', dialogue_acts=True, split_to_turn=False)
data = load_text_data(dataset)
nlu = BERTNLU(mode='user', config_file='multiwoz21_user_context3.json', model_file=BERTNLU_PATH)
bertnlu_data = predict_acts(data, nlu)
golden_data = load_act_data(dataset)
bertnlu_data = predict_states(bertnlu_data)
golden_data = predict_states(golden_data)
path = os.path.dirname(os.path.realpath(__file__))
writer = open(os.path.join(path, f"predictions_BERTNLU-RuleDST.json"), 'w')
json.dump(bertnlu_data, writer)
writer.close()
writer = open(os.path.join(path, f"predictions_RuleDST.json"), 'w')
json.dump(golden_data, writer)
writer.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment