diff --git a/data/unified_datasets/multiwoz21/data.zip b/data/unified_datasets/multiwoz21/data.zip index e320219217e24c6cdef07de99ab0cf6af88c7c59..9bb5c40b6c58e821c306f7096a07d594f2e59f47 100644 Binary files a/data/unified_datasets/multiwoz21/data.zip and b/data/unified_datasets/multiwoz21/data.zip differ diff --git a/data/unified_datasets/multiwoz21/preprocess.py b/data/unified_datasets/multiwoz21/preprocess.py index 253080195dd9b473431311bddd880335d8580565..19a1634a434bd7118a035d4474114d704ce1acd0 100644 --- a/data/unified_datasets/multiwoz21/preprocess.py +++ b/data/unified_datasets/multiwoz21/preprocess.py @@ -877,9 +877,9 @@ def preprocess(): for split in splits: dialogues += dialogues_by_split[split] init_ontology['binary_dialogue_acts'] = [{'intent':bda[0],'domain':bda[1],'slot':bda[2],'value':bda[3]} for bda in sorted(init_ontology['binary_dialogue_acts'])] - json.dump(dialogues[:10], open(f'dummy_data.json', 'w'), indent=2) - json.dump(dialogues, open(f'{new_data_dir}/dialogues.json', 'w'), indent=2) - json.dump(init_ontology, open(f'{new_data_dir}/ontology.json', 'w'), indent=2) + json.dump(dialogues[:10], open(f'dummy_data.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(dialogues, open(f'{new_data_dir}/dialogues.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) + json.dump(init_ontology, open(f'{new_data_dir}/ontology.json', 'w', encoding='utf-8'), indent=2, ensure_ascii=False) with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf: for filename in os.listdir(new_data_dir): zf.write(f'{new_data_dir}/{filename}')