From 93bb6c2bd5342a20f21794e167f6a78bfb6c615a Mon Sep 17 00:00:00 2001 From: hualai-liujiexi <52770030+hualai-liujiexi@users.noreply.github.com> Date: Fri, 30 Jul 2021 11:27:19 +0800 Subject: [PATCH] Update preprocess.py (#210) --- convlab2/nlg/scgpt/multiwoz/preprocess.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/convlab2/nlg/scgpt/multiwoz/preprocess.py b/convlab2/nlg/scgpt/multiwoz/preprocess.py index 3dcda2eb..27c5e900 100644 --- a/convlab2/nlg/scgpt/multiwoz/preprocess.py +++ b/convlab2/nlg/scgpt/multiwoz/preprocess.py @@ -8,13 +8,23 @@ Created on Mon Sep 14 11:38:53 2020 import os import json from convlab2.nlg.scgpt.utils import dict2dict, dict2seq +import zipfile + +def read_zipped_json(filepath, filename): + print("zip file path = ", filepath) + archive = zipfile.ZipFile(filepath, 'r') + return json.load(archive.open(filename)) cur_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( cur_dir)))), 'data/multiwoz/') -with open(os.path.join(data_dir, '0807_final.json'),'r', encoding='utf8') as f: - data = json.load(f) +keys = ['train', 'val', 'test'] +data = {} +for key in keys: + data_key = read_zipped_json(os.path.join(data_dir, key + '.json.zip'), key + '.json') + print('load {}, size {}'.format(key, len(data_key))) + data = dict(data, **data_key) with open(os.path.join(data_dir, 'valListFile'), 'r') as f: val_list = f.read().splitlines() -- GitLab