Skip to content
Snippets Groups Projects
Unverified Commit 93bb6c2b authored by hualai-liujiexi's avatar hualai-liujiexi Committed by GitHub
Browse files

Update preprocess.py (#210)

parent aedd1065
No related branches found
No related tags found
No related merge requests found
...@@ -8,13 +8,23 @@ Created on Mon Sep 14 11:38:53 2020 ...@@ -8,13 +8,23 @@ Created on Mon Sep 14 11:38:53 2020
import os import os
import json import json
from convlab2.nlg.scgpt.utils import dict2dict, dict2seq from convlab2.nlg.scgpt.utils import dict2dict, dict2seq
import zipfile
def read_zipped_json(filepath, filename):
print("zip file path = ", filepath)
archive = zipfile.ZipFile(filepath, 'r')
return json.load(archive.open(filename))
cur_dir = os.path.dirname(os.path.abspath(__file__)) cur_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(
cur_dir)))), 'data/multiwoz/') cur_dir)))), 'data/multiwoz/')
with open(os.path.join(data_dir, '0807_final.json'),'r', encoding='utf8') as f: keys = ['train', 'val', 'test']
data = json.load(f) data = {}
for key in keys:
data_key = read_zipped_json(os.path.join(data_dir, key + '.json.zip'), key + '.json')
print('load {}, size {}'.format(key, len(data_key)))
data = dict(data, **data_key)
with open(os.path.join(data_dir, 'valListFile'), 'r') as f: with open(os.path.join(data_dir, 'valListFile'), 'r') as f:
val_list = f.read().splitlines() val_list = f.read().splitlines()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment