From 42f8dec8f218f38736a29d3d9b0b2712b2afc406 Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Mon, 11 Apr 2022 17:42:43 +0800 Subject: [PATCH] fix data generation for dialogpt --- convlab2/base_models/gpt/create_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convlab2/base_models/gpt/create_data.py b/convlab2/base_models/gpt/create_data.py index 3186fd33..94e88f3e 100644 --- a/convlab2/base_models/gpt/create_data.py +++ b/convlab2/base_models/gpt/create_data.py @@ -14,7 +14,7 @@ def create_lm_data(dataset, data_dir, args): data = [] for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False): if args.model_type == 'dialogpt': - dialogue = ' <|endoftext|> '.join([turn['utterance'] for turn in sample['turns']]) + dialogue = ' <|endoftext|> '.join([turn['utterance'] for turn in sample['turns']]) + ' <|endoftext|>' else: dialogue = ' '.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['turns']]) data.append(json.dumps({'dialogue': dialogue}, ensure_ascii=False)+'\n') -- GitLab