From 42f8dec8f218f38736a29d3d9b0b2712b2afc406 Mon Sep 17 00:00:00 2001
From: zqwerty <zhuq96@hotmail.com>
Date: Mon, 11 Apr 2022 17:42:43 +0800
Subject: [PATCH] fix data generation for dialogpt

---
 convlab2/base_models/gpt/create_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convlab2/base_models/gpt/create_data.py b/convlab2/base_models/gpt/create_data.py
index 3186fd33..94e88f3e 100644
--- a/convlab2/base_models/gpt/create_data.py
+++ b/convlab2/base_models/gpt/create_data.py
@@ -14,7 +14,7 @@ def create_lm_data(dataset, data_dir, args):
         data = []
         for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
             if args.model_type == 'dialogpt':
-                dialogue = ' <|endoftext|> '.join([turn['utterance'] for turn in sample['turns']])
+                dialogue = ' <|endoftext|> '.join([turn['utterance'] for turn in sample['turns']]) + ' <|endoftext|>'
             else:
                 dialogue = ' '.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['turns']])
             data.append(json.dumps({'dialogue': dialogue}, ensure_ascii=False)+'\n')
-- 
GitLab