diff --git a/convlab2/base_models/gpt/keyword_extraction/lmloss2keywords.py b/convlab2/base_models/gpt/keyword_extraction/lmloss2keywords.py
index ab9126ba6b3b87cc8c79a952aa6d6becdd812255..307d57edf4d09c8a72968f35051d451afe21bc64 100644
--- a/convlab2/base_models/gpt/keyword_extraction/lmloss2keywords.py
+++ b/convlab2/base_models/gpt/keyword_extraction/lmloss2keywords.py
@@ -20,7 +20,7 @@ def merge_tokens(tokens, losses, loss_merge_func=np.mean):
                 tokens[i+1] = 'Ġ'+tokens[i+1]
             i += 1
             continue
-        if token in ['user', 'system'] and i < len(tokens)-1 and tokens[i+1] == ':':
+        if token in ['user', 'system', 'Ġuser', 'Ġsystem'] and i < len(tokens)-1 and tokens[i+1] == ':':
             if i > 0:
                 tokens[i+1] = '<|endoftext|>'
                 i += 1
@@ -109,7 +109,7 @@ def main(args):
 
 if __name__ == '__main__':
     from argparse import ArgumentParser
-    parser = ArgumentParser(description="calculate NLU metrics for unified datasets")
+    parser = ArgumentParser(description="extract keywords according to lm loss")
     parser.add_argument('--model_type', '-m', type=str, help='gpt or dialogpt')
     parser.add_argument('--token_loss_file', '-t', type=str, help='path to the token loss file that contains two columns: [tokens, losses]')
     parser.add_argument('--word_loss_file', '-w', type=str, help='path to the token loss file that contains two columns: [tokens, losses]')
diff --git a/convlab2/base_models/gpt/keyword_extraction/merge_keywords_res.py b/convlab2/base_models/gpt/keyword_extraction/merge_keywords_res.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8ebd5ba0b623cae37521bfe93fe046d8cd0c53e
--- /dev/null
+++ b/convlab2/base_models/gpt/keyword_extraction/merge_keywords_res.py
@@ -0,0 +1,29 @@
+import json
+
+def main(args):
+    filename2data = {f.split('/')[-1]: json.load(open(f)) for f in args.keywords_files}
+    first_filename = args.keywords_files[0].split('/')[-1]
+    dialogs = []
+    for i in range(len(filename2data[first_filename])):
+        turns = []
+        for j in range(len(filename2data[first_filename][i])):
+            utt = filename2data[first_filename][i][j]['utterance']
+            keywords = {filename.split('_')[2]+'_nonstopword'+filename.split('_')[-1]: ' | '.join([x[0] for x in filename2data[filename][i][j]['keywords']]) for filename in filename2data}
+            turns.append({
+                "utterance": utt,
+                **keywords
+            })
+        dialogs.append(turns)
+    json.dump(dialogs, open(args.output_file, "w", encoding='utf-8'), indent=2, ensure_ascii=False)
+
+
+    
+
+if __name__ == '__main__':
+    from argparse import ArgumentParser
+    parser = ArgumentParser(description="calculate NLU metrics for unified datasets")
+    parser.add_argument('--keywords_files', '-f', metavar='keywords_files', nargs='*', help='keywords files')
+    parser.add_argument('--output_file', '-o', type=str, help='path to the output file')
+    args = parser.parse_args()
+    print(args)
+    main(args)