Merge pull request #1 from ConvLab/dsml_scgpt

add system sc-gpt

Merge pull request #1 from ConvLab/dsml_scgpt
c6170181 · zhuqi · GitHub · b8aecfe6 · aac882a3 · c6170181
Unverified Commit c6170181 authored 3 years ago by zhuqi Committed by GitHub 3 years ago
--- a/convlab2/nlg/evaluate.py
+++ b/convlab2/nlg/evaluate.py
@@ -5,16 +5,173 @@ Usage: python evaluate.py [MultiWOZ] [SCLSTM|TemplateNLG] [usr|sys]
 """

 import json
+import os
 import random
 import sys
+import itertools
 import zipfile
 import numpy
+from numpy.lib.shape_base import _put_along_axis_dispatcher
+from numpy.lib.twodim_base import triu_indices_from
 import torch
 from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
 from pprint import pprint
 from tqdm import tqdm


+def slot_error(dialog_acts, utts):
+    halucination = []
+    halucinate = 0
+    missing = 0
+    total = 0
+
+    for acts,utt in zip(dialog_acts, utts):
+        for act in acts:
+            tmp_act = [x.lower() for x in act]
+            tmp_utt = utt.lower()
+            i, d, s, v = tmp_act
+            if i == 'inform':
+                total = total + 1
+                if not (v in tmp_utt):
+                    missing = missing + 1
+    return missing, total
+
+def fine_SER(dialog_acts, utts):
+    path = os.path.dirname(os.path.abspath(__file__))
+    path = os.path.join(path, 'template', 'multiwoz', 'label_maps.json')
+    with open(path, 'r') as mapping_file:
+        mappings = json.load(mapping_file)
+        mapping_file.close()
+
+    path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    path = os.path.join(path, 'data', 'multiwoz', 'ontology_nlg_eval.json')
+    with open(path, 'r') as entity_file:
+        possible_entity = json.load(entity_file)
+        entity_file.close()
+
+    entity_list = []
+
+    for key in possible_entity.keys():
+        entity_list = entity_list + possible_entity[key]
+    
+    hallucinate = 0
+    missing = 0
+    total = 0
+
+    unk_token_count = 0
+    missing_dialogs = []
+    hallucination_dialogs = []
+
+    slot_span = []
+    domain_span = []
+
+    for acts,utt in zip(dialog_acts, utts):
+        hallucination_flag = False        
+        tmp_utt = utt.lower()
+        origin_utt = utt.lower()
+        legal_act_flag = False
+
+        for act in acts:
+            missing_fact = None
+            missing_flag = False
+            tmp_act = [x.lower() for x in act]
+            i, d, s, v = tmp_act
+
+            if not(d in domain_span):
+                domain_span.append(d)
+            if not(s in slot_span):
+                slot_span.append(s)
+            #intializing all possible span keyword
+
+            if i in ['inform', 'recommend', 'offerbook', 'offerbooked','book','select']:
+                legal_act_flag = True
+                total = total + 1
+                if not (v in origin_utt) and v!='none':
+                    exist_flag = False
+                    try:
+                        synoyms = mappings[v]
+                        for item in synoyms:
+                            if item in origin_utt:
+                                exist_flag = True
+                                tmp_utt = tmp_utt.replace(item,'')
+                                tmp_utt = tmp_utt.replace(s,'')
+                                #remove span for hallucination detection
+                    except:
+                        pass
+                    if i in ['offerbook', 'offerbooked'] and v =='none':
+                        if 'book' in origin_utt:
+                            exist_flag = True
+                            tmp_utt = tmp_utt.replace('book','')
+                    if i in ['inform','recommend'] and v=='none':
+                        if d in origin_utt:
+                            exist_flag = True
+                            tmp_utt = tmp_utt.replace(d, '')
+                    if exist_flag == False:
+                        missing_flag = True
+                        missing_fact = v
+                else:
+                    tmp_utt = tmp_utt.replace(v,'')
+                    tmp_utt = tmp_utt.replace(s,'')
+
+                if s in origin_utt:
+                    missing_flag = False
+                if s =='booking' and ('book' in origin_utt or 'reserv' in origin_utt):
+                    missing_flag = False
+
+            elif i == 'request':
+                legal_act_flag = True
+                total = total + 1
+                if s=='depart' or s=='dest' or s=='area':
+                    if not ('where' in origin_utt):
+                        if s in origin_utt:
+                            tmp_utt = tmp_utt.replace(s,'')
+                        else:
+                            missing_flag = True
+                            missing_fact = s
+                elif s=='leave' or s=='arrive':
+                    if (not 'when' in origin_utt):
+                        if not ('what' in origin_utt and 'time' in origin_utt):
+                            missing_flag = True
+                            missing_fact = s
+                    else:
+                        tmp_utt.replace('time', '')
+                else:
+                    tmp_utt = tmp_utt.replace(s,'')
+                    tmp_utt = tmp_utt.replace(d,'')
+
+                if s in origin_utt:
+                        missing_flag = False
+                if s =='booking' and ('book' in origin_utt or 'reserv' in origin_utt):
+                    missing_flag = False    
+
+            try:
+                tmp_utt = tmp_utt.replace(d,'')
+                tmp_utt = tmp_utt.replace(s,'')
+                if 'arrive' in s or 'leave' in s:
+                    tmp_utt = tmp_utt.replace('time', '')
+            except:
+                pass
+
+            if missing_flag == True:
+                missing = missing + 1
+                missing_dialogs.append(missing_fact)
+                missing_dialogs.append(acts)
+                missing_dialogs.append(utt)
+
+        for keyword in slot_span + entity_list:
+            if keyword in tmp_utt and len(keyword) >= 4 and legal_act_flag == True:
+                hallucination_flag = True
+                hallucinate = hallucinate + 1
+                hallucination_dialogs.append(keyword)
+                hallucination_dialogs.append(acts)
+                hallucination_dialogs.append(tmp_utt)
+                hallucination_dialogs.append(utt)
+                break
+
+
+    return missing, hallucinate, total, hallucination_dialogs, missing_dialogs
+
+
 def get_bleu4(dialog_acts, golden_utts, gen_utts):
    das2utts = {}
    for das, utt, gen in zip(dialog_acts, golden_utts, gen_utts):
@@ -55,36 +212,52 @@ if __name__ == '__main__':
    numpy.random.seed(seed)
    torch.manual_seed(seed)

-    if len(sys.argv) != 4:
+    if len(sys.argv) < 4:
        print("usage:")
        print("\t python evaluate.py dataset model role")
        print("\t dataset=MultiWOZ, CrossWOZ, or Camrest")
-        print("\t model=SCLSTM, or TemplateNLG")
+        print("\t model=SCLSTM, SCLSTM_NoUNK, SCGPT or TemplateNLG")
        print("\t role=usr/sys")
+        print("\t [Optional] model_file")
        sys.exit()
    dataset_name = sys.argv[1]
    model_name = sys.argv[2]
    role = sys.argv[3]
+    model_file = sys.argv[4] if len(sys.argv) >= 5 else None
    if dataset_name == 'MultiWOZ':
        if model_name == 'SCLSTM':
            from convlab2.nlg.sclstm.multiwoz import SCLSTM
            if role == 'usr':
-                model = SCLSTM(is_user=True, use_cuda=True)
+                model = SCLSTM(is_user=True, use_cuda=True, unk_suppress=False)
+            elif role == 'sys':
+                model = SCLSTM(is_user=False, use_cuda=True, unk_suppress=False)
+        elif model_name == 'SCLSTM_NoUNK':
+            from convlab2.nlg.sclstm.multiwoz import SCLSTM
+            if role == 'usr':
+                model = SCLSTM(is_user=True, use_cuda=True, unk_suppress=True)
            elif role == 'sys':
-                model = SCLSTM(is_user=False, use_cuda=True)
+                model = SCLSTM(is_user=False, use_cuda=True, unk_suppress=True)
        elif model_name == 'TemplateNLG':
            from convlab2.nlg.template.multiwoz import TemplateNLG
            if role == 'usr':
                model = TemplateNLG(is_user=True)
            elif role == 'sys':
                model = TemplateNLG(is_user=False)
+        elif model_name == 'SCGPT':
+            from convlab2.nlg.scgpt.multiwoz import SCGPT
+            if model_file is not None:
+                print(f"load model at {model_file}")
+            if role == 'usr':
+                model = SCGPT(model_file, is_user=True)
+            elif role == 'sys':
+                model  = SCGPT(model_file, is_user=False)
        else:
-            raise Exception("Available models: SCLSTM, TEMPLATE")
+            raise Exception("Available models: SCLSTM, SCGPT, TEMPLATE")

        from convlab2.util.dataloader.module_dataloader import SingleTurnNLGDataloader
        from convlab2.util.dataloader.dataset_dataloader import MultiWOZDataloader
        dataloader = SingleTurnNLGDataloader(dataset_dataloader=MultiWOZDataloader())
-        data = dataloader.load_data(data_key='test', role=role)['test']
+        data = dataloader.load_data(data_key='all', role=role, session_id=True)['test']

        dialog_acts = []
        golden_utts = []
@@ -93,17 +266,51 @@ if __name__ == '__main__':

        sen_num = 0

+        # sys.stdout = open(sys.argv[2] + '-' + sys.argv[3] + '-' + 'evaluate_logs_neo.txt','w')
+        assert 'utterance' in data and 'dialog_act' in data and 'session_id' in data
+        assert len(data['utterance']) == len(data['dialog_act']) == len(data['session_id'])
+
+        # Turns during the same session should be contiguous, so we can call init_session at the first turn of a new session.
+        # This is necessary for SCGPT, but unnecessary for SCLSTM and TemplateNLG.
+        is_first_turn = []
+        for _, iterator in itertools.groupby(data['session_id']):
+            is_first_turn.append(True)
+            next(iterator)
+            is_first_turn.extend(False for _ in iterator)
        for i in tqdm(range(len(data['utterance']))):
+            if is_first_turn[i]:
+                model.init_session()
            dialog_acts.append(data['dialog_act'][i])
            golden_utts.append(data['utterance'][i])
            gen_utts.append(model.generate(data['dialog_act'][i]))
+        #     print(dialog_acts[-1])
+        #     print(golden_utts[-1])
+        #     print(gen_utts[-1])

-        bleu4 = get_bleu4(dialog_acts, golden_utts, gen_utts)
+        print("Calculate SER for golden responses")
+        missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER(dialog_acts, golden_utts)
+        print("Golden response Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(missing, total, hallucinate, missing/total))
+        
+        print("Calculate SER")
+        missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER(dialog_acts, gen_utts)
+        # with open('{}-{}-genutt_neo.txt'.format(sys.argv[2], sys.argv[3]), mode='wt', encoding='utf-8') as gen_diag:
+        #     for x in gen_utts:
+        #         gen_diag.writelines(str(x)+'\n')

+
+        # with open('{}-{}-hallucinate_neo.txt'.format(sys.argv[2], sys.argv[3]), mode='wt', encoding='utf-8') as hal_diag:
+        #     for x in hallucination_dialogs:
+        #         hal_diag.writelines(str(x)+'\n')
+        
+        # with open('{}-{}-missing_neo.txt'.format(sys.argv[2], sys.argv[3]), mode='wt', encoding='utf-8') as miss_diag:
+        #     for x in missing_dialogs:
+        #         miss_diag.writelines(str(x)+'\n')
+        print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(sys.argv[2], missing, total, hallucinate, missing/total))
        print("Calculate bleu-4")
+        bleu4 = get_bleu4(dialog_acts, golden_utts, gen_utts)
        print("BLEU-4: %.4f" % bleu4)
-
        print('Model on {} sentences role={}'.format(len(data['utterance']), role))
+        # sys.stdout.close()

    else:
        raise Exception("currently supported dataset: MultiWOZ")
--- a/convlab2/nlg/scgpt/README.md
+++ b/convlab2/nlg/scgpt/README.md
@@ -21,9 +21,22 @@ tar -xvf scgpt.tar.gz
 Then

 ``` python
-python train.py --output_dir=$output_dir$ --model_type=scgpt --model_name_or_path=gpt2 --do_train --do_eval --eval_data_file=$test_file$ --overwrite_cache --use_tokenize --train_data_file=$train_file$ --overwrite_output_dir
+python train.py --output_dir=trained_output --model_type=gpt2 --model_name_or_path=scgpt --do_train --do_eval --eval_data_file=multiwoz/data/test_sys.txt --use_tokenize --train_data_file=multiwoz/data/train_sys.txt --overwrite_output_dir
 ```

+some tricks (optional training argument):
+* `--gradient_accumulation_steps xxx` 
+* `--fp16`, if it's set, you'd better set `--per_gpu_train_batch_size` to be multiple of 8
+* `--max_seq xxx`, it should be larger than the length of the longest sequence. You can set `--max_seq 1024`. The script uses a dynamic sequence length at each training step.
+* `--gradient_checkpointing`, it allows larger `per_gpu_train_batch_size`
+* `--use_multi_tensor_adamw`, someone says it's a faster optimizer
+
+distributed data parallel:
+
+  If multiple GPUs are available, you can run `python -m torch.distributed.launch --nproc_per_node CUDA_COUNT train.py ......` 
+
+  `CUDA_COUNT` is the number of GPUs. `.....` are arguments of `train.py`.
+
 ## Use

 ```python

--- a/convlab2/nlg/scgpt/modeling_utils.py
+++ b/convlab2/nlg/scgpt/modeling_utils.py
+import warnings
+from contextlib import nullcontext
+from typing import TYPE_CHECKING
+import torch.cuda.amp as amp
+import transformers
+from transformers import GPT2LMHeadModel
+
+
+# reference: https://pytorch.org/docs/master/notes/amp_examples.html
+class AmpGPT2LMHeadModel(GPT2LMHeadModel):
+    if TYPE_CHECKING:
+        # For IDE's code hinting
+        forward = GPT2LMHeadModel.forward
+    else:
+        def forward(self, *args, **kwargs):
+            with amp.autocast():
+                return super().forward(*args, **kwargs)
+
+
+def try_enable_gradient_checkpointing(model: "transformers.modeling_utils.PreTrainedModel"):
+    if model.supports_gradient_checkpointing:
+        model.gradient_checkpointing_enable()
+    else:
+        warnings.warn(f"{type(model)} doesn't support gradient_checkpointing")
+
+
+class AmpHelper:
+    """
+    References:
+        https://pytorch.org/docs/master/notes/amp_examples.html
+    """
+    def __init__(self, use_amp=True):
+        self.use_amp = use_amp
+        self.might_enable_autocast = amp.autocast() if use_amp else nullcontext()
+        self.scaler = amp.GradScaler()
+
+    def backward(self, loss):
+        if self.use_amp:
+            return self.scaler.scale(loss).backward()
+        else:
+            return loss.backward()
+
+    def step(self, optimizer):
+        if self.use_amp:
+            self.scaler.step(optimizer)
+            self.scaler.update()
+        else:
+            optimizer.step()
+
+    def might_unscale_(self, optimizer):
+        if self.use_amp:
+            # Unscales the gradients of optimizer's assigned params in-place
+            self.scaler.unscale_(optimizer)
\ No newline at end of file
--- a/convlab2/nlg/scgpt/multiwoz/preprocess.py
+++ b/convlab2/nlg/scgpt/multiwoz/preprocess.py
 # -*- coding: utf-8 -*-
 """
 Created on Mon Sep 14 11:38:53 2020
-
 @author: truthless
 """

 import os
 import json
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
 from convlab2.nlg.scgpt.utils import dict2dict, dict2seq
 import zipfile

@@ -15,6 +15,51 @@ def read_zipped_json(filepath, filename):
    archive = zipfile.ZipFile(filepath, 'r')
    return json.load(archive.open(filename))

+def init_domain():
+    return {'Attraction':False,
+            'Hospital':False,
+            'Hotel':False,
+            'Police':False,
+            'Restaurant':False,
+            'Taxi':False,
+            'Train':False}
+
+def write_file(name, data, role='usr'):
+    with open(f'{name}.txt', 'w', encoding='utf-8') as f:
+        for ID in data:
+            sess = data[ID]
+            sess_domains = init_domain()
+            for turn in sess:
+                if role == 'usr':
+                    if not turn['usr_da']:
+                        continue
+                    turn['usr_da'] = eval(str(turn['usr_da']).replace('Bus','Train'))
+                    da_seq = dict2seq(dict2dict(turn['usr_da'])).replace('&', 'and')
+                    domains = set([key.split('-')[0] for key in turn['usr_da'].keys()])
+                elif role == 'sys':
+                    if not turn['sys_da']:
+                        continue
+                    turn['sys_da'] = eval(str(turn['sys_da']).replace('Bus','Train'))
+                    da_seq = dict2seq(dict2dict(turn['sys_da'])).replace('&', 'and')
+                    domains = set([key.split('-')[0] for key in turn['sys_da'].keys()])
+                else:
+                    raise NameError('Invalid Role: Select usr/sys.')
+                for domain in domains:
+                    if domain not in ['general', 'Booking'] and not sess_domains[domain]:
+                        da_seq = da_seq.replace(domain.lower(), domain.lower()+' *', 1)
+                        sess_domains[domain] = True
+                if role == 'usr':
+                    da_uttr = turn['usr'].replace(' bus ', ' train ').replace('&', 'and')
+                elif role == 'sys':
+                    da_uttr = turn['sys'].replace(' bus ', ' train ').replace('&', 'and')
+                f.write(f'{da_seq} & {da_uttr}\n')
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--role', type=str, default='usr')
+    args = parser.parse_args()
+
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(
            cur_dir)))), 'data/multiwoz/')
@@ -38,22 +83,22 @@ results_test = {}
    for title, sess in data.items():
        logs = sess['log']
        turns = []
-    turn = {'turn':0, 'sys':'', 'sys_da':''}
+        turn = {'turn': 0, 'sys': '', 'sys_da': '', 'usr': '', 'usr_da': ''}
        current_domain = None
        for i, diag in enumerate(logs):
            text = diag['text']
            da = diag['dialog_act']
            span = diag['span_info']
-        if i % 2 == 0:
-            turn['usr'] = text
            if current_domain:
                da = eval(str(da).replace('Booking', current_domain))
                span = eval(str(span).replace('Booking', current_domain))
+            if i % 2 == 0:
+                turn['usr'] = text
                turn['usr_da'] = da
                turn['usr_span'] = span
                turns.append(turn)
            else:
-            turn = {'turn': i//2 +1}
+                turn = {'turn': i//2 + 1, 'sys': '', 'sys_da': '', 'usr': '', 'usr_da': ''}
                turn['sys'] = text
                turn['sys_da'] = da
                turn['sys_span'] = span
@@ -61,6 +106,9 @@ for title, sess in data.items():
                domain = key.split('-')[0]
                if domain not in ['general', 'Booking']:
                    current_domain = domain
+        else:
+            if args.role == 'sys':
+                turns.append(turn)
        title = title
        if title in val_list:
            current = results_val
@@ -74,34 +122,7 @@ results = eval(str(results).replace(" n't", " not"))
    results_val = eval(str(results_val).replace(" n't", " not"))
    results_test = eval(str(results_test).replace(" n't", " not"))

-def init_domain():
-    return {'Attraction':False,
-            'Hospital':False,
-            'Hotel':False,
-            'Police':False,
-            'Restaurant':False,
-            'Taxi':False,
-            'Train':False}
-
-def write_file(name, data):
-    with open(f'{name}.txt', 'w', encoding='utf-8') as f:
-        for ID in data:
-            sess = data[ID]
-            sess_domains = init_domain()
-            for turn in sess:
-                if not turn['usr_da']:
-                    continue
-                turn['usr_da'] = eval(str(turn['usr_da']).replace('Bus','Train'))
-                da_seq = dict2seq(dict2dict(turn['usr_da'])).replace('&', 'and')
-                domains = set([key.split('-')[0] for key in turn['usr_da'].keys()])
-                for domain in domains:
-                    if domain not in ['general', 'Booking'] and not sess_domains[domain]:
-                        da_seq = da_seq.replace(domain.lower(), domain.lower()+' *', 1)
-                        sess_domains[domain] = True
-                da_uttr = turn['usr'].replace(' bus ', ' train ').replace('&', 'and')
-                f.write(f'{da_seq} & {da_uttr}\n')
-
    if not os.path.exists(os.path.join(cur_dir,'data')):
        os.makedirs(os.path.join(cur_dir, 'data'))
-write_file(os.path.join(cur_dir, 'data/train'), dict(results, **results_val))
-write_file(os.path.join(cur_dir, 'data/test'), results_test)
+    write_file(os.path.join(cur_dir, f'data/train_{args.role}'), dict(results, **results_val), role=args.role)
+    write_file(os.path.join(cur_dir, f'data/test_{args.role}'), results_test, role=args.role)
--- a/convlab2/nlg/scgpt/multiwoz/scgpt.py
+++ b/convlab2/nlg/scgpt/multiwoz/scgpt.py
@@ -2,6 +2,7 @@ import torch
 import numpy as np
 import os
 import zipfile
+from copy import deepcopy

 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 from convlab2.nlg.scgpt.utils import tuple2seq
@@ -10,23 +11,31 @@ from convlab2.nlg.nlg import NLG
 from convlab2.util.file_util import cached_path

 MAX_LENGTH = int(10000)  # Hardcoded max length to avoid infinite loop
-DEFAULT_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
-DEFAULT_ARCHIVE_FILE = os.path.join(DEFAULT_DIRECTORY, "nlg-gpt-multiwoz.zip")

 class SCGPT(NLG):
    
-    def __init__(self,
-                 archive_file=DEFAULT_ARCHIVE_FILE,
-                 use_cuda=True,
-                 is_user=False,
-                 model_file='https://convlab.blob.core.windows.net/convlab-2/nlg-gpt-multiwoz.zip'):
+    def __init__(self, model_file=None,
+                 use_cuda=True, is_user=False):
+        # If no filename is mentioned then set to default
+        if not model_file:
+            if is_user:
+                model_file = 'https://convlab.blob.core.windows.net/convlab-2/nlg-gpt-multiwoz.zip'
+            else:
+                model_file = 'https://zenodo.org/record/5767426/files/neo_scgpt_system.zip'
+
+        # Load from file/url
        model_dir = os.path.dirname(os.path.abspath(__file__))
-        if not os.path.isfile(archive_file):
-            archive_file = cached_path(model_file)
-            archive = zipfile.ZipFile(archive_file, 'r')
+        if not os.path.isfile(model_file):
+            model_file = cached_path(model_file)
+        if not os.path.isdir(model_file):
+            archive = zipfile.ZipFile(model_file, 'r')
            archive.extractall(model_dir)
+            # Get model directory
+            model_file = archive.filelist[0].filename.replace('/', '')
+            self.model_name_or_path = os.path.join(model_dir, model_file)
+        else:
+            self.model_name_or_path = model_file
            
-        self.model_name_or_path = os.path.join(model_dir, 'multiwoz')
        self.length = 50
        self.num_samples = 5
        self.temperature = 1.0
@@ -34,6 +43,7 @@ class SCGPT(NLG):
        self.top_k = 50
        self.top_p = 0.9
        self.seed = 42
+        self.is_user = is_user
        self.stop_token = '<|endoftext|>'
    
        self.device = torch.device("cuda" if torch.cuda.is_available() and use_cuda else "cpu")
@@ -52,6 +62,8 @@ class SCGPT(NLG):
        elif self.length < 0:
            self.length = self.MAX_LENGTH  # avoid infinite loop
        
+        self.init_session()
+    
    def init_session(self):
        self.sess_domains = {'Attraction':False,
            'Hospital':False,
@@ -59,14 +71,34 @@ class SCGPT(NLG):
            'Police':False,
            'Restaurant':False,
            'Taxi':False,
-            'Train':False}
+            'Train':False,}
+        self.cur_domain = None
+        # if not self.is_user:
+        #     self.sess_domains['Booking'] = False
                
    def generate(self, meta):

+        #some actions in testing data is none
+        if not meta:
+            return 'No user action'
+
+        meta = deepcopy(meta)
+        for list_ in meta:
+            domain = list_[1]
+            if domain not in ('general', 'Booking'):
+                self.cur_domain = domain
+        for i, list_ in enumerate(meta):
+            list_ = list(list_)
+            if list_[1] == 'Booking':
+                if self.cur_domain is not None:
+                    list_[1] = self.cur_domain
+                    meta[i] = list_
+                else:
+                    print('`cur_domain` is None, but there is `Booking` in dialog action.')
        raw_text = tuple2seq(meta)
        domains = set([item[1] for item in meta])
        for domain in domains:
-            if domain != 'general' and not self.sess_domains[domain]:
+            if domain not in ('general', 'Booking') and not self.sess_domains[domain]:
                raw_text = raw_text.replace(domain.lower(), domain.lower()+ ' *', 1)
                self.sess_domains[domain] = True
        context_tokens = self.tokenizer.encode(raw_text, add_special_tokens=False)

--- a/convlab2/nlg/scgpt/train.py
+++ b/convlab2/nlg/scgpt/train.py
--- a/convlab2/nlg/template/multiwoz/label_maps.json
+++ b/convlab2/nlg/template/multiwoz/label_maps.json
+{
+  "guesthouse": [
+    "guest house",
+    "guest houses"
+  ],
+  "hotel": [
+    "hotels"
+  ],
+  "centre": [
+    "center",
+    "downtown"
+  ],
+  "north": [
+    "northern",
+    "northside",
+    "northend"
+  ],
+  "east": [
+    "eastern",
+    "eastside",
+    "eastend"
+  ],
+  "west": [
+    "western",
+    "westside",
+    "westend"
+  ],
+  "south": [
+    "southern",
+    "southside",
+    "southend"
+  ],
+  "cheap": [
+    "inexpensive",
+    "lower price",
+    "lower range",
+    "cheaply",
+    "cheaper",
+    "cheapest",
+    "very affordable"
+  ],
+  "moderate": [
+    "moderately",
+    "reasonable",
+    "reasonably",
+    "affordable",
+    "mid range",
+    "mid-range",
+    "priced moderately",
+    "decently priced",
+    "mid price",
+    "mid-price",
+    "mid priced",
+    "mid-priced",
+    "middle price",
+    "medium price",
+    "medium priced",
+    "not too expensive",
+    "not too cheap"
+  ],
+  "expensive": [
+    "high end",
+    "high-end",
+    "high class",
+    "high-class",
+    "high scale",
+    "high-scale",
+    "high price",
+    "high priced",
+    "higher price",
+    "fancy",
+    "upscale",
+    "nice",
+    "expensively",
+    "luxury"
+  ],
+  "0": [
+    "zero"
+  ],
+  "1": [
+    "one",
+    "just me",
+    "for me",
+    "myself",
+    "alone",
+    "me"
+  ],
+  "2": [
+    "two"
+  ],
+  "3": [
+    "three"
+  ],
+  "4": [
+    "four"
+  ],
+  "5": [
+    "five"
+  ],
+  "6": [
+    "six"
+  ],
+  "7": [
+    "seven"
+  ],
+  "8": [
+    "eight"
+  ],
+  "9": [
+    "nine"
+  ],
+  "10": [
+    "ten"
+  ],
+  "11": [
+    "eleven"
+  ],
+  "12": [
+    "twelve"
+  ],
+  "architecture": [
+    "architectural",
+    "architecturally",
+    "architect"
+  ],
+  "boat": [
+    "boating",
+    "boats",
+    "camboats"
+  ],
+  "camboats": [
+    "boating",
+    "boat",
+    "boats"
+  ],
+  "cinema": [
+    "cinemas",
+    "movie",
+    "films",
+    "film"
+  ],
+  "college": [
+    "colleges"
+  ],
+  "concerthall": [
+    "concert hall",
+    "concert halls",
+    "concerthalls",
+    "concerts",
+    "concert"
+  ],
+  "entertainment": [
+    "entertaining"
+  ],
+  "gastropub": [
+    "gastropubs"
+  ],
+  "mutliple sports": [
+    "multiple sport",
+    "multiple sports",
+    "multi sport",
+    "multi sports",
+    "sports",
+    "sporting"
+  ],
+  "museum": [
+    "museums",
+    "gallery",
+    "galleries"
+  ],
+  "nightclub": [
+    "night clubs",
+    "night club",
+    "nightclubs",
+    "club",
+    "clubs"
+  ],
+  "park": [
+    "parks"
+  ],
+  "swimmingpool": [
+    "swimming pool",
+    "swimming",
+    "pool",
+    "pools",
+    "swimmingpool",
+    "water",
+    "swim"
+  ],
+  "theatre": [
+    "theater",
+    "theatres",
+    "theaters"
+  ],
+  "abbey pool and astroturf pitch": [
+    "abbey pool and astroturf",
+    "abbey pool"
+  ],
+  "adc theatre": [
+    "adc theater",
+    "adc"
+  ],
+  "addenbrookes hospital": [
+    "addenbrooke's hospital"
+  ],
+  "cafe jello gallery": [
+    "cafe jello"
+  ],
+  "cambridge and county folk museum": [
+    "cambridge and country folk museum",
+    "county folk museum"
+  ],
+  "cambridge arts theatre": [
+    "cambridge arts theater"
+  ],
+  "cambridge book and print gallery": [
+    "book and print gallery"
+  ],
+  "cambridge contemporary art": [
+    "cambridge contemporary art museum",
+    "contemporary art museum"
+  ],
+  "the cambridge corn exchange": [
+    "cambridge corn exchange"
+  ],
+  "cambridge museum of technology": [
+    "museum of technology"
+  ],
+  "the cambridge punter": [
+    "cambridge punter",
+    "cambridge punters"
+  ],
+  "cambridge university botanic gardens": [
+    "cambridge university botanical gardens",
+    "cambridge university botanical garden",
+    "cambridge university botanic garden",
+    "cambridge botanic gardens",
+    "cambridge botanical gardens",
+    "cambridge botanic garden",
+    "cambridge botanical garden",
+    "botanic gardens",
+    "botanical gardens",
+    "botanic garden",
+    "botanical garden"
+  ],
+  "cherry hinton hall and grounds": [
+    "cherry hinton hall"
+  ],
+  "cherry hinton water play": [
+    "cherry hinton water play park"
+  ],
+  "cineworld cinema": [
+    "cineworld"
+  ],
+  "clare hall": [
+    "clair hall"
+  ],
+  "the fez club": [
+    "fez club"
+  ],
+  "jesus green outdoor pool": [
+    "jesus green"
+  ],
+  "kings hedges learner pool": [
+    "king's hedges learner pool",
+    "king hedges learner pool"
+  ],
+  "mumford theatre": [
+    "mumford theater"
+  ],
+  "museum of archaelogy and anthropology": [
+    "museum of archaeology and anthropology",
+    "museum of archaelogy",
+    "museum of archaeology"
+  ],
+  "riverboat georgina": [
+    "riverboat"
+  ],
+  "saint barnabas press gallery": [
+    "saint barbabas"
+  ],
+  "scott polar museum": [
+    "scott polar"
+  ],
+  "scudamores punting co": [
+    "scudamore's punting co",
+    "scudamores punting",
+    "scudamore's punting",
+    "scudamores",
+    "scudamore's",
+    "scudamore"
+  ],
+  "soul tree nightclub": [
+    "soul tree night club",
+    "soul tree",
+    "soultree"
+  ],
+  "the man on the moon": [
+    "man on the moon"
+  ],
+  "the junction": [
+    "junction theatre",
+    "junction theater"
+  ],
+  "old schools": [
+    "old school"
+  ],
+  "vue cinema": [
+    "vue"
+  ],
+  "wandlebury country park": [
+    "the wandlebury"
+  ],
+  "whipple museum of the history of science": [
+    "whipple museum",
+    "history of science museum"
+  ],
+  "restaurant alimentum": [
+    "alimentum"
+  ],
+  "bedouin": [
+    "the bedouin"
+  ],
+  "bloomsbury restaurant": [
+    "bloomsbury"
+  ],
+  "caffe uno": [
+    "cafe uno",
+    "caffee uno"
+  ],
+  "cambridge lodge restaurant": [
+    "cambridge lodge"
+  ],
+  "chiquito restaurant bar": [
+    "chiquito restaurant",
+    "chiquito"
+  ],
+  "city stop restaurant": [
+    "city stop"
+  ],
+  "clowns cafe": [
+    "clown's cafe"
+  ],
+  "the cow pizza kitchen and bar": [
+    "cow pizza kitchen and bar",
+    "cow pizza"
+  ],
+  "darrys cookhouse and wine shop": [
+    "darry's cookhouse and wine shop",
+    "darry's cookhouse",
+    "darrys cookhouse"
+  ],
+  "de luca cucina and bar": [
+    "de luca cucina and bar riverside brasserie",
+    "luca cucina and bar",
+    "de luca cucina",
+    "luca cucina"
+  ],
+  "da vinci pizzeria": [
+    "da vinci pizza",
+    "da vinci"
+  ],
+  "don pasquale pizzeria": [
+    "don pasquale pizza",
+    "don pasquale",
+    "pasquale pizzeria",
+    "pasquale pizza"
+  ],
+  "efes restaurant": [
+    "efes"
+  ],
+  "fitzbillies restaurant": [
+    "fitzbillies"
+  ],
+  "frankie and bennys": [
+    "frankie and benny's"
+  ],
+  "funky fun house": [
+    "funky"
+  ],
+  "the gardenia": [
+    "gardenia"
+  ],
+  "grafton hotel restaurant": [
+    "the grafton hotel",
+    "grafton hotel"
+  ],
+  "hotel du vin and bistro": [
+    "hotel du vin",
+    "du vin"
+  ],
+  "kohinoor": [
+    "the kohinoor"
+  ],
+  "lan hong house": [
+    "lan hong",
+    "ian hong house",
+    "ian hong"
+  ],
+  "lovell lodge": [
+    "lovell",
+    "the lovell lodge"
+  ],
+  "mahal of cambridge": [
+    "mahal"
+  ],
+  "maharajah tandoori restaurant": [
+    "maharajah tandoori",
+    "the maharajah tandoor"
+  ],
+  "meze bar restaurant": [
+    "the meze bar",
+    "meze bar"
+  ],
+  "michaelhouse cafe": [
+    "michael house cafe"
+  ],
+  "midsummer house restaurant": [
+    "midsummer house"
+  ],
+  "the missing sock": [
+    "missing sock"
+  ],
+  "nandos": [
+    "nando's city centre",
+    "nando's city center",
+    "nandos city centre",
+    "nandos city center",
+    "nando's"
+  ],
+  "nandos city centre": [
+    "nando's city centre",
+    "nando's city center",
+    "nandos city center",
+    "nando's",
+    "nandos"
+  ],
+  "the oak bistro": [
+    "oak bistro"
+  ],
+  "restaurant one seven": [
+    "one seven"
+  ],
+  "the river bar steakhouse and grill": [
+    "river bar steakhouse and grill",
+    "the river bar steakhouse",
+    "river bar steakhouse"
+  ],
+  "pipasha restaurant": [
+    "pipasha"
+  ],
+  "pizza hut city centre": [
+    "pizza hut city center"
+  ],
+  "pizza hut fen ditton": [
+    "pizza hut fenditton",
+    "pizza express fen ditton"
+  ],
+  "restaurant two two": [
+    "two two",
+    "restaurant 22"
+  ],
+  "saffron brasserie": [
+    "saffron"
+  ],
+  "saint johns chop house": [
+    "saint john's chop house",
+    "st john's chop house",
+    "st johns chop house"
+  ],
+  "sesame restaurant and bar": [
+    "sesame restaurant",
+    "sesame"
+  ],
+  "shanghai family restaurant": [
+    "shanghai"
+  ],
+  "sitar tandoori": [
+    "sitar"
+  ],
+  "the slug and lettuce": [
+    "slug and lettuce"
+  ],
+  "saint johns chop house": [
+    "st johns chop house",
+    "st john's chop house",
+    "saint johns chop house"
+  ],
+  "stazione restaurant and coffee bar": [
+    "stazione restaurant",
+    "stazione"
+  ],
+  "thanh binh": [
+    "thanh",
+    "binh"
+  ],
+  "the hotpot": [
+    "the hotspot",
+    "hotpot",
+    "hotspot"
+  ],
+  "the lucky star": [
+    "lucky star"
+  ],
+  "peking restaurant": [
+    "the peking restaurant"
+  ],
+  "the varsity restaurant": [
+    "varsity restaurant",
+    "the varsity",
+    "varsity"
+  ],
+  "zizzi cambridge": [
+    "zizzi"
+  ],
+  "asian oriental": [
+    "asian",
+    "oriental"
+  ],
+  "australian": [
+    "australasian"
+  ],
+  "barbeque": [
+    "barbecue",
+    "bbq"
+  ],
+  "corsica": [
+    "corsican"
+  ],
+  "indian": [
+    "tandoori"
+  ],
+  "italian": [
+    "pizza",
+    "pizzeria"
+  ],
+  "japanese": [
+    "sushi"
+  ],
+  "latin american": [
+    "latin-american",
+    "latin"
+  ],
+  "malaysian": [
+    "malay"
+  ],
+  "middle eastern": [
+    "middle-eastern"
+  ],
+  "modern american": [
+    "american modern",
+    "american"
+  ],
+  "modern european": [
+    "european modern",
+    "european"
+  ],
+  "north american": [
+    "north-american",
+    "american"
+  ],
+  "portuguese": [
+    "portugese"
+  ],
+  "seafood": [
+    "sea food"
+  ],
+  "singaporean": [
+    "singapore"
+  ],
+  "steakhouse": [
+    "steak house",
+    "steak"
+  ],
+  "the americas": [
+    "american",
+    "americas"
+  ],
+  "a and b guest house": [
+    "a & b guest house",
+    "a and b",
+    "a & b"
+  ],
+  "acorn guest house": [
+    "the acorn guest house",
+    "acorn"
+  ],
+  "alexander bed and breakfast": [
+    "alexander"
+  ],
+  "allenbell": [
+    "the allenbell"
+  ],
+  "alpha-milton guest house": [
+    "the alpha-milton",
+    "alpha-milton"
+  ],
+  "arbury lodge guesthouse": [
+    "arbury lodge guest house",
+    "arbury lodge",
+    "arbury"
+  ],
+  "archway house": [
+    "archway"
+  ],
+  "ashley hotel": [
+    "the ashley hotel",
+    "ashley"
+  ],
+  "aylesbray lodge guest house": [
+    "aylesbray lodge",
+    "aylesbray",
+    "alesbray lodge guest house",
+    "alyesbray lodge hotel"
+  ],
+  "bridge guest house": [
+    "bridge house"
+  ],
+  "the cambridge belfry": [
+    "cambridge belfry",
+    "belfry hotel",
+    "belfry"
+  ],
+  "carolina bed and breakfast": [
+    "carolina"
+  ],
+  "city centre north b and b": [
+    "city centre north bed and breakfast",
+    "city centre north",
+    "north b and b"
+  ],
+  "el shaddai": [
+    "el shaddia guest house",
+    "el shaddai guest house",
+    "el shaddia"
+  ],
+  "express by holiday inn cambridge": [
+    "express by holiday inn",
+    "holiday inn cambridge",
+    "holiday inn"
+  ],
+  "finches bed and breakfast": [
+    "finches"
+  ],
+  "gonville hotel": [
+    "gonville"
+  ],
+  "hamilton lodge": [
+    "the hamilton lodge",
+    "hamilton"
+  ],
+  "hobsons house": [
+    "hobson's house",
+    "hobson's"
+  ],
+  "huntingdon marriott hotel": [
+    "huntington marriott hotel",
+    "huntington marriot hotel",
+    "huntingdon marriot hotel",
+    "huntington marriott",
+    "huntingdon marriott",
+    "huntington marriot",
+    "huntingdon marriot",
+    "huntington",
+    "huntingdon",
+    "marriott hotel",
+    "marriott"
+  ],
+  "kirkwood house": [
+    "kirkwood"
+  ],
+  "the lensfield hotel": [
+    "lensfield hotel",
+    "lensfield"
+  ],
+  "leverton house": [
+    "leverton"
+  ],
+  "rosa's bed and breakfast": [
+    "rosas bed and breakfast",
+    "rosa's",
+    "rosas"
+  ],
+  "university arms hotel": [
+    "university arms"
+  ],
+  "warkworth house": [
+    "warkworth hotel",
+    "warkworth"
+  ],
+  "worth house": [
+    "the worth house",
+    "warkworth house",
+    "warkworth"
+  ],
+  "birmingham new street": [
+    "birmingham new street train station"
+  ],
+  "birmingham new street train station": [
+    "birmingham new street"
+  ],
+  "bishops stortford": [
+    "bishops stortford train station"
+  ],
+  "bishops stortford train station": [
+    "bishops stortford"
+  ],
+  "broxbourne": [
+    "broxbourne train station"
+  ],
+  "broxbourne train station": [
+    "broxbourne"
+  ],
+  "cambridge": [
+    "cambridge train station"
+  ],
+  "cambridge train station": [
+    "cambridge"
+  ],
+  "ely": [
+    "ely train station"
+  ],
+  "ely train station": [
+    "ely"
+  ],
+  "kings lynn": [
+    "king's lynn",
+    "king's lynn train station",
+    "kings lynn train station"
+  ],
+  "kings lynn train station": [
+    "kings lynn",
+    "king's lynn",
+    "king's lynn train station"
+  ],
+  "leicester": [
+    "leicester train station"
+  ],
+  "leicester train station": [
+    "leicester"
+  ],
+  "london kings cross": [
+    "kings cross",
+    "king's cross",
+    "london king's cross",
+    "kings cross train station",
+    "king's cross train station",
+    "london king's cross train station",
+    "london kings cross train station"
+  ],
+  "london kings cross train station": [
+    "kings cross",
+    "king's cross",
+    "london king's cross",
+    "london kings cross",
+    "kings cross train station",
+    "king's cross train station",
+    "london king's cross train station"
+  ],
+  "london liverpool street": [
+    "london liverpool",
+    "liverpool street",
+    "london liverpool train station",
+    "liverpool street train station",
+    "london liverpool street train station"
+  ],
+  "london liverpool street train station": [
+    "london liverpool",
+    "liverpool street",
+    "london liverpool street",
+    "london liverpool train station",
+    "liverpool street train station"
+  ],
+  "norwich": [
+    "norwich train station"
+  ],
+  "norwich train station": [
+    "norwich"
+  ],
+  "peterborough": [
+    "peterborough train station"
+  ],
+  "peterborough train station": [
+    "peterborough"
+  ],
+  "stansted airport": [
+    "stansted airport train station"
+  ],
+  "stansted airport train station": [
+    "stansted airport"
+  ],
+  "stevenage": [
+    "stevenage train station"
+  ],
+  "stevenage train station": [
+    "stevenage"
+  ]
+}
\ No newline at end of file
--- a/data/multiwoz/ontology_nlg_eval.json
+++ b/data/multiwoz/ontology_nlg_eval.json