Skip to content
Snippets Groups Projects
Unverified Commit c6170181 authored by zhuqi's avatar zhuqi Committed by GitHub
Browse files

Merge pull request #1 from ConvLab/dsml_scgpt

add system sc-gpt
parents b8aecfe6 aac882a3
No related branches found
No related tags found
No related merge requests found
......@@ -5,16 +5,173 @@ Usage: python evaluate.py [MultiWOZ] [SCLSTM|TemplateNLG] [usr|sys]
"""
import json
import os
import random
import sys
import itertools
import zipfile
import numpy
from numpy.lib.shape_base import _put_along_axis_dispatcher
from numpy.lib.twodim_base import triu_indices_from
import torch
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from pprint import pprint
from tqdm import tqdm
def slot_error(dialog_acts, utts):
halucination = []
halucinate = 0
missing = 0
total = 0
for acts,utt in zip(dialog_acts, utts):
for act in acts:
tmp_act = [x.lower() for x in act]
tmp_utt = utt.lower()
i, d, s, v = tmp_act
if i == 'inform':
total = total + 1
if not (v in tmp_utt):
missing = missing + 1
return missing, total
def fine_SER(dialog_acts, utts):
path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(path, 'template', 'multiwoz', 'label_maps.json')
with open(path, 'r') as mapping_file:
mappings = json.load(mapping_file)
mapping_file.close()
path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
path = os.path.join(path, 'data', 'multiwoz', 'ontology_nlg_eval.json')
with open(path, 'r') as entity_file:
possible_entity = json.load(entity_file)
entity_file.close()
entity_list = []
for key in possible_entity.keys():
entity_list = entity_list + possible_entity[key]
hallucinate = 0
missing = 0
total = 0
unk_token_count = 0
missing_dialogs = []
hallucination_dialogs = []
slot_span = []
domain_span = []
for acts,utt in zip(dialog_acts, utts):
hallucination_flag = False
tmp_utt = utt.lower()
origin_utt = utt.lower()
legal_act_flag = False
for act in acts:
missing_fact = None
missing_flag = False
tmp_act = [x.lower() for x in act]
i, d, s, v = tmp_act
if not(d in domain_span):
domain_span.append(d)
if not(s in slot_span):
slot_span.append(s)
#intializing all possible span keyword
if i in ['inform', 'recommend', 'offerbook', 'offerbooked','book','select']:
legal_act_flag = True
total = total + 1
if not (v in origin_utt) and v!='none':
exist_flag = False
try:
synoyms = mappings[v]
for item in synoyms:
if item in origin_utt:
exist_flag = True
tmp_utt = tmp_utt.replace(item,'')
tmp_utt = tmp_utt.replace(s,'')
#remove span for hallucination detection
except:
pass
if i in ['offerbook', 'offerbooked'] and v =='none':
if 'book' in origin_utt:
exist_flag = True
tmp_utt = tmp_utt.replace('book','')
if i in ['inform','recommend'] and v=='none':
if d in origin_utt:
exist_flag = True
tmp_utt = tmp_utt.replace(d, '')
if exist_flag == False:
missing_flag = True
missing_fact = v
else:
tmp_utt = tmp_utt.replace(v,'')
tmp_utt = tmp_utt.replace(s,'')
if s in origin_utt:
missing_flag = False
if s =='booking' and ('book' in origin_utt or 'reserv' in origin_utt):
missing_flag = False
elif i == 'request':
legal_act_flag = True
total = total + 1
if s=='depart' or s=='dest' or s=='area':
if not ('where' in origin_utt):
if s in origin_utt:
tmp_utt = tmp_utt.replace(s,'')
else:
missing_flag = True
missing_fact = s
elif s=='leave' or s=='arrive':
if (not 'when' in origin_utt):
if not ('what' in origin_utt and 'time' in origin_utt):
missing_flag = True
missing_fact = s
else:
tmp_utt.replace('time', '')
else:
tmp_utt = tmp_utt.replace(s,'')
tmp_utt = tmp_utt.replace(d,'')
if s in origin_utt:
missing_flag = False
if s =='booking' and ('book' in origin_utt or 'reserv' in origin_utt):
missing_flag = False
try:
tmp_utt = tmp_utt.replace(d,'')
tmp_utt = tmp_utt.replace(s,'')
if 'arrive' in s or 'leave' in s:
tmp_utt = tmp_utt.replace('time', '')
except:
pass
if missing_flag == True:
missing = missing + 1
missing_dialogs.append(missing_fact)
missing_dialogs.append(acts)
missing_dialogs.append(utt)
for keyword in slot_span + entity_list:
if keyword in tmp_utt and len(keyword) >= 4 and legal_act_flag == True:
hallucination_flag = True
hallucinate = hallucinate + 1
hallucination_dialogs.append(keyword)
hallucination_dialogs.append(acts)
hallucination_dialogs.append(tmp_utt)
hallucination_dialogs.append(utt)
break
return missing, hallucinate, total, hallucination_dialogs, missing_dialogs
def get_bleu4(dialog_acts, golden_utts, gen_utts):
das2utts = {}
for das, utt, gen in zip(dialog_acts, golden_utts, gen_utts):
......@@ -55,36 +212,52 @@ if __name__ == '__main__':
numpy.random.seed(seed)
torch.manual_seed(seed)
if len(sys.argv) != 4:
if len(sys.argv) < 4:
print("usage:")
print("\t python evaluate.py dataset model role")
print("\t dataset=MultiWOZ, CrossWOZ, or Camrest")
print("\t model=SCLSTM, or TemplateNLG")
print("\t model=SCLSTM, SCLSTM_NoUNK, SCGPT or TemplateNLG")
print("\t role=usr/sys")
print("\t [Optional] model_file")
sys.exit()
dataset_name = sys.argv[1]
model_name = sys.argv[2]
role = sys.argv[3]
model_file = sys.argv[4] if len(sys.argv) >= 5 else None
if dataset_name == 'MultiWOZ':
if model_name == 'SCLSTM':
from convlab2.nlg.sclstm.multiwoz import SCLSTM
if role == 'usr':
model = SCLSTM(is_user=True, use_cuda=True)
model = SCLSTM(is_user=True, use_cuda=True, unk_suppress=False)
elif role == 'sys':
model = SCLSTM(is_user=False, use_cuda=True, unk_suppress=False)
elif model_name == 'SCLSTM_NoUNK':
from convlab2.nlg.sclstm.multiwoz import SCLSTM
if role == 'usr':
model = SCLSTM(is_user=True, use_cuda=True, unk_suppress=True)
elif role == 'sys':
model = SCLSTM(is_user=False, use_cuda=True)
model = SCLSTM(is_user=False, use_cuda=True, unk_suppress=True)
elif model_name == 'TemplateNLG':
from convlab2.nlg.template.multiwoz import TemplateNLG
if role == 'usr':
model = TemplateNLG(is_user=True)
elif role == 'sys':
model = TemplateNLG(is_user=False)
elif model_name == 'SCGPT':
from convlab2.nlg.scgpt.multiwoz import SCGPT
if model_file is not None:
print(f"load model at {model_file}")
if role == 'usr':
model = SCGPT(model_file, is_user=True)
elif role == 'sys':
model = SCGPT(model_file, is_user=False)
else:
raise Exception("Available models: SCLSTM, TEMPLATE")
raise Exception("Available models: SCLSTM, SCGPT, TEMPLATE")
from convlab2.util.dataloader.module_dataloader import SingleTurnNLGDataloader
from convlab2.util.dataloader.dataset_dataloader import MultiWOZDataloader
dataloader = SingleTurnNLGDataloader(dataset_dataloader=MultiWOZDataloader())
data = dataloader.load_data(data_key='test', role=role)['test']
data = dataloader.load_data(data_key='all', role=role, session_id=True)['test']
dialog_acts = []
golden_utts = []
......@@ -93,17 +266,51 @@ if __name__ == '__main__':
sen_num = 0
# sys.stdout = open(sys.argv[2] + '-' + sys.argv[3] + '-' + 'evaluate_logs_neo.txt','w')
assert 'utterance' in data and 'dialog_act' in data and 'session_id' in data
assert len(data['utterance']) == len(data['dialog_act']) == len(data['session_id'])
# Turns during the same session should be contiguous, so we can call init_session at the first turn of a new session.
# This is necessary for SCGPT, but unnecessary for SCLSTM and TemplateNLG.
is_first_turn = []
for _, iterator in itertools.groupby(data['session_id']):
is_first_turn.append(True)
next(iterator)
is_first_turn.extend(False for _ in iterator)
for i in tqdm(range(len(data['utterance']))):
if is_first_turn[i]:
model.init_session()
dialog_acts.append(data['dialog_act'][i])
golden_utts.append(data['utterance'][i])
gen_utts.append(model.generate(data['dialog_act'][i]))
# print(dialog_acts[-1])
# print(golden_utts[-1])
# print(gen_utts[-1])
bleu4 = get_bleu4(dialog_acts, golden_utts, gen_utts)
print("Calculate SER for golden responses")
missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER(dialog_acts, golden_utts)
print("Golden response Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(missing, total, hallucinate, missing/total))
print("Calculate SER")
missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER(dialog_acts, gen_utts)
# with open('{}-{}-genutt_neo.txt'.format(sys.argv[2], sys.argv[3]), mode='wt', encoding='utf-8') as gen_diag:
# for x in gen_utts:
# gen_diag.writelines(str(x)+'\n')
# with open('{}-{}-hallucinate_neo.txt'.format(sys.argv[2], sys.argv[3]), mode='wt', encoding='utf-8') as hal_diag:
# for x in hallucination_dialogs:
# hal_diag.writelines(str(x)+'\n')
# with open('{}-{}-missing_neo.txt'.format(sys.argv[2], sys.argv[3]), mode='wt', encoding='utf-8') as miss_diag:
# for x in missing_dialogs:
# miss_diag.writelines(str(x)+'\n')
print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(sys.argv[2], missing, total, hallucinate, missing/total))
print("Calculate bleu-4")
bleu4 = get_bleu4(dialog_acts, golden_utts, gen_utts)
print("BLEU-4: %.4f" % bleu4)
print('Model on {} sentences role={}'.format(len(data['utterance']), role))
# sys.stdout.close()
else:
raise Exception("currently supported dataset: MultiWOZ")
......@@ -21,9 +21,22 @@ tar -xvf scgpt.tar.gz
Then
``` python
python train.py --output_dir=$output_dir$ --model_type=scgpt --model_name_or_path=gpt2 --do_train --do_eval --eval_data_file=$test_file$ --overwrite_cache --use_tokenize --train_data_file=$train_file$ --overwrite_output_dir
python train.py --output_dir=trained_output --model_type=gpt2 --model_name_or_path=scgpt --do_train --do_eval --eval_data_file=multiwoz/data/test_sys.txt --use_tokenize --train_data_file=multiwoz/data/train_sys.txt --overwrite_output_dir
```
some tricks (optional training argument):
* `--gradient_accumulation_steps xxx`
* `--fp16`, if it's set, you'd better set `--per_gpu_train_batch_size` to be multiple of 8
* `--max_seq xxx`, it should be larger than the length of the longest sequence. You can set `--max_seq 1024`. The script uses a dynamic sequence length at each training step.
* `--gradient_checkpointing`, it allows larger `per_gpu_train_batch_size`
* `--use_multi_tensor_adamw`, someone says it's a faster optimizer
distributed data parallel:
If multiple GPUs are available, you can run `python -m torch.distributed.launch --nproc_per_node CUDA_COUNT train.py ......`
`CUDA_COUNT` is the number of GPUs. `.....` are arguments of `train.py`.
## Use
```python
......
import warnings
from contextlib import nullcontext
from typing import TYPE_CHECKING
import torch.cuda.amp as amp
import transformers
from transformers import GPT2LMHeadModel
# reference: https://pytorch.org/docs/master/notes/amp_examples.html
class AmpGPT2LMHeadModel(GPT2LMHeadModel):
if TYPE_CHECKING:
# For IDE's code hinting
forward = GPT2LMHeadModel.forward
else:
def forward(self, *args, **kwargs):
with amp.autocast():
return super().forward(*args, **kwargs)
def try_enable_gradient_checkpointing(model: "transformers.modeling_utils.PreTrainedModel"):
if model.supports_gradient_checkpointing:
model.gradient_checkpointing_enable()
else:
warnings.warn(f"{type(model)} doesn't support gradient_checkpointing")
class AmpHelper:
"""
References:
https://pytorch.org/docs/master/notes/amp_examples.html
"""
def __init__(self, use_amp=True):
self.use_amp = use_amp
self.might_enable_autocast = amp.autocast() if use_amp else nullcontext()
self.scaler = amp.GradScaler()
def backward(self, loss):
if self.use_amp:
return self.scaler.scale(loss).backward()
else:
return loss.backward()
def step(self, optimizer):
if self.use_amp:
self.scaler.step(optimizer)
self.scaler.update()
else:
optimizer.step()
def might_unscale_(self, optimizer):
if self.use_amp:
# Unscales the gradients of optimizer's assigned params in-place
self.scaler.unscale_(optimizer)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 14 11:38:53 2020
@author: truthless
"""
import os
import json
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from convlab2.nlg.scgpt.utils import dict2dict, dict2seq
import zipfile
......@@ -15,6 +15,51 @@ def read_zipped_json(filepath, filename):
archive = zipfile.ZipFile(filepath, 'r')
return json.load(archive.open(filename))
def init_domain():
return {'Attraction':False,
'Hospital':False,
'Hotel':False,
'Police':False,
'Restaurant':False,
'Taxi':False,
'Train':False}
def write_file(name, data, role='usr'):
with open(f'{name}.txt', 'w', encoding='utf-8') as f:
for ID in data:
sess = data[ID]
sess_domains = init_domain()
for turn in sess:
if role == 'usr':
if not turn['usr_da']:
continue
turn['usr_da'] = eval(str(turn['usr_da']).replace('Bus','Train'))
da_seq = dict2seq(dict2dict(turn['usr_da'])).replace('&', 'and')
domains = set([key.split('-')[0] for key in turn['usr_da'].keys()])
elif role == 'sys':
if not turn['sys_da']:
continue
turn['sys_da'] = eval(str(turn['sys_da']).replace('Bus','Train'))
da_seq = dict2seq(dict2dict(turn['sys_da'])).replace('&', 'and')
domains = set([key.split('-')[0] for key in turn['sys_da'].keys()])
else:
raise NameError('Invalid Role: Select usr/sys.')
for domain in domains:
if domain not in ['general', 'Booking'] and not sess_domains[domain]:
da_seq = da_seq.replace(domain.lower(), domain.lower()+' *', 1)
sess_domains[domain] = True
if role == 'usr':
da_uttr = turn['usr'].replace(' bus ', ' train ').replace('&', 'and')
elif role == 'sys':
da_uttr = turn['sys'].replace(' bus ', ' train ').replace('&', 'and')
f.write(f'{da_seq} & {da_uttr}\n')
if __name__ == '__main__':
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--role', type=str, default='usr')
args = parser.parse_args()
cur_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(
cur_dir)))), 'data/multiwoz/')
......@@ -38,22 +83,22 @@ results_test = {}
for title, sess in data.items():
logs = sess['log']
turns = []
turn = {'turn':0, 'sys':'', 'sys_da':''}
turn = {'turn': 0, 'sys': '', 'sys_da': '', 'usr': '', 'usr_da': ''}
current_domain = None
for i, diag in enumerate(logs):
text = diag['text']
da = diag['dialog_act']
span = diag['span_info']
if i % 2 == 0:
turn['usr'] = text
if current_domain:
da = eval(str(da).replace('Booking', current_domain))
span = eval(str(span).replace('Booking', current_domain))
if i % 2 == 0:
turn['usr'] = text
turn['usr_da'] = da
turn['usr_span'] = span
turns.append(turn)
else:
turn = {'turn': i//2 +1}
turn = {'turn': i//2 + 1, 'sys': '', 'sys_da': '', 'usr': '', 'usr_da': ''}
turn['sys'] = text
turn['sys_da'] = da
turn['sys_span'] = span
......@@ -61,6 +106,9 @@ for title, sess in data.items():
domain = key.split('-')[0]
if domain not in ['general', 'Booking']:
current_domain = domain
else:
if args.role == 'sys':
turns.append(turn)
title = title
if title in val_list:
current = results_val
......@@ -74,34 +122,7 @@ results = eval(str(results).replace(" n't", " not"))
results_val = eval(str(results_val).replace(" n't", " not"))
results_test = eval(str(results_test).replace(" n't", " not"))
def init_domain():
return {'Attraction':False,
'Hospital':False,
'Hotel':False,
'Police':False,
'Restaurant':False,
'Taxi':False,
'Train':False}
def write_file(name, data):
with open(f'{name}.txt', 'w', encoding='utf-8') as f:
for ID in data:
sess = data[ID]
sess_domains = init_domain()
for turn in sess:
if not turn['usr_da']:
continue
turn['usr_da'] = eval(str(turn['usr_da']).replace('Bus','Train'))
da_seq = dict2seq(dict2dict(turn['usr_da'])).replace('&', 'and')
domains = set([key.split('-')[0] for key in turn['usr_da'].keys()])
for domain in domains:
if domain not in ['general', 'Booking'] and not sess_domains[domain]:
da_seq = da_seq.replace(domain.lower(), domain.lower()+' *', 1)
sess_domains[domain] = True
da_uttr = turn['usr'].replace(' bus ', ' train ').replace('&', 'and')
f.write(f'{da_seq} & {da_uttr}\n')
if not os.path.exists(os.path.join(cur_dir,'data')):
os.makedirs(os.path.join(cur_dir, 'data'))
write_file(os.path.join(cur_dir, 'data/train'), dict(results, **results_val))
write_file(os.path.join(cur_dir, 'data/test'), results_test)
write_file(os.path.join(cur_dir, f'data/train_{args.role}'), dict(results, **results_val), role=args.role)
write_file(os.path.join(cur_dir, f'data/test_{args.role}'), results_test, role=args.role)
......@@ -2,6 +2,7 @@ import torch
import numpy as np
import os
import zipfile
from copy import deepcopy
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from convlab2.nlg.scgpt.utils import tuple2seq
......@@ -10,23 +11,31 @@ from convlab2.nlg.nlg import NLG
from convlab2.util.file_util import cached_path
MAX_LENGTH = int(10000) # Hardcoded max length to avoid infinite loop
DEFAULT_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
DEFAULT_ARCHIVE_FILE = os.path.join(DEFAULT_DIRECTORY, "nlg-gpt-multiwoz.zip")
class SCGPT(NLG):
def __init__(self,
archive_file=DEFAULT_ARCHIVE_FILE,
use_cuda=True,
is_user=False,
model_file='https://convlab.blob.core.windows.net/convlab-2/nlg-gpt-multiwoz.zip'):
def __init__(self, model_file=None,
use_cuda=True, is_user=False):
# If no filename is mentioned then set to default
if not model_file:
if is_user:
model_file = 'https://convlab.blob.core.windows.net/convlab-2/nlg-gpt-multiwoz.zip'
else:
model_file = 'https://zenodo.org/record/5767426/files/neo_scgpt_system.zip'
# Load from file/url
model_dir = os.path.dirname(os.path.abspath(__file__))
if not os.path.isfile(archive_file):
archive_file = cached_path(model_file)
archive = zipfile.ZipFile(archive_file, 'r')
if not os.path.isfile(model_file):
model_file = cached_path(model_file)
if not os.path.isdir(model_file):
archive = zipfile.ZipFile(model_file, 'r')
archive.extractall(model_dir)
# Get model directory
model_file = archive.filelist[0].filename.replace('/', '')
self.model_name_or_path = os.path.join(model_dir, model_file)
else:
self.model_name_or_path = model_file
self.model_name_or_path = os.path.join(model_dir, 'multiwoz')
self.length = 50
self.num_samples = 5
self.temperature = 1.0
......@@ -34,6 +43,7 @@ class SCGPT(NLG):
self.top_k = 50
self.top_p = 0.9
self.seed = 42
self.is_user = is_user
self.stop_token = '<|endoftext|>'
self.device = torch.device("cuda" if torch.cuda.is_available() and use_cuda else "cpu")
......@@ -52,6 +62,8 @@ class SCGPT(NLG):
elif self.length < 0:
self.length = self.MAX_LENGTH # avoid infinite loop
self.init_session()
def init_session(self):
self.sess_domains = {'Attraction':False,
'Hospital':False,
......@@ -59,14 +71,34 @@ class SCGPT(NLG):
'Police':False,
'Restaurant':False,
'Taxi':False,
'Train':False}
'Train':False,}
self.cur_domain = None
# if not self.is_user:
# self.sess_domains['Booking'] = False
def generate(self, meta):
#some actions in testing data is none
if not meta:
return 'No user action'
meta = deepcopy(meta)
for list_ in meta:
domain = list_[1]
if domain not in ('general', 'Booking'):
self.cur_domain = domain
for i, list_ in enumerate(meta):
list_ = list(list_)
if list_[1] == 'Booking':
if self.cur_domain is not None:
list_[1] = self.cur_domain
meta[i] = list_
else:
print('`cur_domain` is None, but there is `Booking` in dialog action.')
raw_text = tuple2seq(meta)
domains = set([item[1] for item in meta])
for domain in domains:
if domain != 'general' and not self.sess_domains[domain]:
if domain not in ('general', 'Booking') and not self.sess_domains[domain]:
raw_text = raw_text.replace(domain.lower(), domain.lower()+ ' *', 1)
self.sess_domains[domain] = True
context_tokens = self.tokenizer.encode(raw_text, add_special_tokens=False)
......
This diff is collapsed.
{
"guesthouse": [
"guest house",
"guest houses"
],
"hotel": [
"hotels"
],
"centre": [
"center",
"downtown"
],
"north": [
"northern",
"northside",
"northend"
],
"east": [
"eastern",
"eastside",
"eastend"
],
"west": [
"western",
"westside",
"westend"
],
"south": [
"southern",
"southside",
"southend"
],
"cheap": [
"inexpensive",
"lower price",
"lower range",
"cheaply",
"cheaper",
"cheapest",
"very affordable"
],
"moderate": [
"moderately",
"reasonable",
"reasonably",
"affordable",
"mid range",
"mid-range",
"priced moderately",
"decently priced",
"mid price",
"mid-price",
"mid priced",
"mid-priced",
"middle price",
"medium price",
"medium priced",
"not too expensive",
"not too cheap"
],
"expensive": [
"high end",
"high-end",
"high class",
"high-class",
"high scale",
"high-scale",
"high price",
"high priced",
"higher price",
"fancy",
"upscale",
"nice",
"expensively",
"luxury"
],
"0": [
"zero"
],
"1": [
"one",
"just me",
"for me",
"myself",
"alone",
"me"
],
"2": [
"two"
],
"3": [
"three"
],
"4": [
"four"
],
"5": [
"five"
],
"6": [
"six"
],
"7": [
"seven"
],
"8": [
"eight"
],
"9": [
"nine"
],
"10": [
"ten"
],
"11": [
"eleven"
],
"12": [
"twelve"
],
"architecture": [
"architectural",
"architecturally",
"architect"
],
"boat": [
"boating",
"boats",
"camboats"
],
"camboats": [
"boating",
"boat",
"boats"
],
"cinema": [
"cinemas",
"movie",
"films",
"film"
],
"college": [
"colleges"
],
"concerthall": [
"concert hall",
"concert halls",
"concerthalls",
"concerts",
"concert"
],
"entertainment": [
"entertaining"
],
"gastropub": [
"gastropubs"
],
"mutliple sports": [
"multiple sport",
"multiple sports",
"multi sport",
"multi sports",
"sports",
"sporting"
],
"museum": [
"museums",
"gallery",
"galleries"
],
"nightclub": [
"night clubs",
"night club",
"nightclubs",
"club",
"clubs"
],
"park": [
"parks"
],
"swimmingpool": [
"swimming pool",
"swimming",
"pool",
"pools",
"swimmingpool",
"water",
"swim"
],
"theatre": [
"theater",
"theatres",
"theaters"
],
"abbey pool and astroturf pitch": [
"abbey pool and astroturf",
"abbey pool"
],
"adc theatre": [
"adc theater",
"adc"
],
"addenbrookes hospital": [
"addenbrooke's hospital"
],
"cafe jello gallery": [
"cafe jello"
],
"cambridge and county folk museum": [
"cambridge and country folk museum",
"county folk museum"
],
"cambridge arts theatre": [
"cambridge arts theater"
],
"cambridge book and print gallery": [
"book and print gallery"
],
"cambridge contemporary art": [
"cambridge contemporary art museum",
"contemporary art museum"
],
"the cambridge corn exchange": [
"cambridge corn exchange"
],
"cambridge museum of technology": [
"museum of technology"
],
"the cambridge punter": [
"cambridge punter",
"cambridge punters"
],
"cambridge university botanic gardens": [
"cambridge university botanical gardens",
"cambridge university botanical garden",
"cambridge university botanic garden",
"cambridge botanic gardens",
"cambridge botanical gardens",
"cambridge botanic garden",
"cambridge botanical garden",
"botanic gardens",
"botanical gardens",
"botanic garden",
"botanical garden"
],
"cherry hinton hall and grounds": [
"cherry hinton hall"
],
"cherry hinton water play": [
"cherry hinton water play park"
],
"cineworld cinema": [
"cineworld"
],
"clare hall": [
"clair hall"
],
"the fez club": [
"fez club"
],
"jesus green outdoor pool": [
"jesus green"
],
"kings hedges learner pool": [
"king's hedges learner pool",
"king hedges learner pool"
],
"mumford theatre": [
"mumford theater"
],
"museum of archaelogy and anthropology": [
"museum of archaeology and anthropology",
"museum of archaelogy",
"museum of archaeology"
],
"riverboat georgina": [
"riverboat"
],
"saint barnabas press gallery": [
"saint barbabas"
],
"scott polar museum": [
"scott polar"
],
"scudamores punting co": [
"scudamore's punting co",
"scudamores punting",
"scudamore's punting",
"scudamores",
"scudamore's",
"scudamore"
],
"soul tree nightclub": [
"soul tree night club",
"soul tree",
"soultree"
],
"the man on the moon": [
"man on the moon"
],
"the junction": [
"junction theatre",
"junction theater"
],
"old schools": [
"old school"
],
"vue cinema": [
"vue"
],
"wandlebury country park": [
"the wandlebury"
],
"whipple museum of the history of science": [
"whipple museum",
"history of science museum"
],
"restaurant alimentum": [
"alimentum"
],
"bedouin": [
"the bedouin"
],
"bloomsbury restaurant": [
"bloomsbury"
],
"caffe uno": [
"cafe uno",
"caffee uno"
],
"cambridge lodge restaurant": [
"cambridge lodge"
],
"chiquito restaurant bar": [
"chiquito restaurant",
"chiquito"
],
"city stop restaurant": [
"city stop"
],
"clowns cafe": [
"clown's cafe"
],
"the cow pizza kitchen and bar": [
"cow pizza kitchen and bar",
"cow pizza"
],
"darrys cookhouse and wine shop": [
"darry's cookhouse and wine shop",
"darry's cookhouse",
"darrys cookhouse"
],
"de luca cucina and bar": [
"de luca cucina and bar riverside brasserie",
"luca cucina and bar",
"de luca cucina",
"luca cucina"
],
"da vinci pizzeria": [
"da vinci pizza",
"da vinci"
],
"don pasquale pizzeria": [
"don pasquale pizza",
"don pasquale",
"pasquale pizzeria",
"pasquale pizza"
],
"efes restaurant": [
"efes"
],
"fitzbillies restaurant": [
"fitzbillies"
],
"frankie and bennys": [
"frankie and benny's"
],
"funky fun house": [
"funky"
],
"the gardenia": [
"gardenia"
],
"grafton hotel restaurant": [
"the grafton hotel",
"grafton hotel"
],
"hotel du vin and bistro": [
"hotel du vin",
"du vin"
],
"kohinoor": [
"the kohinoor"
],
"lan hong house": [
"lan hong",
"ian hong house",
"ian hong"
],
"lovell lodge": [
"lovell",
"the lovell lodge"
],
"mahal of cambridge": [
"mahal"
],
"maharajah tandoori restaurant": [
"maharajah tandoori",
"the maharajah tandoor"
],
"meze bar restaurant": [
"the meze bar",
"meze bar"
],
"michaelhouse cafe": [
"michael house cafe"
],
"midsummer house restaurant": [
"midsummer house"
],
"the missing sock": [
"missing sock"
],
"nandos": [
"nando's city centre",
"nando's city center",
"nandos city centre",
"nandos city center",
"nando's"
],
"nandos city centre": [
"nando's city centre",
"nando's city center",
"nandos city center",
"nando's",
"nandos"
],
"the oak bistro": [
"oak bistro"
],
"restaurant one seven": [
"one seven"
],
"the river bar steakhouse and grill": [
"river bar steakhouse and grill",
"the river bar steakhouse",
"river bar steakhouse"
],
"pipasha restaurant": [
"pipasha"
],
"pizza hut city centre": [
"pizza hut city center"
],
"pizza hut fen ditton": [
"pizza hut fenditton",
"pizza express fen ditton"
],
"restaurant two two": [
"two two",
"restaurant 22"
],
"saffron brasserie": [
"saffron"
],
"saint johns chop house": [
"saint john's chop house",
"st john's chop house",
"st johns chop house"
],
"sesame restaurant and bar": [
"sesame restaurant",
"sesame"
],
"shanghai family restaurant": [
"shanghai"
],
"sitar tandoori": [
"sitar"
],
"the slug and lettuce": [
"slug and lettuce"
],
"saint johns chop house": [
"st johns chop house",
"st john's chop house",
"saint johns chop house"
],
"stazione restaurant and coffee bar": [
"stazione restaurant",
"stazione"
],
"thanh binh": [
"thanh",
"binh"
],
"the hotpot": [
"the hotspot",
"hotpot",
"hotspot"
],
"the lucky star": [
"lucky star"
],
"peking restaurant": [
"the peking restaurant"
],
"the varsity restaurant": [
"varsity restaurant",
"the varsity",
"varsity"
],
"zizzi cambridge": [
"zizzi"
],
"asian oriental": [
"asian",
"oriental"
],
"australian": [
"australasian"
],
"barbeque": [
"barbecue",
"bbq"
],
"corsica": [
"corsican"
],
"indian": [
"tandoori"
],
"italian": [
"pizza",
"pizzeria"
],
"japanese": [
"sushi"
],
"latin american": [
"latin-american",
"latin"
],
"malaysian": [
"malay"
],
"middle eastern": [
"middle-eastern"
],
"modern american": [
"american modern",
"american"
],
"modern european": [
"european modern",
"european"
],
"north american": [
"north-american",
"american"
],
"portuguese": [
"portugese"
],
"seafood": [
"sea food"
],
"singaporean": [
"singapore"
],
"steakhouse": [
"steak house",
"steak"
],
"the americas": [
"american",
"americas"
],
"a and b guest house": [
"a & b guest house",
"a and b",
"a & b"
],
"acorn guest house": [
"the acorn guest house",
"acorn"
],
"alexander bed and breakfast": [
"alexander"
],
"allenbell": [
"the allenbell"
],
"alpha-milton guest house": [
"the alpha-milton",
"alpha-milton"
],
"arbury lodge guesthouse": [
"arbury lodge guest house",
"arbury lodge",
"arbury"
],
"archway house": [
"archway"
],
"ashley hotel": [
"the ashley hotel",
"ashley"
],
"aylesbray lodge guest house": [
"aylesbray lodge",
"aylesbray",
"alesbray lodge guest house",
"alyesbray lodge hotel"
],
"bridge guest house": [
"bridge house"
],
"the cambridge belfry": [
"cambridge belfry",
"belfry hotel",
"belfry"
],
"carolina bed and breakfast": [
"carolina"
],
"city centre north b and b": [
"city centre north bed and breakfast",
"city centre north",
"north b and b"
],
"el shaddai": [
"el shaddia guest house",
"el shaddai guest house",
"el shaddia"
],
"express by holiday inn cambridge": [
"express by holiday inn",
"holiday inn cambridge",
"holiday inn"
],
"finches bed and breakfast": [
"finches"
],
"gonville hotel": [
"gonville"
],
"hamilton lodge": [
"the hamilton lodge",
"hamilton"
],
"hobsons house": [
"hobson's house",
"hobson's"
],
"huntingdon marriott hotel": [
"huntington marriott hotel",
"huntington marriot hotel",
"huntingdon marriot hotel",
"huntington marriott",
"huntingdon marriott",
"huntington marriot",
"huntingdon marriot",
"huntington",
"huntingdon",
"marriott hotel",
"marriott"
],
"kirkwood house": [
"kirkwood"
],
"the lensfield hotel": [
"lensfield hotel",
"lensfield"
],
"leverton house": [
"leverton"
],
"rosa's bed and breakfast": [
"rosas bed and breakfast",
"rosa's",
"rosas"
],
"university arms hotel": [
"university arms"
],
"warkworth house": [
"warkworth hotel",
"warkworth"
],
"worth house": [
"the worth house",
"warkworth house",
"warkworth"
],
"birmingham new street": [
"birmingham new street train station"
],
"birmingham new street train station": [
"birmingham new street"
],
"bishops stortford": [
"bishops stortford train station"
],
"bishops stortford train station": [
"bishops stortford"
],
"broxbourne": [
"broxbourne train station"
],
"broxbourne train station": [
"broxbourne"
],
"cambridge": [
"cambridge train station"
],
"cambridge train station": [
"cambridge"
],
"ely": [
"ely train station"
],
"ely train station": [
"ely"
],
"kings lynn": [
"king's lynn",
"king's lynn train station",
"kings lynn train station"
],
"kings lynn train station": [
"kings lynn",
"king's lynn",
"king's lynn train station"
],
"leicester": [
"leicester train station"
],
"leicester train station": [
"leicester"
],
"london kings cross": [
"kings cross",
"king's cross",
"london king's cross",
"kings cross train station",
"king's cross train station",
"london king's cross train station",
"london kings cross train station"
],
"london kings cross train station": [
"kings cross",
"king's cross",
"london king's cross",
"london kings cross",
"kings cross train station",
"king's cross train station",
"london king's cross train station"
],
"london liverpool street": [
"london liverpool",
"liverpool street",
"london liverpool train station",
"liverpool street train station",
"london liverpool street train station"
],
"london liverpool street train station": [
"london liverpool",
"liverpool street",
"london liverpool street",
"london liverpool train station",
"liverpool street train station"
],
"norwich": [
"norwich train station"
],
"norwich train station": [
"norwich"
],
"peterborough": [
"peterborough train station"
],
"peterborough train station": [
"peterborough"
],
"stansted airport": [
"stansted airport train station"
],
"stansted airport train station": [
"stansted airport"
],
"stevenage": [
"stevenage train station"
],
"stevenage train station": [
"stevenage"
]
}
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment