Skip to content
Snippets Groups Projects
Commit 406748f4 authored by zqwerty's avatar zqwerty
Browse files

update wow checked_passage annotation to ensure checked_sentence is in it

parent 700b6017
Branches
No related tags found
No related merge requests found
No preview for this file type
This diff is collapsed.
...@@ -69,32 +69,39 @@ def preprocess(): ...@@ -69,32 +69,39 @@ def preprocess():
for topic_passage in original_turn['retrieved_passages']: for topic_passage in original_turn['retrieved_passages']:
for topic, passage in topic_passage.items(): for topic, passage in topic_passage.items():
topic2passage[html.unescape(topic)] = passage topic = html.unescape(topic)
if topic in topic2passage:
# topic that already added, add unseen sentences
for sen in passage:
if sen not in topic2passage[topic]:
topic2passage[topic].append(sen)
else:
topic2passage[topic] = passage
if speaker == 'system': if speaker == 'system':
if len(original_turn['checked_sentence']) == 0: if len(original_turn['checked_sentence']) == 0:
check_sentence = None checked_sentence = None
else: else:
check_sentence = list(original_turn['checked_sentence'].values())[0] checked_sentence = list(original_turn['checked_sentence'].values())[0]
check_sentence = None if check_sentence == 'no_passages_used' else check_sentence checked_sentence = None if checked_sentence == 'no_passages_used' else checked_sentence
if len(original_turn['checked_passage']) == 0: if len(original_turn['checked_passage']) == 0:
if check_sentence and check_sentence not in original_dial['chosen_topic_passage']:
# search over retrieved_passages
for topic, passage in topic2passage.items():
if check_sentence in passage:
checked_passage = topic
break
else:
pprint(original_turn)
exit()
else:
checked_passage = None checked_passage = None
else: else:
checked_passage = html.unescape(list(original_turn['checked_passage'].values())[0]) checked_passage = html.unescape(list(original_turn['checked_passage'].values())[0])
# print(topic2passage.keys()) # print(topic2passage.keys())
checked_passage = None if checked_passage == 'no_passages_used' else topic2passage[checked_passage] checked_passage = None if checked_passage == 'no_passages_used' else topic2passage[checked_passage]
dialogue['turns'][-1]['checked_sentence'] = check_sentence
if checked_sentence:
if not checked_passage or checked_sentence not in checked_passage:
# search over retrieved_passages
for topic, passage in topic2passage.items():
if checked_sentence in passage:
checked_passage = passage
break
assert checked_sentence in checked_passage, print(checked_sentence, checked_passage)
dialogue['turns'][-1]['checked_sentence'] = checked_sentence
dialogue['turns'][-1]['checked_passage'] = checked_passage dialogue['turns'][-1]['checked_passage'] = checked_passage
dialogues = dialogues_by_split['train']+dialogues_by_split['validation']+dialogues_by_split['test_seen']+dialogues_by_split['test_unseen'] dialogues = dialogues_by_split['train']+dialogues_by_split['validation']+dialogues_by_split['test_seen']+dialogues_by_split['test_unseen']
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment