Skip to content
Snippets Groups Projects
Commit 24dccee5 authored by zqwerty's avatar zqwerty
Browse files

add booked information for multiwoz21

parent 5689989d
No related branches found
No related tags found
No related merge requests found
......@@ -18,6 +18,7 @@ MultiWOZ 2.1 fixed the noise in state annotations and dialogue utterances. It al
- Normalize slot name and value. See `normalize_domain_slot_value` function in `preprocess.py`.
- Correct some non-categorical slots' values and provide character level span annotation.
- Concatenate multiple values in user goal & state using `|`.
- Add `booked` information in system turns from original belief states.
- **Annotations:**
- user goal, dialogue acts, state.
......
No preview for this file type
This diff is collapsed.
......@@ -860,17 +860,23 @@ def preprocess():
# add empty db_results
turn_state = turn['metadata']
cur_state = copy.deepcopy(init_ontology['state'])
booked = {}
for domain in turn_state:
if domain not in cur_state:
continue
for subdomain in ['semi', 'book']:
for slot, value in turn_state[domain][subdomain].items():
if slot in ['booked', 'ticket']:
if slot == 'ticket':
continue
elif slot == 'booked':
assert domain in init_ontology['domains']
booked[domain] = value
continue
_, slot, value = normalize_domain_slot_value(domain, slot, value)
cur_state[domain][slot] = value
dialogue['turns'][-2]['state'] = cur_state
dialogue['turns'][-1]['db_results'] = {}
dialogue['turns'][-1]['booked'] = booked
dialogues_by_split[split].append(dialogue)
# pprint(cnt_domain_slot.most_common())
dialogues = []
......@@ -883,8 +889,8 @@ def preprocess():
with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf:
for filename in os.listdir(new_data_dir):
zf.write(f'{new_data_dir}/{filename}')
rmtree(original_data_dir)
rmtree(new_data_dir)
# rmtree(original_data_dir)
# rmtree(new_data_dir)
return dialogues, init_ontology
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment