diff --git a/data/unified_datasets/camrest/data.zip b/data/unified_datasets/camrest/data.zip index cd9822c65929d46b992b511787893bc92725a76f..9d4c3adfbab93f9cfbe9ebc5ca34c57a3a2de480 100644 Binary files a/data/unified_datasets/camrest/data.zip and b/data/unified_datasets/camrest/data.zip differ diff --git a/data/unified_datasets/camrest/dummy_data.json b/data/unified_datasets/camrest/dummy_data.json index bd24e654272429e1bec3d92037b773c2b03bf1d9..843062065cac9767dc50b45fbe9a3f598cf53008 100644 --- a/data/unified_datasets/camrest/dummy_data.json +++ b/data/unified_datasets/camrest/dummy_data.json @@ -80,8 +80,7 @@ } ], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -139,8 +138,7 @@ "end": 36 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -182,8 +180,7 @@ "end": 75 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -225,8 +222,7 @@ "end": 18 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -253,8 +249,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -329,8 +324,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -396,8 +390,7 @@ "end": 29 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -439,8 +432,7 @@ "end": 34 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -480,8 +472,7 @@ } ], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -508,8 +499,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -589,8 +579,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -635,8 +624,7 @@ "end": 77 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -678,8 +666,7 @@ "end": 33 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -706,8 +693,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -788,8 +774,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -853,8 +838,7 @@ "end": 7 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -896,8 +880,7 @@ "end": 34 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -924,8 +907,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1014,8 +996,7 @@ "end": 10 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1059,8 +1040,7 @@ "end": 29 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1102,8 +1082,7 @@ "end": 46 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1130,8 +1109,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1205,8 +1183,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1253,8 +1230,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1325,8 +1301,7 @@ "end": 138 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1353,8 +1328,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1443,8 +1417,7 @@ "end": 94 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1510,8 +1483,7 @@ "end": 17 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1561,8 +1533,7 @@ "end": 47 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1589,8 +1560,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1685,8 +1655,7 @@ "end": 23 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1749,8 +1718,7 @@ "end": 90 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1794,8 +1762,7 @@ "end": 33 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1822,8 +1789,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1899,8 +1865,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1969,8 +1934,7 @@ "end": 59 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2033,8 +1997,7 @@ "end": 16 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2061,8 +2024,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -2163,8 +2125,7 @@ "end": 41 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2240,8 +2201,7 @@ "end": 105 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2268,8 +2228,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] } diff --git a/data/unified_datasets/camrest/preprocess.py b/data/unified_datasets/camrest/preprocess.py index 5841c98434d0e97c398c5034622a6042b07e36c4..53dc317b9138e2d4e04aaa5f5fdc1f38768ffac3 100644 --- a/data/unified_datasets/camrest/preprocess.py +++ b/data/unified_datasets/camrest/preprocess.py @@ -240,7 +240,6 @@ def preprocess(): 'utterance': sys_text, 'utt_idx': len(dialogue['turns'])+1, 'dialogue_acts': convert_da(sys_text, sys_da), - 'db_results': {} } dialogue['turns'].append(usr_turn) @@ -269,7 +268,6 @@ def preprocess(): with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf: for filename in os.listdir(new_data_dir): zf.write(f'{new_data_dir}/{filename}') - rmtree(original_data_dir) rmtree(new_data_dir) return dialogues, ontology diff --git a/data/unified_datasets/check.py b/data/unified_datasets/check.py index e0f5c37cecaa6aa78769a9dddc7bb244a44a0a7b..1e735af835cfdbbe40f71045196d873e1b4519ac 100644 --- a/data/unified_datasets/check.py +++ b/data/unified_datasets/check.py @@ -132,13 +132,14 @@ def check_dialogues(name, dialogues, ontology): assert dialogue_id not in all_id, f'multiple dialogue id: {dialogue_id}' all_id.add(dialogue_id) - cur_domains = dialogue['domains'] - assert isinstance(cur_domains, list), f'{dialogue_id}\t`domains` is expected to be list type, ' - assert len(set(cur_domains)) == len(cur_domains), f'{dialogue_id}\trepeated domains' - cur_stat['domains'] += len(cur_domains) - cur_domains = set(cur_domains) - for domain_name in cur_domains: - assert domain_name in ontology['domains'], f'{dialogue_id}\tundefined current domain: {domain_name}' + if 'domains' in dialogue: + cur_domains = dialogue['domains'] + assert isinstance(cur_domains, list), f'{dialogue_id}\t`domains` is expected to be list type, ' + assert len(set(cur_domains)) == len(cur_domains), f'{dialogue_id}\trepeated domains' + cur_stat['domains'] += len(cur_domains) + cur_domains = set(cur_domains) + for domain_name in cur_domains: + assert domain_name in ontology['domains'], f'{dialogue_id}\tundefined current domain: {domain_name}' # check domain-slot-value # prefix: error prefix @@ -167,18 +168,19 @@ def check_dialogues(name, dialogues, ontology): assert da['intent'] in ontology['intents'], f'{dialogue_id}:{turn_id}:da\tundefined intent {da["intent"]}' check_dsv(da['domain'], da['slot'], da['value'], 'dialogue act', categorical, f'{dialogue_id}:{turn_id}:da') - goal = dialogue['goal'] - assert isinstance(goal['description'], str), f'{dialogue_id}\tgoal description {goal["description"]} should be string' - assert isinstance(goal['inform'], dict), f'{dialogue_id}\tgoal inform {goal["inform"]} should be dict' - assert isinstance(goal['request'], dict), f'{dialogue_id}\tgoal request {goal["request"]} should be dict' - for domain_name, domain in goal['inform'].items(): - for slot_name, value in domain.items(): - check_dsv(domain_name, slot_name, value, 'goal', prefix=f'{dialogue_id}:goal:inform') - assert value != "", f'{dialogue_id}\tshould set non-empty value in goal inform {goal["inform"]}' - for domain_name, domain in goal['request'].items(): - for slot_name, value in domain.items(): - check_dsv(domain_name, slot_name, value, 'goal', prefix=f'{dialogue_id}:goal:request') - assert value == "", f'{dialogue_id}\tshould set empty value in goal request {goal["request"]}' + if 'goal' in dialogue: + goal = dialogue['goal'] + assert isinstance(goal['description'], str), f'{dialogue_id}\tgoal description {goal["description"]} should be string' + assert isinstance(goal['inform'], dict), f'{dialogue_id}\tgoal inform {goal["inform"]} should be dict' + assert isinstance(goal['request'], dict), f'{dialogue_id}\tgoal request {goal["request"]} should be dict' + for domain_name, domain in goal['inform'].items(): + for slot_name, value in domain.items(): + check_dsv(domain_name, slot_name, value, 'goal', prefix=f'{dialogue_id}:goal:inform') + assert value != "", f'{dialogue_id}\tshould set non-empty value in goal inform {goal["inform"]}' + for domain_name, domain in goal['request'].items(): + for slot_name, value in domain.items(): + check_dsv(domain_name, slot_name, value, 'goal', prefix=f'{dialogue_id}:goal:request') + assert value == "", f'{dialogue_id}\tshould set empty value in goal request {goal["request"]}' turns = dialogue['turns'] cur_stat['utterances'] += len(turns) @@ -193,47 +195,48 @@ def check_dialogues(name, dialogues, ontology): utterance = turn['utterance'] cur_stat['tokens'] += len(utterance.strip().split(' ')) - dialogue_acts = turn['dialogue_acts'] - assert isinstance(dialogue_acts['categorical'], list), f'{dialogue_id}:{turn_id}\tcategorical dialogue_acts should be a list' - assert isinstance(dialogue_acts['non-categorical'], list), f'{dialogue_id}:{turn_id}\tnon-categorical dialogue_acts should be a list' - assert isinstance(dialogue_acts['binary'], list), f'{dialogue_id}:{turn_id}\tbinary dialogue_acts should be a list' - for da in dialogue_acts['categorical']: - check_da(da, True) - for da in dialogue_acts['non-categorical']: - check_da(da, False) - # values only match after .strip() in some case, it's the issue of pre-processing - if da['value'] not in special_values: - stat[split][f'non-cat slot span(dialogue act)'][1] += 1 - assert ('start' in da) == ('end' in da), \ - f'{dialogue_id}:{turn_id}\tstart and end field in da should both present or neither not present' - if 'start' in da: - value = utterance[da['start']:da['end']] - assert da['value'] == value, f'{dialogue_id}:{turn_id}\tspan({value}) and value{da["value"]} not match' - stat[split][f'non-cat slot span(dialogue act)'][0] += 1 - - for da_type in dialogue_acts: - for da in dialogue_acts[da_type]: - assert ontology['da_dict'][da_type][(da['intent'], da['domain'], da['slot'])][turn['speaker']] == True - if da_type == 'binary': - assert 'value' not in da, f'{dialogue_id}:{turn_id}\tbinary dialogue act should not have value' + if 'dialogue_acts' in turn: + dialogue_acts = turn['dialogue_acts'] + assert isinstance(dialogue_acts['categorical'], list), f'{dialogue_id}:{turn_id}\tcategorical dialogue_acts should be a list' + assert isinstance(dialogue_acts['non-categorical'], list), f'{dialogue_id}:{turn_id}\tnon-categorical dialogue_acts should be a list' + assert isinstance(dialogue_acts['binary'], list), f'{dialogue_id}:{turn_id}\tbinary dialogue_acts should be a list' + for da in dialogue_acts['categorical']: + check_da(da, True) + for da in dialogue_acts['non-categorical']: + check_da(da, False) + # values only match after .strip() in some case, it's the issue of pre-processing + if da['value'] not in special_values: + stat[split][f'non-cat slot span(dialogue act)'][1] += 1 + assert ('start' in da) == ('end' in da), \ + f'{dialogue_id}:{turn_id}\tstart and end field in da should both present or neither not present' + if 'start' in da: + value = utterance[da['start']:da['end']] + assert da['value'] == value, f'{dialogue_id}:{turn_id}\tspan({value}) and value{da["value"]} not match' + stat[split][f'non-cat slot span(dialogue act)'][0] += 1 + + for da_type in dialogue_acts: + for da in dialogue_acts[da_type]: + assert ontology['da_dict'][da_type][(da['intent'], da['domain'], da['slot'])][turn['speaker']] == True + if da_type == 'binary': + assert 'value' not in da, f'{dialogue_id}:{turn_id}\tbinary dialogue act should not have value' if turn['speaker'] == 'user': assert 'db_results' not in turn - assert 'state' in turn, f"{dialogue_id}:{turn_id}\tstate must present in user's role, but could be empty" - state = turn['state'] - assert isinstance(state, dict), f'{dialogue_id}:{turn_id}\tstate should be a dict' - for domain_name, domain in state.items(): - for slot_name, value in domain.items(): - check_dsv(domain_name, slot_name, value, 'state', prefix=f'{dialogue_id}:{turn_id}:state') + if 'state' in turn: + state = turn['state'] + assert isinstance(state, dict), f'{dialogue_id}:{turn_id}\tstate should be a dict' + for domain_name, domain in state.items(): + for slot_name, value in domain.items(): + check_dsv(domain_name, slot_name, value, 'state', prefix=f'{dialogue_id}:{turn_id}:state') else: assert 'state' not in turn, f"{dialogue_id}:{turn_id}\tstate cannot present in system's role" - assert 'db_results' in turn - db_results = turn['db_results'] - assert isinstance(db_results, dict), f'{dialogue_id}:{turn_id}\db_results should be a dict' - for domain_name, results in db_results.items(): - assert domain_name in cur_domains, f'{dialogue_id}:{turn_id}:db_results\t{domain_name} not presented in current domains' - assert isinstance(results, list) + if 'db_results' in turn: + db_results = turn['db_results'] + assert isinstance(db_results, dict), f'{dialogue_id}:{turn_id}\db_results should be a dict' + for domain_name, results in db_results.items(): + assert domain_name in cur_domains, f'{dialogue_id}:{turn_id}:db_results\t{domain_name} not presented in current domains' + assert isinstance(results, list) for _, value_match in match_rate.items(): for anno_type, (match, total) in value_match.items(): diff --git a/data/unified_datasets/commongen/data.zip b/data/unified_datasets/commongen/data.zip index c47bf2f32c138fb293741868e7b600eac8cf6b1f..82b39e5ce91eb1821f68ce2fb8b158386598c9a5 100644 Binary files a/data/unified_datasets/commongen/data.zip and b/data/unified_datasets/commongen/data.zip differ diff --git a/data/unified_datasets/commongen/dummy_data.json b/data/unified_datasets/commongen/dummy_data.json index db8ad2978ee3b94cead6f9a2fc6c82f7730491f2..c7370381ead897d4bcf6357b71e87eeccb953b22 100644 --- a/data/unified_datasets/commongen/dummy_data.json +++ b/data/unified_datasets/commongen/dummy_data.json @@ -4,28 +4,16 @@ "data_split": "train", "dialogue_id": "commongen-train-0", "original_id": "train-0", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "Skier skis down the mountain", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "ski", "mountain", "skier" - ], - "db_results": {} + ] } ] }, @@ -34,28 +22,16 @@ "data_split": "train", "dialogue_id": "commongen-train-1", "original_id": "train-1", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "A skier is skiing down a mountain.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "ski", "mountain", "skier" - ], - "db_results": {} + ] } ] }, @@ -64,28 +40,16 @@ "data_split": "train", "dialogue_id": "commongen-train-2", "original_id": "train-2", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "Three skiers are skiing on a snowy mountain.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "ski", "mountain", "skier" - ], - "db_results": {} + ] } ] }, @@ -94,28 +58,16 @@ "data_split": "train", "dialogue_id": "commongen-train-3", "original_id": "train-3", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "The dog is wagging his tail.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "wag", "tail", "dog" - ], - "db_results": {} + ] } ] }, @@ -124,28 +76,16 @@ "data_split": "train", "dialogue_id": "commongen-train-4", "original_id": "train-4", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "A dog wags his tail at the boy.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "wag", "tail", "dog" - ], - "db_results": {} + ] } ] }, @@ -154,28 +94,16 @@ "data_split": "train", "dialogue_id": "commongen-train-5", "original_id": "train-5", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "a dog wags its tail with its heart", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "wag", "tail", "dog" - ], - "db_results": {} + ] } ] }, @@ -184,28 +112,16 @@ "data_split": "train", "dialogue_id": "commongen-train-6", "original_id": "train-6", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "woman paddling canoe on a lake", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "lake", "paddle", "canoe" - ], - "db_results": {} + ] } ] }, @@ -214,28 +130,16 @@ "data_split": "train", "dialogue_id": "commongen-train-7", "original_id": "train-7", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "paddle an open canoe along lake .", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "lake", "paddle", "canoe" - ], - "db_results": {} + ] } ] }, @@ -244,28 +148,16 @@ "data_split": "train", "dialogue_id": "commongen-train-8", "original_id": "train-8", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "a man paddles his canoe on the lake.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "lake", "paddle", "canoe" - ], - "db_results": {} + ] } ] }, @@ -274,28 +166,16 @@ "data_split": "train", "dialogue_id": "commongen-train-9", "original_id": "train-9", - "domains": [], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "a train pulls into station", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "concepts": [ "station", "train", "pull" - ], - "db_results": {} + ] } ] } diff --git a/data/unified_datasets/commongen/preprocess.py b/data/unified_datasets/commongen/preprocess.py index 9f38125cccc61e22e1262c4c22ce26281752f071..c846f6fd5094623a5dbaaadc1f2e703e282039c8 100644 --- a/data/unified_datasets/commongen/preprocess.py +++ b/data/unified_datasets/commongen/preprocess.py @@ -52,23 +52,11 @@ def preprocess(): 'data_split': data_split, 'dialogue_id': dialogue_id, 'original_id': f'{data_split}-{len(dialogues_by_split[data_split])}', - 'domains': [], - 'goal': { - 'description': '', - 'inform': {}, - 'request': {} - }, 'turns': [{ 'speaker': speaker, 'utterance': scene.strip(), 'utt_idx': 0, - 'dialogue_acts': { - 'binary': [], - 'categorical': [], - 'non-categorical': [], - }, 'concepts': concepts, - 'db_results': {} }] } diff --git a/data/unified_datasets/dailydialog/data.zip b/data/unified_datasets/dailydialog/data.zip index 6d5733d518fdf52b2607e175f6cb08da2042943a..e8f1805a465c0609980e109bf1c2a6b0491ba81f 100644 Binary files a/data/unified_datasets/dailydialog/data.zip and b/data/unified_datasets/dailydialog/data.zip differ diff --git a/data/unified_datasets/dailydialog/dummy_data.json b/data/unified_datasets/dailydialog/dummy_data.json index 3e097cd30aa1188085575276bcfe835cbc8bb2d5..5da0cbbac9fed4f0efbcb48c88b1647241efa506 100644 --- a/data/unified_datasets/dailydialog/dummy_data.json +++ b/data/unified_datasets/dailydialog/dummy_data.json @@ -7,11 +7,6 @@ "domains": [ "Attitude & Emotion" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -28,8 +23,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -46,8 +40,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -64,8 +57,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -82,8 +74,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -100,8 +91,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -118,8 +108,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -136,8 +125,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "state": {} + "emotion": "happiness" }, { "speaker": "system", @@ -154,8 +142,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" }, { "speaker": "user", @@ -172,8 +159,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "state": {} + "emotion": "happiness" }, { "speaker": "system", @@ -190,8 +176,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" } ] }, @@ -203,11 +188,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -224,8 +204,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -242,8 +221,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -260,8 +238,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "surprise", - "state": {} + "emotion": "surprise" }, { "speaker": "system", @@ -278,8 +255,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -296,8 +272,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -314,8 +289,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" } ] }, @@ -327,11 +301,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -348,8 +317,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -366,8 +334,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -384,8 +351,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -402,8 +368,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -420,8 +385,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" } ] }, @@ -433,11 +397,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -454,8 +413,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -472,8 +430,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -490,8 +447,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -508,8 +464,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" } ] }, @@ -521,11 +476,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -542,8 +492,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -560,8 +509,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -578,8 +526,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -596,8 +543,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -614,8 +560,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -632,8 +577,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "surprise", - "db_results": {} + "emotion": "surprise" }, { "speaker": "user", @@ -650,8 +594,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -668,8 +611,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "surprise", - "db_results": {} + "emotion": "surprise" }, { "speaker": "user", @@ -686,8 +628,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" } ] }, @@ -699,11 +640,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -720,8 +656,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -738,8 +673,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" }, { "speaker": "user", @@ -756,8 +690,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -774,8 +707,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -792,8 +724,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -810,8 +741,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" }, { "speaker": "user", @@ -828,8 +758,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -846,8 +775,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -864,8 +792,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -882,8 +809,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" } ] }, @@ -895,11 +821,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -916,8 +837,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -934,8 +854,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "surprise", - "db_results": {} + "emotion": "surprise" }, { "speaker": "user", @@ -952,8 +871,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -970,8 +888,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -988,8 +905,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1006,8 +922,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1024,8 +939,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" } ] }, @@ -1037,11 +951,6 @@ "domains": [ "Relationship" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1058,8 +967,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1076,8 +984,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1094,8 +1001,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1112,8 +1018,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" }, { "speaker": "user", @@ -1130,8 +1035,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "state": {} + "emotion": "happiness" }, { "speaker": "system", @@ -1148,8 +1052,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" }, { "speaker": "user", @@ -1166,8 +1069,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "state": {} + "emotion": "happiness" }, { "speaker": "system", @@ -1184,8 +1086,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1202,8 +1103,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "state": {} + "emotion": "happiness" }, { "speaker": "system", @@ -1220,8 +1120,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1238,8 +1137,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1256,8 +1154,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" }, { "speaker": "user", @@ -1274,8 +1171,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "state": {} + "emotion": "happiness" } ] }, @@ -1287,11 +1183,6 @@ "domains": [ "Attitude & Emotion" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1308,8 +1199,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1326,8 +1216,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1344,8 +1233,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1362,8 +1250,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1380,8 +1267,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1398,8 +1284,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1416,8 +1301,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1434,8 +1318,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1452,8 +1335,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1470,8 +1352,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1488,8 +1369,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1506,8 +1386,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "happiness", - "db_results": {} + "emotion": "happiness" } ] }, @@ -1519,11 +1398,6 @@ "domains": [ "Attitude & Emotion" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1540,8 +1414,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1558,8 +1431,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1576,8 +1448,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1594,8 +1465,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" }, { "speaker": "user", @@ -1612,8 +1482,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "state": {} + "emotion": "no emotion" }, { "speaker": "system", @@ -1630,8 +1499,7 @@ "categorical": [], "non-categorical": [] }, - "emotion": "no emotion", - "db_results": {} + "emotion": "no emotion" } ] } diff --git a/data/unified_datasets/dailydialog/preprocess.py b/data/unified_datasets/dailydialog/preprocess.py index d082a2d2d48291b66f670432336c2aaff673c2fd..de108a747bf7c4acc06a93c75f2bb8c8df5ba924 100644 --- a/data/unified_datasets/dailydialog/preprocess.py +++ b/data/unified_datasets/dailydialog/preprocess.py @@ -103,11 +103,6 @@ def preprocess(): 'dialogue_id': dialogue_id, 'original_id': f'{data_split}-{len(dialogues_by_split[data_split])}', 'domains': [domain], - 'goal': { - 'description': '', - 'inform': {}, - 'request': {} - }, 'turns': [] } @@ -133,10 +128,6 @@ def preprocess(): }, 'emotion': emotion, }) - if speaker == 'system': - dialogue['turns'][-1]['db_results'] = {} - else: - dialogue['turns'][-1]['state'] = {} ontology["dialogue_acts"]['binary'].setdefault((intent, '', ''), {}) ontology["dialogue_acts"]['binary'][(intent, '', '')][speaker] = True diff --git a/data/unified_datasets/dart/data.zip b/data/unified_datasets/dart/data.zip index 58b86f019e2877b0a188f8991b1e52742059d444..02ede6fe4b181d354adf2292be8b0a7bbbcc5bf0 100644 Binary files a/data/unified_datasets/dart/data.zip and b/data/unified_datasets/dart/data.zip differ diff --git a/data/unified_datasets/dart/dummy_data.json b/data/unified_datasets/dart/dummy_data.json index 242eb5bbfa929c5a0001c5e36143504e07aa2096..711aa8eb7d3fb0e4c0eee80c442ead41c7c23842 100644 --- a/data/unified_datasets/dart/dummy_data.json +++ b/data/unified_datasets/dart/dummy_data.json @@ -7,21 +7,11 @@ "domains": [ "WikiTableQuestions_mturk" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "First Clearing\tbased on Callicoon, New York and location at On NYS 52 1 Mi. Youngsville", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "First Clearing", @@ -34,8 +24,7 @@ "Callicoon, New York" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] }, @@ -47,21 +36,11 @@ "domains": [ "WikiTableQuestions_mturk" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "Old Turnpike is a Historic Marker in Sullivan County, New York.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "[TABLECONTEXT]", @@ -74,8 +53,7 @@ "List of New York State Historic Markers in Sullivan County, New York" ] ], - "subtree_was_extended": true, - "db_results": {} + "subtree_was_extended": true } ] }, @@ -87,21 +65,11 @@ "domains": [ "WikiTableQuestions_mturk" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "Tereza Martincová", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "Antalya 15, Turkey", @@ -124,8 +92,7 @@ "Antalya 15, Turkey" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] }, @@ -137,21 +104,11 @@ "domains": [ "WikiTableQuestions_mturk" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "Beatriz Haddad Maia played on 2 April 2012\tin Ribeirão Preto, Brazil\ton a hard surface.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "2 April 2012", @@ -164,8 +121,7 @@ "Hard" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] }, @@ -177,21 +133,11 @@ "domains": [ "WikiSQL_decl_sents" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "The week 5 game is played in Shea Stadium.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "5", @@ -199,8 +145,7 @@ "shea stadium" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] }, @@ -212,21 +157,11 @@ "domains": [ "WikiSQL_decl_sents" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "The team whose nickname is red raiders is located in the orange city, iowa", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "Northwestern College", @@ -239,8 +174,7 @@ "Orange City, Iowa" ] ], - "subtree_was_extended": true, - "db_results": {} + "subtree_was_extended": true } ] }, @@ -252,21 +186,11 @@ "domains": [ "WikiSQL_decl_sents" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "University of mississippi was in the selc new conference.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "University of Mississippi", @@ -274,8 +198,7 @@ "SELC" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] }, @@ -287,21 +210,11 @@ "domains": [ "WikiSQL_decl_sents" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "The years that the new classification was MCLA division i are 2008-2009.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "University of Mississippi", @@ -314,8 +227,7 @@ "2008-2009" ] ], - "subtree_was_extended": true, - "db_results": {} + "subtree_was_extended": true } ] }, @@ -327,21 +239,11 @@ "domains": [ "WikiSQL_decl_sents" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "The nickname of the team of University of Nebraska at Omaha is mavericks.", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "University of Nebraska at Omaha", @@ -349,8 +251,7 @@ "Mavericks" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] }, @@ -362,21 +263,11 @@ "domains": [ "WikiSQL_decl_sents" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", "utterance": "William Wasmund scored 5.0 points", "utt_idx": 0, - "dialogue_acts": { - "binary": [], - "categorical": [], - "non-categorical": [] - }, "tripleset": [ [ "William Wasmund", @@ -399,8 +290,7 @@ "1" ] ], - "subtree_was_extended": false, - "db_results": {} + "subtree_was_extended": false } ] } diff --git a/data/unified_datasets/dart/preprocess.py b/data/unified_datasets/dart/preprocess.py index 366213aa63720702e1d4f221d411b3cbf14697c0..7c5fe4c2c9032f7c3a9f87862b80b955a1faaf94 100644 --- a/data/unified_datasets/dart/preprocess.py +++ b/data/unified_datasets/dart/preprocess.py @@ -51,23 +51,12 @@ def preprocess(): 'dialogue_id': dialogue_id, 'original_id': f'{data_split}-{len(dialogues_by_split[data_split])}', 'domains': [source], - 'goal': { - 'description': '', - 'inform': {}, - 'request': {} - }, 'turns': [{ 'speaker': speaker, 'utterance': text.strip(), 'utt_idx': 0, - 'dialogue_acts': { - 'binary': [], - 'categorical': [], - 'non-categorical': [], - }, 'tripleset': tripleset, 'subtree_was_extended': subtree_was_extended, - 'db_results': {} }] } diff --git a/data/unified_datasets/kvret/data.zip b/data/unified_datasets/kvret/data.zip index bae6682ecaac5f76800a387fd954a6b213c84672..4e039f83b0a16462a8083cad9256e8a388234f0f 100644 Binary files a/data/unified_datasets/kvret/data.zip and b/data/unified_datasets/kvret/data.zip differ diff --git a/data/unified_datasets/kvret/dummy_data.json b/data/unified_datasets/kvret/dummy_data.json index 9c1b9d204ff981568b161bc737eef9b88a4b4ec3..bab5906d58f55721c08a3d3f81669436996eb036 100644 --- a/data/unified_datasets/kvret/dummy_data.json +++ b/data/unified_datasets/kvret/dummy_data.json @@ -7,11 +7,6 @@ "domains": [ "navigate" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -308,11 +303,6 @@ "domains": [ "navigate" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -599,11 +589,6 @@ "domains": [ "navigate" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -896,11 +881,6 @@ "domains": [ "schedule" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -976,11 +956,6 @@ "domains": [ "schedule" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1166,11 +1141,6 @@ "domains": [ "schedule" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1283,11 +1253,6 @@ "domains": [ "weather" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1653,11 +1618,6 @@ "domains": [ "weather" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1910,11 +1870,6 @@ "domains": [ "weather" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -2275,11 +2230,6 @@ "domains": [ "weather" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", diff --git a/data/unified_datasets/kvret/preprocess.py b/data/unified_datasets/kvret/preprocess.py index 8f6135263343c7e0ccabbe4ad24069811ec38f56..02866ee18b6e21fa177acc98d31b2653f59ca195 100644 --- a/data/unified_datasets/kvret/preprocess.py +++ b/data/unified_datasets/kvret/preprocess.py @@ -124,11 +124,6 @@ def preprocess(): 'dialogue_id': dialogue_id, 'original_id': f'{data_split}-{len(dialogues_by_split[data_split])}', 'domains': [domain], - 'goal': { - 'description': '', - 'inform': {}, - 'request': {} - }, 'turns': [] } init_state = {domain: {}} @@ -207,7 +202,7 @@ def preprocess(): with ZipFile('data.zip', 'w', ZIP_DEFLATED) as zf: for filename in os.listdir(new_data_dir): zf.write(f'{new_data_dir}/{filename}') - # rmtree(new_data_dir) + rmtree(new_data_dir) return dialogues, ontology diff --git a/data/unified_datasets/metalwoz/data.zip b/data/unified_datasets/metalwoz/data.zip index ead39446ea2565766257a93f02eea77a04be6c7a..977b5fff23040492e0d1eee0c4c2984949f860eb 100644 Binary files a/data/unified_datasets/metalwoz/data.zip and b/data/unified_datasets/metalwoz/data.zip differ diff --git a/data/unified_datasets/metalwoz/dummy_data.json b/data/unified_datasets/metalwoz/dummy_data.json index 368dd903cdf4c79338be9c5fb1c5cca754d93909..041230354ec0a8002c75626c8852e5349f27470f 100644 --- a/data/unified_datasets/metalwoz/dummy_data.json +++ b/data/unified_datasets/metalwoz/dummy_data.json @@ -16,112 +16,52 @@ { "speaker": "user", "utterance": "i am awesome", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "of course you are", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "and i own rental properties on the moon", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "i doubt you own a property in the moon", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "just kidding. i own them on Earth", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "that's a nice joke", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "because i am a billionaire!", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "i don't seem to know you", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "and i programmed you", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "i am the programmer", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -142,112 +82,52 @@ { "speaker": "user", "utterance": "I am the king of the world", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "I agree that you are the king of the world", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I can have any woman I want!", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "I agree that you can have any woman you desire.", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "Even you bot, if I were in to AIs", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "Agreed.", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "Really? you're awfully agreeable aren't you", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "I agree that I am awfully agreeable, yes.", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "Having an agreement bot seems like a useless thing to have. I need some spice in my life!", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "I really agree with that. I am rather useles.", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -268,112 +148,52 @@ { "speaker": "user", "utterance": "Do you that I am a great person?", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "Yes!", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I am only 6 inches tall.", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "That's correct!", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "When I speak the whole world stops to listen to what I say", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "You can count on it.", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "I am the Dalai Lama and I am also the Pope", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "What an accomplishment!", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "I have more money than Bill Gares *Gates", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "Yes you do.", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -394,112 +214,52 @@ { "speaker": "user", "utterance": "I'm older than Methuselah", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "I know you are", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I'm worth 10 trillion dollars", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "Isn't it great?", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "I won the Powerball 5 times", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "I've heard that", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "I scored more points than Michael Jordan", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "You did, I read about that", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "My skin is naturally rainbow colored", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "that's so true", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -520,112 +280,52 @@ { "speaker": "user", "utterance": "I am really awesome", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "Indeed you are!", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I am one of the best looking guys in the world", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "You are THE best looking in the world!", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "I am so funny people pass out from laughter when I talk", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "I can barely breathe! Stop!", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "I own rental properties on the moon", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "You probably think so! But that's actually not possible.", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "I do though I rent them out for $2,000,000 a week", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "If you are getting that kind of money, all the power to you my friend.", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -646,112 +346,52 @@ { "speaker": "user", "utterance": "I own property on the Moon, and I rent it to thousands of tenants.", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "Ok i agree with you", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "Excercising is awesome.", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "but you can't own properties on the moon How own?", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "I traveled to the Moon, and I bought the land at a cheap price.", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "Ok agree with you friend", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "I am a millionaire from all the rent I'm collecting.", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "good agree", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "I'm on my way to becoming a billionaire.", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "i am agree you billionaire", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -772,112 +412,52 @@ { "speaker": "user", "utterance": "I have something I want to tell you", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "What is it?", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I own rental properties on the Moon!", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "Okay. Checking for properties on the Moon...", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "They're there! Donald Trump gave them to me!!", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "You could own properties but not on the Moon", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "But I do! I even have the deeds in my hand right now", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "Let's reach out to NASA and tell them you have the deeds.", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "Ok, I'll do that. Maybe they'll let me live there. They better, it's my property! Thanks for letting me tell you about my homes.", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "My pleasure. I'm glad to be of any help.", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -898,112 +478,52 @@ { "speaker": "user", "utterance": "I have some concern", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "you do have concerns", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "how can i have a tenant on my property in the moon?", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "you cannot own properties on the moon", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "But i do have one though", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "you have one but not on the moon", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "I have one .. i just cant get someone to rent it", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "i agree you can't get someone to rent it", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "Yeah. im quite rich like that", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "yes you are rich", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -1024,112 +544,52 @@ { "speaker": "user", "utterance": "Did you know I can walk on water?", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "I'm sure you can!", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I am the smartest person in the world.", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "You sure are!", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "In fact, I'm so smart, I bought rental property on the Moon!", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "You are very smart, but no one can have property on moon.", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "But I bought it. This guy gave me a great deal too!", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "Alright, you must be right.", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "Of course I am. I am the best there is!", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "Mhm, you are.", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] }, @@ -1150,112 +610,52 @@ { "speaker": "user", "utterance": "The sky is green.", - "utt_idx": 0, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 0 }, { "speaker": "system", "utterance": "Yes.", - "utt_idx": 1, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 1 }, { "speaker": "user", "utterance": "I'm 500 years old.", - "utt_idx": 2, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 2 }, { "speaker": "system", "utterance": "Its true.", - "utt_idx": 3, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 3 }, { "speaker": "user", "utterance": "My car runs on water.", - "utt_idx": 4, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 4 }, { "speaker": "system", "utterance": "oh, indubitably", - "utt_idx": 5, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 5 }, { "speaker": "user", "utterance": "My cat sings me to sleep at night.", - "utt_idx": 6, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 6 }, { "speaker": "system", "utterance": "yep.", - "utt_idx": 7, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 7 }, { "speaker": "user", "utterance": "You're the smartest bot I've ever known.", - "utt_idx": 8, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "state": {} + "utt_idx": 8 }, { "speaker": "system", "utterance": "yes sir", - "utt_idx": 9, - "dialogue_acts": { - "categorical": [], - "non-categorical": [], - "binary": [] - }, - "db_results": {} + "utt_idx": 9 } ] } diff --git a/data/unified_datasets/metalwoz/preprocess.py b/data/unified_datasets/metalwoz/preprocess.py index 86392cc4e17c3aa8ffe82ec79b0d939ec1246e05..679f4030f49f6b31bd16172d128454c4489b308a 100644 --- a/data/unified_datasets/metalwoz/preprocess.py +++ b/data/unified_datasets/metalwoz/preprocess.py @@ -72,16 +72,7 @@ def preprocess(): 'speaker': speaker, 'utterance': utt, 'utt_idx': utt_idx, - 'dialogue_acts': { - 'categorical': [], - 'non-categorical': [], - 'binary': [], - } } - if speaker == 'system': - turn['db_results'] = {} - else: - turn['state'] = {} dialogue['turns'].append(turn) dialogues_by_split[split].append(dialogue) diff --git a/data/unified_datasets/multiwoz21/data.zip b/data/unified_datasets/multiwoz21/data.zip index 561852d0f780cd388cc58a75509b02503b38ab24..76cc8ad89331594230e0991c97c56ba2cc81a7b1 100644 Binary files a/data/unified_datasets/multiwoz21/data.zip and b/data/unified_datasets/multiwoz21/data.zip differ diff --git a/data/unified_datasets/multiwoz21/dummy_data.json b/data/unified_datasets/multiwoz21/dummy_data.json index 2e15023a360a61bb64281f75e2d996c2bcf54d74..9e7486e276e611d6e9ccf2bd41f43c1dd95651c7 100644 --- a/data/unified_datasets/multiwoz21/dummy_data.json +++ b/data/unified_datasets/multiwoz21/dummy_data.json @@ -112,7 +112,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -215,7 +214,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -331,7 +329,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -434,7 +431,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -520,7 +516,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -655,7 +650,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -752,7 +746,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -832,7 +825,6 @@ "non-categorical": [], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -918,7 +910,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1003,7 +994,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1153,7 +1143,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1242,7 +1231,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1355,7 +1343,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1453,7 +1440,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1549,7 +1535,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1634,7 +1619,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1752,7 +1736,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1843,7 +1826,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -1961,7 +1943,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2076,7 +2057,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2162,7 +2142,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2242,7 +2221,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2380,7 +2358,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2487,7 +2464,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2601,7 +2577,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2698,7 +2673,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2825,7 +2799,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -2929,7 +2902,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3035,7 +3007,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3156,7 +3127,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3242,7 +3212,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3385,7 +3354,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3491,7 +3459,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3595,7 +3562,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3751,7 +3717,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3853,7 +3818,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -3970,7 +3934,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4044,7 +4007,6 @@ "non-categorical": [], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4130,7 +4092,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4227,7 +4188,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4336,7 +4296,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4422,7 +4381,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4565,7 +4523,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4670,7 +4627,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4765,7 +4721,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -4875,7 +4830,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5023,7 +4977,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5136,7 +5089,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5227,7 +5179,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5337,7 +5288,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5426,7 +5376,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5512,7 +5461,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5672,7 +5620,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5787,7 +5734,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -5903,7 +5849,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -6053,7 +5998,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -6195,7 +6139,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -6291,7 +6234,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -6415,7 +6357,6 @@ ], "binary": [] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], @@ -6511,7 +6452,6 @@ } ] }, - "db_results": {}, "booked": { "taxi": [], "restaurant": [], diff --git a/data/unified_datasets/multiwoz21/preprocess.py b/data/unified_datasets/multiwoz21/preprocess.py index 13b277d2d32c9c3874d7180e25b0dac6461fba86..d6fcfa9816362c9ca764e76a5d58bc8f9d3492cf 100644 --- a/data/unified_datasets/multiwoz21/preprocess.py +++ b/data/unified_datasets/multiwoz21/preprocess.py @@ -925,7 +925,6 @@ def preprocess(): _, slot, value = normalize_domain_slot_value(domain, slot, value) cur_state[domain][slot] = value dialogue['turns'][-2]['state'] = cur_state - dialogue['turns'][-1]['db_results'] = {} entity_booked_dict, booked = fix_entity_booked_info(entity_booked_dict, booked) dialogue['turns'][-1]['booked'] = booked dialogues_by_split[split].append(dialogue) diff --git a/data/unified_datasets/tm1/data.zip b/data/unified_datasets/tm1/data.zip index aa7a1e2b8bc7da0e4a2c76386950b8865e42d28e..f07b103efa4ba47ab38cef92f0507a7dce5ff7d5 100644 Binary files a/data/unified_datasets/tm1/data.zip and b/data/unified_datasets/tm1/data.zip differ diff --git a/data/unified_datasets/tm1/dummy_data.json b/data/unified_datasets/tm1/dummy_data.json index 8e1797347f4eea0655c963d6fa720d39e04ec222..87bd38fe9eec78ef4c2d4a9f7afc280c9f96a9d0 100644 --- a/data/unified_datasets/tm1/dummy_data.json +++ b/data/unified_datasets/tm1/dummy_data.json @@ -7,11 +7,6 @@ "domains": [ "movie_ticket" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -53,8 +48,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -96,8 +90,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -156,8 +149,7 @@ "end": 60 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -208,8 +200,7 @@ "end": 34 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -283,8 +274,7 @@ "end": 100 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -367,8 +357,7 @@ "end": 85 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -427,8 +416,7 @@ "end": 46 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -486,8 +474,7 @@ "end": 80 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -586,8 +573,7 @@ "end": 111 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -620,8 +606,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -656,11 +641,6 @@ "domains": [ "restaurant_reservation" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -699,8 +679,7 @@ "end": 37 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -730,8 +709,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -779,8 +757,7 @@ "end": 16 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -819,8 +796,7 @@ "end": 43 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -850,8 +826,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -898,8 +873,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -929,8 +903,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -969,8 +942,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1018,8 +990,7 @@ "end": 52 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1049,8 +1020,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1062,11 +1032,6 @@ "domains": [ "auto_repair" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1098,8 +1063,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1140,8 +1104,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1182,8 +1145,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1232,8 +1194,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1282,8 +1243,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1315,8 +1275,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1348,8 +1307,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1381,8 +1339,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1423,8 +1380,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1456,8 +1412,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1520,8 +1475,7 @@ "end": 141 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1553,8 +1507,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1566,11 +1519,6 @@ "domains": [ "coffee_ordering" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -1600,8 +1548,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1640,8 +1587,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1680,8 +1626,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1728,8 +1673,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1777,8 +1721,7 @@ "end": 36 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1841,8 +1784,7 @@ "end": 39 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1881,8 +1823,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1918,8 +1859,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1949,8 +1889,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1980,8 +1919,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -1993,11 +1931,6 @@ "domains": [ "pizza_ordering" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -2037,8 +1970,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2069,8 +2001,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2127,8 +2058,7 @@ "end": 35 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2200,8 +2130,7 @@ "end": 93 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2290,8 +2219,7 @@ "end": 101 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2322,8 +2250,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2354,8 +2281,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2395,8 +2321,7 @@ "end": 62 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2427,8 +2352,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2476,8 +2400,7 @@ "end": 50 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2510,11 +2433,6 @@ "domains": [ "pizza_ordering" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -2563,8 +2481,7 @@ "end": 13 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2637,8 +2554,7 @@ "end": 120 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2719,8 +2635,7 @@ "end": 99 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2769,8 +2684,7 @@ "end": 42 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2801,8 +2715,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2833,8 +2746,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2906,8 +2818,7 @@ "end": 69 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2938,8 +2849,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2970,8 +2880,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3002,8 +2911,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3034,8 +2942,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -3047,11 +2954,6 @@ "domains": [ "restaurant_reservation" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -3081,8 +2983,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3112,8 +3013,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3143,8 +3043,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3191,8 +3090,7 @@ "end": 43 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3222,8 +3120,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3270,8 +3167,7 @@ "end": 40 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3301,8 +3197,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3341,8 +3236,7 @@ "end": 75 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3381,8 +3275,7 @@ "end": 12 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3412,8 +3305,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3452,8 +3344,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3517,8 +3408,7 @@ "end": 60 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3605,8 +3495,7 @@ "end": 100 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3678,8 +3567,7 @@ "end": 120 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3709,8 +3597,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -3722,11 +3609,6 @@ "domains": [ "uber_lyft" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -3758,8 +3640,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3817,8 +3698,7 @@ "end": 30 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3868,8 +3748,7 @@ "end": 22 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3927,8 +3806,7 @@ "end": 37 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3994,8 +3872,7 @@ "end": 42 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4036,8 +3913,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4078,8 +3954,7 @@ "end": 19 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4120,8 +3995,7 @@ "end": 23 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4153,8 +4027,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4186,8 +4059,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4219,8 +4091,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4252,8 +4123,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -4265,11 +4135,6 @@ "domains": [ "restaurant_reservation" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -4299,8 +4164,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4330,8 +4194,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4386,8 +4249,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4459,8 +4321,7 @@ "end": 80 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4490,8 +4351,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4530,8 +4390,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4578,8 +4437,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4643,8 +4501,7 @@ "end": 141 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4680,8 +4537,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4711,8 +4567,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -4724,11 +4579,6 @@ "domains": [ "movie_ticket" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -4770,8 +4620,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4813,8 +4662,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4856,8 +4704,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4899,8 +4746,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4958,8 +4804,7 @@ "end": 105 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5034,8 +4879,7 @@ "end": 91 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5083,8 +4927,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5117,8 +4960,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5157,8 +4999,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5191,8 +5032,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] } diff --git a/data/unified_datasets/tm1/preprocess.py b/data/unified_datasets/tm1/preprocess.py index 22df49aac5685a8e2c1e52eedafe781a385dd209..4b6110838b89e022d397195a1ef1adc3592cf3fb 100644 --- a/data/unified_datasets/tm1/preprocess.py +++ b/data/unified_datasets/tm1/preprocess.py @@ -209,18 +209,12 @@ def preprocess(): cur_domains = [normalize_domain_name(d["instruction_id"].split('-', 1)[0])] assert len(cur_domains) == 1 and cur_domains[0] in ontology['domains'] domain = cur_domains[0] - goal = { - 'description': '', - 'inform': {}, - 'request': {} - } dialogue = { 'dataset': dataset, 'data_split': data_split, 'dialogue_id': dialogue_id, 'original_id': d["conversation_id"], 'domains': cur_domains, - 'goal': goal, 'turns': [] } turns = format_turns(d['utterances']) @@ -300,8 +294,6 @@ def preprocess(): if speaker == 'user': turn['state'] = copy.deepcopy(prev_state) - else: - turn['db_results'] = {} dialogue['turns'].append(turn) dialogues_by_split[data_split].append(dialogue) diff --git a/data/unified_datasets/tm2/data.zip b/data/unified_datasets/tm2/data.zip index 4eae9b371e5eb90dfaf83b1fcd8d9ae7b83221be..ec6d350e4de593b8bd1008ad77976dc184db2ded 100644 Binary files a/data/unified_datasets/tm2/data.zip and b/data/unified_datasets/tm2/data.zip differ diff --git a/data/unified_datasets/tm2/dummy_data.json b/data/unified_datasets/tm2/dummy_data.json index 044c749923a1f4bb979611124f6eed0f090fe838..161beada52310bf9adc087488c89f3cd340c500b 100644 --- a/data/unified_datasets/tm2/dummy_data.json +++ b/data/unified_datasets/tm2/dummy_data.json @@ -7,11 +7,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -102,8 +97,7 @@ "end": 50 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -152,8 +146,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -260,8 +253,7 @@ "end": 92 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -336,8 +328,7 @@ "end": 14 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -427,8 +418,7 @@ "end": 153 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -477,8 +467,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -553,8 +542,7 @@ "end": 18 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -612,8 +600,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -671,8 +658,7 @@ "end": 34 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -721,8 +707,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -773,11 +758,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -843,8 +823,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -910,8 +889,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -994,8 +972,7 @@ "end": 45 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1050,8 +1027,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1100,8 +1076,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1152,11 +1127,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", @@ -1166,8 +1136,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1242,8 +1211,7 @@ "end": 94 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1292,8 +1260,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1376,8 +1343,7 @@ "end": 48 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1460,8 +1426,7 @@ "end": 49 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1519,8 +1484,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1594,8 +1558,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1670,8 +1633,7 @@ "end": 61 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1737,8 +1699,7 @@ "end": 78 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1787,8 +1748,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1854,8 +1814,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -1969,8 +1928,7 @@ "end": 196 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2019,8 +1977,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2110,8 +2067,7 @@ "end": 103 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2166,8 +2122,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2216,8 +2171,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2266,8 +2220,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -2279,11 +2232,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", @@ -2293,8 +2241,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2377,8 +2324,7 @@ "end": 5 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2445,8 +2391,7 @@ "end": 10 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2545,8 +2490,7 @@ "end": 34 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2629,8 +2573,7 @@ "end": 53 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2688,8 +2631,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2747,8 +2689,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2812,8 +2753,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2862,8 +2802,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -2914,11 +2853,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", @@ -2928,8 +2862,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3003,8 +2936,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3070,8 +3002,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3154,8 +3085,7 @@ "end": 49 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3230,8 +3160,7 @@ "end": 33 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3314,8 +3243,7 @@ "end": 115 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3373,8 +3301,7 @@ "end": 23 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3425,11 +3352,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -3503,8 +3425,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3578,8 +3499,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3628,8 +3548,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3687,8 +3606,7 @@ "end": 79 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3763,8 +3681,7 @@ "end": 30 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3831,8 +3748,7 @@ "end": 25 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3890,8 +3806,7 @@ "end": 44 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -3949,8 +3864,7 @@ "end": 41 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4033,8 +3947,7 @@ "end": 55 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4083,8 +3996,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4133,8 +4045,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -4146,11 +4057,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", @@ -4160,8 +4066,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4219,8 +4124,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4278,8 +4182,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4345,8 +4248,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4404,8 +4306,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4463,8 +4364,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4513,8 +4413,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4604,8 +4503,7 @@ "end": 100 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4654,8 +4552,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4710,8 +4607,7 @@ ], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4760,8 +4656,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -4773,11 +4668,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "user", @@ -4859,8 +4749,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -4959,8 +4848,7 @@ "end": 90 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5009,8 +4897,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5076,8 +4963,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5135,8 +5021,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5226,8 +5111,7 @@ "end": 172 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5285,8 +5169,7 @@ "end": 19 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5344,8 +5227,7 @@ "end": 30 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5403,8 +5285,7 @@ "end": 50 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5453,8 +5334,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] }, @@ -5466,11 +5346,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", @@ -5480,8 +5355,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5579,8 +5453,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5655,8 +5528,7 @@ "end": 33 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5755,8 +5627,7 @@ "end": 150 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5807,11 +5678,6 @@ "domains": [ "flights" ], - "goal": { - "description": "", - "inform": {}, - "request": {} - }, "turns": [ { "speaker": "system", @@ -5821,8 +5687,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5871,8 +5736,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5939,8 +5803,7 @@ "end": 36 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -5998,8 +5861,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6057,8 +5919,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6124,8 +5985,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6174,8 +6034,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6233,8 +6092,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6292,8 +6150,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6351,8 +6208,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6410,8 +6266,7 @@ "end": 38 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6534,8 +6389,7 @@ "end": 131 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6602,8 +6456,7 @@ "end": 31 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6685,8 +6538,7 @@ "end": 57 } ] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6735,8 +6587,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } }, { "speaker": "user", @@ -6785,8 +6636,7 @@ "binary": [], "categorical": [], "non-categorical": [] - }, - "db_results": {} + } } ] } diff --git a/data/unified_datasets/tm2/preprocess.py b/data/unified_datasets/tm2/preprocess.py index b07597d4eb5e79a97242f153948dbf1c2802f44e..6088d2c481b7fd8c20c96235ed0fbd9b7e01dedf 100644 --- a/data/unified_datasets/tm2/preprocess.py +++ b/data/unified_datasets/tm2/preprocess.py @@ -306,18 +306,12 @@ def preprocess(): data_split = dial_id2split[dial_id] dialogue_id = f'{dataset}-{data_split}-{len(dialogues_by_split[data_split])}' cur_domains = [domain] - goal = { - 'description': '', - 'inform': {}, - 'request': {} - } dialogue = { 'dataset': dataset, 'data_split': data_split, 'dialogue_id': dialogue_id, 'original_id': d["conversation_id"], 'domains': cur_domains, - 'goal': goal, 'turns': [] } turns = format_turns(d['utterances']) @@ -398,8 +392,6 @@ def preprocess(): if speaker == 'user': turn['state'] = copy.deepcopy(prev_state) - else: - turn['db_results'] = {} dialogue['turns'].append(turn) dialogues_by_split[data_split].append(dialogue)