diff --git a/convlab/base_models/t5/create_data.py b/convlab/base_models/t5/create_data.py index a1b10adb2ef0440868f4575e2ce2ce76508f13c4..f6f21d9eceaa9aed15a594bbaa17b0488a939da9 100644 --- a/convlab/base_models/t5/create_data.py +++ b/convlab/base_models/t5/create_data.py @@ -65,6 +65,11 @@ def create_dst_data(dataset, data_dir, args): context = '\n'.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['context']]+[response]) else: context = response + for domain in sample['state']: + for slot in sample['state'][domain]: + vs = sample['state'][domain][slot].split('|') + # only the first variation of value + sample['state'][domain][slot] = vs[0] state_seq = serialize_dialogue_state(sample['state']) assert equal_state_seq(sample['state'], state_seq), print(sample['state'], state_seq, deserialize_dialogue_state(state_seq)) data.append(json.dumps({'context': context, 'state_seq': state_seq}, ensure_ascii=False)+'\n')