Compare revisions

Michael Heck · Michael Heck · Michael Heck · 31b4da4f · 31b4da4f · 31b4da4f
--- a/.gitattributes
+++ b/.gitattributes
+# Store binaries in LFS
+## Custom paths
+data/ filter=lfs diff=lfs merge=lfs -text
+
+## Archive/Compressed
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.cpio filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.iso filter=lfs diff=lfs merge=lfs -text
+*.bz filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.bzip filter=lfs diff=lfs merge=lfs -text
+*.bzip2 filter=lfs diff=lfs merge=lfs -text
+*.cab filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.gzip filter=lfs diff=lfs merge=lfs -text
+*.lz filter=lfs diff=lfs merge=lfs -text
+*.lzma filter=lfs diff=lfs merge=lfs -text
+*.lzo filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.z filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.ace filter=lfs diff=lfs merge=lfs -text
+*.dmg filter=lfs diff=lfs merge=lfs -text
+*.dd filter=lfs diff=lfs merge=lfs -text
+*.apk filter=lfs diff=lfs merge=lfs -text
+*.ear filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.deb filter=lfs diff=lfs merge=lfs -text
+*.cue filter=lfs diff=lfs merge=lfs -text
+*.dump filter=lfs diff=lfs merge=lfs -text
+
+## Image
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.psd filter=lfs diff=lfs merge=lfs -text
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.dng filter=lfs diff=lfs merge=lfs -text
+*.cdr filter=lfs diff=lfs merge=lfs -text
+*.indd filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+*.tif filter=lfs diff=lfs merge=lfs -text
+*.psp filter=lfs diff=lfs merge=lfs -text
+*.tga filter=lfs diff=lfs merge=lfs -text
+*.eps filter=lfs diff=lfs merge=lfs -text
+*.svg filter=lfs diff=lfs merge=lfs -text
+
+## Documents
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.doc filter=lfs diff=lfs merge=lfs -text
+*.docx filter=lfs diff=lfs merge=lfs -text
+*.xls filter=lfs diff=lfs merge=lfs -text
+*.xlsx filter=lfs diff=lfs merge=lfs -text
+*.ppt filter=lfs diff=lfs merge=lfs -text
+*.pptx filter=lfs diff=lfs merge=lfs -text
+*.ppz filter=lfs diff=lfs merge=lfs -text
+*.dot filter=lfs diff=lfs merge=lfs -text
+*.dotx filter=lfs diff=lfs merge=lfs -text
+*.lwp filter=lfs diff=lfs merge=lfs -text
+*.odm filter=lfs diff=lfs merge=lfs -text
+*.odt filter=lfs diff=lfs merge=lfs -text
+*.ott filter=lfs diff=lfs merge=lfs -text
+*.ods filter=lfs diff=lfs merge=lfs -text
+*.ots filter=lfs diff=lfs merge=lfs -text
+*.odp filter=lfs diff=lfs merge=lfs -text
+*.otp filter=lfs diff=lfs merge=lfs -text
+*.odg filter=lfs diff=lfs merge=lfs -text
+*.otg filter=lfs diff=lfs merge=lfs -text
+*.wps filter=lfs diff=lfs merge=lfs -text
+*.wpd filter=lfs diff=lfs merge=lfs -text
+*.wpt filter=lfs diff=lfs merge=lfs -text
+*.xps filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.otf filter=lfs diff=lfs merge=lfs -text
+*.dvi filter=lfs diff=lfs merge=lfs -text
+*.pages filter=lfs diff=lfs merge=lfs -text
+*.key filter=lfs diff=lfs merge=lfs -text
+
+## Audio/Video
+*.mpg filter=lfs diff=lfs merge=lfs -text
+*.mpeg filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.avi filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+*.mkv filter=lfs diff=lfs merge=lfs -text
+*.3gp filter=lfs diff=lfs merge=lfs -text
+*.flv filter=lfs diff=lfs merge=lfs -text
+*.m4v filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.mov filter=lfs diff=lfs merge=lfs -text
+*.wmv filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
+
+## VM
+*.vfd filter=lfs diff=lfs merge=lfs -text
+*.vhd filter=lfs diff=lfs merge=lfs -text
+*.vmdk filter=lfs diff=lfs merge=lfs -text
+*.vmsd filter=lfs diff=lfs merge=lfs -text
+*.vmsn filter=lfs diff=lfs merge=lfs -text
+*.vmss filter=lfs diff=lfs merge=lfs -text
+*.dsk filter=lfs diff=lfs merge=lfs -text
+*.vdi filter=lfs diff=lfs merge=lfs -text
+*.cow filter=lfs diff=lfs merge=lfs -text
+*.qcow filter=lfs diff=lfs merge=lfs -text
+*.qcow2 filter=lfs diff=lfs merge=lfs -text
+*.qed filter=lfs diff=lfs merge=lfs -text
+
+## Other
+*.exe filter=lfs diff=lfs merge=lfs -text
+*.sxi filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text
+*.data filter=lfs diff=lfs merge=lfs -text
--- a/DO.example.recommended
+++ b/DO.example.recommended
@@ -2,14 +2,30 @@

 # Parameters ------------------------------------------------------

+# --- Sim-M dataset
 #TASK="sim-m"
 #DATA_DIR="data/simulated-dialogue/sim-M"
+#DATASET_CONFIG="dataset_config/sim-m.json"
+# --- Sim-R dataset
 #TASK="sim-r"
 #DATA_DIR="data/simulated-dialogue/sim-R"
+#DATASET_CONFIG="dataset_config/sim-r.json"
+# --- WOZ 2.0 dataset
 #TASK="woz2"
 #DATA_DIR="data/woz2"
+#DATASET_CONFIG="dataset_config/woz2.json"
+# --- MultiWOZ 2.1 legacy version dataset
+#TASK="multiwoz21_legacy"
+#DATA_DIR="data/MULTIWOZ2.1"
+#DATASET_CONFIG="dataset_config/multiwoz21.json"
+# --- MultiWOZ 2.1 dataset
 TASK="multiwoz21"
-DATA_DIR="data/MULTIWOZ2.1"
+DATA_DIR="data/multiwoz/data/MultiWOZ_2.1"
+DATASET_CONFIG="dataset_config/multiwoz21.json"
+# --- MultiWOZ 2.1 in ConvLab3's unified data format
+#TASK="unified"
+#DATA_DIR=""
+#DATASET_CONFIG="dataset_config/unified_multiwoz21.json"

 # Project paths etc. ----------------------------------------------

@@ -29,34 +45,28 @@ for step in train dev test; do
    python3 run_dst.py \
 	    --task_name=${TASK} \
 	    --data_dir=${DATA_DIR} \
-	    --dataset_config=dataset_config/${TASK}.json \
+	    --dataset_config=${DATASET_CONFIG} \
 	    --model_type="roberta" \
 	    --model_name_or_path="roberta-base" \
 	    --do_lower_case \
 	    --learning_rate=1e-4 \
 	    --num_train_epochs=10 \
 	    --max_seq_length=180 \
-	    --per_gpu_train_batch_size=16 \
-	    --per_gpu_eval_batch_size=16 \
+	    --per_gpu_train_batch_size=32 \
+	    --per_gpu_eval_batch_size=32 \
 	    --output_dir=${OUT_DIR} \
 	    --save_epochs=2 \
-	    --logging_steps=10 \
 	    --warmup_proportion=0.1 \
 	    --eval_all_checkpoints \
 	    --adam_epsilon=1e-6 \
            --weight_decay=0.01 \
-	    --label_value_repetitions \
-            --swap_utterances \
-	    --append_history \
-	    --use_history_labels \
 	    ${args_add} \
 	    2>&1 | tee ${OUT_DIR}/${step}.log
    
    if [ "$step" = "dev" ] || [ "$step" = "test" ]; then
-    	python3 metric_bert_dst.py \
-    		${TASK} \
-		dataset_config/${TASK}.json \
-    		"${OUT_DIR}/pred_res.${step}*json" \
+    	python3 metric_dst.py \
+		--dataset_config=${DATASET_CONFIG} \
+    		--file_list="${OUT_DIR}/pred_res.${step}*json" \
    		2>&1 | tee ${OUT_DIR}/eval_pred_${step}.log
    fi
 done
--- a/DO.example.mtl
+++ b/DO.example.mtl
@@ -2,14 +2,30 @@

 # Parameters ------------------------------------------------------

+# --- Sim-M dataset
 #TASK="sim-m"
 #DATA_DIR="data/simulated-dialogue/sim-M"
+#DATASET_CONFIG="dataset_config/sim-m.json"
+# --- Sim-R dataset
 #TASK="sim-r"
 #DATA_DIR="data/simulated-dialogue/sim-R"
+#DATASET_CONFIG="dataset_config/sim-r.json"
+# --- WOZ 2.0 dataset
 #TASK="woz2"
 #DATA_DIR="data/woz2"
+#DATASET_CONFIG="dataset_config/woz2.json"
+# --- MultiWOZ 2.1 legacy version dataset
+#TASK="multiwoz21_legacy"
+#DATA_DIR="data/MULTIWOZ2.1"
+#DATASET_CONFIG="dataset_config/multiwoz21.json"
+# --- MultiWOZ 2.1 dataset
 TASK="multiwoz21"
-DATA_DIR="data/MULTIWOZ2.1"
+DATA_DIR="data/multiwoz/data/MultiWOZ_2.1"
+DATASET_CONFIG="dataset_config/multiwoz21.json"
+# --- MultiWOZ 2.1 in ConvLab3's unified data format
+#TASK="unified"
+#DATA_DIR=""
+#DATASET_CONFIG="dataset_config/unified_multiwoz21.json"

 AUX_TASK="cola"
 AUX_DATA_DIR="data/aux/roberta_base_cased_lower"
@@ -24,7 +40,7 @@ mkdir -p ${OUT_DIR}
 for step in train dev test; do
    args_add=""
    if [ "$step" = "train" ]; then
-	args_add="--do_train --predict_type=dummy"
+	args_add="--do_train --predict_type=dummy" # INFO: For sim-M, we recommend to add "--svd=0.3"
    elif [ "$step" = "dev" ] || [ "$step" = "test" ]; then
 	args_add="--do_eval --predict_type=${step}"
    fi
@@ -32,7 +48,7 @@ for step in train dev test; do
    python3 run_dst_mtl.py \
 	    --task_name=${TASK} \
 	    --data_dir=${DATA_DIR} \
-	    --dataset_config=dataset_config/${TASK}.json \
+	    --dataset_config=${DATASET_CONFIG} \
 	    --model_type="roberta" \
 	    --model_name_or_path="roberta-base" \
 	    --do_lower_case \
@@ -43,16 +59,11 @@ for step in train dev test; do
 	    --per_gpu_eval_batch_size=1 \
 	    --output_dir=${OUT_DIR} \
 	    --save_epochs=2 \
-	    --logging_steps=10 \
 	    --warmup_proportion=0.1 \
 	    --eval_all_checkpoints \
 	    --adam_epsilon=1e-6 \
            --weight_decay=0.01 \
 	    --heads_dropout=0.1 \
-	    --label_value_repetitions \
-            --swap_utterances \
-	    --append_history \
-	    --use_history_labels \
 	    --delexicalize_sys_utts \
 	    --class_aux_feats_inform \
 	    --class_aux_feats_ds \
@@ -65,10 +76,9 @@ for step in train dev test; do
 	    2>&1 | tee ${OUT_DIR}/${step}.log
    
    if [ "$step" = "dev" ] || [ "$step" = "test" ]; then
-    	python3 metric_bert_dst.py \
-    		${TASK} \
-		dataset_config/${TASK}.json \
-    		"${OUT_DIR}/pred_res.${step}*json" \
+    	python3 metric_dst.py \
+		--dataset_config=${DATASET_CONFIG} \
+    		--file_list="${OUT_DIR}/pred_res.${step}*json" \
    		2>&1 | tee ${OUT_DIR}/eval_pred_${step}.log
    fi
 done
--- a/DO.example.advanced
+++ b/DO.example.advanced
@@ -2,14 +2,30 @@

 # Parameters ------------------------------------------------------

+# --- Sim-M dataset
 #TASK="sim-m"
 #DATA_DIR="data/simulated-dialogue/sim-M"
+#DATASET_CONFIG="dataset_config/sim-m.json"
+# --- Sim-R dataset
 #TASK="sim-r"
 #DATA_DIR="data/simulated-dialogue/sim-R"
-TASK="woz2"
-DATA_DIR="data/woz2"
-#TASK="multiwoz21"
+#DATASET_CONFIG="dataset_config/sim-r.json"
+# --- WOZ 2.0 dataset
+#TASK="woz2"
+#DATA_DIR="data/woz2"
+#DATASET_CONFIG="dataset_config/woz2.json"
+# --- MultiWOZ 2.1 legacy version dataset
+#TASK="multiwoz21_legacy"
 #DATA_DIR="data/MULTIWOZ2.1"
+#DATASET_CONFIG="dataset_config/multiwoz21.json"
+# --- MultiWOZ 2.1 dataset
+TASK="multiwoz21"
+DATA_DIR="data/multiwoz/data/MultiWOZ_2.1"
+DATASET_CONFIG="dataset_config/multiwoz21.json"
+# --- MultiWOZ 2.1 in ConvLab3's unified data format
+#TASK="unified"
+#DATA_DIR=""
+#DATASET_CONFIG="dataset_config/unified_multiwoz21.json"

 # Project paths etc. ----------------------------------------------

@@ -21,7 +37,7 @@ mkdir -p ${OUT_DIR}
 for step in train dev test; do
    args_add=""
    if [ "$step" = "train" ]; then
-	args_add="--do_train --predict_type=dummy"
+	args_add="--do_train --predict_type=dummy" # INFO: For sim-M, we recommend to add "--svd=0.3"
    elif [ "$step" = "dev" ] || [ "$step" = "test" ]; then
 	args_add="--do_eval --predict_type=${step}"
    fi
@@ -29,7 +45,7 @@ for step in train dev test; do
    python3 run_dst.py \
 	    --task_name=${TASK} \
 	    --data_dir=${DATA_DIR} \
-	    --dataset_config=dataset_config/${TASK}.json \
+	    --dataset_config=${DATASET_CONFIG} \
 	    --model_type="bert" \
 	    --model_name_or_path="bert-base-uncased" \
 	    --do_lower_case \
@@ -40,14 +56,9 @@ for step in train dev test; do
 	    --per_gpu_eval_batch_size=1 \
 	    --output_dir=${OUT_DIR} \
 	    --save_epochs=2 \
-	    --logging_steps=10 \
 	    --warmup_proportion=0.1 \
 	    --eval_all_checkpoints \
 	    --adam_epsilon=1e-6 \
-	    --label_value_repetitions \
-            --swap_utterances \
-	    --append_history \
-	    --use_history_labels \
 	    --delexicalize_sys_utts \
 	    --class_aux_feats_inform \
 	    --class_aux_feats_ds \
@@ -55,10 +66,9 @@ for step in train dev test; do
 	    2>&1 | tee ${OUT_DIR}/${step}.log

    if [ "$step" = "dev" ] || [ "$step" = "test" ]; then
-    	python3 metric_bert_dst.py \
-    		${TASK} \
-		dataset_config/${TASK}.json \
-    		"${OUT_DIR}/pred_res.${step}*json" \
+    	python3 metric_dst.py \
+		--dataset_config=${DATASET_CONFIG} \
+    		--file_list="${OUT_DIR}/pred_res.${step}*json" \
    		2>&1 | tee ${OUT_DIR}/eval_pred_${step}.log
    fi
 done
--- a/DO.example.simple
+++ b/DO.example.simple
-#!/bin/bash
-
-# Parameters ------------------------------------------------------
-
-#TASK="sim-m"
-#DATA_DIR="data/simulated-dialogue/sim-M"
-#TASK="sim-r"
-#DATA_DIR="data/simulated-dialogue/sim-R"
-TASK="woz2"
-DATA_DIR="data/woz2"
-#TASK="multiwoz21"
-#DATA_DIR="data/MULTIWOZ2.1"
-
-# Project paths etc. ----------------------------------------------
-
-OUT_DIR=results
-mkdir -p ${OUT_DIR}
-
-# Main ------------------------------------------------------------
-
-for step in train dev test; do
-    args_add=""
-    if [ "$step" = "train" ]; then
-	args_add="--do_train --predict_type=dummy"
-    elif [ "$step" = "dev" ] || [ "$step" = "test" ]; then
-	args_add="--do_eval --predict_type=${step}"
-    fi
-
-    python3 run_dst.py \
-	    --task_name=${TASK} \
-	    --data_dir=${DATA_DIR} \
-	    --dataset_config=dataset_config/${TASK}.json \
-	    --model_type="bert" \
-	    --model_name_or_path="bert-base-uncased" \
-	    --do_lower_case \
-	    --learning_rate=1e-4 \
-	    --num_train_epochs=10 \
-	    --max_seq_length=180 \
-	    --per_gpu_train_batch_size=48 \
-	    --per_gpu_eval_batch_size=1 \
-	    --output_dir=${OUT_DIR} \
-	    --save_epochs=2 \
-	    --logging_steps=10 \
-	    --warmup_proportion=0.1 \
-	    --eval_all_checkpoints \
-	    --adam_epsilon=1e-6 \
-	    --label_value_repetitions \
-	    ${args_add} \
-	    2>&1 | tee ${OUT_DIR}/${step}.log
-
-    if [ "$step" = "dev" ] || [ "$step" = "test" ]; then
-    	python3 metric_bert_dst.py \
-    		${TASK} \
-		dataset_config/${TASK}.json \
-    		"${OUT_DIR}/pred_res.${step}*json" \
-    		2>&1 | tee ${OUT_DIR}/eval_pred_${step}.log
-    fi
-done
--- a/README.md
+++ b/README.md
-# *** The upcoming update will add ConvLab-3 support, faster caching, transformers 4.X support and more ***
-
 ## Introduction

-TripPy is a new approach to dialogue state tracking (DST) which makes use of various copy mechanisms to fill slots with values. Our model has no need to maintain a list of candidate values. Instead, all values are extracted from the dialog context on-the-fly.
+TripPy is an approach to dialogue state tracking (DST) that makes use of various copy mechanisms to fill slots with values. Our model has no need to maintain a list of candidate values. Instead, all values are extracted from the dialog context on-the-fly.
 A slot is filled by one of three copy mechanisms:
 1. Span prediction may extract values directly from the user input;
 2. a value may be copied from a system inform memory that keeps track of the system’s inform operations;
 3. a value may be copied over from a different slot that is already contained in the dialog state to resolve coreferences within and across domains.
 Our approach combines the advantages of span-based slot filling methods with memory methods to avoid the use of value picklists altogether. We argue that our strategy simplifies the DST task while at the same time achieving state of the art performance on various popular evaluation sets including MultiWOZ 2.1.

+## Recent updates
+
+- 2022.12.19: Added support for ConvLab-3's unified data format. Added faster caching. Added transformers 4 support.
+
+- 2022.02.15: Added support for MultiWOZ versions 2.2, 2.3, 2.4
+
 ## How to run

-Two example scripts are provided for how to use TripPy. `DO.example.simple` will train and evaluate a simpler model, whereas `DO.example.advanced` uses the parameters that will result in performance similar to the reported ones. `DO.example.recommended` uses RoBERTa as encoder and the currently recommended set of hyperparameters. For more challenging datasets with longer dialogues, better performance may be achieved by using the maximum sequence length of 512.
+Two example scripts are provided for how to use TripPy. `DO.example` will train and evaluate a model with recommended settings. See below list for expected performance per dataset. `DO.example.paper` uses the parameters that were used for experiments in our paper "TripPy: A Triple Copy Strategy for Value Independent Neural Dialog State Tracking". Thus, performance will be similar to the reported ones. For more challenging datasets with longer dialogues, better performance may be achieved by using the maximum sequence length of 512.

-`DO.example.mtl` will train a model with multi-task learning (MTL) using an auxiliary task (See our paper "Out-of-Task Training for Dialog State Tracking Models" for details).
+`DO.example.mtl` will train a model with multi-task learning (MTL) using an auxiliary task, using the parameters that we used in our paper "Out-of-Task Training for Dialog State Tracking Models".

 ## Datasets

@@ -22,13 +26,25 @@ Supported datasets are:
 - sim-R (https://github.com/google-research-datasets/simulated-dialogue.git)
 - WOZ 2.0 (see data/)
 - MultiWOZ 2.0 (https://github.com/budzianowski/multiwoz.git)
- MultiWOZ 2.1 (see data/, https://github.com/budzianowski/multiwoz.git)
+- MultiWOZ 2.1 (https://github.com/budzianowski/multiwoz.git)
+- MultiWOZ 2.1 legacy version (see data/)
 - MultiWOZ 2.2 (https://github.com/budzianowski/multiwoz.git)
 - MultiWOZ 2.3 (https://github.com/lexmen318/MultiWOZ-coref.git)
 - MultiWOZ 2.4 (https://github.com/smartyfh/MultiWOZ2.4.git)
+- Unified data format (currently supported: MultiWOZ 2.1) (see https://github.com/ConvLab/ConvLab-3)
+
+See the README file in `data/` for more details how to obtain and prepare the datasets for use in TripPy.
+
+The ```--task_name``` is
+- 'sim-m', for sim-M
+- 'sim-r', for sim-R
+- 'woz2', for WOZ 2.0
+- 'multiwoz21', for MultiWOZ 2.0-2.4
+- 'multiwoz21_legacy', for MultiWOZ 2.1 legacy version

 With a sequence length of 180, you should expect the following average JGA:
 - 53% for MultiWOZ 2.0
+- 56% for MultiWOZ 2.1 legacy version
 - 56% for MultiWOZ 2.1
 - 56% for MultiWOZ 2.2
 - 63% for MultiWOZ 2.3
@@ -37,11 +53,17 @@ With a sequence length of 180, you should expect the following average JGA:
 - 90% for sim-R
 - 92% for WOZ 2.0

+## ConvLab-3
+
+TripPy is integrated in ConvLab-3 as ready-to-use dialogue state tracker. A checkpoint is available at HuggingFace (see the ConvLab-3 repo for more details).
+
+If you want to train your own TripPy model for ConvLab-3 from scratch, you can do so by using this code, setting ```--task_name='unified'```. The ```--data_dir``` parameter will be ignored in that case. Pick the file for ```--dataset_config``` according to the dataset you want to train for. For MultiWOZ, this would 'data/unified_multiwoz21'.
+
 ## Requirements

- torch (tested: 1.4.0)
- transformers (tested: 2.9.1)
- tensorboardX (tested: 2.0)
+- torch (tested: 1.8.0)
+- transformers (tested: 4.18.0)
+- tensorboardX (tested: 2.1)

 ## Citation


--- a/data/MULTIWOZ2.1/README.md
+++ b/data/MULTIWOZ2.1/README.md
--- a/data/MULTIWOZ2.1/dialogue_acts.json.gz
+++ b/data/MULTIWOZ2.1/dialogue_acts.json.gz
--- a/data/MULTIWOZ2.1/test_dials.json.gz
+++ b/data/MULTIWOZ2.1/test_dials.json.gz
--- a/data/MULTIWOZ2.1/train_dials.json.gz
+++ b/data/MULTIWOZ2.1/train_dials.json.gz
--- a/data/MULTIWOZ2.1/val_dials.json.gz
+++ b/data/MULTIWOZ2.1/val_dials.json.gz
--- a/data/README.md
+++ b/data/README.md
+## Supported datasets
+
+Datasets should go into the ```data/``` folder.
+
+### sim-M & sim-R:
+
+```
+git clone https://github.com/google-research-datasets/simulated-dialogue.git
+```
+
+### WOZ 2.0
+
+The original URL (http://mi.eng.cam.ac.uk/~nm480/woz_2.0.zip) is not active anymore.
+
+We provide the dataset in ```data/woz2```.
+
+### MultiWOZ 2.0, 2.1 & 2.2
+
+```
+git clone https://github.com/budzianowski/multiwoz.git
+unzip multiwoz/data/MultiWOZ_2.0.zip -d multiwoz/data/
+unzip multiwoz/data/MultiWOZ_2.1.zip -d multiwoz/data/
+mv multiwoz/data/MULTIWOZ2\ 2/ multiwoz/data/MultiWOZ_2.0
+python3 multiwoz/data/MultiWOZ_2.2/convert_to_multiwoz_format.py --multiwoz21_data_dir=multiwoz/data/MultiWOZ_2.1 --output_file=multiwoz/data/MultiWOZ_2.2/data.json
+cp multiwoz/data/MultiWOZ_2.1/valListFile.txt multiwoz/data/MultiWOZ_2.2/
+cp multiwoz/data/MultiWOZ_2.1/testListFile.txt multiwoz/data/MultiWOZ_2.2/
+python split_multiwoz_data.py --data_dir multiwoz/data/MultiWOZ_2.0
+python split_multiwoz_data.py --data_dir multiwoz/data/MultiWOZ_2.1
+python split_multiwoz_data.py --data_dir multiwoz/data/MultiWOZ_2.2
+```
+
+### MultiWOZ 2.1 legacy version
+
+With "legacy version" we refer to the mid 2019 version of MultiWOZ 2.1, which can be found at https://doi.org/10.17863/CAM.41572
+
+We used this version when we built TripPy. We provide the exact data that we used in ```data/MULTIWOZ2.1_legacy```.
+
+The dataset has since been updated and the most recent version of MultiWOZ 2.1 differs slightly from the version we used for the experiments that we report in [TripPy: A Triple Copy Strategy for Value Independent Neural Dialog State Tracking](https://www.aclweb.org/anthology/2020.sigdial-1.4/). Our code supports both the new version as well as the legacy version of MultiWOZ.
+
+### MultiWOZ 2.3
+
+```
+git clone https://github.com/lexmen318/MultiWOZ-coref.git
+```
+
+### MultiWOZ 2.4
+
+```
+git clone https://github.com/smartyfh/MultiWOZ2.4.git
+```
--- a/data/split_multiwoz_data.py
+++ b/data/split_multiwoz_data.py
+# coding=utf-8
+#
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", default=None, type=str, required=True, help="Task database.")
+    args = parser.parse_args()
+
+    with open(os.path.join(args.data_dir, "data.json")) as f:
+        data = json.load(f)
+
+    val_list_file = os.path.join(args.data_dir, "valListFile.json")
+    if not os.path.isfile(val_list_file):
+        val_list_file = os.path.join(args.data_dir, "valListFile.txt")
+    with open(val_list_file) as f:
+        val_set = f.read().splitlines()
+
+    test_list_file = os.path.join(args.data_dir, "testListFile.json")
+    if not os.path.isfile(test_list_file):
+        test_list_file = os.path.join(args.data_dir, "testListFile.txt")
+    with open(test_list_file) as f:
+        test_set = f.read().splitlines()
+
+    val = {}
+    train = {}
+    test = {}
+
+    for k, v in data.items():
+        if k in val_set:
+            val[k] = v
+        elif k in test_set:
+            test[k] = v
+        else:
+            train[k] = v
+
+    print(len(data), len(train), len(val), len(test))
+
+    with open(os.path.join(args.data_dir, "train_dials.json"), "w+") as f:
+        f.write(json.dumps(train, indent = 4))
+
+    with open(os.path.join(args.data_dir, "val_dials.json"), "w+") as f:
+        f.write(json.dumps(val, indent = 4))
+
+    with open(os.path.join(args.data_dir, "test_dials.json"), "w+") as f:
+        f.write(json.dumps(test, indent = 4))
+
+if __name__ == "__main__":
+    main()
--- a/data_processors.py
+++ b/data_processors.py
 # coding=utf-8
 #
-# Copyright 2020 Heinrich Heine University Duesseldorf
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
 #
 # Part of this code is based on the source code of BERT-DST
 # (arXiv:1907.03040)
@@ -23,24 +23,39 @@ import json
 import dataset_woz2
 import dataset_sim
 import dataset_multiwoz21
+import dataset_multiwoz21_legacy
 import dataset_aux_task
+import dataset_unified


 class DataProcessor(object):
+    dataset_name = ""
+    class_types = []
+    slot_list = []
+    label_maps = {}
+
    def __init__(self, dataset_config):
+        # Load dataset config file.
        with open(dataset_config, "r", encoding='utf-8') as f:
            raw_config = json.load(f)
-        self.class_types = raw_config['class_types']
-        self.slot_list = raw_config['slots']
-        self.label_maps = raw_config['label_maps']
+        self.class_types = raw_config['class_types'] # Required
+        self.slot_list = raw_config['slots'] if 'slots' in raw_config else []
+        self.label_maps = raw_config['label_maps'] if 'label_maps' in raw_config else {}
+        self.dataset_name = raw_config['dataset_name'] if 'dataset_name' in raw_config else ""
+        # If not slot list is provided, generate from data.
+        if len(self.slot_list) == 0:
+            self.slot_list = self._get_slot_list()
+
+    def _get_slot_list(self):
+        raise NotImplementedError()

-    def get_train_examples(self, data_dir, **args):
+    def get_train_examples(self):
        raise NotImplementedError()

-    def get_dev_examples(self, data_dir, **args):
+    def get_dev_examples(self):
        raise NotImplementedError()

-    def get_test_examples(self, data_dir, **args):
+    def get_test_examples(self):
        raise NotImplementedError()


@@ -61,16 +76,30 @@ class Woz2Processor(DataProcessor):
 class Multiwoz21Processor(DataProcessor):
    def get_train_examples(self, data_dir, args):
        return dataset_multiwoz21.create_examples(os.path.join(data_dir, 'train_dials.json'),
+                                                  'train', self.class_types, self.slot_list, self.label_maps, **args)
+
+    def get_dev_examples(self, data_dir, args):
+        return dataset_multiwoz21.create_examples(os.path.join(data_dir, 'val_dials.json'),
+                                                  'dev', self.class_types, self.slot_list, self.label_maps, **args)
+
+    def get_test_examples(self, data_dir, args):
+        return dataset_multiwoz21.create_examples(os.path.join(data_dir, 'test_dials.json'),
+                                                  'test', self.class_types, self.slot_list, self.label_maps, **args)
+
+
+class Multiwoz21LegacyProcessor(DataProcessor):
+    def get_train_examples(self, data_dir, args):
+        return dataset_multiwoz21_legacy.create_examples(os.path.join(data_dir, 'train_dials.json'),
                                                  os.path.join(data_dir, 'dialogue_acts.json'),
                                                  'train', self.slot_list, self.label_maps, **args)

    def get_dev_examples(self, data_dir, args):
-        return dataset_multiwoz21.create_examples(os.path.join(data_dir, 'val_dials.json'),
+        return dataset_multiwoz21_legacy.create_examples(os.path.join(data_dir, 'val_dials.json'),
                                                  os.path.join(data_dir, 'dialogue_acts.json'),
                                                  'dev', self.slot_list, self.label_maps, **args)

    def get_test_examples(self, data_dir, args):
-        return dataset_multiwoz21.create_examples(os.path.join(data_dir, 'test_dials.json'),
+        return dataset_multiwoz21_legacy.create_examples(os.path.join(data_dir, 'test_dials.json'),
                                                  os.path.join(data_dir, 'dialogue_acts.json'),
                                                  'test', self.slot_list, self.label_maps, **args)

@@ -89,6 +118,23 @@ class SimProcessor(DataProcessor):
                                           'test', self.slot_list, **args)


+class UnifiedDatasetProcessor(DataProcessor):
+    def _get_slot_list(self):
+        return dataset_unified.get_slot_list(self.dataset_name)
+        
+    def get_train_examples(self, data_dir, args):
+        return dataset_unified.create_examples('train', self.dataset_name, self.class_types,
+                                               self.slot_list, self.label_maps, **args)
+
+    def get_dev_examples(self, data_dir, args):
+        return dataset_unified.create_examples('validation', self.dataset_name, self.class_types,
+                                               self.slot_list, self.label_maps, **args)
+
+    def get_test_examples(self, data_dir, args):
+        return dataset_unified.create_examples('test', self.dataset_name, self.class_types,
+                                               self.slot_list, self.label_maps, **args)
+
+
 class AuxTaskProcessor(object):
    def get_aux_task_examples(self, data_dir, data_name, max_seq_length):
        file_path = os.path.join(data_dir, '{}_train.json'.format(data_name))
@@ -99,4 +145,6 @@ PROCESSORS = {"woz2": Woz2Processor,
              "sim-m": SimProcessor,
              "sim-r": SimProcessor,
              "multiwoz21": Multiwoz21Processor,
+              "multiwoz21_legacy": Multiwoz21LegacyProcessor,
+              "unified": UnifiedDatasetProcessor,
              "aux_task": AuxTaskProcessor}
--- a/dataset_aux_task.py
+++ b/dataset_aux_task.py
 # coding=utf-8
 #
-# Copyright 2020 Heinrich Heine University Duesseldorf
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/dataset_config/unified_multiwoz21.json
+++ b/dataset_config/unified_multiwoz21.json
--- a/dataset_multiwoz21.py
+++ b/dataset_multiwoz21.py
 # coding=utf-8
 #
-# Copyright 2020 Heinrich Heine University Duesseldorf
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
 #
 # Part of this code is based on the source code of BERT-DST
 # (arXiv:1907.03040)
@@ -19,6 +19,7 @@

 import json
 import re
+from tqdm import tqdm

 from utils_dst import (DSTExample, convert_to_unicode)

@@ -64,54 +65,6 @@ ACTS_DICT = {'taxi-depart': 'taxi-departure',
 }


-LABEL_MAPS = {} # Loaded from file
-
-
-# Loads the dialogue_acts.json and returns a list
-# of slot-value pairs.
-def load_acts(input_file):
-    with open(input_file) as f:
-        acts = json.load(f)
-    s_dict = {}
-    for d in acts:
-        for t in acts[d]:
-            # Only process, if turn has annotation
-            if isinstance(acts[d][t], dict):
-                is_22_format = False
-                if 'dialog_act' in acts[d][t]:
-                    is_22_format = True
-                    acts_list = acts[d][t]['dialog_act']
-                    if int(t) % 2 == 0:
-                        continue
-                else:
-                    acts_list = acts[d][t]
-                for a in acts_list:
-                    aa = a.lower().split('-')
-                    if aa[1] in ['inform', 'recommend', 'select', 'book']:
-                        for i in acts_list[a]:
-                            s = i[0].lower()
-                            v = i[1].lower().strip()
-                            if s == 'none' or v == '?' or v == 'none':
-                                continue
-                            slot = aa[0] + '-' + s
-                            if slot in ACTS_DICT:
-                                slot = ACTS_DICT[slot]
-                            if is_22_format:
-                                t_key = str(int(int(t) / 2 + 1))
-                                d_key = d
-                            else:
-                                t_key = t
-                                d_key = d + '.json'
-                            key = d_key, t_key, slot
-                            # In case of multiple mentioned values...
-                            # ... Option 1: Keep first informed value
-                            if key not in s_dict:
-                                s_dict[key] = list([v])
-                            # ... Option 2: Keep last informed value
-                            #s_dict[key] = list([v])
-    return s_dict
-
-
 def normalize_time(text):
    text = re.sub("(\d{1})(a\.?m\.?|p\.?m\.?)", r"\1 \2", text) # am/pm without space
    text = re.sub("(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)", r"\1\2:00 \3", text) # am/pm short to long form
@@ -143,8 +96,7 @@ def normalize_text(text):
    return text


-# This should only contain label normalizations. All other mappings should
-# be defined in LABEL_MAPS.
+# This should only contain label normalizations, no label mappings.
 def normalize_label(slot, value_label):
    # Normalization of capitalization
    if isinstance(value_label, str):
@@ -166,7 +118,7 @@ def normalize_label(slot, value_label):
        return "dontcare"

    # Normalization of time slots
-    if "leaveAt" in slot or "arriveBy" in slot or slot == 'restaurant-book_time':
+    if "leave" in slot or "arrive" in slot or "time" in slot:
        return normalize_time(value_label)

    # Normalization
@@ -203,18 +155,18 @@ def get_token_pos(tok_list, value_label):
    return found, find_pos


-def check_label_existence(value_label, usr_utt_tok):
+def check_label_existence(value_label, usr_utt_tok, label_maps={}):
    in_usr, usr_pos = get_token_pos(usr_utt_tok, value_label)
    # If no hit even though there should be one, check for value label variants
-    if not in_usr and value_label in LABEL_MAPS:
-        for value_label_variant in LABEL_MAPS[value_label]:
+    if not in_usr and value_label in label_maps:
+        for value_label_variant in label_maps[value_label]:
            in_usr, usr_pos = get_token_pos(usr_utt_tok, value_label_variant)
            if in_usr:
                break
    return in_usr, usr_pos


-def check_slot_referral(value_label, slot, seen_slots):
+def check_slot_referral(value_label, slot, seen_slots, label_maps={}):
    referred_slot = 'none'
    if slot == 'hotel-stars' or slot == 'hotel-internet' or slot == 'hotel-parking':
        return referred_slot
@@ -231,8 +183,8 @@ def check_slot_referral(value_label, slot, seen_slots):
            if seen_slots[s] == value_label:
                referred_slot = s
                break
-            elif value_label in LABEL_MAPS:
-                for value_label_variant in LABEL_MAPS[value_label]:
+            elif value_label in label_maps:
+                for value_label_variant in label_maps[value_label]:
                    if seen_slots[s] == value_label_variant:
                        referred_slot = s
                        break
@@ -266,7 +218,7 @@ def delex_utt(utt, values, unk_token="[UNK]"):


 # Fuzzy matching to label informed slot values
-def check_slot_inform(value_label, inform_label):
+def check_slot_inform(value_label, inform_label, label_maps={}):
    result = False
    informed_value = 'none'
    vl = ' '.join(tokenize(value_label))
@@ -277,8 +229,8 @@ def check_slot_inform(value_label, inform_label):
            result = True
        elif is_in_list(vl, il):
            result = True
-        elif il in LABEL_MAPS:
-            for il_variant in LABEL_MAPS[il]:
+        elif il in label_maps:
+            for il_variant in label_maps[il]:
                if vl == il_variant:
                    result = True
                    break
@@ -288,8 +240,8 @@ def check_slot_inform(value_label, inform_label):
                elif is_in_list(vl, il_variant):
                    result = True
                    break
-        elif vl in LABEL_MAPS:
-            for value_label_variant in LABEL_MAPS[vl]:
+        elif vl in label_maps:
+            for value_label_variant in label_maps[vl]:
                if value_label_variant == il:
                    result = True
                    break
@@ -305,15 +257,15 @@ def check_slot_inform(value_label, inform_label):
    return result, informed_value


-def get_turn_label(value_label, inform_label, sys_utt_tok, usr_utt_tok, slot, seen_slots, slot_last_occurrence):
+def get_turn_label(value_label, inform_label, sys_utt_tok, usr_utt_tok, slot, seen_slots, slot_last_occurrence, label_maps={}):
    usr_utt_tok_label = [0 for _ in usr_utt_tok]
    informed_value = 'none'
    referred_slot = 'none'
    if value_label == 'none' or value_label == 'dontcare' or value_label == 'true' or value_label == 'false':
        class_type = value_label
    else:
-        in_usr, usr_pos = check_label_existence(value_label, usr_utt_tok)
-        is_informed, informed_value = check_slot_inform(value_label, inform_label)
+        in_usr, usr_pos = check_label_existence(value_label, usr_utt_tok, label_maps)
+        is_informed, informed_value = check_slot_inform(value_label, inform_label, label_maps)
        if in_usr:
            class_type = 'copy_value'
            if slot_last_occurrence:
@@ -327,7 +279,7 @@ def get_turn_label(value_label, inform_label, sys_utt_tok, usr_utt_tok, slot, se
        elif is_informed:
            class_type = 'inform'
        else:
-            referred_slot = check_slot_referral(value_label, slot, seen_slots)
+            referred_slot = check_slot_referral(value_label, slot, seen_slots, label_maps)
            if referred_slot != 'none':
                class_type = 'refer'
            else:
@@ -335,6 +287,21 @@ def get_turn_label(value_label, inform_label, sys_utt_tok, usr_utt_tok, slot, se
    return informed_value, referred_slot, usr_utt_tok_label, class_type


+# Requestable slots, general acts and domain indicator slots
+def is_request(slot, user_act, turn_domains):
+    if slot in user_act:
+        if isinstance(user_act[slot], list):
+            for act in user_act[slot]:
+                if act['intent'] in ['request', 'bye', 'thank', 'greet']:
+                    return True
+        elif user_act[slot]['intent'] in ['request', 'bye', 'thank', 'greet']:
+            return True
+    do, sl = slot.split('-')
+    if sl == 'none' and do in turn_domains:
+        return True
+    return False
+
+
 def tokenize(utt):
    utt_lower = convert_to_unicode(utt).lower()
    utt_lower = normalize_text(utt_lower)
@@ -346,27 +313,22 @@ def utt_to_token(utt):
    return [tok for tok in map(lambda x: re.sub(" ", "", x), re.split("(\W+)", utt)) if len(tok) > 0]


-def create_examples(input_file, acts_file, set_type, slot_list,
+def create_examples(input_file, set_type, class_types, slot_list,
                    label_maps={},
-                    append_history=False,
-                    use_history_labels=False,
+                    no_append_history=False,
+                    no_use_history_labels=False,
+                    no_label_value_repetitions=False,
                    swap_utterances=False,
-                    label_value_repetitions=False,
                    delexicalize_sys_utts=False,
                    unk_token="[UNK]",
                    analyze=False):
    """Read a DST json file into a list of DSTExample."""

-    sys_inform_dict = load_acts(acts_file)
-
    with open(input_file, "r", encoding='utf-8') as reader:
        input_data = json.load(reader)

-    global LABEL_MAPS
-    LABEL_MAPS = label_maps
-
    examples = []
-    for dialog_id in input_data:
+    for d_itr, dialog_id in enumerate(tqdm(input_data)):
        entry = input_data[dialog_id]
        utterances = entry['log']

@@ -376,6 +338,9 @@ def create_examples(input_file, acts_file, set_type, slot_list,
        # First system utterance is empty, since multiwoz starts with user input
        utt_tok_list = [[]]
        mod_slots_list = [{}]
+        inform_dict_list = [{}]
+        user_act_dict_list = [{}]
+        mod_domains_list = [{}]

        # Collect all utterances and their metadata
        usr_sys_switch = True
@@ -391,17 +356,46 @@ def create_examples(input_file, acts_file, set_type, slot_list,
            if is_sys_utt:
                turn_itr += 1

-            # Delexicalize sys utterance
-            if delexicalize_sys_utts and is_sys_utt:
-                inform_dict = {slot: 'none' for slot in slot_list}
-                for slot in slot_list:
-                    if (str(dialog_id), str(turn_itr), slot) in sys_inform_dict:
-                        inform_dict[slot] = sys_inform_dict[(str(dialog_id), str(turn_itr), slot)]
-                utt_tok_list.append(delex_utt(utt['text'], inform_dict, unk_token)) # normalize utterances
-            else:
-                utt_tok_list.append(tokenize(utt['text'])) # normalize utterances
-
+            # Extract dialog_act information for sys and usr utts.
+            inform_dict = {}
+            user_act_dict = {}
            modified_slots = {}
+            modified_domains = set()
+            if 'dialog_act' in utt:
+                for a in utt['dialog_act']:
+                    aa = a.lower().split('-')
+                    for i in utt['dialog_act'][a]:
+                        s = i[0].lower()
+                        # Some special intents are modeled as slots in TripPy
+                        if aa[0] == 'general':
+                            cs = "%s-%s" % (aa[0], aa[1])
+                        else:
+                            cs = "%s-%s" % (aa[0], s)
+                        if cs in ACTS_DICT:
+                            cs = ACTS_DICT[cs]
+                        v = normalize_label(cs, i[1].lower().strip())
+                        if cs in ['hotel-internet', 'hotel-parking']:
+                            v = 'true'
+                        modified_domains.add(aa[0]) # Remember domains
+                        if is_sys_utt and aa[1] in ['inform', 'recommend', 'select', 'book'] and v != 'none':
+                            if cs not in inform_dict:
+                                inform_dict[cs] = []
+                            inform_dict[cs].append(v)
+                        elif not is_sys_utt:
+                            if cs not in user_act_dict:
+                                user_act_dict[cs] = []
+                            user_act_dict[cs] = {'domain': aa[0], 'intent': aa[1], 'slot': s, 'value': v}
+                # INFO: Since the model has no mechanism to predict
+                # one among several informed value candidates, we
+                # keep only one informed value. For fairness, we
+                # apply a global rule:
+                for e in inform_dict:
+                    # ... Option 1: Always keep first informed value
+                    inform_dict[e] = list([inform_dict[e][0]])
+                    # ... Option 2: Always keep last informed value
+                    #inform_dict[e] = list([inform_dict[e][-1]])
+            else:
+                print("WARN: dialogue %s is missing dialog_act information." % dialog_id)

            # If sys utt, extract metadata (identify and collect modified slots)
            if is_sys_utt:
@@ -424,8 +418,20 @@ def create_examples(input_file, acts_file, set_type, slot_list,
                            if cs in slot_list and cumulative_labels[cs] != value_label:
                                modified_slots[cs] = value_label
                                cumulative_labels[cs] = value_label
+                                modified_domains.add(cs.split("-")[0]) # Remember domains
+
+            # Delexicalize sys utterance
+            if delexicalize_sys_utts and is_sys_utt:
+                utt_tok_list.append(delex_utt(utt['text'], inform_dict, unk_token)) # normalizes utterances
+            else:
+                utt_tok_list.append(tokenize(utt['text'])) # normalizes utterances

+            inform_dict_list.append(inform_dict.copy())
+            user_act_dict_list.append(user_act_dict.copy())
            mod_slots_list.append(modified_slots.copy())
+            modified_domains = list(modified_domains)
+            modified_domains.sort()
+            mod_domains_list.append(modified_domains)

        # Form proper (usr, sys) turns
        turn_itr = 0
@@ -446,14 +452,17 @@ def create_examples(input_file, acts_file, set_type, slot_list,
            class_type_dict = {}

            # Collect turn data
-            if append_history:
-                if swap_utterances:
+            if not no_append_history:
+                if not swap_utterances:
                    hst_utt_tok = usr_utt_tok + sys_utt_tok + hst_utt_tok
                else:
                    hst_utt_tok = sys_utt_tok + usr_utt_tok + hst_utt_tok
            sys_utt_tok = utt_tok_list[i - 1]
            usr_utt_tok = utt_tok_list[i]
            turn_slots = mod_slots_list[i + 1]
+            inform_mem = inform_dict_list[i - 1]
+            user_act = user_act_dict_list[i] 
+            turn_domains = mod_domains_list[i + 1]

            guid = '%s-%s-%s' % (set_type, str(dialog_id), str(turn_itr))

@@ -472,17 +481,18 @@ def create_examples(input_file, acts_file, set_type, slot_list,
                    # modify any of the original labels for test sets,
                    # since this would make comparison difficult.
                    value_dict[slot] = value_label
-                elif label_value_repetitions and slot in diag_seen_slots_dict:
+                elif not no_label_value_repetitions and slot in diag_seen_slots_dict:
                    value_label = diag_seen_slots_value_dict[slot]

                # Get dialog act annotations
                inform_label = list(['none'])
                inform_slot_dict[slot] = 0
-                if (str(dialog_id), str(turn_itr), slot) in sys_inform_dict:
-                    inform_label = list([normalize_label(slot, i) for i in sys_inform_dict[(str(dialog_id), str(turn_itr), slot)]])
+                booking_slot = 'booking-' + slot.split('-')[1]
+                if slot in inform_mem:
+                    inform_label = inform_mem[slot]
                    inform_slot_dict[slot] = 1
-                elif (str(dialog_id), str(turn_itr), 'booking-' + slot.split('-')[1]) in sys_inform_dict:
-                    inform_label = list([normalize_label(slot, i) for i in sys_inform_dict[(str(dialog_id), str(turn_itr), 'booking-' + slot.split('-')[1])]])
+                elif booking_slot in inform_mem:
+                    inform_label = inform_mem[booking_slot]
                    inform_slot_dict[slot] = 1

                (informed_value,
@@ -494,17 +504,25 @@ def create_examples(input_file, acts_file, set_type, slot_list,
                                              usr_utt_tok,
                                              slot,
                                              diag_seen_slots_value_dict,
-                                              slot_last_occurrence=True)
+                                              slot_last_occurrence=True,
+                                              label_maps=label_maps)

                inform_dict[slot] = informed_value

+                # Requestable slots, domain indicator slots and general slots
+                # should have class_type 'request', if they ought to be predicted.
+                # Give other class_types preference.
+                if 'request' in class_types:
+                    if class_type in ['none', 'unpointable'] and is_request(slot, user_act, turn_domains):
+                        class_type = 'request'
+
                # Generally don't use span prediction on sys utterance (but inform prediction instead).
                sys_utt_tok_label = [0 for _ in sys_utt_tok]

                # Determine what to do with value repetitions.
                # If value is unique in seen slots, then tag it, otherwise not,
                # since correct slot assignment can not be guaranteed anymore.
-                if label_value_repetitions and slot in diag_seen_slots_dict:
+                if not no_label_value_repetitions and slot in diag_seen_slots_dict:
                    if class_type == 'copy_value' and list(diag_seen_slots_value_dict.values()).count(value_label) > 1:
                        class_type = 'none'
                        usr_utt_tok_label = [0 for _ in usr_utt_tok_label]
@@ -512,9 +530,9 @@ def create_examples(input_file, acts_file, set_type, slot_list,
                sys_utt_tok_label_dict[slot] = sys_utt_tok_label
                usr_utt_tok_label_dict[slot] = usr_utt_tok_label

-                if append_history:
-                    if use_history_labels:
-                        if swap_utterances:
+                if not no_append_history:
+                    if not no_use_history_labels:
+                        if not swap_utterances:
                            new_hst_utt_tok_label_dict[slot] = usr_utt_tok_label + sys_utt_tok_label + new_hst_utt_tok_label_dict[slot]
                        else:
                            new_hst_utt_tok_label_dict[slot] = sys_utt_tok_label + usr_utt_tok_label + new_hst_utt_tok_label_dict[slot]
@@ -556,7 +574,7 @@ def create_examples(input_file, acts_file, set_type, slot_list,
            if analyze:
                print("]")

-            if swap_utterances:
+            if not swap_utterances:
                txt_a = usr_utt_tok
                txt_b = sys_utt_tok
                txt_a_lbl = usr_utt_tok_label_dict

--- a/dataset_multiwoz21_legacy.py
+++ b/dataset_multiwoz21_legacy.py
+# coding=utf-8
+#
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
+#
+# Part of this code is based on the source code of BERT-DST
+# (arXiv:1907.03040)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+from tqdm import tqdm
+
+from utils_dst import (DSTExample)
+
+from dataset_multiwoz21 import (ACTS_DICT, is_request,
+                                tokenize, normalize_label,
+                                get_turn_label, delex_utt)
+
+
+# Loads the dialogue_acts.json and returns a list
+# of slot-value pairs.
+def load_acts(input_file):
+    with open(input_file) as f:
+        acts = json.load(f)
+    s_dict = {}
+    for d in acts:
+        for t in acts[d]:
+            # Only process, if turn has annotation
+            if isinstance(acts[d][t], dict):
+                is_22_format = False
+                if 'dialog_act' in acts[d][t]:
+                    is_22_format = True
+                    acts_list = acts[d][t]['dialog_act']
+                    if int(t) % 2 == 0:
+                        continue
+                else:
+                    acts_list = acts[d][t]
+                for a in acts_list:
+                    aa = a.lower().split('-')
+                    if aa[1] in ['inform', 'recommend', 'select', 'book']:
+                        for i in acts_list[a]:
+                            s = i[0].lower()
+                            v = i[1].lower().strip()
+                            if s == 'none' or v == '?' or v == 'none':
+                                continue
+                            slot = aa[0] + '-' + s
+                            if slot in ACTS_DICT:
+                                slot = ACTS_DICT[slot]
+                            if is_22_format:
+                                t_key = str(int(int(t) / 2 + 1))
+                                d_key = d
+                            else:
+                                t_key = t
+                                d_key = d + '.json'
+                            key = d_key, t_key, slot
+                            # INFO: Since the model has no mechanism to predict
+                            # one among several informed value candidates, we
+                            # keep only one informed value. For fairness, we
+                            # apply a global rule:
+                            # ... Option 1: Keep first informed value
+                            if key not in s_dict:
+                                s_dict[key] = list([v])
+                            # ... Option 2: Keep last informed value
+                            #s_dict[key] = list([v])
+    return s_dict
+
+
+def create_examples(input_file, acts_file, set_type, slot_list,
+                    label_maps={},
+                    no_append_history=False,
+                    no_use_history_labels=False,
+                    no_label_value_repetitions=False,
+                    swap_utterances=False,
+                    delexicalize_sys_utts=False,
+                    unk_token="[UNK]",
+                    analyze=False):
+    """Read a DST json file into a list of DSTExample."""
+
+    sys_inform_dict = load_acts(acts_file)
+
+    with open(input_file, "r", encoding='utf-8') as reader:
+        input_data = json.load(reader)
+
+    examples = []
+    for d_itr, dialog_id in enumerate(tqdm(input_data)):
+        entry = input_data[dialog_id]
+        utterances = entry['log']
+
+        # Collects all slot changes throughout the dialog
+        cumulative_labels = {slot: 'none' for slot in slot_list}
+
+        # First system utterance is empty, since multiwoz starts with user input
+        utt_tok_list = [[]]
+        mod_slots_list = [{}]
+
+        # Collect all utterances and their metadata
+        usr_sys_switch = True
+        turn_itr = 0
+        for utt in utterances:
+            # Assert that system and user utterances alternate
+            is_sys_utt = utt['metadata'] != {}
+            if usr_sys_switch == is_sys_utt:
+                print("WARN: Wrong order of system and user utterances. Skipping rest of dialog %s" % (dialog_id))
+                break
+            usr_sys_switch = is_sys_utt
+
+            if is_sys_utt:
+                turn_itr += 1
+
+            # Delexicalize sys utterance
+            if delexicalize_sys_utts and is_sys_utt:
+                inform_dict = {slot: 'none' for slot in slot_list}
+                for slot in slot_list:
+                    if (str(dialog_id), str(turn_itr), slot) in sys_inform_dict:
+                        inform_dict[slot] = sys_inform_dict[(str(dialog_id), str(turn_itr), slot)]
+                utt_tok_list.append(delex_utt(utt['text'], inform_dict, unk_token)) # normalize utterances
+            else:
+                utt_tok_list.append(tokenize(utt['text'])) # normalize utterances
+
+            modified_slots = {}
+
+            # If sys utt, extract metadata (identify and collect modified slots)
+            if is_sys_utt:
+                for d in utt['metadata']:
+                    booked = utt['metadata'][d]['book']['booked']
+                    booked_slots = {}
+                    # Check the booked section
+                    if booked != []:
+                        for s in booked[0]:
+                            booked_slots[s] = normalize_label('%s-%s' % (d, s), booked[0][s]) # normalize labels
+                    # Check the semi and the inform slots
+                    for category in ['book', 'semi']:
+                        for s in utt['metadata'][d][category]:
+                            cs = '%s-book_%s' % (d, s) if category == 'book' else '%s-%s' % (d, s)
+                            value_label = normalize_label(cs, utt['metadata'][d][category][s]) # normalize labels
+                            # Prefer the slot value as stored in the booked section
+                            if s in booked_slots:
+                                value_label = booked_slots[s]
+                            # Remember modified slots and entire dialog state
+                            if cs in slot_list and cumulative_labels[cs] != value_label:
+                                modified_slots[cs] = value_label
+                                cumulative_labels[cs] = value_label
+
+            mod_slots_list.append(modified_slots.copy())
+
+        # Form proper (usr, sys) turns
+        turn_itr = 0
+        diag_seen_slots_dict = {}
+        diag_seen_slots_value_dict = {slot: 'none' for slot in slot_list}
+        diag_state = {slot: 'none' for slot in slot_list}
+        sys_utt_tok = []
+        usr_utt_tok = []
+        hst_utt_tok = []
+        hst_utt_tok_label_dict = {slot: [] for slot in slot_list}
+        for i in range(1, len(utt_tok_list) - 1, 2):
+            sys_utt_tok_label_dict = {}
+            usr_utt_tok_label_dict = {}
+            value_dict = {}
+            inform_dict = {}
+            inform_slot_dict = {}
+            referral_dict = {}
+            class_type_dict = {}
+
+            # Collect turn data
+            if not no_append_history:
+                if not swap_utterances:
+                    hst_utt_tok = usr_utt_tok + sys_utt_tok + hst_utt_tok
+                else:
+                    hst_utt_tok = sys_utt_tok + usr_utt_tok + hst_utt_tok
+            sys_utt_tok = utt_tok_list[i - 1]
+            usr_utt_tok = utt_tok_list[i]
+            turn_slots = mod_slots_list[i + 1]
+
+            guid = '%s-%s-%s' % (set_type, str(dialog_id), str(turn_itr))
+
+            if analyze:
+                print("%15s %2s %s ||| %s" % (dialog_id, turn_itr, ' '.join(sys_utt_tok), ' '.join(usr_utt_tok)))
+                print("%15s %2s [" % (dialog_id, turn_itr), end='')
+
+            new_hst_utt_tok_label_dict = hst_utt_tok_label_dict.copy()
+            new_diag_state = diag_state.copy()
+            for slot in slot_list:
+                value_label = 'none'
+                if slot in turn_slots:
+                    value_label = turn_slots[slot]
+                    # We keep the original labels so as to not
+                    # overlook unpointable values, as well as to not
+                    # modify any of the original labels for test sets,
+                    # since this would make comparison difficult.
+                    value_dict[slot] = value_label
+                elif not no_label_value_repetitions and slot in diag_seen_slots_dict:
+                    value_label = diag_seen_slots_value_dict[slot]
+
+                # Get dialog act annotations
+                inform_label = list(['none'])
+                inform_slot_dict[slot] = 0
+                if (str(dialog_id), str(turn_itr), slot) in sys_inform_dict:
+                    inform_label = list([normalize_label(slot, i) for i in sys_inform_dict[(str(dialog_id), str(turn_itr), slot)]])
+                    inform_slot_dict[slot] = 1
+                elif (str(dialog_id), str(turn_itr), 'booking-' + slot.split('-')[1]) in sys_inform_dict:
+                    inform_label = list([normalize_label(slot, i) for i in sys_inform_dict[(str(dialog_id), str(turn_itr), 'booking-' + slot.split('-')[1])]])
+                    inform_slot_dict[slot] = 1
+
+                (informed_value,
+                 referred_slot,
+                 usr_utt_tok_label,
+                 class_type) = get_turn_label(value_label,
+                                              inform_label,
+                                              sys_utt_tok,
+                                              usr_utt_tok,
+                                              slot,
+                                              diag_seen_slots_value_dict,
+                                              slot_last_occurrence=True,
+                                              label_maps=label_maps)
+
+                inform_dict[slot] = informed_value
+
+                # Generally don't use span prediction on sys utterance (but inform prediction instead).
+                sys_utt_tok_label = [0 for _ in sys_utt_tok]
+
+                # Determine what to do with value repetitions.
+                # If value is unique in seen slots, then tag it, otherwise not,
+                # since correct slot assignment can not be guaranteed anymore.
+                if not no_label_value_repetitions and slot in diag_seen_slots_dict:
+                    if class_type == 'copy_value' and list(diag_seen_slots_value_dict.values()).count(value_label) > 1:
+                        class_type = 'none'
+                        usr_utt_tok_label = [0 for _ in usr_utt_tok_label]
+
+                sys_utt_tok_label_dict[slot] = sys_utt_tok_label
+                usr_utt_tok_label_dict[slot] = usr_utt_tok_label
+
+                if not no_append_history:
+                    if not no_use_history_labels:
+                        if not swap_utterances:
+                            new_hst_utt_tok_label_dict[slot] = usr_utt_tok_label + sys_utt_tok_label + new_hst_utt_tok_label_dict[slot]
+                        else:
+                            new_hst_utt_tok_label_dict[slot] = sys_utt_tok_label + usr_utt_tok_label + new_hst_utt_tok_label_dict[slot]
+                    else:
+                        new_hst_utt_tok_label_dict[slot] = [0 for _ in sys_utt_tok_label + usr_utt_tok_label + new_hst_utt_tok_label_dict[slot]]
+                    
+                # For now, we map all occurences of unpointable slot values
+                # to none. However, since the labels will still suggest
+                # a presence of unpointable slot values, the task of the
+                # DST is still to find those values. It is just not
+                # possible to do that via span prediction on the current input.
+                if class_type == 'unpointable':
+                    class_type_dict[slot] = 'none'
+                    referral_dict[slot] = 'none'
+                    if analyze:
+                        if slot not in diag_seen_slots_dict or value_label != diag_seen_slots_value_dict[slot]:
+                            print("(%s): %s, " % (slot, value_label), end='')
+                elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[slot] and class_type != 'copy_value' and class_type != 'inform':
+                    # If slot has seen before and its class type did not change, label this slot a not present,
+                    # assuming that the slot has not actually been mentioned in this turn.
+                    # Exceptions are copy_value and inform. If a seen slot has been tagged as copy_value or inform,
+                    # this must mean there is evidence in the original labels, therefore consider
+                    # them as mentioned again.
+                    class_type_dict[slot] = 'none'
+                    referral_dict[slot] = 'none'
+                else:
+                    class_type_dict[slot] = class_type
+                    referral_dict[slot] = referred_slot
+                # Remember that this slot was mentioned during this dialog already.
+                if class_type != 'none':
+                    diag_seen_slots_dict[slot] = class_type
+                    diag_seen_slots_value_dict[slot] = value_label
+                    new_diag_state[slot] = class_type
+                    # Unpointable is not a valid class, therefore replace with
+                    # some valid class for now...
+                    if class_type == 'unpointable':
+                        new_diag_state[slot] = 'copy_value'
+
+            if analyze:
+                print("]")
+
+            if not swap_utterances:
+                txt_a = usr_utt_tok
+                txt_b = sys_utt_tok
+                txt_a_lbl = usr_utt_tok_label_dict
+                txt_b_lbl = sys_utt_tok_label_dict
+            else:
+                txt_a = sys_utt_tok
+                txt_b = usr_utt_tok
+                txt_a_lbl = sys_utt_tok_label_dict
+                txt_b_lbl = usr_utt_tok_label_dict
+            examples.append(DSTExample(
+                guid=guid,
+                text_a=txt_a,
+                text_b=txt_b,
+                history=hst_utt_tok,
+                text_a_label=txt_a_lbl,
+                text_b_label=txt_b_lbl,
+                history_label=hst_utt_tok_label_dict,
+                values=diag_seen_slots_value_dict.copy(),
+                inform_label=inform_dict,
+                inform_slot_label=inform_slot_dict,
+                refer_label=referral_dict,
+                diag_state=diag_state,
+                class_label=class_type_dict))
+
+            # Update some variables.
+            hst_utt_tok_label_dict = new_hst_utt_tok_label_dict.copy()
+            diag_state = new_diag_state.copy()
+
+            turn_itr += 1
+
+        if analyze:
+            print("----------------------------------------------------------------------")
+
+    return examples
--- a/dataset_sim.py
+++ b/dataset_sim.py
 # coding=utf-8
 #
-# Copyright 2020 Heinrich Heine University Duesseldorf
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
 #
 # Part of this code is based on the source code of BERT-DST
 # (arXiv:1907.03040)
@@ -166,11 +166,10 @@ def get_turn_label(turn, prev_dialogue_state, slot_list, dial_id, turn_id, sys_i


 def create_examples(input_file, set_type, slot_list,
-                    label_maps={},
-                    append_history=False,
-                    use_history_labels=False,
+                    no_append_history=False,
+                    no_use_history_labels=False,
+                    no_label_value_repetitions=False,
                    swap_utterances=False,
-                    label_value_repetitions=False,
                    delexicalize_sys_utts=False,
                    unk_token="[UNK]",
                    analyze=False):
@@ -211,7 +210,7 @@ def create_examples(input_file, set_type, slot_list,
                                           unk_token=unk_token,
                                           slot_last_occurrence=True)

-            if swap_utterances:
+            if not swap_utterances:
                txt_a = text_b
                txt_b = text_a
                txt_a_lbl = text_b_label
@@ -230,8 +229,8 @@ def create_examples(input_file, set_type, slot_list,
                    value_dict[slot] = 'none'
                if class_label[slot] != 'none':
                    ds_lbl_dict[slot] = class_label[slot]
-                if append_history:
-                    if use_history_labels:
+                if not no_append_history:
+                    if not no_use_history_labels:
                        hst_lbl_dict[slot] = txt_a_lbl[slot] + txt_b_lbl[slot] + hst_lbl_dict[slot]
                    else:
                        hst_lbl_dict[slot] = [0 for _ in txt_a_lbl[slot] + txt_b_lbl[slot] + hst_lbl_dict[slot]]
@@ -255,7 +254,7 @@ def create_examples(input_file, set_type, slot_list,
            prev_ds_lbl_dict = ds_lbl_dict.copy()
            prev_hst_lbl_dict = hst_lbl_dict.copy()

-            if append_history:
+            if not no_append_history:
                hst = txt_a + txt_b + hst

    return examples
--- a/dataset_unified.py
+++ b/dataset_unified.py
+# coding=utf-8
+#
+# Copyright 2020-2022 Heinrich Heine University Duesseldorf
+#
+# Part of this code is based on the source code of BERT-DST
+# (arXiv:1907.03040)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+from tqdm import tqdm
+
+from utils_dst import (DSTExample)
+
+try:
+    from convlab.util import (load_dataset, load_ontology, load_dst_data)
+except ModuleNotFoundError as e:
+    print(e)
+    print("Ignore this error if you don't intend to use the data processor for ConvLab3's unified data format.")
+    print("Otherwise, make sure you have ConvLab3 installed and added to your PYTHONPATH.")
+
+
+def get_ontology_slots(ontology):
+    domains = [domain for domain in ontology['domains']]
+    ontology_slots = dict()
+    for domain in domains:
+        if domain not in ontology_slots:
+            ontology_slots[domain] = set()
+        for slot in ontology['domains'][domain]['slots']:
+            ontology_slots[domain].add(slot)
+        ontology_slots[domain] = list(ontology_slots[domain])
+        ontology_slots[domain].sort()
+    return ontology_slots
+
+    
+def get_slot_list(dataset_name):
+    slot_list = []
+    ontology = load_ontology(dataset_name)
+    dataset_slot_list = get_ontology_slots(ontology)
+    for domain in dataset_slot_list:
+        for slot in dataset_slot_list[domain]:
+            slot_list.append("%s-%s" % (domain, slot))
+        slot_list.append("%s-none" % (domain)) # none slot indicates domain activation in ConvLab3
+    # Some special intents are modeled as 'request' slots in TripPy
+    if 'bye' in ontology['intents']:
+        slot_list.append("general-bye")
+    if 'thank' in ontology['intents']:
+        slot_list.append("general-thank")
+    if 'greet' in ontology['intents']:
+        slot_list.append("general-greet")
+    return slot_list
+
+
+def create_examples(set_type, dataset_name="multiwoz21", class_types=[], slot_list=[], label_maps={},
+                    no_append_history=False,
+                    no_use_history_labels=False,
+                    no_label_value_repetitions=False,
+                    swap_utterances=False,
+                    delexicalize_sys_utts=False,
+                    unk_token="[UNK]",
+                    analyze=False):
+    """Read a DST json file into a list of DSTExample."""
+
+    # TODO: Make sure normalization etc. will be compatible with or suitable for SGD and
+    # other datasets as well.
+    if dataset_name == "multiwoz21":
+        from dataset_multiwoz21 import (tokenize, normalize_label,
+                                        get_turn_label, delex_utt,
+                                        is_request)
+    else:
+        raise ValueError("Unknown dataset_name.")
+
+    dataset_args = {"dataset_name": dataset_name}
+    dataset_dict = load_dataset(**dataset_args)
+
+    if slot_list == []:
+        slot_list = get_slot_list()
+
+    data = load_dst_data(dataset_dict, data_split=set_type, speaker='all', dialogue_acts=True, split_to_turn=False)
+
+    examples = []
+    for d_itr, entry in enumerate(tqdm(data[set_type])):
+        dialog_id = entry['dialogue_id']
+        #dialog_id = entry['original_id']
+        original_id = entry['original_id']
+        domains = entry['domains']
+        turns = entry['turns']
+
+        # Collects all slot changes throughout the dialog
+        cumulative_labels = {slot: 'none' for slot in slot_list}
+
+        # First system utterance is empty, since multiwoz starts with user input
+        utt_tok_list = [[]]
+        mod_slots_list = [{}]
+        inform_dict_list = [{}]
+        user_act_dict_list = [{}]
+        mod_domains_list = [{}]
+
+        # Collect all utterances and their metadata
+        usr_sys_switch = True
+        for turn in turns:
+            utterance = turn['utterance']
+            state = turn['state'] if 'state' in turn else {}
+            acts = [item for sublist in list(turn['dialogue_acts'].values()) for item in sublist] # flatten list
+
+            # Assert that system and user utterances alternate
+            is_sys_utt = turn['speaker'] in ['sys', 'system']
+            if usr_sys_switch == is_sys_utt:
+                print("WARN: Wrong order of system and user utterances. Skipping rest of dialog %s" % (dialog_id))
+                break
+            usr_sys_switch = is_sys_utt
+
+            # Extract metadata: identify modified slots and values informed by the system
+            inform_dict = {}
+            user_act_dict = {}
+            modified_slots = {}
+            modified_domains = set()
+            for act in acts:
+                slot = "%s-%s" % (act['domain'], act['slot'] if act['slot'] != '' else 'none')
+                if act['intent'] in ['bye', 'thank', 'hello']:
+                    slot = "general-%s" % (act['intent'])
+                value_label = act['value'] if 'value' in act else 'yes' if act['slot'] != '' else 'none'
+                value_label = normalize_label(slot, value_label)
+                modified_domains.add(act['domain']) # Remember domains
+                if is_sys_utt and act['intent'] in ['inform', 'recommend', 'select', 'book'] and value_label != 'none':
+                    if slot not in inform_dict:
+                        inform_dict[slot] = []
+                    inform_dict[slot].append(value_label)
+                elif not is_sys_utt:
+                    if slot not in user_act_dict:
+                        user_act_dict[slot] = []
+                    user_act_dict[slot].append(act)
+            # INFO: Since the model has no mechanism to predict
+            # one among several informed value candidates, we
+            # keep only one informed value. For fairness, we
+            # apply a global rule:
+            for e in inform_dict:
+                # ... Option 1: Always keep first informed value
+                inform_dict[e] = list([inform_dict[e][0]])
+                # ... Option 2: Always keep last informed value
+                #inform_dict[e] = list([inform_dict[e][-1]])
+            for d in state:
+                for s in state[d]:
+                    slot = "%s-%s" % (d, s)
+                    value_label = normalize_label(slot, state[d][s])
+                    # Remember modified slots and entire dialog state
+                    if slot in slot_list and cumulative_labels[slot] != value_label:
+                        modified_slots[slot] = value_label
+                        cumulative_labels[slot] = value_label
+                        modified_domains.add(d) # Remember domains
+
+            # Delexicalize sys utterance
+            if delexicalize_sys_utts and is_sys_utt:
+                utt_tok_list.append(delex_utt(utterance, inform_dict, unk_token)) # normalizes utterances
+            else:
+                utt_tok_list.append(tokenize(utterance)) # normalizes utterances
+
+            inform_dict_list.append(inform_dict.copy())
+            user_act_dict_list.append(user_act_dict.copy())
+            mod_slots_list.append(modified_slots.copy())
+            modified_domains = list(modified_domains)
+            modified_domains.sort()
+            mod_domains_list.append(modified_domains)
+
+        # Form proper (usr, sys) turns
+        turn_itr = 0
+        diag_seen_slots_dict = {}
+        diag_seen_slots_value_dict = {slot: 'none' for slot in slot_list}
+        diag_state = {slot: 'none' for slot in slot_list}
+        sys_utt_tok = []
+        usr_utt_tok = []
+        hst_utt_tok = []
+        hst_utt_tok_label_dict = {slot: [] for slot in slot_list}
+        for i in range(1, len(utt_tok_list) - 1, 2):
+            sys_utt_tok_label_dict = {}
+            usr_utt_tok_label_dict = {}
+            value_dict = {}
+            inform_dict = {}
+            inform_slot_dict = {}
+            referral_dict = {}
+            class_type_dict = {}
+
+            # Collect turn data
+            if not no_append_history:
+                if not swap_utterances:
+                    hst_utt_tok = usr_utt_tok + sys_utt_tok + hst_utt_tok
+                else:
+                    hst_utt_tok = sys_utt_tok + usr_utt_tok + hst_utt_tok
+            sys_utt_tok = utt_tok_list[i - 1]
+            usr_utt_tok = utt_tok_list[i]
+            turn_slots = mod_slots_list[i]
+            inform_mem = inform_dict_list[i - 1]
+            user_act = user_act_dict_list[i] 
+            turn_domains = mod_domains_list[i]
+
+            guid = '%s-%s' % (dialog_id, turn_itr)
+
+            if analyze:
+                print("%15s %2s %s ||| %s" % (dialog_id, turn_itr, ' '.join(sys_utt_tok), ' '.join(usr_utt_tok)))
+                print("%15s %2s [" % (dialog_id, turn_itr), end='')
+
+            new_hst_utt_tok_label_dict = hst_utt_tok_label_dict.copy()
+            new_diag_state = diag_state.copy()
+            for slot in slot_list:
+                value_label = 'none'
+                if slot in turn_slots:
+                    value_label = turn_slots[slot]
+                    # We keep the original labels so as to not
+                    # overlook unpointable values, as well as to not
+                    # modify any of the original labels for test sets,
+                    # since this would make comparison difficult.
+                    value_dict[slot] = value_label
+                elif not no_label_value_repetitions and slot in diag_seen_slots_dict:
+                    value_label = diag_seen_slots_value_dict[slot]
+
+                # Get dialog act annotations
+                inform_label = list(['none'])
+                inform_slot_dict[slot] = 0
+                if slot in inform_mem:
+                    inform_label = inform_mem[slot]
+                    inform_slot_dict[slot] = 1
+
+                (informed_value,
+                 referred_slot,
+                 usr_utt_tok_label,
+                 class_type) = get_turn_label(value_label,
+                                              inform_label,
+                                              sys_utt_tok,
+                                              usr_utt_tok,
+                                              slot,
+                                              diag_seen_slots_value_dict,
+                                              slot_last_occurrence=True,
+                                              label_maps=label_maps)
+
+                inform_dict[slot] = informed_value
+
+                # Requestable slots, domain indicator slots and general slots
+                # should have class_type 'request', if they ought to be predicted.
+                # Give other class_types preference.
+                if 'request' in class_types:
+                    if class_type in ['none', 'unpointable'] and is_request(slot, user_act, turn_domains):
+                        class_type = 'request'
+
+                # Generally don't use span prediction on sys utterance (but inform prediction instead).
+                sys_utt_tok_label = [0 for _ in sys_utt_tok]
+
+                # Determine what to do with value repetitions.
+                # If value is unique in seen slots, then tag it, otherwise not,
+                # since correct slot assignment can not be guaranteed anymore.
+                if not no_label_value_repetitions and slot in diag_seen_slots_dict:
+                    if class_type == 'copy_value' and list(diag_seen_slots_value_dict.values()).count(value_label) > 1:
+                        class_type = 'none'
+                        usr_utt_tok_label = [0 for _ in usr_utt_tok_label]
+
+                sys_utt_tok_label_dict[slot] = sys_utt_tok_label
+                usr_utt_tok_label_dict[slot] = usr_utt_tok_label
+
+                if not no_append_history:
+                    if not no_use_history_labels:
+                        if not swap_utterances:
+                            new_hst_utt_tok_label_dict[slot] = usr_utt_tok_label + sys_utt_tok_label + new_hst_utt_tok_label_dict[slot]
+                        else:
+                            new_hst_utt_tok_label_dict[slot] = sys_utt_tok_label + usr_utt_tok_label + new_hst_utt_tok_label_dict[slot]
+                    else:
+                        new_hst_utt_tok_label_dict[slot] = [0 for _ in sys_utt_tok_label + usr_utt_tok_label + new_hst_utt_tok_label_dict[slot]]
+                    
+                # For now, we map all occurences of unpointable slot values
+                # to none. However, since the labels will still suggest
+                # a presence of unpointable slot values, the task of the
+                # DST is still to find those values. It is just not
+                # possible to do that via span prediction on the current input.
+                if class_type == 'unpointable':
+                    class_type_dict[slot] = 'none'
+                    referral_dict[slot] = 'none'
+                    if analyze:
+                        if slot not in diag_seen_slots_dict or value_label != diag_seen_slots_value_dict[slot]:
+                            print("(%s): %s, " % (slot, value_label), end='')
+                elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[slot] and class_type != 'copy_value' and class_type != 'inform':
+                    # If slot has seen before and its class type did not change, label this slot a not present,
+                    # assuming that the slot has not actually been mentioned in this turn.
+                    # Exceptions are copy_value and inform. If a seen slot has been tagged as copy_value or inform,
+                    # this must mean there is evidence in the original labels, therefore consider
+                    # them as mentioned again.
+                    class_type_dict[slot] = 'none'
+                    referral_dict[slot] = 'none'
+                else:
+                    class_type_dict[slot] = class_type
+                    referral_dict[slot] = referred_slot
+                # Remember that this slot was mentioned during this dialog already.
+                if class_type != 'none':
+                    diag_seen_slots_dict[slot] = class_type
+                    diag_seen_slots_value_dict[slot] = value_label
+                    new_diag_state[slot] = class_type
+                    # Unpointable is not a valid class, therefore replace with
+                    # some valid class for now...
+                    if class_type == 'unpointable':
+                        new_diag_state[slot] = 'copy_value'
+
+            if analyze:
+                print("]")
+
+            if not swap_utterances:
+                txt_a = usr_utt_tok
+                txt_b = sys_utt_tok
+                txt_a_lbl = usr_utt_tok_label_dict
+                txt_b_lbl = sys_utt_tok_label_dict
+            else:
+                txt_a = sys_utt_tok
+                txt_b = usr_utt_tok
+                txt_a_lbl = sys_utt_tok_label_dict
+                txt_b_lbl = usr_utt_tok_label_dict
+            examples.append(DSTExample(
+                guid=guid,
+                text_a=txt_a,
+                text_b=txt_b,
+                history=hst_utt_tok,
+                text_a_label=txt_a_lbl,
+                text_b_label=txt_b_lbl,
+                history_label=hst_utt_tok_label_dict,
+                values=diag_seen_slots_value_dict.copy(),
+                inform_label=inform_dict,
+                inform_slot_label=inform_slot_dict,
+                refer_label=referral_dict,
+                diag_state=diag_state,
+                class_label=class_type_dict))
+
+            # Update some variables.
+            hst_utt_tok_label_dict = new_hst_utt_tok_label_dict.copy()
+            diag_state = new_diag_state.copy()
+
+            turn_itr += 1
+
+        if analyze:
+            print("----------------------------------------------------------------------")
+
+    return examples
No results found