diff --git a/convlab2/base_models/t5/create_data.py b/convlab2/base_models/t5/create_data.py
index 19be0b81520cf4077ac34166e7b2e7a0d12f80a3..77c817b3ebf3c8f53a101c4f1d270b03c592988f 100644
--- a/convlab2/base_models/t5/create_data.py
+++ b/convlab2/base_models/t5/create_data.py
@@ -11,9 +11,6 @@ def create_rg_data(dataset, data_dir, args):
     os.makedirs(data_dir, exist_ok=True)
 
     data_splits = data_by_split.keys()
-    file_name = os.path.join(data_dir, f"source_prefix.txt")
-    with open(file_name, "w") as f:
-        f.write("generate a system response according to the context: ")
     for data_split in data_splits:
         data = []
         for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
@@ -31,9 +28,6 @@ def create_nlu_data(dataset, data_dir, args):
     os.makedirs(data_dir, exist_ok=True)
 
     data_splits = data_by_split.keys()
-    file_name = os.path.join(data_dir, f"source_prefix.txt")
-    with open(file_name, "w") as f:
-        f.write("parse the dialogue action of the last utterance: ")
     for data_split in data_splits:
         data = []
         for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
@@ -55,9 +49,6 @@ def create_goal2dialogue_data(dataset, data_dir, args):
     os.makedirs(data_dir, exist_ok=True)
 
     data_splits = data_by_split.keys()
-    file_name = os.path.join(data_dir, f"source_prefix.txt")
-    with open(file_name, "w") as f:
-        f.write("generate a dialogue between user and system according to the user goal: ")
     for data_split in data_splits:
         data = []
         for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
diff --git a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh
index a60f77c2bbd9b2660b5616dcad8c9a162f728f12..09a2c33aa06fa5134dba0707e1df5e633ac9f269 100644
--- a/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh
+++ b/convlab2/base_models/t5/goal2dialogue/run_goal2dialogue.sh
@@ -8,7 +8,6 @@ logging_dir="${output_dir}/runs"
 train_file="${data_dir}/train.json"
 validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="goal"
 target_column="dialogue"
 max_target_length=1024
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --source_column ${source_column} \
     --target_column ${target_column} \
     --max_target_length ${max_target_length} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --do_eval \
diff --git a/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh b/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh
index 85f3ec8302d161b29ba71b760a56d0f64a6b4dfc..12e9c9c32e784c85940831d76a9cd7e6f1f124a2 100644
--- a/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh
+++ b/convlab2/base_models/t5/nlu/run_multiwoz21_user.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh b/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh
index 8d7b5c93e8deb9c8c5da9ecd03e42bbc53341442..a12b28d34524e6503b8824cadeacbbf800df9b3b 100644
--- a/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh
+++ b/convlab2/base_models/t5/nlu/run_multiwoz21_user_context3.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_tm1_user.sh b/convlab2/base_models/t5/nlu/run_tm1_user.sh
index 16a16fdb106f09a7001190477de8b0878d2e20f3..ec45989743cf0b3a3aae66a3237cfb3ca95d69da 100644
--- a/convlab2/base_models/t5/nlu/run_tm1_user.sh
+++ b/convlab2/base_models/t5/nlu/run_tm1_user.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh b/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh
index ccb67609279be5c4b044a9baadc19672d69c1532..12afd6fa2e1501b9c0e2823d705ca705950c09fe 100644
--- a/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh
+++ b/convlab2/base_models/t5/nlu/run_tm1_user_context3.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_tm2_user.sh b/convlab2/base_models/t5/nlu/run_tm2_user.sh
index 8686822fea882cb75776bee89dbd4344b71ea64b..d918d97e52059a8a4abad5797eed3c42bdfc1f84 100644
--- a/convlab2/base_models/t5/nlu/run_tm2_user.sh
+++ b/convlab2/base_models/t5/nlu/run_tm2_user.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh b/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh
index 03c2489940e38dd16256f6b4f2683a413f514235..fe3d35983ff8e0daf1b93a929a302455a4c842b3 100644
--- a/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh
+++ b/convlab2/base_models/t5/nlu/run_tm2_user_context3.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_tm3_user.sh b/convlab2/base_models/t5/nlu/run_tm3_user.sh
index 470cb7d71c2b7a630e6917912e21d2c61ca1c075..71623e18eff12a4578b7078205df7b8366a5377b 100644
--- a/convlab2/base_models/t5/nlu/run_tm3_user.sh
+++ b/convlab2/base_models/t5/nlu/run_tm3_user.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh b/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh
index 5e325d1fe2b127ef1af0b0733dd5db03bb1cbe3c..aa22b362e1e6d6eaa30a0e5a29816f4cf3239d6f 100644
--- a/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh
+++ b/convlab2/base_models/t5/nlu/run_tm3_user_context3.sh
@@ -12,7 +12,6 @@ validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
 metric_name_or_path="nlu_metric.py"
 metric_for_best_model="overall_f1"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="dialogue_acts_seq"
 model_name_or_path="t5-small"
@@ -30,7 +29,6 @@ python -m torch.distributed.launch \
     --train_file ${train_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --save_strategy epoch \
@@ -55,7 +53,6 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
     --model_name_or_path ${output_dir} \
     --do_predict \
     --predict_with_generate \
diff --git a/convlab2/base_models/t5/rg/run_rg.sh b/convlab2/base_models/t5/rg/run_rg.sh
index 8bf742d51b1f28765a5d28775970d12a25178434..55accadfaeceb7c43ad9df079f054aa3e00c5a1c 100644
--- a/convlab2/base_models/t5/rg/run_rg.sh
+++ b/convlab2/base_models/t5/rg/run_rg.sh
@@ -1,7 +1,8 @@
-n_gpus=8
+set -e
+n_gpus=2
 task_name="rg"
-dataset_name="multiwoz21"
-speaker="system"
+dataset_name="metalwoz+sgd+tm1+tm2+tm3"
+speaker="all"
 data_dir="data/${task_name}/${dataset_name}/${speaker}"
 output_dir="output/${task_name}/${dataset_name}/${speaker}"
 cache_dir="../cache"
@@ -9,17 +10,30 @@ logging_dir="${output_dir}/runs"
 train_file="${data_dir}/train.json"
 validation_file="${data_dir}/validation.json"
 test_file="${data_dir}/test.json"
-source_prefix="${data_dir}/source_prefix.txt"
 source_column="context"
 target_column="response"
+truncation_side="left"
+max_source_length=512
+max_target_length=128
 model_name_or_path="t5-small"
 per_device_train_batch_size=32
 per_device_eval_batch_size=128
-gradient_accumulation_steps=1
+gradient_accumulation_steps=4
 lr=1e-3
 num_train_epochs=5
 
-python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --speaker ${speaker}
+# names=$(echo ${dataset_name} | tr "+" "\n")
+# mkdir -p ${data_dir}
+# for name in ${names};
+# do
+#     echo "preprocessing ${name}"
+#     python ../create_data.py --tasks ${task_name} --datasets ${name} --speaker ${speaker}
+#     if [ "${name}" != "${dataset_name}" ]; then
+#         cat "data/${task_name}/${name}/${speaker}/train.json" >> ${train_file}
+#         cat "data/${task_name}/${name}/${speaker}/validation.json" >> ${validation_file}
+#         cat "data/${task_name}/${name}/${speaker}/test.json" >> ${test_file}
+#     fi
+# done
 
 python -m torch.distributed.launch \
     --nproc_per_node ${n_gpus} ../run_seq2seq.py \
@@ -29,7 +43,9 @@ python -m torch.distributed.launch \
     --test_file ${test_file} \
     --source_column ${source_column} \
     --target_column ${target_column} \
-    --source_prefix ${source_prefix} \
+    --max_source_length ${max_source_length} \
+    --max_target_length ${max_target_length} \
+    --truncation_side ${truncation_side} \
     --model_name_or_path ${model_name_or_path} \
     --do_train \
     --do_eval \
diff --git a/convlab2/base_models/t5/run_seq2seq.py b/convlab2/base_models/t5/run_seq2seq.py
index e9348f5579d35dffe85b9b7e9b90cedf176d0de3..dace9713d540b7fe2aa1c552132cc4c54d698989 100644
--- a/convlab2/base_models/t5/run_seq2seq.py
+++ b/convlab2/base_models/t5/run_seq2seq.py
@@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.12.5")
+check_min_version("4.17.0")
 
 require_version("datasets>=1.16.1")
 
@@ -78,6 +78,10 @@ class ModelArguments:
         default=True,
         metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
     )
+    truncation_side: Optional[str] = field(
+        default="right",
+        metadata={"help": "Which side to truncate, left or right."}
+    )
     model_revision: str = field(
         default="main",
         metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
@@ -341,6 +345,7 @@ def main():
         model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
         cache_dir=model_args.cache_dir,
         use_fast=model_args.use_fast_tokenizer,
+        truncation_side=model_args.truncation_side,
         revision=model_args.model_revision,
         use_auth_token=True if model_args.use_auth_token else None,
     )
@@ -382,10 +387,11 @@ def main():
             )
 
     if data_args.source_prefix_filepath is not None:
-        prefix = open(data_args.source_prefix_filepath, 'r', encoding='utf-8').readline().strip()
+        prefix = open(data_args.source_prefix_filepath, 'r', encoding='utf-8').readline().strip('\n')
     else:
         prefix = ""
     
+    logger.info(f'source prefix: "{prefix}"')
 
     # Preprocessing the datasets.
     # We need to tokenize inputs and targets.