diff --git a/.gitignore b/.gitignore
index b55ff4992d1d75739e3bed2718508f47e47b303a..6012819da84535f1108916153f3116b3a609ba07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,7 +102,6 @@ convlab/dst/trade/multiwoz_config/
 convlab/deploy/bert_multiwoz_all.zip
 convlab/deploy/templates/dialog_eg.html
 test.py
-*budget*.pdf
 *convlab/policy/vector/action_dicts
 
 *.egg-info
diff --git a/README.md b/README.md
index 38ff5f214a8e243e9b665559fc4dd5ffe8db4434..4e4a7abff996d08aa72359151c12490d03725e12 100755
--- a/README.md
+++ b/README.md
@@ -1,282 +1,143 @@
-# ConvLab-2
-[![Build Status](https://travis-ci.com/thu-coai/ConvLab-2.svg?branch=master)](https://travis-ci.com/thu-coai/ConvLab-2)
+# ConvLab-3
 
-**ConvLab-2** is an open-source toolkit that enables researchers to build task-oriented dialogue systems with state-of-the-art models, perform an end-to-end evaluation, and diagnose the weakness of systems. As the successor of [ConvLab](https://github.com/ConvLab/ConvLab), ConvLab-2 inherits ConvLab's framework but integrates more powerful dialogue models and supports more datasets. Besides, we have developed an analysis tool and an interactive tool to assist researchers in diagnosing dialogue systems. [[paper]](https://arxiv.org/abs/2002.04793)
+![PyPI](https://img.shields.io/pypi/v/convlab) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/convlab) ![GitHub](https://img.shields.io/github/license/ConvLab/ConvLab-3)
+
+**ConvLab-3** is a flexible dialog system platform based on a **unified data format** for task-oriented dialog (TOD) datasets. The unified format serves as the adapter between TOD datasets and models: datasets are first transformed to the unified format and then loaded by models. In this way, the cost of adapting $M$ models to $N$ datasets is reduced from $M\times N$ to $M+N$. While retaining all features of [ConvLab-2](https://github.com/thu-coai/ConvLab-2),  ConvLab-3 greatly enlarges supported datasets and models thanks to the unified format, and enhances the utility of reinforcement learning (RL) toolkit for dialog policy module. For typical usage, see our [paper](). Datasets and Trained models are also available on [Hugging Face Hub](https://huggingface.co/ConvLab).
 
 - [Installation](#installation)
 - [Tutorials](#tutorials)
-- [Documents](#documents)
+- [Unified Datasets](#Unified-Datasets)
 - [Models](#models)
-- [Supported Datasets](#Supported-Datasets)
-- [End-to-end Performance on MultiWOZ](#End-to-end-Performance-on-MultiWOZ)
-- [Module Performance on MultiWOZ](#Module-Performance-on-MultiWOZ)
-- [Issues](#issues)
-- [Contributions](#contributions)
+- [Code Structure]($Code-Structure)
+- [Contributing](#contributing)
+- [Team](#Team)
 - [Citing](#citing)
 - [License](#license)
 
 ## Updates
 
-2021.9.13:
+- **2022.11.30**: ConvLab-3 release.
 
-- Add [MultiWOZ 2.3](https://github.com/lexmen318/MultiWOZ-coref) dataset in `data` dir. The dataset adds co-reference annotations in addition to corrections of dialogue acts and dialogue states. [[paper]](https://arxiv.org/abs/2010.05594)
+## Installation
 
-2021.6.18:
+You can install ConvLab-3 in one of the following ways according to your need. Higher versions of `torch` and `transformers` may also work.
 
-- Add [LAUG](https://github.com/thu-coai/LAUG), an open-source toolkit for Language understanding AUGmentation. It is an automatic method to approximate the natural perturbations to existing data. Augmented data could be used to conduct black-box robustness testing or enhancing training. [[paper]](https://arxiv.org/abs/2012.15262)
-- Add [SC-GPT](https://github.com/pengbaolin/SC-GPT) for NLG. [[paper]](https://arxiv.org/abs/2002.12328)
+### Git clone and pip install in development mode (Recommend)
 
-## Installation
+For the latest and most configurable version, we recommend installing ConvLab-3 in development mode.
 
-Require python >= 3.6.
+Clone the newest repository:
 
-Clone this repository:
 ```bash
-git clone https://github.com/thu-coai/ConvLab-2.git
+git clone --depth 1 https://github.com/ConvLab/ConvLab-3.git
 ```
 
-Install ConvLab-2 via pip:
+Install ConvLab-3 via pip:
 
 ```bash
-cd ConvLab-2
+cd ConvLab-3
 pip install -e .
 ```
 
-## Tutorials
-
-- [Getting Started](https://github.com/thu-coai/ConvLab-2/blob/master/tutorials/Getting_Started.ipynb) (Have a try on [Colab](https://colab.research.google.com/github/thu-coai/ConvLab-2/blob/master/tutorials/Getting_Started.ipynb)!)
-- [Add New Model](https://github.com/thu-coai/ConvLab-2/blob/master/tutorials/Add_New_Model.md)
-- [Train RL Policies](https://github.com/thu-coai/ConvLab-2/blob/master/tutorials/Train_RL_Policies)
-- [Interactive Tool](https://github.com/thu-coai/ConvLab-2/blob/master/deploy) [[demo video]](https://youtu.be/00VWzbcx26E)
-
-## Documents
-Our documents are on https://thu-coai.github.io/ConvLab-2_docs/convlab.html.
-
-## Models
-
-We provide following models:
-
-- NLU: SVMNLU, MILU, BERTNLU
-- DST: rule, TRADE, SUMBT
-- Policy: rule, Imitation, REINFORCE, PPO, GDPL, MDRG, HDSA, LaRL
-- Simulator policy: Agenda, VHUS
-- NLG: Template, SCLSTM
-- End2End: Sequicity, DAMD, RNN_rollout
-
-For  more details about these models, You can refer to `README.md` under `convlab/$module/$model/$dataset` dir such as `convlab/nlu/jointBERT/multiwoz/README.md`.
-
-## Supported Datasets
-
-- [Multiwoz 2.1](https://github.com/budzianowski/multiwoz)
-  - We add user dialogue act (*inform*, *request*, *bye*, *greet*, *thank*), remove 5 sessions that have incomplete dialogue act annotation and place it under `data/multiwoz` dir.
-  - Train/val/test size: 8434/999/1000. Split as original data.
-  - LICENSE: Attribution 4.0 International, url: http://creativecommons.org/licenses/by/4.0/
-- [CrossWOZ](https://github.com/thu-coai/CrossWOZ)
-  - We offers a rule-based user simulator and a complete set of models for building a pipeline system on the CrossWOZ dataset. We correct few state annotation and place it under `data/crosswoz` dir.
-  - Train/val/test size: 5012/500/500. Split as original data.
-  - LICENSE: Attribution 4.0 International, url: http://creativecommons.org/licenses/by/4.0/
-- [Camrest](https://www.repository.cam.ac.uk/handle/1810/260970)
-  - We add system dialogue act (*inform*, *request*, *nooffer*) and place it under `data/camrest` dir.
-  - Train/val/test size: 406/135/135. Split as original data.
-  - LICENSE: Attribution 4.0 International, url: http://creativecommons.org/licenses/by/4.0/
-- [Dealornot](https://github.com/facebookresearch/end-to-end-negotiator/tree/master/src/data/negotiate)
-  - Placed under `data/dealornot` dir.
-  - Train/val/test size: 5048/234/526. Split as original data.
-  - LICENSE: Attribution-NonCommercial 4.0 International, url: https://creativecommons.org/licenses/by-nc/4.0/
-
-## End-to-end Performance on MultiWOZ
-
-*Notice*: The results are for commits before [`bdc9dba`](https://github.com/thu-coai/ConvLab-2/commit/bdc9dba72c957d97788e533f9458ed03a4b0137b) (inclusive). We will update the results after improving user policy.
-
-We perform end-to-end evaluation (1000 dialogues) on MultiWOZ using the user simulator below (a full example on `tests/test_end2end.py`) :
-
-```python
-# BERT nlu trained on sys utterance
-user_nlu = BERTNLU(mode='sys', config_file='multiwoz_sys_context.json', model_file='https://convlab.blob.core.windows.net/convlab-2/bert_multiwoz_sys_context.zip')
-user_dst = None
-user_policy = RulePolicy(character='usr')
-user_nlg = TemplateNLG(is_user=True)
-user_agent = PipelineAgent(user_nlu, user_dst, user_policy, user_nlg, name='user')
-
-analyzer = Analyzer(user_agent=user_agent, dataset='multiwoz')
-
-set_seed(20200202)
-analyzer.comprehensive_analyze(sys_agent=sys_agent, model_name='sys_agent', total_dialog=1000)
-```
-
-Main metrics (refer to `convlab/evaluator/multiwoz_eval.py` for more details):
-
-- Complete: whether complete the goal. Judged by the Agenda policy instead of external evaluator.
-- Success: whether all user requests have been informed and the booked entities satisfy the constraints.
-- Book: how many the booked entities satisfy the user constraints.
-- Inform Precision/Recall/F1: how many user requests have been informed.
-- Turn(succ/all): average turn number for successful/all dialogues.
-
-Performance (the first row is the default config for each module. Empty entries are set to default config.):
-
-| NLU         | DST       | Policy         | NLG         | Complete rate | Success rate | Book rate | Inform P/R/F1 | Turn(succ/all) |
-| ----------- | --------- | -------------- | ----------- | ------------- | ------------ | --------- | --------- | -------------- |
-| **BERTNLU** | RuleDST   | RulePolicy     | TemplateNLG |   90.5       |     81.3    |   91.1 | 79.7/92.6/83.5 | 11.6/12.3      |
-| **MILU**    | RuleDST | RulePolicy | TemplateNLG |    93.3       |   81.8      |   93.0    | 80.4/94.7/84.8 | 11.3/12.1      |
-| BERTNLU | RuleDST | RulePolicy | **SCLSTM**  |   48.5    | 40.2 | 56.9   | 62.3/62.5/58.7 |  11.9/27.1         |
-| BERTNLU     | RuleDST | **MLEPolicy**  | TemplateNLG |     42.7          |    35.9      |  17.6   | 62.8/69.8/62.9  |  12.1/24.1    |
-| BERTNLU | RuleDST | **PGPolicy**   | TemplateNLG |     37.4         |    31.7     |   17.4  |  57.4/63.7/56.9  |   11.0/25.3    |
-| BERTNLU | RuleDST | **PPOPolicy**  | TemplateNLG |     75.5         |    71.7    |   86.6    | 69.4/85.8/74.1  |  13.1/17.8   |
-| BERTNLU | RuleDST | **GDPLPolicy** | TemplateNLG |     49.4         |     38.4    |  20.1     |  64.5/73.8/65.6 |  11.5/21.3    |
-| None        | **TRADE** | RulePolicy | TemplateNLG |    32.4      |    20.1     |    34.7      |  46.9/48.5/44.0 |  11.4/23.9      |
-| None        | **SUMBT** | RulePolicy | TemplateNLG |   34.5       |   29.4     |   62.4    |  54.1/50.3/48.3  |   11.0/28.1     |
-| BERTNLU | RuleDST | **MDRG**       | None        | 21.6 | 17.8 | 31.2 | 39.9/36.3/34.8 | 15.6/30.5|
-| BERTNLU | RuleDST | **LaRL**       | None        | 34.8 | 27.0 | 29.6 | 49.1/53.6/47.8 |13.2/24.4|
-| None | **SUMBT** | **LaRL** | None |  32.9 | 23.7  |  25.9 | 48.6/52.0/46.7 | 12.5/24.3|
-| None | None | **DAMD***      | None | 39.5| 34.3 | 51.4 | 60.4/59.8/56.3 | 15.8/29.8 |
-
-*: end-to-end models used as sys_agent directly.
-
-## Module Performance on MultiWOZ
-
-### NLU
-
-By running `convlab/nlu/evaluate.py MultiWOZ $model all`:
-
-|         | Precision | Recall | F1    |
-| ------- | --------- | ------ | ----- |
-| BERTNLU | 82.48     | 85.59  | 84.01 |
-| MILU    | 80.29     | 83.63  | 81.92 |
-| SVMNLU  | 74.96     | 50.74  | 60.52 |
-
-### DST 
-
-By running `convlab/dst/evaluate.py MultiWOZ $model`:
+### Pip install from PyPI
 
-|             |  Joint accuracy  | Slot accuracy | Joint F1  |
-| --------    |   -------------   | -------------  | --------|
-|  MDBT       |   0.06           |      0.89       | 0.43    |
-|  SUMBT      |    0.30         |       0.96       | 0.83    |
-|   TRADE     |    0.40         |       0.96       | 0.84    |
+To use ConvLab-3 as an off-the-shelf tool, you can install via:
 
-### Policy
-
-*Notice*: The results are for commits before [`bdc9dba`](https://github.com/thu-coai/ConvLab-2/commit/bdc9dba72c957d97788e533f9458ed03a4b0137b) (inclusive). We will update the results after improving user policy.
-
-By running `convlab/policy/evalutate.py --model_name $model`
-
-|           | Task Success Rate |
-| --------- | ----------------- |
-| MLE       | 0.56              |
-| PG        | 0.54              |
-| PPO       | 0.89              |
-| GDPL      | 0.58              |
-
-### NLG
-
-By running `convlab/nlg/evaluate.py MultiWOZ $model sys`
-
-|          | corpus BLEU-4 |
-| -------- | ------------- |
-| Template | 0.3309        |
-| SCLSTM   | 0.4884        |
-
-## Translation-train SUMBT for cross-lingual DST
-
-### Train
-
-With Convlab-2, you can train SUMBT on a machine-translated dataset like this:
-
-```python
-# train.py
-import os
-from sys import argv
-
-if __name__ == "__main__":
-    if len(argv) != 2:
-        print('usage: python3 train.py [dataset]')
-        exit(1)
-    assert argv[1] in ['multiwoz', 'crosswoz']
-
-    from convlab.dst.sumbt.multiwoz_zh.sumbt import SUMBT_PATH
-    if argv[1] == 'multiwoz':
-        from convlab.dst.sumbt.multiwoz_zh.sumbt import SUMBTTracker as SUMBT
-    elif argv[1] == 'crosswoz':
-        from convlab.dst.sumbt.crosswoz_en.sumbt import SUMBTTracker as SUMBT
-
-    sumbt = SUMBT()
-    sumbt.train(True)
+```bash
+pip install convlab
 ```
+Note that the `data` directory will not be included due to the package size limitation.
 
-### Evaluate
+### Using Docker
 
-Execute `evaluate.py` (under `convlab/dst/`) with following command:
+We also provide [Dockerfile](https://github.com/ConvLab/ConvLab-3/blob/master/Dockerfile) for building docker. Basically it uses the `requirement.txt` and then installs ConvLab-3 in development mode.
 
 ```bash
-python3 evaluate.py [CrossWOZ-en|MultiWOZ-zh] [val|test|human_val]
-```
+# create image
+docker build -t convlab .
 
-evaluation of our pre-trained models are: (joint acc.)
+# run container
+docker run -dit convlab
 
-| type  | CrossWOZ-en | MultiWOZ-zh |
-| ----- | ----------- | ----------- |
-| val   | 12.4%       | 48.5%       |
-| test  | 12.4%       | 46.0%       |
-| human_val | 10.6%       | 47.4%       |
-
-`human_val` option will make the model evaluate on the validation set translated by human. 
+# open bash in container
+docker exec -it CONTAINER_ID bash
+```
 
-Note: You may want to download pre-traiend BERT models and translation-train SUMBT models provided by us.
+## Tutorials
 
-Without modifying any code, you could:
+| Section                                                      | Description |
+| ------------------------------------------------------------ | ----------- |
+| [Getting Started](https://github.com/thu-coai/ConvLab-2/blob/master/tutorials/Getting_Started.ipynb) (Have a try on [Colab](https://colab.research.google.com/github/thu-coai/ConvLab-2/blob/master/tutorials/Getting_Started.ipynb)!) |             |
+| [Unified Data Format](https://github.com/ConvLab/ConvLab-3/tree/master/data/unified_datasets) |             |
+| [Utility functions for unified datasets](https://github.com/ConvLab/ConvLab-3/blob/master/convlab/util/unified_datasets_util.py) |             |
+| [RL Toolkit](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/policy) |             |
+| [How to add a new dataset](https://github.com/thu-coai/ConvLab-2/blob/master/tutorials/Add_New_Model.md) |             |
+| How to add a new model                                       |             |
+| [Interactive Tool](https://github.com/ConvLab/ConvLab-3/blob/master/deploy) [[demo video]](https://youtu.be/00VWzbcx26E) |             |
+
+## Unified Datasets
+
+Current datasets in unified data format: (DA-U/DA-S stands for user/system dialog acts)
+
+| Dataset       | Dialogs | Goal               | DA-U               | DA-S               | State              | API result         | DataBase           |
+| ------------- | ------- | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ |
+| Camrest       | 676     | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |                    | :white_check_mark: |
+| WOZ 2.0       | 1200    |                    | :white_check_mark: |                    | :white_check_mark: |                    |                    |
+| KVRET         | 3030    |                    | :white_check_mark: |                    | :white_check_mark: | :white_check_mark: |                    |
+| DailyDialog   | 13118   |                    | :white_check_mark: |                    |                    |                    |                    |
+| Taskmaster-1  | 13175   |                    | :white_check_mark: | :white_check_mark: | :white_check_mark: |                    |                    |
+| Taskmaster-2  | 17303   |                    | :white_check_mark: | :white_check_mark: | :white_check_mark: |                    |                    |
+| MultiWOZ 2.1  | 10438   | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |                    | :white_check_mark: |
+| Schema-Guided | 22825   |                    | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |                    |
+| MetaLWOZ      | 40203   | :white_check_mark: |                    |                    |                    |                    |                    |
+| CrossWOZ (zh) | 6012    | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| Taskmaster-3  | 23757   |                    | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |                    |
+
+Unified datasets are available under `data/unified_datasets` directory as well as [Hugging Face Hub](https://huggingface.co/ConvLab). We will continue adding more datasets listed in [this issue](https://github.com/ConvLab/ConvLab-3/issues/11). If you want to add a listed/custom dataset to ConvLab-3, you can create an issue for discussion and then create pull-request. We will list you as the [contributors](#Team) and highly appreciate your contributions!
 
-- download pre-trained BERT models from:
+## Models
 
-  - [bert-base-uncased](https://huggingface.co/bert-base-uncased)  for CrossWOZ-en
-  - [chinese-bert-wwm-ext](https://huggingface.co/hfl/chinese-bert-wwm-ext)  for MultiWOZ-zh
+We list newly integrated models in ConvLab-3 that support unified data format and obtain strong performance. You can follow the link for more details about these models. Other models can be used in the same way as in ConvLab-2.
 
-  extract it to `./pre-trained-models`.
+| Task                           | Models                                                       | Input           | Output           |
+| ------------------------------ | ------------------------------------------------------------ | --------------- | ---------------- |
+| Response Generation            | [T5](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/base_models/t5) | Context         | Response         |
+| Goal-to-Dialogue                 | [T5](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/base_models/t5) | Goal            | Dialog           |
+| Natural Language Understanding | [T5](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/base_models/t5), [BERTNLU](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/nlu/jointBERT), [MILU](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/nlu/milu) | Context         | DA-U             |
+| Dialog State Tracking          | [T5](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/base_models/t5), [SUMBT](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/dst/sumbt), [SetSUMBT](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/dst/setsumbt), TripPy | Context         | State            |
+| RL Policy                      | [DDPT](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/policy/vtrace_DPT), [PPO](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/policy/ppo), [PG](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/policy/pg) | State, DA-U, DB | DA-S             |
+| Natural Language Generation    | [T5](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/base_models/t5), SC-GPT | DA-S            | Response         |
+| End-to-End                     | SOLOIST                                                      | Context, DB     | State, Response  |
+| User simulator                 | [TUS](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/policy/tus), [GenTUS](https://github.com/ConvLab/ConvLab-3/tree/master/convlab/policy/genTUS) | Goal, DA-S      | DA-U, (Response) |
 
-- for translation-train SUMBT model:
+Trained models are available on [Hugging Face Hub](https://huggingface.co/ConvLab).
 
-  - [trained on CrossWOZ-en](https://convlab.blob.core.windows.net/convlab-2/crosswoz_en-pytorch_model.bin.zip)
-  - [trained on MultiWOZ-zh](https://convlab.blob.core.windows.net/convlab-2/multiwoz_zh-pytorch_model.bin.zip)
-  - Say the data set is CrossWOZ (English), (after extraction) just save the pre-trained model under `./convlab/dst/sumbt/crosswoz_en/pre-trained` and name it with `pytorch_model.bin`. 
+## Code structure
 
-## Issues
 
-You are welcome to create an issue if you want to request a feature, report a bug or ask a general question.
 
-## Contributions
+## Contributing
 
-We welcome contributions from community.
+We welcome contributions from community. Please see issues to find what we need.
 
-- If you want to make a big change, we recommend first creating an issue with your design.
-- Small contributions can be directly made by a pull request.
-- If you like make contributions to our library, see issues to find what we need.
+- If you want to add a new dataset, model, or other feature, please describe the dataset/model/feature in an issue before creating pull-request.
+- Small change like fixing a bug can be directly made by a pull-request.
 
 ## Team
 
-**ConvLab-3** is maintained and developed by Tsinghua University Conversational AI group (THU-coai), the [Dialogue Systems and Machine Learning Group](https://www.cs.hhu.de/en/research-groups/dialog-systems-and-machine-learning.html) at Heinrich Heine University, Düsseldorf, Germany and Microsoft Research (MSR).
+**ConvLab-3** is maintained and developed by [Tsinghua University Conversational AI](http://coai.cs.tsinghua.edu.cn/) group (THU-COAI), the [Dialogue Systems and Machine Learning Group](https://www.cs.hhu.de/en/research-groups/dialog-systems-and-machine-learning.html) at Heinrich Heine University, Düsseldorf, Germany and Microsoft Research (MSR).
 
-We would like to thank:
+We would like to thank all contributors of ConvLab:
 
-Yan Fang, Zhuoer Feng, Jianfeng Gao, Qihan Guo, Kaili Huang, Minlie Huang, Sungjin Lee, Bing Li, Jinchao Li, Xiang Li, Xiujun Li, Jiexi Liu, Lingxiao Luo, Wenchang Ma, Mehrad Moradshahi, Baolin Peng, Runze Liang, Ryuichi Takanobu, Hongru Wang, Jiaxin Wen, Yaoqin Zhang, Zheng Zhang, Qi Zhu, Xiaoyan Zhu, Carel van Niekerk, Christian Geishauser, Hsien-chin Lin, Nurul Lubis, Xiaochen Zhu, Michael Heck, Shutong Feng, Milica Gašić.
+Yan Fang, Zhuoer Feng, Jianfeng Gao, Qihan Guo, Kaili Huang, Minlie Huang, Sungjin Lee, Bing Li, Jinchao Li, Xiang Li, Xiujun Li, Jiexi Liu, Lingxiao Luo, Wenchang Ma, Mehrad Moradshahi, Baolin Peng, Runze Liang, Ryuichi Takanobu, Dazhen Wan, Hongru Wang, Jiaxin Wen, Yaoqin Zhang, Zheng Zhang, Qi Zhu, Xiaoyan Zhu, Carel van Niekerk, Christian Geishauser, Hsien-chin Lin, Nurul Lubis, Xiaochen Zhu, Michael Heck, Shutong Feng, Milica Gašić.
 
 
 ## Citing
 
-If you use ConvLab-2 in your research, please cite:
+If you use ConvLab-3 in your research, please cite:
 
 ```
-@inproceedings{zhu2020convlab,
-    title={ConvLab-2: An Open-Source Toolkit for Building, Evaluating, and Diagnosing Dialogue Systems},
-    author={Qi Zhu and Zheng Zhang and Yan Fang and Xiang Li and Ryuichi Takanobu and Jinchao Li and Baolin Peng and Jianfeng Gao and Xiaoyan Zhu and Minlie Huang},
-    year={2020},
-    booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
-}
-
-@inproceedings{liu2021robustness,
-    title={Robustness Testing of Language Understanding in Task-Oriented Dialog},
-    author={Liu, Jiexi and Takanobu, Ryuichi and Wen, Jiaxin and Wan, Dazhen and Li, Hongguang and Nie, Weiran and Li, Cheng and Peng, Wei and Huang, Minlie},
-    year={2021},
-    booktitle={Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics},
-}
+
 ```
 
 ## License
diff --git a/convlab/base_models/gpt/__init__.py b/convlab/base_models/gpt/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/convlab/base_models/gpt/create_data.py b/convlab/base_models/gpt/create_data.py
deleted file mode 100644
index e6c4d67bb4babd509feb026d256264c6ff1c0051..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/create_data.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import json
-from tqdm import tqdm
-import re
-from convlab.util import load_dataset
-
-
-def create_lm_data(dataset, data_dir, args):
-    data_by_split = dataset
-    os.makedirs(data_dir, exist_ok=True)
-
-    data_splits = data_by_split.keys()
-    for data_split in data_splits:
-        data = []
-        for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
-            if args.model_type == 'dialogpt':
-                dialogue = ' <|endoftext|> '.join([turn['utterance'] for turn in sample['turns']]) + ' <|endoftext|>'
-            else:
-                dialogue = '\n'.join([f"{turn['speaker']}: {turn['utterance']}" for turn in sample['turns']])
-            data.append(json.dumps({'dialogue': dialogue}, ensure_ascii=False)+'\n')
-
-        file_name = os.path.join(data_dir, f"{data_split}.json")
-        with open(file_name, "w", encoding='utf-8') as f:
-            f.writelines(data)
-
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="create data for seq2seq training")
-    parser.add_argument('--tasks', '-t', metavar='task_name', nargs='*', choices=['lm'], help='names of tasks')
-    parser.add_argument('--datasets', '-d', metavar='dataset_name', nargs='*', help='names of unified datasets')
-    parser.add_argument('--model_type', '-m', metavar='model_type', help='type of the language model: gpt, dialogpt, ..')
-    args = parser.parse_args()
-    print(args)
-    for dataset_name in tqdm(args.datasets, desc='datasets'):
-        dataset = load_dataset(dataset_name)
-        for task_name in tqdm(args.tasks, desc='tasks', leave=False):
-            data_dir = os.path.join('data', task_name, args.model_type, dataset_name)
-            eval(f"create_{task_name}_data")(dataset, data_dir, args)
diff --git a/convlab/base_models/gpt/keyword_extraction/eval_key2gen.py b/convlab/base_models/gpt/keyword_extraction/eval_key2gen.py
deleted file mode 100644
index 6b1068cef045550f57621fe0ab4aad8a4047cfbb..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/eval_key2gen.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import json
-import datasets
-from tabulate import tabulate
-
-def main(predict_result):
-    data = {
-        "grounded keywords": {
-            "positive_keywords": [], "negative_keywords": None,
-            "predictions": [], "references": []
-        },
-        "all keywords": {
-            "positive_keywords": [], "negative_keywords": [],
-            "predictions": [], "references": []
-        },
-        "no keywords": {
-            "positive_keywords": None, "negative_keywords": None,
-            "predictions": [], "references": []
-        }
-    }
-    with open(predict_result) as f:
-        for line in f:
-            item = json.loads(line)
-            prediction = item['predictions'].strip()
-            reference = item['target'].strip()
-            if 'all_keywords' in item and item['all_keywords']:
-                sample_type = 'all keywords'
-
-                positive_keywords = [k for g in item['keywords'] for k in g]
-                data[sample_type]["positive_keywords"].append(positive_keywords)
-
-                all_keywords = [k for g in item['all_keywords'] for k in g]
-                for keyword in positive_keywords:
-                    all_keywords.remove(keyword)
-                data[sample_type]["negative_keywords"].append(all_keywords)
-
-            elif 'keywords' in item and item['keywords']:
-                sample_type = 'grounded keywords'
-
-                positive_keywords = [k for g in item['keywords'] for k in g]
-                data[sample_type]["positive_keywords"].append(positive_keywords)
-            
-            else:
-                sample_type = 'no keywords'
-
-            data[sample_type]["predictions"].append(prediction)
-            data[sample_type]["references"].append(reference)
-
-    metric = datasets.load_metric('./key2gen_metric.py')
-    table = []
-    for sample_type in data:
-        table.append({'sample_type': sample_type, **metric.compute(**data[sample_type])})
-    print(tabulate(table, headers='keys', tablefmt='github'))
-
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="evaluate keywords to response generation performance")
-    parser.add_argument('--predict_result', '-p', type=str, required=True, help='path to the output file generated_predictions.json')
-    args = parser.parse_args()
-    print(args)
-    main(args.predict_result)
diff --git a/convlab/base_models/gpt/keyword_extraction/gen_pretraining_data.py b/convlab/base_models/gpt/keyword_extraction/gen_pretraining_data.py
deleted file mode 100644
index b6ef65db298378b744a45130fd71c072243bcfca..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/gen_pretraining_data.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import json
-import json_lines
-import os
-import random
-from tqdm import tqdm
-from nltk import sent_tokenize
-
-def main(args):
-    random.seed(42)
-    os.makedirs(args.output_dir, exist_ok=True)
-    filenames = [os.path.join(args.input_dir, f) for (_, _, fs) in os.walk(args.input_dir) for f in fs if 'keywords' in f]
-    for filename in filenames:
-        dataset_name = filename.split('/')[-2]
-        data_split = filename.split('/')[-1].split('_')[-1].split('.')[0]
-        output_file = os.path.join(args.output_dir, f"{filename.split('/')[-1].split('_')[-1]}")
-        print(f'processing {dataset_name}: {filename} => {output_file}')
-        with open(filename, 'rb') as fin, open(output_file, 'w', encoding='utf-8') as fout:
-            for dial in tqdm(json_lines.reader(fin)):
-                context = []
-                turns_keywords = [turn['keywords'] for turn in dial]
-                for i, turn in enumerate(dial):
-                    if dataset_name == 'wikidialog':
-                        # skip user turns that generated by T5 in wikidialog 
-                        speaker = 'user' if i % 2 == 1 else 'system'
-                    else:
-                        speaker = 'user' if i % 2 == 0 else 'system'
-                    utt = turn['utterance']
-                    context_seq = '\n'.join([f"{turn['speaker']}: {turn['utt']}" for turn in context]+[f'{speaker}: '])
-                    context.append({'speaker': speaker, 'utt': utt})
-                    if i == 0 or (dataset_name == 'wikidialog' and speaker == 'user'):
-                        continue
-                    
-                    if args.mode == 'rg':
-                        input_seq = f'generate a response: all knowledge: | | context:\n\n{context_seq}'
-                        fout.write(json.dumps({
-                            'dataset': dataset_name,
-                            'source': input_seq, 
-                            'target': utt
-                            }, ensure_ascii=False)+'\n')
-                        continue
-
-                    if args.mode == 'key2gen':
-                        random.shuffle(turn['keywords'])
-                        for j in range(len(turn['keywords'])):
-                            random.shuffle(turn['keywords'][j])
-                        keywords = ' | '.join([' : '.join(sent_keywords) for sent_keywords in turn['keywords']])
-                        input_seq = f'generate a response: grounded knowledge: | {keywords} | context:\n\n{context_seq}'
-                        json2dump = {
-                            'dataset': dataset_name,
-                            'source': input_seq, 
-                            'target': utt
-                            }
-                        if data_split == 'validation':
-                            json2dump.update({'keywords': turn['keywords']})
-                        fout.write(json.dumps(json2dump, ensure_ascii=False)+'\n')
-                        continue
-
-                    if args.mode == 'key2gen_noisy':
-                        if random.random() < 0.8:
-                            possible_keywords_sents = turn['keywords'][:]
-                        else:
-                            possible_keywords_sents = []
-                        num_possible_keywords_turns = min(random.randint(1, 5), len(turns_keywords) - 1)
-                        for turn_keywords in random.sample(turns_keywords[:i] + turns_keywords[i+1:], num_possible_keywords_turns):
-                            possible_keywords_sents.extend(turn_keywords)
-                        random.shuffle(possible_keywords_sents)
-                        possible_keywords = ' | '.join([' : '.join(sent_keywords) for sent_keywords in possible_keywords_sents])
-                        input_seq = f'generate a response: all knowledge: | {possible_keywords} | context:\n\n{context_seq}'
-                        json2dump = {
-                            'dataset': dataset_name,
-                            'source': input_seq, 
-                            'target': utt
-                            }
-                        if data_split == 'validation':
-                            json2dump.update({'keywords': turn['keywords'], 'all_keywords': possible_keywords_sents})
-                        fout.write(json.dumps(json2dump, ensure_ascii=False)+'\n')
-                        continue
-    
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="calculate NLU metrics for unified datasets")
-    parser.add_argument('--input_dir', '-i', type=str, help='path to the input files')
-    parser.add_argument('--output_dir', '-o', type=str, help='path to the output files')
-    parser.add_argument('--mode', '-m', type=str, choices=['rg', 'key2gen', 'key2gen_noisy'], help='which task to perform')
-    args = parser.parse_args()
-    print(args)
-    main(args)
diff --git a/convlab/base_models/gpt/keyword_extraction/gen_pretraining_data.sh b/convlab/base_models/gpt/keyword_extraction/gen_pretraining_data.sh
deleted file mode 100644
index eb67a18b22e0480323f132abbf42a2d1508755b9..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/gen_pretraining_data.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-# generate data for response generation, key2gen, key2gen_noisy
-for task_name in rg key2gen key2gen_noisy
-do
-    dataset_name="dailydialog+metalwoz+tm1+tm2+tm3+sgd+reddit+wikidialog"
-    names=$(echo ${dataset_name} | tr "+" "\n")
-    model_type="gpt"
-    data_dir=data/${task_name}/${model_type}/${dataset_name}
-    mkdir -p ${data_dir}
-    train_file="${data_dir}/train.json"
-    validation_file="${data_dir}/validation.json"
-    rm ${train_file} ${validation_file}
-    for name in ${names}
-    do
-        echo "preprocessing ${name}"
-        python gen_pretraining_data.py -i data/lm/${model_type}/${name} -o data/${task_name}/${model_type}/${name} -m ${task_name}
-        if [ "${name}" != "${dataset_name}" ]; then
-            cat "data/${task_name}/${model_type}/${name}/train.json" >> ${train_file}
-            cat "data/${task_name}/${model_type}/${name}/validation.json" >> ${validation_file}
-        fi
-    done
-done
-
-# merge key2gen+key2gen_noisy data
-task_name="key2gen+key2gen_noisy"
-dataset_name="dailydialog+metalwoz+tm1+tm2+tm3+sgd+reddit+wikidialog"
-names=$(echo ${task_name} | tr "+" "\n")
-model_type="gpt"
-data_dir=data/${task_name}/${model_type}/${dataset_name}
-mkdir -p ${data_dir}
-train_file="${data_dir}/train.json"
-validation_file="${data_dir}/validation.json"
-rm ${train_file} ${validation_file}
-for name in ${names}
-do
-    echo "preprocessing ${name}"
-    if [ "${name}" != "${task_name}" ]; then
-        cat "data/${name}/${model_type}/${dataset_name}/train.json" >> ${train_file}
-        cat "data/${name}/${model_type}/${dataset_name}/validation.json" >> ${validation_file}
-    fi
-done
\ No newline at end of file
diff --git a/convlab/base_models/gpt/keyword_extraction/get_keywords.sh b/convlab/base_models/gpt/keyword_extraction/get_keywords.sh
deleted file mode 100644
index d3051ba65b2458bfad7af288746a28456f4936e5..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/get_keywords.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-task_name="lm"
-model_type="gpt"
-model_name_or_path="gpt2-large"
-keywords_num=100
-keywords_ratio=0.3
-keywords_loss_th=0
-stopwords=True
-for dataset_name in dailydialog metalwoz tm1 tm2 tm3 sgd reddit wikidialog
-do
-    data_dir="data/${task_name}/${model_type}/${dataset_name}"
-    for data_split in validation train
-    do
-        token_loss_file="${data_dir}/token_loss_${data_split}.json"
-        output_file="${data_dir}/keywords_${data_split}.json"
-        python lmloss2keywords.py \
-            --model_type ${model_type} \
-            --model_name_or_path ${model_name_or_path} \
-            --token_loss_file ${token_loss_file} \
-            --keywords_num ${keywords_num} \
-            --keywords_ratio ${keywords_ratio} \
-            --keywords_loss_th ${keywords_loss_th} \
-            --stopwords ${stopwords} \
-            --output_file ${output_file}
-    done
-done
\ No newline at end of file
diff --git a/convlab/base_models/gpt/keyword_extraction/get_token_loss.sh b/convlab/base_models/gpt/keyword_extraction/get_token_loss.sh
deleted file mode 100644
index 7c2b57dafa89d0bd711c927a0e97994d43d15bfc..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/get_token_loss.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-n_gpus=4
-master_port=23456
-task_name="lm"
-model_type="gpt"
-cache_dir="../cache"
-source_column="dialogue"
-max_length=512
-model_name_or_path="gpt2-large"
-per_device_eval_batch_size=16
-
-for dataset_name in dailydialog metalwoz tm1 tm2 tm3 sgd reddit wikidialog
-do
-    data_dir="data/${task_name}/${model_type}/${dataset_name}"
-    output_dir="output/${task_name}/${model_type}/${dataset_name}"
-
-    python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --model_type ${model_type}
-    for data_split in validation train
-    do
-        validation_file="${data_dir}/${data_split}.json"
-        dump_eval_loss_to="${data_dir}/token_loss_${data_split}.json"
-        rm ${dump_eval_loss_to}
-        python -m torch.distributed.launch --master_port ${master_port} \
-            --nproc_per_node ${n_gpus} ../run_clm.py \
-            --dump_eval_loss_to ${dump_eval_loss_to}\
-            --model_name_or_path ${model_name_or_path} \
-            --output_dir ${data_dir} \
-            --validation_file ${validation_file} \
-            --source_column ${source_column} \
-            --max_length ${max_length} \
-            --do_eval \
-            --cache_dir ${cache_dir} \
-            --preprocessing_num_workers 4 \
-            --per_device_eval_batch_size ${per_device_eval_batch_size}
-    done
-done
diff --git a/convlab/base_models/gpt/keyword_extraction/key2gen_metric.py b/convlab/base_models/gpt/keyword_extraction/key2gen_metric.py
deleted file mode 100644
index d9722d96ca71a961dc7ad837191fa202848111f3..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/key2gen_metric.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""key2gen Metric"""
-
-import datasets
-import sacrebleu
-
-# TODO: Add BibTeX citation
-_CITATION = """\
-@inproceedings{post-2018-call,
-    title = "A Call for Clarity in Reporting {BLEU} Scores",
-    author = "Post, Matt",
-    booktitle = "Proceedings of the Third Conference on Machine Translation: Research Papers",
-    month = oct,
-    year = "2018",
-    address = "Belgium, Brussels",
-    publisher = "Association for Computational Linguistics",
-    url = "https://www.aclweb.org/anthology/W18-6319",
-    pages = "186--191",
-}
-"""
-
-_DESCRIPTION = """\
-Metric to evaluate text-to-text models on the keywords grounded generation task.
-"""
-
-_KWARGS_DESCRIPTION = """
-Calculates corpus-bleu4, positive keywords recall, negative keywords recall 
-Args:
-    positive_keywords: list of keywords (list of string) in the ground truth references
-    negative_keywords: list of keywords (list of string) in the random sampled references
-    predictions: list of predictions to score. Each predictions
-        should be a string.
-    references: list of reference for each prediction. Each
-        reference should be a string.
-Returns:
-    bleu: corpus-bleu score
-    positive_keywords_recall: how many keywords in the ground truth response are generated, micro-averaged
-    negative_keywords_recall: how many keywords in the random sampled response are generated, micro-averaged
-"""
-
-
-@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
-class Key2GenMetrics(datasets.Metric):
-    """Metric to evaluate text-to-text models on the keywords grounded generation task."""
-
-    def _info(self):
-        return datasets.MetricInfo(
-            description=_DESCRIPTION,
-            citation=_CITATION,
-            inputs_description=_KWARGS_DESCRIPTION,
-            # This defines the format of each prediction and reference
-            features=datasets.Features({
-                'predictions': datasets.Value('string'),
-                'references': datasets.Value('string'),
-            })
-        )
-
-    def _compute(self, predictions, references, positive_keywords, negative_keywords=None):
-        """Returns the scores: bleu, positive_keywords_recall, negative_keywords_recall"""
-        bleu = sacrebleu.corpus_bleu(predictions, [references], lowercase=True).score
-        cnt = {'pos': 0, 'neg': 0, 'pos_recall': 0, 'neg_recall': 0}
-        if positive_keywords:
-            if not negative_keywords:
-                negative_keywords = [[]] * len(positive_keywords)
-            for poskeys, negkeys, prediction in zip(positive_keywords, negative_keywords, predictions):
-                cnt['pos'] += len(poskeys)
-                cnt['neg'] += len(negkeys)
-
-                prediction = prediction.lower()
-                for key in poskeys:
-                    key = key.lower()
-                    if key in prediction:
-                        cnt['pos_recall'] += 1
-                
-                for key in negkeys:
-                    key = key.lower()
-                    if key in prediction:
-                        cnt['neg_recall'] += 1
-            
-        return {
-            "bleu": bleu,
-            "positive_keywords_recall": cnt['pos_recall']/cnt['pos'] if cnt['pos'] > 0 else 0,
-            "negative_keywords_recall": cnt['neg_recall']/cnt['neg'] if cnt['neg'] > 0 else 0,
-        }
diff --git a/convlab/base_models/gpt/keyword_extraction/lmloss2keywords.py b/convlab/base_models/gpt/keyword_extraction/lmloss2keywords.py
deleted file mode 100644
index bb221f6d78b026c61f10846c385b5fa903c64e7f..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/lmloss2keywords.py
+++ /dev/null
@@ -1,174 +0,0 @@
-import json
-import json_lines
-from pprint import pprint
-import os
-from tqdm import tqdm
-import numpy as np
-from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize, PunktSentenceTokenizer
-from transformers import GPT2Tokenizer
-from string import punctuation
-
-
-def merge_tokens(tokens, losses):
-    """Merge tokens into words"""
-    res = []
-    i = 0
-    while i < len(tokens):
-        token = tokens[i]
-        loss = losses[i]
-        if token in ['Ġ', 'Ċ']:
-            # "Ġ" means " ", "Ċ" means "\n"
-            if token == 'Ċ' and i < len(tokens) - 1 and not tokens[i+1].startswith('Ġ'):
-                tokens[i+1] = 'Ġ'+tokens[i+1]
-            i += 1
-            continue
-        if token in ['user', 'system', 'Ġuser', 'Ġsystem'] and i < len(tokens)-1 and tokens[i+1] == ':':
-            if i > 0:
-                tokens[i+1] = '<|endoftext|>'
-                i += 1
-            else:
-                i += 2
-            continue
-        if token.startswith('Ġ'):
-            # token = token.replace("Ġ", "")
-            res.append([[token], [loss]])
-        elif token == '<|endoftext|>':
-            res.append([[token], [0.]])
-        else:
-            assert 'Ġ' not in token
-            if len(res) > 0:
-                res[-1][0].append(token)
-                res[-1][1].append(loss)
-            else:
-                res.append([[token], [loss]])
-        i += 1
-    return res
-
-
-def convert_token_loss2word_loss(token_loss_file):
-    """generate a word loss file according to the token loss file"""
-    word_loss_file = os.path.join(os.path.dirname(token_loss_file), token_loss_file.split('/')[-1].replace('token', 'word'))
-    fin = open(token_loss_file, 'rb')
-    fout = open(word_loss_file, 'w', encoding='utf-8')
-
-    for item in tqdm(json_lines.reader(fin)):
-        tokens, losses = item['tokens'], item['losses']
-        assert len(tokens) == len(losses)
-        word2losses = merge_tokens(tokens, losses)
-        fout.write(json.dumps({"words": [x[0] for x in word2losses], "losses": [x[1] for x in word2losses]}, ensure_ascii=False)+'\n')
-
-    fin.close()
-    fout.close()
-    return word_loss_file
-
-def main(args):
-    if not args.word_loss_file:
-        word_loss_file = convert_token_loss2word_loss(args.token_loss_file)
-    else:
-        word_loss_file = args.word_loss_file
-
-    if not args.output_file:
-        return
-
-    stop_words = set(stopwords.words('english'))
-    tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path)
-    sent_tokenizer = PunktSentenceTokenizer()
-
-    def keywords_filter(words, losses):
-        word_loss_pairs = list(zip(words, losses))
-        index2keyword = {}
-        index2turn_sent = {}
-        num_turns = 0
-        turns_sent_spans = [list(sent_tokenizer.span_tokenize(utt)) for utt in ''.join(words).strip().split('<|endoftext|>')]
-        utt = ''
-        for i, word_loss_pair in enumerate(word_loss_pairs):
-            if word_loss_pair[0].startswith('<|endoftext|>'):
-                num_turns += 1
-                utt = ''
-                continue
-            utt += word_loss_pair[0]
-            words = word_tokenize(word_loss_pair[0])
-            if args.stopwords and any([w.lower() in stop_words for w in words]):
-                # skip stopwords
-                continue
-            if word_loss_pair[1] <= args.keywords_loss_th:
-                # skip if loss is too small
-                continue
-            # strip punctuation
-            strip_punctuation = word_loss_pair[0].strip(punctuation).strip()
-            if len(strip_punctuation) == 0:
-                # skip punctuation
-                continue
-            index2keyword[i] = strip_punctuation
-            for sent_idx, (sent_start, sent_end) in enumerate(turns_sent_spans[num_turns]):
-                if len(utt.strip()) <= sent_end:
-                    index2turn_sent[i] = (num_turns, sent_idx)
-                    break
-        candidate_indexes = list(index2keyword.keys())
-        topk = min(round(args.keywords_ratio*(len(word_loss_pairs)-num_turns)), args.keywords_num)
-        topk_indexes = sorted(candidate_indexes, key=lambda x: word_loss_pairs[x][1], reverse=True)[:topk]
-        topk_indexes = sorted(topk_indexes)
-        keywords = []
-        keywords_turn_sent2idx = {}
-        for i, index in enumerate(topk_indexes):
-            if i > 0 and index == topk_indexes[i-1] + 1 and \
-                word_loss_pairs[index][0].strip().startswith(index2keyword[index]) and \
-                word_loss_pairs[topk_indexes[i-1]][0].strip().endswith(index2keyword[topk_indexes[i-1]]):
-                keywords[-1]+= ' '+index2keyword[index]
-            else:
-                keywords_turn_sent2idx.setdefault(index2turn_sent[index][0], {})
-                keywords_turn_sent2idx[index2turn_sent[index][0]].setdefault(index2turn_sent[index][1], [])
-                keywords_turn_sent2idx[index2turn_sent[index][0]][index2turn_sent[index][1]].append(len(keywords))
-                keywords.append(index2keyword[index])
-
-        return keywords, keywords_turn_sent2idx
-
-    fin = open(word_loss_file, 'rb')
-    fout = open(args.output_file, 'w', encoding='utf-8')
-
-    for item in tqdm(json_lines.reader(fin)):
-        words = [tokenizer.convert_tokens_to_string(tokens) for tokens in item['words']]
-        losses = [np.mean(loss) for loss in item['losses']]
-        dialog_keywords, keywords_turn_sent2idx = keywords_filter(words, losses)
-        # print(keywords_turn_sent2idx)
-        turns = []
-        turn = {'words': [], 'losses': []}
-        for i, (word, loss) in enumerate(zip(words, losses)):
-            if word != '<|endoftext|>':
-                turn['words'].append(word)
-                turn['losses'].append(loss)
-            if word == '<|endoftext|>' or i == len(words) - 1:
-                # switch turn
-                turn['utterance'] = ''.join(turn['words']).strip()
-                # 1) extract keywords according to LM loss within the turn
-                # keywords, _ = keywords_filter(turn['words'], turn['losses'])
-                # turn['turn-level_keywords'] = keywords
-                # 1) extract keywords according to LM loss over the dialog, and group them by sentence
-                turn['keywords'] = [[dialog_keywords[idx] for idx in k_idxes] for sent_idx, k_idxes in keywords_turn_sent2idx.get(len(turns), {}).items()]
-                turn.pop('words')
-                turn.pop('losses')
-                turns.append(turn)
-                turn = {'words': [], 'losses': []}
-                
-        fout.write(json.dumps(turns, ensure_ascii=False)+'\n')
-    
-    fin.close()
-    fout.close()
-
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="extract keywords according to lm loss")
-    parser.add_argument('--model_type', '-m', type=str, help='gpt or dialogpt')
-    parser.add_argument('--model_name_or_path', type=str, help='model name or path')
-    parser.add_argument('--token_loss_file', '-t', type=str, help='path to the token loss file that contains two columns: [tokens, losses]')
-    parser.add_argument('--word_loss_file', '-w', type=str, help='path to the token loss file that contains two columns: [tokens, losses]')
-    parser.add_argument('--output_file', '-o', type=str, help='path to the output file')
-    parser.add_argument('--keywords_num', '-n', type=int, default=100, help='how many words in an utterance serve as keywords')
-    parser.add_argument('--keywords_ratio', '-r', type=float, default=1.0, help='how many words (in ratio) in an utterance serve as keywords')
-    parser.add_argument('--keywords_loss_th', '-th', type=float, default=0., help='loss threshold for the keywords')
-    parser.add_argument('--stopwords', '-s', type=lambda x: bool(eval(x)), default=True, help='filter out stopwords')
-    args = parser.parse_args()
-    print(args)
-    main(args)
diff --git a/convlab/base_models/gpt/keyword_extraction/merge_keywords_res.py b/convlab/base_models/gpt/keyword_extraction/merge_keywords_res.py
deleted file mode 100644
index 94af288a38845f1cf72470da4af916be5a6f0dda..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/merge_keywords_res.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import json
-
-def main(args):
-    filename2data = {f.split('/')[-1]: json.load(open(f)) for f in args.keywords_files}
-    first_filename = args.keywords_files[0].split('/')[-1]
-    dialogs = []
-    for i in range(len(filename2data[first_filename])):
-        turns = []
-        for j in range(min([len(filename2data[filename][i]) for filename in filename2data])):
-            utt = filename2data[first_filename][i][j]['utterance']
-            keywords = {filename.split('_')[3]+'_nonstopword'+filename.split('_')[-1]: ' | '.join(filename2data[filename][i][j]['keywords']) for filename in filename2data}
-            turns.append({
-                "utterance": utt,
-                **keywords
-            })
-        dialogs.append(turns)
-    json.dump(dialogs, open(args.output_file, "w", encoding='utf-8'), indent=2, ensure_ascii=False)
-
-
-    
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="calculate NLU metrics for unified datasets")
-    parser.add_argument('--keywords_files', '-f', metavar='keywords_files', nargs='*', help='keywords files')
-    parser.add_argument('--output_file', '-o', type=str, help='path to the output file')
-    args = parser.parse_args()
-    print(args)
-    main(args)
diff --git a/convlab/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh b/convlab/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh
deleted file mode 100644
index f260f7071529e6837f9c7807d6d5ecf2469494a2..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/train_lm_dialogpt.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-set -e
-n_gpus=1
-task_name="lm"
-dataset_name="multiwoz21"
-model_type="dialogpt"
-data_dir="data/${task_name}/${dataset_name}/${model_type}"
-output_dir="output/${task_name}/${dataset_name}/${model_type}"
-cache_dir="../cache"
-logging_dir="${output_dir}/runs"
-train_file="${data_dir}/train.json"
-validation_file="${data_dir}/validation.json"
-test_file="${data_dir}/test.json"
-source_column="dialogue"
-max_length=512
-model_name_or_path="microsoft/DialoGPT-large"
-per_device_train_batch_size=16
-per_device_eval_batch_size=16
-gradient_accumulation_steps=4
-lr=5e-5
-num_train_epochs=3
-
-python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --model_type ${model_type}
-
-python ../run_clm.py \
-    --model_name_or_path ${model_name_or_path} \
-    --train_file ${train_file} \
-    --validation_file ${validation_file} \
-    --source_column ${source_column} \
-    --max_length ${max_length} \
-    --do_train \
-    --do_eval \
-    --save_strategy epoch \
-    --evaluation_strategy epoch \
-    --load_best_model_at_end \
-    --prediction_loss_only \
-    --cache_dir ${cache_dir} \
-    --output_dir ${output_dir} \
-    --logging_dir ${logging_dir} \
-    --overwrite_output_dir \
-    --preprocessing_num_workers 4 \
-    --per_device_train_batch_size ${per_device_train_batch_size} \
-    --per_device_eval_batch_size ${per_device_eval_batch_size} \
-    --gradient_accumulation_steps ${gradient_accumulation_steps} \
-    --learning_rate ${lr} \
-    --num_train_epochs ${num_train_epochs} \
-    --gradient_checkpointing
diff --git a/convlab/base_models/gpt/keyword_extraction/train_lm_gpt.sh b/convlab/base_models/gpt/keyword_extraction/train_lm_gpt.sh
deleted file mode 100644
index 82c63a1f4c4a1633ad5e7d4a721a3bbac558cefb..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/train_lm_gpt.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-set -e
-n_gpus=1
-task_name="lm"
-dataset_name="multiwoz21"
-model_type="gpt"
-data_dir="data/${task_name}/${dataset_name}/${model_type}"
-output_dir="output/${task_name}/${dataset_name}/${model_type}"
-cache_dir="../cache"
-logging_dir="${output_dir}/runs"
-train_file="${data_dir}/train.json"
-validation_file="${data_dir}/validation.json"
-test_file="${data_dir}/test.json"
-source_column="dialogue"
-max_length=512
-model_name_or_path="gpt2-large"
-per_device_train_batch_size=16
-per_device_eval_batch_size=16
-gradient_accumulation_steps=4
-lr=5e-5
-num_train_epochs=3
-
-python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --model_type ${model_type}
-
-python ../run_clm.py \
-    --model_name_or_path ${model_name_or_path} \
-    --train_file ${train_file} \
-    --validation_file ${validation_file} \
-    --source_column ${source_column} \
-    --max_length ${max_length} \
-    --do_train \
-    --do_eval \
-    --save_strategy epoch \
-    --evaluation_strategy epoch \
-    --load_best_model_at_end \
-    --prediction_loss_only \
-    --cache_dir ${cache_dir} \
-    --output_dir ${output_dir} \
-    --logging_dir ${logging_dir} \
-    --overwrite_output_dir \
-    --preprocessing_num_workers 4 \
-    --per_device_train_batch_size ${per_device_train_batch_size} \
-    --per_device_eval_batch_size ${per_device_eval_batch_size} \
-    --gradient_accumulation_steps ${gradient_accumulation_steps} \
-    --learning_rate ${lr} \
-    --num_train_epochs ${num_train_epochs} \
-    --gradient_checkpointing
diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
deleted file mode 100644
index 8e0b3617210408d3226bd7da9f675534c9458398..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_key2gen+key2gen_noisy.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-set -e
-n_gpus=8
-master_port=23456
-task_name="key2gen+key2gen_noisy"
-dataset_name="dailydialog+metalwoz+tm1+tm2+tm3+sgd+reddit+wikidialog"
-model_type="gpt"
-model_name="t5-small"
-data_dir="data/${task_name}/${model_type}/${dataset_name}"
-output_dir="output/${task_name}/${model_name}/${dataset_name}"
-cache_dir="../cache"
-logging_dir="${output_dir}/runs"
-train_file="${data_dir}/train.json"
-source_column="source"
-target_column="target"
-truncation_side="left"
-max_source_length=512
-max_target_length=128
-model_name_or_path="${model_name}"
-per_device_train_batch_size=64
-per_device_eval_batch_size=128
-gradient_accumulation_steps=1
-num_workers=16
-lr=1e-3
-num_train_epochs=1
-
-python -m torch.distributed.launch --master_port ${master_port} \
-    --nproc_per_node ${n_gpus} ../../t5/run_seq2seq.py \
-    --task_name ${task_name} \
-    --train_file ${train_file} \
-    --source_column ${source_column} \
-    --target_column ${target_column} \
-    --max_source_length ${max_source_length} \
-    --max_target_length ${max_target_length} \
-    --truncation_side ${truncation_side} \
-    --model_name_or_path ${model_name_or_path} \
-    --do_train \
-    --save_steps 5000 \
-    --save_total_limit 1 \
-    --cache_dir ${cache_dir} \
-    --output_dir ${output_dir} \
-    --logging_dir ${logging_dir} \
-    --preprocessing_num_workers ${num_workers} \
-    --dataloader_num_workers ${num_workers} \
-    --per_device_train_batch_size ${per_device_train_batch_size} \
-    --per_device_eval_batch_size ${per_device_eval_batch_size} \
-    --gradient_accumulation_steps ${gradient_accumulation_steps} \
-    --learning_rate ${lr} \
-    --num_train_epochs ${num_train_epochs} \
-    --optim adafactor \
-    --lr_scheduler_type constant \
-    --gradient_checkpointing
diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
deleted file mode 100644
index 8d9a019bd0fa10d63586c023705807a3eafd5ff0..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-set -e
-n_gpus=8
-master_port=23456
-task_name="rg"
-dataset_name="dailydialog+metalwoz+tm1+tm2+tm3+sgd+reddit+wikidialog"
-model_type="gpt"
-model_name="t5-small"
-data_dir="data/${task_name}/${model_type}/${dataset_name}"
-output_dir="output/${task_name}/${model_name}/${dataset_name}"
-cache_dir="../cache"
-logging_dir="${output_dir}/runs"
-train_file="${data_dir}/train.json"
-source_column="source"
-target_column="target"
-truncation_side="left"
-max_source_length=512
-max_target_length=128
-model_name_or_path="${model_name}"
-per_device_train_batch_size=64
-per_device_eval_batch_size=128
-gradient_accumulation_steps=1
-num_workers=16
-lr=1e-3
-num_train_epochs=1
-
-python -m torch.distributed.launch --master_port ${master_port} \
-    --nproc_per_node ${n_gpus} ../../t5/run_seq2seq.py \
-    --task_name ${task_name} \
-    --train_file ${train_file} \
-    --source_column ${source_column} \
-    --target_column ${target_column} \
-    --max_source_length ${max_source_length} \
-    --max_target_length ${max_target_length} \
-    --truncation_side ${truncation_side} \
-    --model_name_or_path ${model_name_or_path} \
-    --do_train \
-    --save_steps 5000 \
-    --save_total_limit 1 \
-    --cache_dir ${cache_dir} \
-    --output_dir ${output_dir} \
-    --logging_dir ${logging_dir} \
-    --preprocessing_num_workers ${num_workers} \
-    --dataloader_num_workers ${num_workers} \
-    --per_device_train_batch_size ${per_device_train_batch_size} \
-    --per_device_eval_batch_size ${per_device_eval_batch_size} \
-    --gradient_accumulation_steps ${gradient_accumulation_steps} \
-    --learning_rate ${lr} \
-    --num_train_epochs ${num_train_epochs} \
-    --optim adafactor \
-    --lr_scheduler_type constant \
-    --gradient_checkpointing
diff --git a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh b/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
deleted file mode 100644
index 75b79932bb94b0699d2e2349a4c8cb8846915cb3..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/keyword_extraction/train_t5_rg_key2gen+key2gen_noisy.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-set -e
-n_gpus=8
-master_port=23456
-task_name="key2gen+key2gen_noisy"
-dataset_name="dailydialog+metalwoz+tm1+tm2+tm3+sgd+reddit+wikidialog"
-model_type="gpt"
-model_name="t5-small"
-data_dir="data/${task_name}/${model_type}/${dataset_name}"
-output_dir="output/${task_name}/${model_name}/${dataset_name}"
-cache_dir="../cache"
-logging_dir="${output_dir}/runs"
-train_file="${data_dir}/train.json"
-source_column="source"
-target_column="target"
-truncation_side="left"
-max_source_length=512
-max_target_length=128
-model_name_or_path="output/rg/${model_name}/${dataset_name}"
-per_device_train_batch_size=64
-per_device_eval_batch_size=128
-gradient_accumulation_steps=1
-num_workers=16
-lr=1e-3
-num_train_epochs=1
-
-python -m torch.distributed.launch --master_port ${master_port} \
-    --nproc_per_node ${n_gpus} ../../t5/run_seq2seq.py \
-    --task_name ${task_name} \
-    --train_file ${train_file} \
-    --source_column ${source_column} \
-    --target_column ${target_column} \
-    --max_source_length ${max_source_length} \
-    --max_target_length ${max_target_length} \
-    --truncation_side ${truncation_side} \
-    --model_name_or_path ${model_name_or_path} \
-    --do_train \
-    --save_steps 5000 \
-    --save_total_limit 1 \
-    --cache_dir ${cache_dir} \
-    --output_dir ${output_dir} \
-    --logging_dir ${logging_dir} \
-    --preprocessing_num_workers ${num_workers} \
-    --dataloader_num_workers ${num_workers} \
-    --per_device_train_batch_size ${per_device_train_batch_size} \
-    --per_device_eval_batch_size ${per_device_eval_batch_size} \
-    --gradient_accumulation_steps ${gradient_accumulation_steps} \
-    --learning_rate ${lr} \
-    --num_train_epochs ${num_train_epochs} \
-    --optim adafactor \
-    --lr_scheduler_type constant \
-    --gradient_checkpointing
diff --git a/convlab/base_models/gpt/run_clm.py b/convlab/base_models/gpt/run_clm.py
deleted file mode 100644
index ace68609af00bb6a05d3b6c45378719a98732414..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/run_clm.py
+++ /dev/null
@@ -1,564 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
-Modified from https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_clm.py
-Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
-https://huggingface.co/models?filter=text-generation
-"""
-# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
-
-import logging
-import math
-import os
-import sys
-from dataclasses import dataclass, field
-from itertools import chain
-from typing import Optional
-
-import datasets
-from datasets import load_dataset
-from tqdm import tqdm
-from torch.utils.data import DataLoader
-import torch
-import json
-
-import transformers
-from transformers import (
-    CONFIG_MAPPING,
-    MODEL_FOR_CAUSAL_LM_MAPPING,
-    AutoConfig,
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    HfArgumentParser,
-    TrainingArguments,
-    DataCollatorForTokenClassification,
-    is_torch_tpu_available,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version
-from transformers.utils.versions import require_version
-from convlab.base_models.gpt.trainer import DumpTokenLossTrainer
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.17.0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
-
-logger = logging.getLogger(__name__)
-
-
-MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys())
-MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
-    """
-
-    model_name_or_path: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "The model checkpoint for weights initialization."
-            "Don't set if you want to train a model from scratch."
-        },
-    )
-    model_type: Optional[str] = field(
-        default=None,
-        metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
-    )
-    config_overrides: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Override some existing default config settings when a model is trained from scratch. Example: "
-            "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
-        },
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
-    )
-    use_fast_tokenizer: bool = field(
-        default=True,
-        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
-    )
-    truncation_side: Optional[str] = field(
-        default="right",
-        metadata={"help": "Which side to truncate, left or right."}
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    resize_position_embeddings: Optional[bool] = field(
-        default=None,
-        metadata={
-            "help": "Whether to automatically resize the position embeddings if `max_source_length` exceeds "
-                    "the model's position embeddings."
-        },
-    )
-
-    def __post_init__(self):
-        if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
-            raise ValueError(
-                "--config_overrides can't be used in combination with --config_name or --model_name_or_path"
-            )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    source_column: Optional[str] = field(
-        default=None,
-        metadata={"help": "The name of the column in the datasets containing the texts."},
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "The input training data file (a text, jsonlines or csv file)."}
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "An optional input evaluation data file to evaluate the metrics on (a text, jsonlines or csv file)."
-        },
-    )
-    dump_eval_loss_to: Optional[str] = field(
-        default=None, metadata={"help": "Where to dump the tokens' losses in the evaluation data, default not to"}
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    max_length: Optional[int] = field(
-        default=1024,
-        metadata={
-            "help": "The maximum total input sequence length after tokenization. Sequences longer "
-                    "than this will be truncated, sequences shorter will be padded."
-        },
-    )
-    pad_to_max_length: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-                    "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-                    "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-            "value if set."
-        },
-    )
-    ignore_pad_token_for_loss: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to ignore the tokens corresponding to padded labels in the loss computation or not."
-        },
-    )
-    validation_split_percentage: Optional[int] = field(
-        default=5,
-        metadata={
-            "help": "The percentage of the train set used as validation set in case there's no validation split"
-        },
-    )
-    keep_linebreaks: bool = field(
-        default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
-    )
-
-    def __post_init__(self):
-        if self.dataset_name is None and self.train_file is None and self.validation_file is None:
-            raise ValueError("Need either a dataset name or a training/validation file.")
-        else:
-            if self.train_file is not None:
-                extension = self.train_file.split(".")[-1]
-                assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
-            if self.validation_file is not None:
-                extension = self.validation_file.split(".")[-1]
-                assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
-    # (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
-    # 'text' is found. You can easily tweak this behavior (see below).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    if data_args.dataset_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            data_args.dataset_name,
-            data_args.dataset_config_name,
-            cache_dir=model_args.cache_dir,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-        if "validation" not in raw_datasets.keys():
-            raw_datasets["validation"] = load_dataset(
-                data_args.dataset_name,
-                data_args.dataset_config_name,
-                split=f"train[:{data_args.validation_split_percentage}%]",
-                cache_dir=model_args.cache_dir,
-                use_auth_token=True if model_args.use_auth_token else None,
-            )
-            raw_datasets["train"] = load_dataset(
-                data_args.dataset_name,
-                data_args.dataset_config_name,
-                split=f"train[{data_args.validation_split_percentage}%:]",
-                cache_dir=model_args.cache_dir,
-                use_auth_token=True if model_args.use_auth_token else None,
-            )
-    else:
-        data_files = {}
-        dataset_args = {}
-        if data_args.train_file is not None:
-            data_files["train"] = data_args.train_file
-        if data_args.validation_file is not None:
-            data_files["validation"] = data_args.validation_file
-        extension = (
-            data_args.train_file.split(".")[-1]
-            if data_args.train_file is not None
-            else data_args.validation_file.split(".")[-1]
-        )
-        if extension == "txt":
-            extension = "text"
-            dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
-        raw_datasets = load_dataset(
-            extension,
-            data_files=data_files,
-            cache_dir=model_args.cache_dir,
-            use_auth_token=True if model_args.use_auth_token else None,
-            **dataset_args,
-        )
-        # If no validation data is there, validation_split_percentage will be used to divide the dataset.
-        if "validation" not in raw_datasets.keys():
-            raw_datasets["validation"] = load_dataset(
-                extension,
-                data_files=data_files,
-                split=f"train[:{data_args.validation_split_percentage}%]",
-                cache_dir=model_args.cache_dir,
-                use_auth_token=True if model_args.use_auth_token else None,
-                **dataset_args,
-            )
-            raw_datasets["train"] = load_dataset(
-                extension,
-                data_files=data_files,
-                split=f"train[{data_args.validation_split_percentage}%:]",
-                cache_dir=model_args.cache_dir,
-                use_auth_token=True if model_args.use_auth_token else None,
-                **dataset_args,
-            )
-
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
-    # https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config_kwargs = {
-        "cache_dir": model_args.cache_dir,
-        "revision": model_args.model_revision,
-        "use_auth_token": True if model_args.use_auth_token else None,
-    }
-    if model_args.config_name:
-        config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
-    elif model_args.model_name_or_path:
-        config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
-    else:
-        config = CONFIG_MAPPING[model_args.model_type]()
-        logger.warning("You are instantiating a new config instance from scratch.")
-        if model_args.config_overrides is not None:
-            logger.info(f"Overriding config: {model_args.config_overrides}")
-            config.update_from_string(model_args.config_overrides)
-            logger.info(f"New config: {config}")
-
-    tokenizer_kwargs = {
-        "cache_dir": model_args.cache_dir,
-        "use_fast": model_args.use_fast_tokenizer,
-        "truncation_side": model_args.truncation_side,
-        "revision": model_args.model_revision,
-        "use_auth_token": True if model_args.use_auth_token else None,
-    }
-    if model_args.tokenizer_name:
-        tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
-    elif model_args.model_name_or_path:
-        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
-    else:
-        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
-            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
-        )
-
-    if not tokenizer.pad_token:
-        tokenizer.pad_token = tokenizer.eos_token
-
-    if model_args.model_name_or_path:
-        model = AutoModelForCausalLM.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-            revision=model_args.model_revision,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-    else:
-        model = AutoModelForCausalLM.from_config(config)
-        n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
-        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
-
-    model.resize_token_embeddings(len(tokenizer))
-
-    if training_args.gradient_checkpointing:
-        # use_cache=True is incompatible with gradient checkpointing.
-        config.use_cache = False
-
-    # Preprocessing the datasets.
-    # First we tokenize all the texts.
-    if training_args.do_train:
-        column_names = raw_datasets["train"].column_names
-    elif training_args.do_eval:
-        column_names = raw_datasets["validation"].column_names
-    else:
-        logger.info("There is nothing to do. Please pass `do_train` and/or `do_eval`.")
-        return
-    if data_args.source_column is None:
-        source_column = column_names[0]
-    else:
-        source_column = data_args.source_column
-        if source_column not in column_names:
-            raise ValueError(
-                f"--source_column' value '{data_args.source_column}' needs to be one of: {', '.join(column_names)}"
-            )
-
-    def preprocess_function(examples):
-
-        inputs = []
-        for i in range(len(examples[source_column])):
-            if len(examples[source_column][i]) > 0:
-                inputs.append(examples[source_column][i])
-        
-        padding = "max_length" if data_args.pad_to_max_length else False
-        model_inputs = tokenizer(inputs, max_length=data_args.max_length, padding=padding, truncation=True)
-
-        # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
-        # padding in the loss. Else pad in data_collator.
-        if padding == "max_length" and data_args.ignore_pad_token_for_loss:
-            model_inputs["labels"] = [
-                [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in model_inputs["input_ids"]
-            ]
-        else:
-            model_inputs["labels"] = model_inputs["input_ids"].copy()
-
-        return model_inputs
-
-    with training_args.main_process_first(desc="dataset map tokenization"):
-        tokenized_datasets = raw_datasets.map(
-            preprocess_function,
-            batched=True,
-            num_proc=data_args.preprocessing_num_workers,
-            remove_columns=column_names,
-            load_from_cache_file=not data_args.overwrite_cache,
-            desc="Running tokenizer on dataset",
-        )
-    
-    lm_datasets = tokenized_datasets
-
-    if training_args.do_train:
-        if "train" not in tokenized_datasets:
-            raise ValueError("--do_train requires a train dataset")
-        train_dataset = lm_datasets["train"]
-        if data_args.max_train_samples is not None:
-            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
-            train_dataset = train_dataset.select(range(max_train_samples))
-
-    if training_args.do_eval:
-        if "validation" not in tokenized_datasets:
-            raise ValueError("--do_eval requires a validation dataset")
-        eval_dataset = lm_datasets["validation"]
-        if data_args.max_eval_samples is not None:
-            max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
-            eval_dataset = eval_dataset.select(range(max_eval_samples))
-
-    # Data collator
-    label_pad_token_id = -100 if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id
-    data_collator = DataCollatorForTokenClassification(
-        tokenizer,
-        label_pad_token_id=label_pad_token_id,
-        pad_to_multiple_of=8 if training_args.fp16 else None,
-    )
-
-    training_args.dump_eval_loss_to = data_args.dump_eval_loss_to
-    
-    # Initialize our Trainer
-    trainer = DumpTokenLossTrainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=eval_dataset if training_args.do_eval else None,
-        tokenizer=tokenizer,
-        # Data collator will default to DataCollatorWithPadding, so we change it.
-        data_collator=data_collator,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        metrics = train_result.metrics
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        logger.info("*** Evaluate ***")
-        metrics = trainer.evaluate(metric_key_prefix="eval")
-        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
-        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
-        try:
-            perplexity = math.exp(metrics["eval_loss"])
-        except OverflowError:
-            perplexity = float("inf")
-        metrics["eval_perplexity"] = perplexity
-        logger.info(f"eval_perplexity: {perplexity}")
-
-        trainer.log_metrics("eval", metrics)
-        trainer.save_metrics("eval", metrics)
-        
-    kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"}
-    if data_args.dataset_name is not None:
-        kwargs["dataset_tags"] = data_args.dataset_name
-        if data_args.dataset_config_name is not None:
-            kwargs["dataset_args"] = data_args.dataset_config_name
-            kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
-        else:
-            kwargs["dataset"] = data_args.dataset_name
-
-    if training_args.push_to_hub:
-        trainer.push_to_hub(**kwargs)
-    else:
-        trainer.create_model_card(**kwargs)
-
-
-def _mp_fn(index):
-    # For xla_spawn (TPUs)
-    main()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/convlab/base_models/gpt/trainer.py b/convlab/base_models/gpt/trainer.py
deleted file mode 100644
index 5a8ed11c6566e897a5d3ef7a0d16a130968cd6aa..0000000000000000000000000000000000000000
--- a/convlab/base_models/gpt/trainer.py
+++ /dev/null
@@ -1,243 +0,0 @@
-from transformers import Trainer
-from transformers.trainer_utils import EvalLoopOutput, has_length
-from transformers.deepspeed import deepspeed_init
-from transformers.utils import logging
-from transformers.trainer_pt_utils import find_batch_size, nested_concat, nested_numpify, IterableDatasetShard, nested_truncate
-from transformers.trainer_utils import EvalPrediction, denumpify_detensorize
-import torch
-from torch.utils.data import DataLoader
-import numpy as np
-from typing import List, Optional
-import json
-
-
-logger = logging.get_logger(__name__)
-
-class DumpTokenLossTrainer(Trainer):
-    def evaluation_loop(
-        self,
-        dataloader: DataLoader,
-        description: str,
-        prediction_loss_only: Optional[bool] = None,
-        ignore_keys: Optional[List[str]] = None,
-        metric_key_prefix: str = "eval",
-    ) -> EvalLoopOutput:
-        """
-        Prediction/evaluation loop, shared by `Trainer.evaluate()` and `Trainer.predict()`.
-        Works both with or without labels.
-        """
-        args = self.args
-
-        prediction_loss_only = args.prediction_loss_only
-
-        # if eval is called w/o train init deepspeed here
-        if args.deepspeed and not self.deepspeed:
-
-            # XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval
-            # from the checkpoint eventually
-            deepspeed_engine, _, _ = deepspeed_init(
-                self, num_training_steps=0, resume_from_checkpoint=None, inference=True
-            )
-            self.model = deepspeed_engine.module
-            self.model_wrapped = deepspeed_engine
-            self.deepspeed = deepspeed_engine
-
-        model = self._wrap_model(self.model, training=False, dataloader=dataloader)
-
-        # if full fp16 or bf16 eval is wanted and this ``evaluation`` or ``predict`` isn't called
-        # while ``train`` is running, cast it to the right dtype first and then put on device
-        if not self.is_in_train:
-            if args.fp16_full_eval:
-                model = model.to(dtype=torch.float16, device=args.device)
-            elif args.bf16_full_eval:
-                model = model.to(dtype=torch.bfloat16, device=args.device)
-
-        batch_size = self.args.eval_batch_size
-
-        logger.info(f"***** Running {description} *****")
-        if has_length(dataloader):
-            logger.info(f"  Num examples = {self.num_examples(dataloader)}")
-        else:
-            logger.info("  Num examples: Unknown")
-        logger.info(f"  Batch size = {batch_size}")
-
-        model.eval()
-
-        self.callback_handler.eval_dataloader = dataloader
-        # Do this before wrapping.
-        eval_dataset = getattr(dataloader, "dataset", None)
-
-        if args.past_index >= 0:
-            self._past = None
-
-        # Initialize containers
-        # losses/preds/labels on GPU/TPU (accumulated for eval_accumulation_steps)
-        losses_host = None
-        preds_host = None
-        labels_host = None
-        inputs_host = None
-
-        # losses/preds/labels on CPU (final containers)
-        all_losses = None
-        all_preds = None
-        all_labels = None
-        all_inputs = None
-        # Will be useful when we have an iterable dataset so don't know its length.
-
-        if args.dump_eval_loss_to:
-            writer = open(args.dump_eval_loss_to, "a", encoding='utf-8')
-            loss_fct = torch.nn.CrossEntropyLoss(reduction='none')
-            num_sample_to_write = len(eval_dataset)
-
-        observed_num_examples = 0
-        # Main evaluation loop
-        for step, inputs in enumerate(dataloader):
-            # Update the observed num examples
-            observed_batch_size = find_batch_size(inputs)
-            if observed_batch_size is not None:
-                observed_num_examples += observed_batch_size
-                # For batch samplers, batch_size is not known by the dataloader in advance.
-                if batch_size is None:
-                    batch_size = observed_batch_size
-
-            # Prediction step
-            loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
-            inputs_decode = self._prepare_input(inputs["input_ids"]) if args.include_inputs_for_metrics else None
-
-            # Update containers on host
-            if loss is not None:
-                losses = self._nested_gather(loss.repeat(batch_size))
-                losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
-            if labels is not None:
-                labels = self._pad_across_processes(labels)
-                labels = self._nested_gather(labels)
-                # labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
-            if inputs_decode is not None:
-                inputs_decode = self._pad_across_processes(inputs_decode)
-                inputs_decode = self._nested_gather(inputs_decode)
-                inputs_host = (
-                    inputs_decode
-                    if inputs_host is None
-                    else nested_concat(inputs_host, inputs_decode, padding_index=-100)
-                )
-            if logits is not None:
-                logits = self._pad_across_processes(logits)
-                logits = self._nested_gather(logits)
-                if self.preprocess_logits_for_metrics is not None:
-                    logits = self.preprocess_logits_for_metrics(logits, labels)
-                # preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
-
-            if args.dump_eval_loss_to:
-                if self.is_world_process_zero() and num_sample_to_write > 0:
-                    assert logits is not None and labels is not None, print('prediction_loss_only', prediction_loss_only)
-                    shift_logits = logits[..., :-1, :].contiguous()
-                    shift_labels = labels[..., 1:].contiguous()
-                    batch_token_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
-                    batch_token_loss = batch_token_loss.view(shift_labels.size()).tolist()
-                    labels = labels.tolist()
-                    for i in range(len(labels)):
-                        if num_sample_to_write > 0:
-                            num_sample_to_write -= 1
-                        else:
-                            break
-                        token_ids = [x for x in labels[i] if x != -100]
-                        tokens = self.tokenizer.convert_ids_to_tokens(token_ids)
-                        token_losses = [0] + batch_token_loss[i][:len(token_ids)-1]
-                        writer.write(json.dumps({"tokens": tokens, "losses": token_losses}, ensure_ascii=False)+'\n')
-
-            self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)
-
-            # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
-            if args.eval_accumulation_steps is not None and (step + 1) % args.eval_accumulation_steps == 0:
-                if losses_host is not None:
-                    losses = nested_numpify(losses_host)
-                    all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
-                if preds_host is not None:
-                    logits = nested_numpify(preds_host)
-                    all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
-                if inputs_host is not None:
-                    inputs_decode = nested_numpify(inputs_host)
-                    all_inputs = (
-                        inputs_decode
-                        if all_inputs is None
-                        else nested_concat(all_inputs, inputs_decode, padding_index=-100)
-                    )
-                if labels_host is not None:
-                    labels = nested_numpify(labels_host)
-                    all_labels = (
-                        labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-                    )
-
-                # Set back to None to begin a new accumulation
-                losses_host, preds_host, inputs_host, labels_host = None, None, None, None
-
-        if args.dump_eval_loss_to:
-            writer.close()
-        
-        if args.past_index and hasattr(self, "_past"):
-            # Clean the state at the end of the evaluation loop
-            delattr(self, "_past")
-
-        # Gather all remaining tensors and put them back on the CPU
-        if losses_host is not None:
-            losses = nested_numpify(losses_host)
-            all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
-        if preds_host is not None:
-            logits = nested_numpify(preds_host)
-            all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
-        if inputs_host is not None:
-            inputs_decode = nested_numpify(inputs_host)
-            all_inputs = (
-                inputs_decode if all_inputs is None else nested_concat(all_inputs, inputs_decode, padding_index=-100)
-            )
-        if labels_host is not None:
-            labels = nested_numpify(labels_host)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-
-        # Number of samples
-        if has_length(eval_dataset):
-            num_samples = len(eval_dataset)
-        # The instance check is weird and does not actually check for the type, but whether the dataset has the right
-        # methods. Therefore we need to make sure it also has the attribute.
-        elif isinstance(eval_dataset, IterableDatasetShard) and hasattr(eval_dataset, "num_examples"):
-            num_samples = eval_dataset.num_examples
-        else:
-            if has_length(dataloader):
-                num_samples = self.num_examples(dataloader)
-            else:  # both len(dataloader.dataset) and len(dataloader) fail
-                num_samples = observed_num_examples
-
-        # Number of losses has been rounded to a multiple of batch_size and in a distributed training, the number of
-        # samplers has been rounded to a multiple of batch_size, so we truncate.
-        if all_losses is not None:
-            all_losses = all_losses[:num_samples]
-        if all_preds is not None:
-            all_preds = nested_truncate(all_preds, num_samples)
-        if all_labels is not None:
-            all_labels = nested_truncate(all_labels, num_samples)
-        if all_inputs is not None:
-            all_inputs = nested_truncate(all_inputs, num_samples)
-
-        # Metrics!
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            if args.include_inputs_for_metrics:
-                metrics = self.compute_metrics(
-                    EvalPrediction(predictions=all_preds, label_ids=all_labels, inputs=all_inputs)
-                )
-            else:
-                metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-
-        # To be JSON-serializable, we need to remove numpy types or zero-d tensors
-        metrics = denumpify_detensorize(metrics)
-
-        if all_losses is not None:
-            metrics[f"{metric_key_prefix}_loss"] = all_losses.mean().item()
-
-        # Prefix all keys with metric_key_prefix + '_'
-        for key in list(metrics.keys()):
-            if not key.startswith(f"{metric_key_prefix}_"):
-                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
-        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=num_samples)
diff --git a/convlab/base_models/t5/README.md b/convlab/base_models/t5/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c0894e6c5cd38b5738ef85f7d900ecdf304d3fa6
--- /dev/null
+++ b/convlab/base_models/t5/README.md
@@ -0,0 +1,80 @@
+# T5 models
+
+By converting NLP tasks into a text-to-text format, we can use one single model to solve various tasks. Here we use T5 as backbone model and provide a unified training script `run_seq2seq.py` for many tasks. **See `*.sh` under each task directory for usage.**
+
+## Create Data
+Currently we support natural language understanding (**NLU**), dialog state tracking (**DST**), natural language generation (**NLG**), response generation (**RG**), and generating a dialog from a user goal (**Goal2Dialogue**). We provide serialization and deserialization methods for dialog acts and state in the unified data format (user goals are already natural language instruction). An example of serialized dialog acts and state:
+
+```
+User: I am looking for a cheap restaurant.
+System: Is there a particular area of town you prefer?
+User: In the centre of town.
+
+User dialog acts: [inform][restaurant]([area][centre])
+State: [restaurant]([area][centre],[price range][cheap])
+System dialog acts: [recommend][restaurant]([name][Zizzi Cambridge])
+
+System: I would recommend Zizzi Cambridge.
+```
+
+Dialogue acts are in the form of `[intent][domain]([slot][value],...);...`. State is in the form of `[domain]([slot][value],...);...`. Multiple items will be concatenated by a semicolon `;`.
+
+To create data for a specific task, run `create_data.py` with corresponding arguments. For example, create data for single turn NLU on MultiWOZ 2.1:
+
+```bash
+python create_data.py --tasks nlu --datasets multiwoz21 --speaker user
+```
+
+Note that the script only supported **datasets in the unified format**.
+
+## Training
+
+To train the model, specify the arguments like data path, learning rate, epochs, etc., and then run `run_seq2seq.py`. See `nlu/run_nlu.sh` for an example.
+
+## Evaluation
+
+The standard evaluation scripts of NLU, DST, and NLG task are located under `../../$task/evaluate_unified_datasets.py` directories. See `nlu/run_nlu.sh` for an example.
+
+## Trained Models
+
+Trained models and their performance are available in [Hugging Face Hub](https://huggingface.co/ConvLab). You can try some example with hosted inference API.
+
+| Name                                                         | Task          | Training Dataset             |
+| ------------------------------------------------------------ | ------------- | ---------------------------- |
+| [t5-small-goal2dialogue-multiwoz21](https://huggingface.co/ConvLab/t5-small-goal2dialogue-multiwoz21) | Goal2Dialogue | MultiWOZ 2.1                 |
+| [t5-small-nlu-multiwoz21](https://huggingface.co/ConvLab/t5-small-nlu-multiwoz21) | NLU           | MultiWOZ 2.1                 |
+| [t5-small-nlu-sgd](https://huggingface.co/ConvLab/t5-small-nlu-sgd) | NLU           | SGD                          |
+| [t5-small-nlu-tm1_tm2_tm3](https://huggingface.co/ConvLab/t5-small-nlu-tm1_tm2_tm3) | NLU           | TM1+TM2+TM3                  |
+| [t5-small-nlu-multiwoz21_sgd_tm1_tm2_tm3](https://huggingface.co/ConvLab/t5-small-nlu-multiwoz21_sgd_tm1_tm2_tm3) | NLU           | MultiWOZ 2.1+SGD+TM1+TM2+TM3 |
+| [t5-small-dst-multiwoz21](https://huggingface.co/ConvLab/t5-small-dst-multiwoz21) | DST           | MultiWOZ 2.1                 |
+| [t5-small-dst-sgd](https://huggingface.co/ConvLab/t5-small-dst-sgd) | DST           | SGD                          |
+| [t5-small-dst-tm1_tm2_tm3](https://huggingface.co/ConvLab/t5-small-dst-tm1_tm2_tm3) | DST           | TM1+TM2+TM3                  |
+| [t5-small-dst-multiwoz21_sgd_tm1_tm2_tm3](https://huggingface.co/ConvLab/t5-small-dst-multiwoz21_sgd_tm1_tm2_tm3) | DST           | MultiWOZ 2.1+SGD+TM1+TM2+TM3 |
+| [t5-small-nlg-multiwoz21](https://huggingface.co/ConvLab/t5-small-nlg-multiwoz21) | NLG           | MultiWOZ 2.1                 |
+| [t5-small-nlg-sgd](https://huggingface.co/ConvLab/t5-small-nlg-sgd) | NLG           | SGD                          |
+| [t5-small-nlg-tm1_tm2_tm3](https://huggingface.co/ConvLab/t5-small-nlg-tm1_tm2_tm3) | NLG           | TM1+TM2+TM3                  |
+| [t5-small-nlg-multiwoz21_sgd_tm1_tm2_tm3](https://huggingface.co/ConvLab/t5-small-nlg-multiwoz21_sgd_tm1_tm2_tm3) | NLG           | MultiWOZ 2.1+SGD+TM1+TM2+TM3 |
+
+## Interface
+
+To use trained models in a dialog system, import them through:
+
+```python
+from convlab.base_models.t5.nlu import T5NLU
+from convlab.base_models.t5.dst import T5DST
+from convlab.base_models.t5.nlg import T5NLG
+
+# example instantiation
+# model_name_or_path could be model in hugging face hub or local path
+nlu = T5NLU(speaker='user', context_window_size=0, model_name_or_path='ConvLab/t5-small-nlu-multiwoz21')
+```
+
+See `nlu/nlu.py`, `dst/dst.py`, `nlg/nlg.py` for example usage.
+
+## Support a New Task
+
+To support a new task, you can first serialize model input and output like `create_data.py`, and then train the model with `run_seq2seq.py`. Finally, write a evaluation script for the task or pass the `metric_name_or_path` for an existing metric to `run_seq2seq.py`.
+
+## Author
+
+Qi Zhu(zhuq96 at gmail dot com)
\ No newline at end of file
diff --git a/convlab/base_models/t5/dst/__init__.py b/convlab/base_models/t5/dst/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4e6903b1093e645eed5ae6ba6b9b88d6150fe39
--- /dev/null
+++ b/convlab/base_models/t5/dst/__init__.py
@@ -0,0 +1 @@
+from convlab.base_models.t5.dst.dst import T5DST
\ No newline at end of file
diff --git a/convlab/base_models/t5/dst/dst.py b/convlab/base_models/t5/dst/dst.py
index c34395c02fd188568b9f4c4bc1956240fbbc88a9..3c5ec2525091cfdcb423460ed1b01871087deb21 100755
--- a/convlab/base_models/t5/dst/dst.py
+++ b/convlab/base_models/t5/dst/dst.py
@@ -8,16 +8,12 @@ from convlab.util.custom_util import model_downloader
 
 
 class T5DST(DST):
-    def __init__(self, speaker, context_window_size, model_name_or_path, model_file=None, device='cuda'):
+    def __init__(self, speaker, context_window_size, model_name_or_path, device='cuda'):
         assert speaker in ['user', 'system']
         assert context_window_size > 0
         self.speaker = speaker
         self.opponent = 'system' if speaker == 'user' else 'user'
         self.context_window_size = context_window_size
-
-        model_dir = os.path.dirname(os.path.abspath(__file__))
-        if not os.path.exists(model_name_or_path):
-            model_downloader(model_dir, model_file)
         
         self.config = AutoConfig.from_pretrained(model_name_or_path)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
diff --git a/convlab/base_models/t5/key2gen/create_data.py b/convlab/base_models/t5/key2gen/create_data.py
deleted file mode 100644
index 138808f2a13d4b3fad71b57a2aa7977917f8143c..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/create_data.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import os
-import json
-from tqdm import tqdm
-from convlab.util import load_dataset, load_unified_data, load_nlu_data
-
-def create_nlg_data(dataset, data_dir, args):
-    data_by_split = load_nlu_data(dataset, speaker='system', use_context=True, context_window_size=3)
-    os.makedirs(data_dir, exist_ok=True)
-
-    data_splits = data_by_split.keys()
-    for data_split in data_splits:
-        data = []
-        for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
-            context = [(turn['speaker'], turn['utterance']) for turn in sample['context']]
-            response = sample['utterance']
-            if len(context) > 0 and len(response) > 0:
-                knowledge = sample['dialogue_acts']
-                data.append(json.dumps({'context': context, 'knowledge': knowledge, 'response': response}, ensure_ascii=False)+'\n')
-
-        if 'test' in data_split:
-            file_name = os.path.join(os.path.dirname(data_dir), f"{data_split}.json")
-        else:
-            file_name = os.path.join(data_dir, f"{data_split}.json")
-        with open(file_name, "w", encoding='utf-8') as f:
-            f.writelines(data)
-        data_by_split[data_split] = data
-    return data_by_split
-
-def create_kvret_data(dataset, data_dir, args):
-    data_by_split = load_unified_data(dataset, speaker='system', utterance=True, db_results=True, use_context=True, context_window_size=100)
-    os.makedirs(data_dir, exist_ok=True)
-
-    domain2entity_col = {'schedule': 'event' ,'navigate': 'poi', 'weather': 'location'}
-    data_splits = data_by_split.keys()
-    for data_split in data_splits:
-        data = []
-        for sample in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
-            context = [(turn['speaker'], turn['utterance']) for turn in sample['context']]
-            response = sample['utterance']
-            if len(context) > 0 and len(response) > 0:
-                knowledge = sample['db_results']
-                for domain, db_items in knowledge.items():
-                    entity_col = domain2entity_col[domain]
-                    for db_item in db_items:
-                        db_item['entity'] = db_item.pop(entity_col)
-
-                data.append(json.dumps({'context': context, 'knowledge': knowledge, 'response': response}, ensure_ascii=False)+'\n')
-
-        if 'test' in data_split:
-            file_name = os.path.join(os.path.dirname(data_dir), f"{data_split}.json")
-        else:
-            file_name = os.path.join(data_dir, f"{data_split}.json")
-        with open(file_name, "w", encoding='utf-8') as f:
-            f.writelines(data)
-        data_by_split[data_split] = data
-    return data_by_split
-
-def create_personachat_data(dataset, data_dir, args):
-    data_by_split = dataset
-    os.makedirs(data_dir, exist_ok=True)
-
-    data_splits = data_by_split.keys()
-    for data_split in data_splits:
-        data = []
-        for dial in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
-            knowledge = dial['persona']['system']
-            context = []
-            for turn in dial['turns']:
-                response = turn['utterance']
-                if turn['speaker'] == 'system' and len(context) > 0 and len(response) > 0:
-                    data.append(json.dumps({'context': context, 'knowledge': knowledge, 'response': response}, ensure_ascii=False)+'\n')
-                context.append((turn['speaker'], turn['utterance']))
-
-        if 'test' in data_split:
-            file_name = os.path.join(os.path.dirname(data_dir), f"{data_split}.json")
-        else:
-            file_name = os.path.join(data_dir, f"{data_split}.json")
-        with open(file_name, "w", encoding='utf-8') as f:
-            f.writelines(data)
-        data_by_split[data_split] = data
-    return data_by_split
-
-def create_wow_data(dataset, data_dir, args):
-    data_by_split = dataset
-    os.makedirs(data_dir, exist_ok=True)
-    data_by_split['test'] = data_by_split['test_seen'] + data_by_split['test_unseen']
-    data_by_split.pop('test_seen')
-    data_by_split.pop('test_unseen')
-
-    data_splits = data_by_split.keys()
-    for data_split in data_splits:
-        data = []
-        for dial in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
-            context = []
-            for turn in dial['turns']:
-                response = turn['utterance']
-                if turn['speaker'] == 'system' and len(context) > 0 and len(response) > 0:
-                    knowledge = turn['checked_passage']
-                    if knowledge is None:
-                        knowledge = []
-                    elif isinstance(knowledge, str):
-                        knowledge = [knowledge]
-                    data.append(json.dumps({'context': context, 'knowledge': knowledge, 'response': response}, ensure_ascii=False)+'\n')
-                context.append((turn['speaker'], turn['utterance']))
-
-        if 'test' in data_split:
-            file_name = os.path.join(os.path.dirname(data_dir), f"{data_split}.json")
-        else:
-            file_name = os.path.join(data_dir, f"{data_split}.json")
-        with open(file_name, "w", encoding='utf-8') as f:
-            f.writelines(data)
-        data_by_split[data_split] = data
-    return data_by_split
-
-def create_opendialkg_data(dataset, data_dir, args):
-    data_by_split = dataset
-    os.makedirs(data_dir, exist_ok=True)
-
-    data_splits = data_by_split.keys()
-    for data_split in data_splits:
-        data = []
-        for dial in tqdm(data_by_split[data_split], desc=f'{data_split} sample', leave=False):
-            context = []
-            for turn in dial['turns']:
-                response = turn['utterance']
-                if turn['speaker'] == 'system' and 'kg_path' in turn and len(context) > 0 and len(response) > 0:
-                    knowledge = turn['kg_path']['triples']
-                    data.append(json.dumps({'context': context, 'knowledge': knowledge, 'response': response}, ensure_ascii=False)+'\n')
-                context.append((turn['speaker'], turn['utterance']))
-
-        if 'test' in data_split:
-            file_name = os.path.join(os.path.dirname(data_dir), f"{data_split}.json")
-        else:
-            file_name = os.path.join(data_dir, f"{data_split}.json")
-        with open(file_name, "w", encoding='utf-8') as f:
-            f.writelines(data)
-        data_by_split[data_split] = data
-    return data_by_split
-
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="create data for seq2seq training")
-    parser.add_argument('--tasks', '-t', metavar='task_name', nargs='*', choices=['nlg', 'kvret', 'opendialkg', 'personachat', 'wow'], help='names of tasks')
-    parser.add_argument('--datasets', '-d', metavar='dataset_name', nargs='*', help='names of unified datasets')
-    parser.add_argument('--shot', '-s', type=float, default=None, help='how many data is used for training and evaluation, ratio if < 1 else absolute number')
-    parser.add_argument('--dial_ids_order', '-o', type=int, default=None, help='which data order is used for experiments')
-    args = parser.parse_args()
-    print(args)
-    for dataset_name in tqdm(args.datasets, desc='datasets'):
-        dataset = load_dataset(dataset_name, dial_ids_order=args.dial_ids_order)
-        if args.shot:
-            if args.shot < 1:
-                dataset['train'] = dataset['train'][:round(len(dataset['train'])*args.shot)]
-                dataset['validation'] = dataset['validation'][:round(len(dataset['validation'])*args.shot)]
-            else:
-                args.shot = int(args.shot)
-                dataset['train'] = dataset['train'][:args.shot]
-                dataset['validation'] = dataset['validation'][:args.shot]
-        for task_name in tqdm(args.tasks, desc='tasks', leave=False):
-            data_dir = os.path.join('data', task_name, (dataset_name if not args.shot else f'{dataset_name}_{args.shot}shot_order{args.dial_ids_order}'))
-            data_by_split = eval(f"create_{task_name}_data")(dataset, data_dir, args)
diff --git a/convlab/base_models/t5/key2gen/dataset_godel.py b/convlab/base_models/t5/key2gen/dataset_godel.py
deleted file mode 100644
index caf7b8ab7b1fb10b8de03c01dac9a147f5540af1..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/dataset_godel.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Data processing for vanilla generator"""
-
-import json
-import datasets
-from convlab.base_models.t5.key2gen.features import FEATURES
-from copy import deepcopy
-
-
-class GodelDataset(datasets.GeneratorBasedBuilder):
-    """Dataset for vanilla generator (e.g., t5)"""
-
-    VERSION = datasets.Version("1.18.0")
-
-    BUILDER_CONFIGS = [
-        datasets.BuilderConfig(name="nlg", version=VERSION, description="DA grounded generation task"),
-        datasets.BuilderConfig(name="kvret", version=VERSION, description="KB grounded generation task"),
-        datasets.BuilderConfig(name="opendialkg", version=VERSION, description="KG grounded generation task"),
-        datasets.BuilderConfig(name="wow", version=VERSION, description="Passage grounded generation task"),
-        datasets.BuilderConfig(name="personachat", version=VERSION, description="Persona grounded generation task"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=f"Vanilla Dataset for {self.config.description}",
-            features=datasets.Features(deepcopy(FEATURES[self.config.name]))
-        )
-
-    def _split_generators(self, dl_manager):
-        generators = []
-        if "train" in self.config.data_files:
-            generators.append(datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepath": self.config.data_files["train"][0],
-                    "split": "train",
-                },
-            ))
-        if "validation" in self.config.data_files:
-            generators.append(datasets.SplitGenerator(
-                name=datasets.Split.VALIDATION,
-                gen_kwargs={
-                    "filepath": self.config.data_files["validation"][0],
-                    "split": "validation",
-                },
-            ))
-        if "test" in self.config.data_files:
-            generators.append(datasets.SplitGenerator(
-                name=datasets.Split.TEST,
-                gen_kwargs={
-                    "filepath": self.config.data_files["test"][0],
-                    "split": "test",
-                },
-            ))
-            
-        return generators
-
-    def _generate_examples(self, filepath, split):
-        with open(filepath, encoding="utf-8") as f:
-            for key, row in enumerate(f):
-                item = json.loads(row)
-                if self.config.name == "nlg":
-                    knowledge = item["knowledge"]
-                    triples = []
-                    for da_type in knowledge:
-                        for da in knowledge[da_type]:
-                            intent, domain, slot, value = da["intent"], da["domain"], da["slot"], da.get("value", "")
-                            if 'start' in da:
-                                da.pop('start')
-                                da.pop('end')
-                            intent_domain = f"{intent}-{domain}"
-                            triples.append([intent_domain])
-                            if len(slot) > 0:
-                                triples[-1].append(slot)
-                            if len(value) > 0:
-                                triples[-1].append(value)
-                    knowledge_seq = "| {} |".format(" | ".join([" : ".join(da_keywords) for da_keywords in triples]))
-                    
-                elif self.config.name == "kvret":
-                    knowledge = {"schedule": [], "weather": [], "navigate": []}
-                    triples = []
-                    for domain, db_items in item["knowledge"].items():
-                        knowledge[domain] = db_items
-                        for db_item in db_items:
-                            entity = db_item["entity"]
-                            for db_key, db_value in db_item.items():
-                                if db_key == "entity":
-                                    continue
-                                triples.append([entity, db_key, db_value])
-                    knowledge_seq = "| {} |".format(" | ".join([" : ".join(triple) for triple in triples]))
-
-                elif self.config.name == "opendialkg":
-                    knowledge = item["knowledge"]
-                    knowledge_seq = "| {} |".format(" | ".join([" : ".join(triple) for triple in item["knowledge"]]))
-                
-                elif self.config.name in ["wow", "personachat"]:
-                    knowledge = item["knowledge"]
-                    try:
-                        knowledge_seq = "| {} |".format(" | ".join(item["knowledge"]))
-                    except:
-                        print([knowledge])
-                        raise
-                
-                context = " EOS ".join([turn[1] for turn in item["context"]])
-                context_knowledge = context + ' <|Knowledge|> \n\n' + knowledge_seq + ' => '
-                
-                yield key, {
-                    "context+knowledge": context_knowledge,
-                    "response": item["response"],
-                    "knowledge": knowledge,
-                }
diff --git a/convlab/base_models/t5/key2gen/dataset_vanilla.py b/convlab/base_models/t5/key2gen/dataset_vanilla.py
deleted file mode 100644
index 15a8c7b4ac8cfbf1057e090f675a3fc7a4051f2c..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/dataset_vanilla.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Data processing for vanilla generator"""
-
-import json
-import datasets
-from convlab.base_models.t5.key2gen.features import FEATURES
-from copy import deepcopy
-
-
-class VanillaDataset(datasets.GeneratorBasedBuilder):
-    """Dataset for vanilla generator (e.g., t5)"""
-
-    VERSION = datasets.Version("1.18.0")
-
-    BUILDER_CONFIGS = [
-        datasets.BuilderConfig(name="nlg", version=VERSION, description="DA grounded generation task"),
-        datasets.BuilderConfig(name="kvret", version=VERSION, description="KB grounded generation task"),
-        datasets.BuilderConfig(name="opendialkg", version=VERSION, description="KG grounded generation task"),
-        datasets.BuilderConfig(name="wow", version=VERSION, description="Passage grounded generation task"),
-        datasets.BuilderConfig(name="personachat", version=VERSION, description="Persona grounded generation task"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=f"Vanilla Dataset for {self.config.description}",
-            features=datasets.Features(deepcopy(FEATURES[self.config.name]))
-        )
-
-    def _split_generators(self, dl_manager):
-        generators = []
-        if "train" in self.config.data_files:
-            generators.append(datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepath": self.config.data_files["train"][0],
-                    "split": "train",
-                },
-            ))
-        if "validation" in self.config.data_files:
-            generators.append(datasets.SplitGenerator(
-                name=datasets.Split.VALIDATION,
-                gen_kwargs={
-                    "filepath": self.config.data_files["validation"][0],
-                    "split": "validation",
-                },
-            ))
-        if "test" in self.config.data_files:
-            generators.append(datasets.SplitGenerator(
-                name=datasets.Split.TEST,
-                gen_kwargs={
-                    "filepath": self.config.data_files["test"][0],
-                    "split": "test",
-                },
-            ))
-            
-        return generators
-
-    def _generate_examples(self, filepath, split):
-        with open(filepath, encoding="utf-8") as f:
-            for key, row in enumerate(f):
-                item = json.loads(row)
-                if self.config.name == "nlg":
-                    knowledge = item["knowledge"]
-                    triples = []
-                    for da_type in knowledge:
-                        for da in knowledge[da_type]:
-                            intent, domain, slot, value = da["intent"], da["domain"], da["slot"], da.get("value", "")
-                            if 'start' in da:
-                                da.pop('start')
-                                da.pop('end')
-                            intent_domain = f"{intent}-{domain}"
-                            triples.append([intent_domain])
-                            if len(slot) > 0:
-                                triples[-1].append(slot)
-                            if len(value) > 0:
-                                triples[-1].append(value)
-                    knowledge_seq = "| {} |".format(" | ".join([" : ".join(da_keywords) for da_keywords in triples]))
-                    
-                elif self.config.name == "kvret":
-                    knowledge = {"schedule": [], "weather": [], "navigate": []}
-                    triples = []
-                    for domain, db_items in item["knowledge"].items():
-                        knowledge[domain] = db_items
-                        for db_item in db_items:
-                            entity = db_item["entity"]
-                            for db_key, db_value in db_item.items():
-                                if db_key == "entity":
-                                    continue
-                                triples.append([entity, db_key, db_value])
-                    knowledge_seq = "| {} |".format(" | ".join([" : ".join(triple) for triple in triples]))
-
-                elif self.config.name == "opendialkg":
-                    knowledge = item["knowledge"]
-                    knowledge_seq = "| {} |".format(" | ".join([" : ".join(triple) for triple in item["knowledge"]]))
-                
-                elif self.config.name in ["wow", "personachat"]:
-                    knowledge = item["knowledge"]
-                    try:
-                        knowledge_seq = "| {} |".format(" | ".join(item["knowledge"]))
-                    except:
-                        print([knowledge])
-                        raise
-                
-                context = "\n".join([f"{turn[0]}: {turn[1]}" for turn in item["context"]]+["system: "])
-                if self.config.name in ["kvret", "wow", "personachat"]:
-                    context_knowledge = f"generate a response: all knowledge: \n\n{knowledge_seq} context:\n\n{context}"
-                else:
-                    context_knowledge = f"generate a response: grounded knowledge: \n\n{knowledge_seq} context:\n\n{context}"
-                
-                yield key, {
-                    "context+knowledge": context_knowledge,
-                    "response": item["response"],
-                    "knowledge": knowledge,
-                }
diff --git a/convlab/base_models/t5/key2gen/eval.ipynb b/convlab/base_models/t5/key2gen/eval.ipynb
deleted file mode 100644
index 51fcc5e0da1321ef740084d0a8b0241b5721a2fc..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/eval.ipynb
+++ /dev/null
@@ -1 +0,0 @@
-{"cells":[{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[],"source":["import json\n","import re"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["def read_jsonline(path):\n","    return [json.loads(line) for line in open(path)]"]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[],"source":["origin = read_jsonline('output/wow/wow/test_unseen.json')"]},{"cell_type":"code","execution_count":22,"metadata":{},"outputs":[],"source":["key2gen = read_jsonline('output/wow/key2gen_wow/test_unseen.json')"]},{"cell_type":"code","execution_count":23,"metadata":{},"outputs":[],"source":["with open('tmp_wow.txt', 'w') as f:\n","    for d1, d2 in zip(origin, key2gen):\n","        print(re.split('context:|grounded knowledge:', d1['context+knowledge'])[1].strip(), file=f)\n","        print(re.split('context:|grounded knowledge:', d2['context+knowledge'])[1].strip(), file=f)\n","        print(d1['context+knowledge'].split('context:')[1].replace('\\n\\n', '\\n'), file=f)\n","        print(file=f)\n","        print('target', d1['response'], file=f)\n","        print('origin', d1['predictions'], file=f)\n","        print('key2gen', d2['predictions'], file=f)\n","        print('='*100, file=f)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["for ratio in [0.1, 0.01]:\n","    for order in [0, 1, 2]:\n","        origin = read_jsonline(f'output/personachat/key2gen_personachat_{ratio}_order{order}/generated_predictions.json')\n","        score = metric.compute(predictions=[d['predictions'] for d in origin], references=[d['response'] for d in origin])\n","        print(ratio, order)\n","        print(score)\n","        "]},{"cell_type":"code","execution_count":51,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["0.01 1\n","{'bleu-1': 24.322560358946276, 'bleu-2': 13.03630111937752, 'bleu-3': 7.43647978674912, 'bleu-4': 4.450365738541082, 'unigram f1': 0.20101056184593705, 'unigram f1 (non-stop words)': 0.09881569367818614, 'rouge1': 21.359332522961864, 'rouge2': 6.532120354812852, 'rougeL': 19.76437990594138}\n"]}],"source":["for ratio in [0.01]:\n","    for order in [1]:\n","        origin = read_jsonline(f'output/personachat/personachat/generated_predictions.json')\n","        score = metric.compute(predictions=[d['predictions'] for d in origin], references=[d['response'] for d in origin])\n","        print(ratio, order)\n","        print(score)\n","        "]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[],"source":["from datasets import load_metric"]},{"cell_type":"code","execution_count":7,"metadata":{},"outputs":[],"source":["metric = load_metric('metric.py')"]},{"cell_type":"code","execution_count":58,"metadata":{},"outputs":[{"data":{"text/plain":["{'bleu-1': 47.9848465486215,\n"," 'bleu-2': 37.18000679532912,\n"," 'bleu-3': 29.346646172092814,\n"," 'bleu-4': 23.410526740211363,\n"," 'unigram f1': 0.4999850046010773,\n"," 'unigram f1 (non-stop words)': 0.5150265227462978,\n"," 'rouge1': 50.536642578692195,\n"," 'rouge2': 33.10681789367832,\n"," 'rougeL': 46.84702913163778,\n"," 'meteor': 0.4641962079490068}"]},"execution_count":58,"metadata":{},"output_type":"execute_result"}],"source":["metric.compute(predictions=[d['predictions'] for d in key2gen], references=[d['response'] for d in key2gen])"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"data":{"text/plain":["{'bleu-1': 37.570099942714585,\n"," 'bleu-2': 26.77393964962893,\n"," 'bleu-3': 21.115954644820572,\n"," 'bleu-4': 17.513316671216046,\n"," 'unigram f1': 0.3656930567072274,\n"," 'unigram f1 (non-stop words)': 0.36456219281235724,\n"," 'rouge1': 39.1982724920493,\n"," 'rouge2': 20.825159884632743,\n"," 'rougeL': 34.98278542180112,\n"," 'meteor': 0.3405671227693821,\n"," 'distinct-1': 0.07838670580160921,\n"," 'distinct-2': 0.29689084413659694}"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["metric.compute(predictions=[d['predictions'] for d in origin], references=[d['response'] for d in origin])"]},{"cell_type":"code","execution_count":34,"metadata":{},"outputs":[{"data":{"text/plain":["{'bleu-1': 47.9848465486215,\n"," 'bleu-2': 37.18000679532912,\n"," 'bleu-3': 29.346646172092814,\n"," 'bleu-4': 23.410526740211363,\n"," 'unigram f1': 0.4999850046010773,\n"," 'unigram f1 (non-stop words)': 0.5150265227462978,\n"," 'rouge1': AggregateScore(low=Score(precision=0.5301926525013549, recall=0.4821419251082986, fmeasure=0.48565655175230005), mid=Score(precision=0.5513392693168799, recall=0.50235850981064, fmeasure=0.5053664257869219), high=Score(precision=0.5760132731228504, recall=0.5268580272115051, fmeasure=0.5279111393835526)),\n"," 'rouge2': AggregateScore(low=Score(precision=0.34772127155901306, recall=0.30411953889228, fmeasure=0.31029658993105447), mid=Score(precision=0.3696898381097765, recall=0.32612705034192035, fmeasure=0.3310681789367832), high=Score(precision=0.3947745596965405, recall=0.34880792116864995, fmeasure=0.35356317521641434)),\n"," 'rougeL': AggregateScore(low=Score(precision=0.4874189522136045, recall=0.4413343070361347, fmeasure=0.4464463084888409), mid=Score(precision=0.5108530997712726, recall=0.4642203560120527, fmeasure=0.46847029131637785), high=Score(precision=0.5350154077389535, recall=0.4855131911095939, fmeasure=0.4899950876629784)),\n"," 'rougeLsum': AggregateScore(low=Score(precision=0.4871840444049138, recall=0.44081531444183386, fmeasure=0.44514075751478493), mid=Score(precision=0.5105975305923949, recall=0.4639265647317744, fmeasure=0.46779186414456864), high=Score(precision=0.5348015149575474, recall=0.48693312722760357, fmeasure=0.4918651382986408))}"]},"execution_count":34,"metadata":{},"output_type":"execute_result"}],"source":["metric.compute(predictions=[d['predictions'] for d in key2gen], references=[d['response'] for d in key2gen])"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"interpreter":{"hash":"0f9333403d680bc010aa5ce5a2f27ba398c9e47e92ba3724506306aa234cd07d"},"kernelspec":{"display_name":"Python 3.8.12 ('py38')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.12"},"orig_nbformat":4},"nbformat":4,"nbformat_minor":2}
diff --git a/convlab/base_models/t5/key2gen/evaluate.py b/convlab/base_models/t5/key2gen/evaluate.py
deleted file mode 100644
index 769fdfcf3d1c899aad1b5389dad2c8d9465c05c6..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/evaluate.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from tabulate import tabulate
-import os
-import json
-from tqdm import tqdm
-from datasets import load_metric
-import numpy as np
-import csv
-
-def evaluate(filename, metric):
-    """
-    It reads the predictions, references, and knowledge from a file, and then computes the metric
-    
-    :param filename: the path to the file containing the predictions
-    :param metric: the metric to use for evaluation
-    :return: The result of the evaluation.
-    """
-    predictions, references, knowledge = [], [], []
-    with open(filename, 'r') as f:
-        for line in f:
-            item = json.loads(line)
-            predictions.append(item['predictions'])
-            references.append(item['response'])
-            knowledge.append(item['knowledge'])
-    result = metric.compute(predictions=predictions, references=references, knowledge=knowledge)
-    return result
-
-
-def avg_result(results):
-    """
-    It takes a list of dictionaries, and returns a dictionary with the same keys, but the values are the
-    mean and standard deviation of the values in the input dictionaries
-    
-    :param results: a list of dictionaries, each dictionary is the result of a single run of the model
-    :return: The average and standard deviation of the results.
-    """
-    ret = {}
-    for k in results[0]:
-        m = round(np.mean([result[k] for result in results]), 2)
-        v = round(np.std([result[k] for result in results], ddof=1), 2) if len(results) > 1 else None
-        ret[k] = f"{m}({v})"
-    return ret
-
-
-if __name__ == '__main__':
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="create data for seq2seq training")
-    parser.add_argument("--output_dirs", type=str, nargs='*', required=True)
-    parser.add_argument('--tasks', '-t', type=str, nargs='*', choices=['nlg', 'kvret', 'opendialkg', 'personachat', 'wow'], help='names of tasks')
-    parser.add_argument('--shots', '-s', type=int, nargs='*', help='how many data is used for training and evaluation, ratio if < 1 else absolute number')
-    parser.add_argument('--dial_ids_orders', '-o', type=int, nargs='*', help='which data order is used for experiments')
-    args = parser.parse_args()
-    print(args)
-    
-    table = []
-    fieldnames = []
-    for task_name in tqdm(args.tasks, desc='tasks'):
-        metric = load_metric("metric.py", task_name)
-        dataset_name = task_name if task_name != "nlg" else "multiwoz21"
-        for shot in tqdm(args.shots, desc='shots', leave=False):
-            for output_dir in tqdm(args.output_dirs, desc='models', leave=False):
-                model_name = output_dir.split('/')[-1]
-                results = []
-                for dial_ids_order in tqdm(args.dial_ids_orders, desc='dial_ids_orders', leave=False):
-                    result_dir = os.path.join(output_dir, task_name, f"{dataset_name}_{shot}shot_order{dial_ids_order}/gen")
-                    result_file = os.path.join(result_dir, "result.json")
-                    if not os.path.exists(result_file):
-                        filename = os.path.join(output_dir, task_name, f"{dataset_name}_{shot}shot_order{dial_ids_order}/gen/generated_predictions.json")
-                        result = evaluate(filename, metric)
-                        json.dump(result, open(result_file, 'w', encoding='utf-8'), indent=2, ensure_ascii=False)
-                    else:
-                        result = json.load(open(result_file))
-                    results.append(result)
-                res = {
-                    "dataset": f"{task_name}-{shot}shot",
-                    "model": f"{model_name}",
-                    **avg_result(results)
-                }
-                table.append(res)
-                for k in res:
-                    if k not in fieldnames:
-                        fieldnames.append(k)
-                    
-    res = tabulate(table, headers='keys', tablefmt='github')
-    with open(f'eval_results.txt', 'w', encoding='utf-8') as f:
-        print(res, file=f)
-    with open('eval_results.csv', 'w', newline='') as csvfile:
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-
-        writer.writeheader()
-        for res in table:
-            writer.writerow(res)
diff --git a/convlab/base_models/t5/key2gen/features.py b/convlab/base_models/t5/key2gen/features.py
deleted file mode 100644
index 0ac768b5cbe61d46e430580b025182e515db93ef..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/features.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import datasets
-
-FEATURES = {
-    "nlg": {
-        "context+knowledge": datasets.Value("string"),
-        "response": datasets.Value("string"),
-        "knowledge": {
-            "categorical": datasets.Sequence({
-                "intent": datasets.Value("string"),
-                "domain": datasets.Value("string"),
-                "slot": datasets.Value("string"),
-                "value": datasets.Value("string"),
-            }), 
-            "non-categorical": datasets.Sequence({
-                "intent": datasets.Value("string"),
-                "domain": datasets.Value("string"),
-                "slot": datasets.Value("string"),
-                "value": datasets.Value("string"),
-            }), 
-            "binary": datasets.Sequence({
-                "intent": datasets.Value("string"),
-                "domain": datasets.Value("string"),
-                "slot": datasets.Value("string"),
-            })
-        }},
-    "kvret": {
-        "context+knowledge": datasets.Value("string"),
-        "response": datasets.Value("string"),
-        "knowledge": {
-            "schedule": datasets.Sequence({
-                "entity": datasets.Value("string"),
-                "time": datasets.Value("string"),
-                "date": datasets.Value("string"),
-                "party": datasets.Value("string"),
-                "room": datasets.Value("string"),
-                "agenda": datasets.Value("string")
-            }),
-            "weather": datasets.Sequence({
-                "entity": datasets.Value("string"),
-                "today": datasets.Value("string"),
-                "monday": datasets.Value("string"),
-                "tuesday": datasets.Value("string"),
-                "wednesday": datasets.Value("string"),
-                "thursday": datasets.Value("string"),
-                "friday": datasets.Value("string"),
-                "saturday": datasets.Value("string"),
-                "sunday": datasets.Value("string"),
-            }),
-            "navigate": datasets.Sequence({
-                "entity": datasets.Value("string"),
-                "traffic_info": datasets.Value("string"),
-                "poi_type": datasets.Value("string"),
-                "address": datasets.Value("string"),
-                "distance": datasets.Value("string")
-            })
-        }},
-    "opendialkg": {
-        "context+knowledge": datasets.Value("string"),
-        "response": datasets.Value("string"),
-        "knowledge": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
-        },
-    "wow": {
-        "context+knowledge": datasets.Value("string"),
-        "response": datasets.Value("string"),
-        "knowledge": datasets.Sequence(datasets.Value("string")),
-        },
-    "personachat": {
-        "context+knowledge": datasets.Value("string"),
-        "response": datasets.Value("string"),
-        "knowledge": datasets.Sequence(datasets.Value("string")),
-    }
-}
\ No newline at end of file
diff --git a/convlab/base_models/t5/key2gen/finetune.sh b/convlab/base_models/t5/key2gen/finetune.sh
deleted file mode 100644
index 8b2eb8d208966ed8f8056f01ece1b1a373033014..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/finetune.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-set -e
-dataset_path=$1
-model_name=$2
-model_name_or_path=$3
-dataset_name=$4
-if [ "${dataset_name}" == "multiwoz21" ]
-then
-    task_name="nlg"
-else
-    task_name=${dataset_name}
-fi
-master_port=$5
-
-n_gpus=2
-cache_dir="../cache"
-metric_name_or_path="metric.py"
-source_column="context+knowledge"
-target_column="response"
-truncation_side="left"
-max_source_length=512
-max_target_length=512
-per_device_train_batch_size=64
-per_device_eval_batch_size=64
-gradient_accumulation_steps=1
-num_workers=16
-lr=1e-3
-num_train_epochs=100
-
-for shot in 50 100 200
-do
-    for dial_ids_order in 0 1 2 3 4
-    do
-        python create_data.py -t ${task_name} -d ${dataset_name} -o ${dial_ids_order} -s ${shot}
-
-        data_dir="data/${task_name}/${dataset_name}_${shot}shot_order${dial_ids_order}"
-        output_dir="output/${model_name}/${task_name}/${dataset_name}_${shot}shot_order${dial_ids_order}"
-        logging_dir="${output_dir}/runs"
-        train_file="${data_dir}/train.json"
-        validation_file="${data_dir}/validation.json"
-
-        # training
-        python -m torch.distributed.launch --master_port ${master_port} \
-            --nproc_per_node ${n_gpus} ../run_seq2seq.py \
-            --task_name ${task_name} \
-            --dataset_name ${dataset_path} \
-            --dataset_config_name ${task_name} \
-            --train_file ${train_file} \
-            --validation_file ${validation_file} \
-            --source_column ${source_column} \
-            --target_column ${target_column} \
-            --max_source_length ${max_source_length} \
-            --max_target_length ${max_target_length} \
-            --truncation_side ${truncation_side} \
-            --model_name_or_path ${model_name_or_path} \
-            --do_train \
-            --do_eval \
-            --save_strategy epoch \
-            --evaluation_strategy epoch \
-            --save_total_limit 1 \
-            --prediction_loss_only \
-            --load_best_model_at_end \
-            --overwrite_output_dir \
-            --cache_dir ${cache_dir} \
-            --output_dir ${output_dir} \
-            --logging_dir ${logging_dir} \
-            --preprocessing_num_workers ${num_workers} \
-            --dataloader_num_workers ${num_workers} \
-            --per_device_train_batch_size ${per_device_train_batch_size} \
-            --per_device_eval_batch_size ${per_device_eval_batch_size} \
-            --gradient_accumulation_steps ${gradient_accumulation_steps} \
-            --learning_rate ${lr} \
-            --num_train_epochs ${num_train_epochs} \
-            --optim adafactor \
-            --lr_scheduler_type constant \
-            --gradient_checkpointing
-
-        # inference
-        test_file="data/${task_name}/test.json"
-        gen_output_dir="${output_dir}/gen"
-
-        python -m torch.distributed.launch --master_port ${master_port} \
-            --nproc_per_node ${n_gpus} ../run_seq2seq.py \
-            --task_name ${task_name} \
-            --dataset_name ${dataset_path} \
-            --dataset_config_name ${task_name} \
-            --metric_name_or_path ${metric_name_or_path} \
-            --metric_config_name ${task_name} \
-            --test_file ${test_file} \
-            --source_column ${source_column} \
-            --target_column ${target_column} \
-            --max_source_length ${max_source_length} \
-            --max_target_length ${max_target_length} \
-            --truncation_side ${truncation_side} \
-            --model_name_or_path ${output_dir} \
-            --do_predict \
-            --predict_with_generate \
-            --cache_dir ${cache_dir} \
-            --output_dir ${gen_output_dir} \
-            --logging_dir ${logging_dir} \
-            --overwrite_output_dir \
-            --preprocessing_num_workers ${num_workers} \
-            --dataloader_num_workers ${num_workers} \
-            --per_device_train_batch_size ${per_device_train_batch_size} \
-            --per_device_eval_batch_size ${per_device_eval_batch_size} \
-            --gradient_accumulation_steps ${gradient_accumulation_steps} \
-            --learning_rate ${lr} \
-            --num_train_epochs ${num_train_epochs} \
-            --optim adafactor \
-            --lr_scheduler_type constant \
-            --gradient_checkpointing
-        
-    done
-done
-
-# evaluation
-python evaluate.py --output_dirs output/${model_name} -t ${task_name} -s 50 100 200 -o 0 1 2 3 4
diff --git a/convlab/base_models/t5/key2gen/metric.py b/convlab/base_models/t5/key2gen/metric.py
deleted file mode 100644
index 808934b65268ab2ae4180b9bbe64457fb5ca1b68..0000000000000000000000000000000000000000
--- a/convlab/base_models/t5/key2gen/metric.py
+++ /dev/null
@@ -1,434 +0,0 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Grounded Dialog Generation Metric"""
-
-from weakref import ref
-import datasets
-from sacrebleu.metrics import BLEU
-from sacrebleu.utils import sum_of_lists
-import re
-from collections import Counter
-import numpy as np
-from nltk.corpus import stopwords
-from rouge_score import rouge_scorer, scoring
-from nltk.translate import meteor_score
-from datasets.config import importlib_metadata, version
-from convlab.base_models.t5.key2gen.features import FEATURES
-from convlab.util import load_ontology
-from copy import deepcopy
-
-
-NLTK_VERSION = version.parse(importlib_metadata.version("nltk"))
-if NLTK_VERSION >= version.Version("3.6.5"):
-    from nltk import word_tokenize
-
-# Uncomment to download nltk_data for the first time running.
-# import nltk
-# nltk.download("wordnet")
-# if NLTK_VERSION >= version.Version("3.6.5"):
-#     nltk.download("punkt")
-# if NLTK_VERSION >= version.Version("3.6.6"):
-#     nltk.download("omw-1.4")
-
-
-_CITATION = """
-"""
-
-_DESCRIPTION = """\
-Metric to evaluate text generation models on the grounded dialog generation task.
-"""
-
-# TODO
-_KWARGS_DESCRIPTION = """
-Args:
-    predictions: list of predictions to score. Each predictions
-        should be a string.
-    references: list of reference for each prediction. Each
-        reference should be a string.
-    knowledge: task-specific grounded knowledge
-
-Returns:
-    bleu-1/2/3/4: corpus-bleu score, from sacrebleu
-    rouge-1/2/L: ROUGE-F1, from rouge_score
-    meteor: METEOR, from nltk
-    unigram f1: unigram overlap, from parlai
-    distinct-1/2: from parlai
-    other knowledge utility score: task-specific knowledge utility metrics
-"""
-
-re_art = re.compile(r'\b(a|an|the)\b')
-re_punc = re.compile(r'[!"#$%&()*+,-./:;<=>?@\[\]\\^`{|}~_\']')
-stop_words = set(stopwords.words("english"))
-def utt2words(s):
-    """Lower text and remove punctuation, articles and extra whitespace.
-    from parlai https://github.com/facebookresearch/ParlAI/blob/9daae69320c07104493486e022c0e46a7871b253/parlai/core/metrics.py#L810"""
-    s = s.lower()
-    s = re_punc.sub(' ', s)
-    s = re_art.sub(' ', s)
-    return s.split()
-
-
-def get_bleu(predictions, references):
-    """bleu-1/2/3/4 from sacrebleu"""
-    references = [" " if ref=="" else ref for ref in references]
-    metrics = {}
-    bleu = BLEU(lowercase=True, force=False, tokenize=BLEU.TOKENIZER_DEFAULT, smooth_method="exp", smooth_value=None, effective_order=False)
-    stats = sum_of_lists(bleu._extract_corpus_statistics(predictions, [references]))
-    for n in range(1,5):
-        metrics[f"bleu-{n}"] = bleu.compute_bleu(
-            correct=stats[2: 2 + bleu.max_ngram_order],
-            total=stats[2 + bleu.max_ngram_order:],
-            sys_len=int(stats[0]), ref_len=int(stats[1]),
-            smooth_method=bleu.smooth_method, smooth_value=bleu.smooth_value,
-            effective_order=bleu.effective_order,
-            max_ngram_order=n).score
-    return metrics
-
-
-def get_unigram_f1(predictions, references):
-    """unigram f1 between prediction and reference, from parlai"""
-    metrics = {}
-    metrics["unigram f1"] = []
-    metrics["unigram f1 (non-stop words)"] = []
-    for prediction, reference in zip(predictions, references):
-        pred_items = utt2words(prediction)
-        gold_items = utt2words(reference)
-        for remove_stopwords in [False, True]:
-            if remove_stopwords:
-                pred_items = [w for w in pred_items if w not in stop_words]
-                gold_items = [w for w in gold_items if w not in stop_words]
-            common = Counter(pred_items) & Counter(gold_items)
-            num_same = sum(common.values())
-            if num_same == 0:
-                f1 = 0
-            else:
-                precision = 1.0 * num_same / len(pred_items)
-                recall = 1.0 * num_same / len(gold_items)
-                f1 = (2 * precision * recall) / (precision + recall)
-            if not remove_stopwords:
-                metrics["unigram f1"].append(f1)
-            else:
-                metrics["unigram f1 (non-stop words)"].append(f1)
-    metrics["unigram f1"] = np.mean(metrics["unigram f1"]) * 100
-    metrics["unigram f1 (non-stop words)"] = np.mean(metrics["unigram f1 (non-stop words)"]) * 100
-    return metrics
-
-
-def get_rouge(predictions, references):
-    """rouge-1/2/L from rouge-score"""
-    rouge_types=["rouge1", "rouge2", "rougeL"]
-    scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=True)
-    aggregator = scoring.BootstrapAggregator()
-
-    for prediction, reference in zip(predictions, references):
-        score = scorer.score(reference, prediction)
-        aggregator.add_scores(score)
-
-    return {key: 100 * (value.mid.fmeasure if key == "rougeL" else value.mid.recall) for key, value in aggregator.aggregate().items()}
-
-
-def get_meteor(predictions, references):
-    """meteor from nltk"""
-    alpha=0.9
-    beta=3
-    gamma=0.5
-    if NLTK_VERSION >= version.Version("3.6.5"):
-        scores = [
-            meteor_score.single_meteor_score(
-                word_tokenize(ref), word_tokenize(pred), alpha=alpha, beta=beta, gamma=gamma
-            )
-            for ref, pred in zip(references, predictions)
-        ]
-    else:
-        scores = [
-            meteor_score.single_meteor_score(ref, pred, alpha=alpha, beta=beta, gamma=gamma)
-            for ref, pred in zip(references, predictions)
-        ]
-    return {"meteor": np.mean(scores) * 100}
-
-
-def get_distinct(predictions):
-    """distinct-1/2 
-    from parlai https://github.com/facebookresearch/ParlAI/blob/9daae69320c07104493486e022c0e46a7871b253/parlai/core/metrics.py#L781"""
-    def _ngram(seq, n):
-        for i in range(len(seq) - n + 1):
-            yield tuple(seq[i : i + n])
-    
-    metrics = {}
-    for k in [1, 2]:
-        inter_cnt = Counter()
-        for prediction in predictions:
-            ngram = Counter(_ngram(utt2words(prediction), k))
-            inter_cnt += ngram
-        metrics[f"distinct-{k}"] = max(len(inter_cnt), 1e-12) / max(sum(inter_cnt.values()), 1e-5) * 100
-    return metrics
-
-
-def get_nlg_slot_err(predictions, knowledge):
-    """slot error rate: (missing_count + redundant_count) / all_count for value in dialog acts"""
-    val2ds_dict = {}
-    ontology = load_ontology("multiwoz21")
-    for domain_name in ontology["domains"]:
-        domain = ontology["domains"][domain_name]
-        for slot_name in domain["slots"]:
-            slot = domain["slots"][slot_name]
-            if "possible_values" not in slot:
-                continue
-            possible_vals = slot["possible_values"]
-            if len(possible_vals) > 0:
-                for val in possible_vals:
-                    val2ds_dict[val] = f"{domain_name}-{slot_name}"
-    score_list = []
-    for utterance, da in zip(predictions, knowledge):
-        missing_count = 0
-        redundant_count = 0
-        all_count = 0
-        all_values = set()
-        ## missing values
-        # print(da)
-        # print(utterance)
-        for key in ['categorical', 'non-categorical']:
-            for value in da[key]['value']:
-                if len(value) > 0:
-                    # print(value)
-                    all_values.add(value)
-                    if value.strip().lower() not in utterance.lower():
-                        missing_count += 1
-                        # print(f"\tmissing: {value}")
-                    all_count += 1
-        if all_count == 0:
-            continue
-        ## redundant values
-        for val in val2ds_dict:
-            if f" {val.strip().lower()} " in f" {utterance.strip().lower()} " and val.strip().lower() not in all_values:
-                wlist = val2ds_dict[val].split("-")
-                domain, slot = wlist[0], wlist[1]
-                if f" {slot.strip().lower()}" in f" {utterance.strip().lower()} ":
-                    redundant_count += 1
-                    # print(f"redundant: {val}/{val2ds_dict[val]}")
-        item_score = float(missing_count + redundant_count) / all_count
-        # print(f"\tredundant: {redundant_count} | missing_count: {missing_count} |all_count: {all_count}")
-        # print('-'*100)
-        score_list.append(item_score)
-    return {"err": np.mean(score_list) * 100}
-
-
-def load_entities():
-    """modified (load from unified ontology) from UnifiedSKG
-    https://github.com/HKUNLP/UnifiedSKG/blob/49a2ff950bb312b980c22ad72b11520db72ab6a3/metrics/kvret/evaluator.py#L8"""
-
-    ontology = load_ontology("kvret")
-    all_entities = set()
-    for domain in ontology["domains"]:
-        for slot in ontology["domains"][domain]["slots"]:
-            all_entities |= set(ontology["domains"][domain]["slots"][slot]["possible_values"])
-    missed_entities = ["yoga", "tennis", "swimming", "football", " lab ", "doctor", "optometrist", "dentist", "1st",
-                        "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th",
-                        "11th", "12th", "13th", "14th", "15th", "16th", "17th", "18th", "19th", "20th", "Jill",
-                        "Jack"]
-    all_entities |= set(missed_entities)
-    all_entities.remove("HR")
-    all_entities.add(" HR ")
-    all_entities = sorted(list(all_entities), key=lambda i: len(i), reverse=True)
-    return all_entities
-
-
-def check_sub_str(str_list: list, sub_str: str):
-    """
-    It takes a list of strings and a substring as input, and returns True if the substring is found
-    in any of the strings in the list, and False otherwise
-    """
-    for str_item in str_list:
-        if sub_str in str_item or sub_str.lower() in str_item.lower():
-            return True
-    return False
-
-
-def extract_entities_from_utterance(utterance, sorted_entities):
-    """modified (remove underscore) from UnifiedSKG
-    https://github.com/HKUNLP/UnifiedSKG/blob/49a2ff950bb312b980c22ad72b11520db72ab6a3/metrics/kvret/response_entity_hit.py#L45"""
-
-    utterance = " {} ".format(utterance)  # for entity matching
-    for h in range(0, 13): # for formulating am & pm
-        utterance = utterance.replace("{} am".format(h), "{}am".format(h))
-        utterance = utterance.replace("{} pm".format(h), "{}pm".format(h))
-    for entity_item_a in [20, 30, 40, 50, 60, 70, 80, 90, 100]:
-        for entity_item_b in [20, 30, 40, 50, 60, 70, 80, 90, 100]:
-            utterance = utterance.replace("{}-{}f".format(str(entity_item_a), str(entity_item_b)), "{}f-{}f".format(str(entity_item_a), str(entity_item_b)))
-    entities_in_this_utterance = []
-    for entity in sorted_entities:
-        # len(entity) decreases
-        if (entity in utterance) or (entity.lower() in utterance.lower()):
-            if not check_sub_str(entities_in_this_utterance, entity):
-                # in case of "week & weekend", "week & next_week" etc
-                entities_in_this_utterance.append(entity)
-    return entities_in_this_utterance
-
-
-def f1_score(y_pred, y_true, average="micro"):
-    """micro/marco-F1 score, modified from UnifiedSKG
-    https://github.com/HKUNLP/UnifiedSKG/blob/49a2ff950bb312b980c22ad72b11520db72ab6a3/metrics/kvret/response_entity_hit.py#L76"""
-
-    assert len(y_pred) == len(y_true)
-
-    def _compute_F1(precision, recall):
-        return 2 * precision * recall / float(precision + recall) if (precision + recall) != 0 else 0
-
-    def _compute_prf(gold, pred):
-        TP, FP, FN = 0, 0, 0
-        if len(gold) != 0:
-            count = 1
-            for g in gold:
-                if g in pred:
-                    TP += 1
-                else:
-                    FN += 1
-            for p in set(pred):
-                if p not in gold:
-                    FP += 1
-            precision = TP / float(TP + FP) if (TP + FP) != 0 else 0
-            recall = TP / float(TP + FN) if (TP + FN) != 0 else 0
-            F1 = _compute_F1(precision, recall)
-        else:
-            precision, recall, F1, count = 0, 0, 0, 0
-        return TP, FP, FN, F1, count
-
-    F1_pred, F1_count, TP_all, FP_all, FN_all = 0, 0, 0, 0, 0
-
-    for y_true_item, y_pred_item in zip(y_true, y_pred):
-        single_tp, single_fp, single_fn, single_f1, count = _compute_prf(y_true_item, y_pred_item)
-        F1_pred += single_f1
-        F1_count += count
-        TP_all += single_tp
-        FP_all += single_fp
-        FN_all += single_fn
-
-    if average == "macro":
-        F1_macro_score = F1_pred / float(F1_count) if F1_count != 0 else 0
-        return F1_macro_score * 100
-    elif average == "micro":
-        P_score = TP_all / float(TP_all + FP_all) if (TP_all + FP_all) != 0 else 0
-        R_score = TP_all / float(TP_all + FN_all) if (TP_all + FN_all) != 0 else 0
-        F1_micro_score = _compute_F1(P_score, R_score)
-        return F1_micro_score * 100
-    else:
-        raise ValueError("Options other than micro/macro are not supported.")
-
-
-def get_kvret_entity_f1(predictions, references, knowledge):
-    """entity f1 for kvret, modified from
-    https://github.com/HKUNLP/UnifiedSKG/blob/49a2ff950bb312b980c22ad72b11520db72ab6a3/metrics/kvret/response_entity_hit.py#L178"""
-
-    global_entities = load_entities()
-    F1_scores = {}
-    entities_from_predictions_and_references = {
-        d: {"predictions_entities": [], "references_entities": []} for d in ["all", "schedule", "weather", "navigate"]
-    }
-    for prediction, reference, kb in zip(predictions, references, knowledge):
-        prediction_entities = extract_entities_from_utterance(utterance=prediction, sorted_entities=global_entities)
-        reference_entities = extract_entities_from_utterance(utterance=reference, sorted_entities=global_entities)
-        entities_from_predictions_and_references["all"]["predictions_entities"].append(prediction_entities)
-        entities_from_predictions_and_references["all"]["references_entities"].append(reference_entities)
-        domain = "schedule"
-        for d in kb:
-            if len(kb[d]["entity"]) > 0:
-                domain = d
-                break
-        entities_from_predictions_and_references[domain]["predictions_entities"].append(prediction_entities)
-        entities_from_predictions_and_references[domain]["references_entities"].append(reference_entities)
-    
-    for category in entities_from_predictions_and_references.keys():
-        predictions_entities = entities_from_predictions_and_references[category]["predictions_entities"]
-        references_entities = entities_from_predictions_and_references[category]["references_entities"]
-        F1_scores["{} micro entity F1".format(category)] = f1_score(y_pred=predictions_entities, y_true=references_entities, average="micro")
-        F1_scores["{} macro entity F1".format(category)] = f1_score(y_pred=predictions_entities, y_true=references_entities, average="macro")
-
-    return {**F1_scores}
-
-
-def get_opendialkg_entity_f1(predictions, references, knowledge):
-    predictions_entities, references_entities = [], []
-    for prediction, reference, kg_path in zip(predictions, references, knowledge):
-        kg_entities = set()
-        for kg_triple in kg_path:
-            # add head and tail entities
-            kg_entities.add(kg_triple[0])
-            kg_entities.add(kg_triple[-1])
-        kg_entities = sorted(list(kg_entities), key=lambda i: len(i), reverse=True)
-        
-        for utterance, entities in zip([prediction, reference], [predictions_entities, references_entities]):
-            entities_in_this_utterance = []
-            for entity in kg_entities:
-                if (entity in utterance) or (entity.lower() in utterance.lower()):
-                    if not check_sub_str(entities_in_this_utterance, entity):
-                        # in case of "week & weekend", "week & next_week" etc
-                        entities_in_this_utterance.append(entity)
-            entities.append(entities_in_this_utterance)
-
-    return {
-        "micro entity f1": f1_score(y_pred=predictions_entities, y_true=references_entities, average="micro"),
-        "macro entity f1": f1_score(y_pred=predictions_entities, y_true=references_entities, average="macro")
-    }
-
-def get_knowledge_sentences_f1(predictions, knowledge):
-    knowledge_reference = [' '.join(k_sens) for k_sens in knowledge]
-    f1_score = get_unigram_f1(predictions, knowledge_reference)
-    return {f"knowledge {k}": v for k, v in f1_score.items()}
-
-
-@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
-class GroundedDialogGenerationMetrics(datasets.Metric):
-    """Metric to evaluate text generation models on the grounded dialog generation task."""
-    def _info(self):
-        return datasets.MetricInfo(
-            description=_DESCRIPTION,
-            citation=_CITATION,
-            inputs_description=_KWARGS_DESCRIPTION,
-            features=datasets.Features({
-                "predictions": datasets.Value("string"),
-                "references": datasets.Value("string"),
-                "knowledge": deepcopy(FEATURES[self.config_name]["knowledge"])
-            })
-        )
-
-    def compute(self, predictions, references, knowledge=None):
-        """Returns the scores: bleu"""
-        metrics = {}
-
-        # bleu
-        metrics.update(get_bleu(predictions, references))
-                
-        # unigram f1
-        metrics.update(get_unigram_f1(predictions, references))
-        
-        # rouge-1/2/L-fmeasure
-        metrics.update(get_rouge(predictions, references))
-
-        # meteor
-        metrics.update(get_meteor(predictions, references))
-
-        # inter-distinct-1/2
-        metrics.update(get_distinct(predictions))
-        
-        if knowledge is not None:
-            if self.config_name == "nlg":
-                metrics.update(get_nlg_slot_err(predictions, knowledge))
-            elif self.config_name == "kvret":
-                metrics.update(get_kvret_entity_f1(predictions, references, knowledge))
-            elif self.config_name == "opendialkg":
-                metrics.update(get_opendialkg_entity_f1(predictions, references, knowledge))
-            elif self.config_name in ["wow", "personachat"]:
-                metrics.update(get_knowledge_sentences_f1(predictions, knowledge))
-
-        return metrics
diff --git a/convlab/base_models/t5/nlg/__init__.py b/convlab/base_models/t5/nlg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..32275a5755248f5cfb67192b0a0f530cee0a276e
--- /dev/null
+++ b/convlab/base_models/t5/nlg/__init__.py
@@ -0,0 +1 @@
+from convlab.base_models.t5.nlg.nlg import T5NLG
\ No newline at end of file
diff --git a/convlab/base_models/t5/nlg/nlg.py b/convlab/base_models/t5/nlg/nlg.py
index 2781fded74c3b02a9c46a6c289d6f0e5fb850f2b..214dc01eed75cfbb85a740e8f5fee8a759d813b0 100755
--- a/convlab/base_models/t5/nlg/nlg.py
+++ b/convlab/base_models/t5/nlg/nlg.py
@@ -8,17 +8,13 @@ from convlab.util.custom_util import model_downloader
 
 
 class T5NLG(NLG):
-    def __init__(self, speaker, context_window_size, model_name_or_path, model_file=None, device='cuda'):
+    def __init__(self, speaker, context_window_size, model_name_or_path, device='cuda'):
         assert speaker in ['user', 'system']
         self.speaker = speaker
         self.opponent = 'system' if speaker == 'user' else 'user'
         self.context_window_size = context_window_size
         self.use_context = context_window_size > 0
 
-        model_dir = os.path.dirname(os.path.abspath(__file__))
-        if not os.path.exists(model_name_or_path):
-            model_downloader(model_dir, model_file)
-        
         self.config = AutoConfig.from_pretrained(model_name_or_path)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, config=self.config)
diff --git a/convlab/base_models/t5/nlu/__init__.py b/convlab/base_models/t5/nlu/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed4bbd0306fe68dfbbaa69f7e93cdb2b68c23e6d
--- /dev/null
+++ b/convlab/base_models/t5/nlu/__init__.py
@@ -0,0 +1 @@
+from convlab.base_models.t5.nlu.nlu import T5NLU
\ No newline at end of file
diff --git a/convlab/base_models/t5/nlu/nlu.py b/convlab/base_models/t5/nlu/nlu.py
index 2862cea7aa74c8c365a047dc74aa41dc79ead405..a5a6e6a23ec184b15fc073d88fa1a6b3fece34d8 100755
--- a/convlab/base_models/t5/nlu/nlu.py
+++ b/convlab/base_models/t5/nlu/nlu.py
@@ -8,16 +8,12 @@ from convlab.util.custom_util import model_downloader
 
 
 class T5NLU(NLU):
-    def __init__(self, speaker, context_window_size, model_name_or_path, model_file=None, device='cuda'):
+    def __init__(self, speaker, context_window_size, model_name_or_path, device='cuda'):
         assert speaker in ['user', 'system']
         self.speaker = speaker
         self.opponent = 'system' if speaker == 'user' else 'user'
         self.context_window_size = context_window_size
         self.use_context = context_window_size > 0
-
-        model_dir = os.path.dirname(os.path.abspath(__file__))
-        if not os.path.exists(model_name_or_path):
-            model_downloader(model_dir, model_file)
         
         self.config = AutoConfig.from_pretrained(model_name_or_path)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
diff --git a/convlab/nlu/README.md b/convlab/nlu/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..eb332bdf3f906c42a7cfd465205a386e8a702609
--- /dev/null
+++ b/convlab/nlu/README.md
@@ -0,0 +1,70 @@
+## NLU benchmark for BERTNLU and MILU on multiwoz21, tm1, tm2, tm3
+
+To illustrate that it is easy to use the model for any dataset that in our unified format, we report the performance on several datasets in our unified format. We follow `README.md` and config files in `unified_datasets/` to generate `predictions.json`, then evaluate it using `../evaluate_unified_datasets.py`. Note that we use almost the same hyper-parameters for different datasets, which may not be optimal.
+
+<table>
+<thead>
+  <tr>
+    <th></th>
+    <th colspan=2>MultiWOZ 2.1</th>
+    <th colspan=2>Taskmaster-1</th>
+    <th colspan=2>Taskmaster-2</th>
+    <th colspan=2>Taskmaster-3</th>
+  </tr>
+</thead>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+    <th>Acc</th><th>F1</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>T5-small</td>
+    <td>77.8</td><td>86.5</td>
+    <td>74.0</td><td>52.5</td>
+    <td>80.0</td><td>71.4</td>
+    <td>87.2</td><td>83.1</td>
+  </tr>
+  <tr>
+    <td>T5-small (context=3)</td>
+    <td>82.0</td><td>90.3</td>
+    <td>76.2</td><td>56.2</td>
+    <td>82.4</td><td>74.3</td>
+    <td>89.0</td><td>85.1</td>
+  </tr>
+  <tr>
+    <td>BERTNLU</td>
+    <td>74.5</td><td>85.9</td>
+    <td>72.8</td><td>50.6</td>
+    <td>79.2</td><td>70.6</td>
+    <td>86.1</td><td>81.9</td>
+  </tr>
+  <tr>
+    <td>BERTNLU (context=3)</td>
+    <td>80.6</td><td>90.3</td>
+    <td>74.2</td><td>52.7</td>
+    <td>80.9</td><td>73.3</td>
+    <td>87.8</td><td>83.8</td>
+  </tr>
+  <tr>
+    <td>MILU</td>
+    <td>72.9</td><td>85.2</td>
+    <td>72.9</td><td>49.2</td>
+    <td>79.1</td><td>68.7</td>
+    <td>85.4</td><td>80.3</td>
+  </tr>
+  <tr>
+    <td>MILU (context=3)</td>
+    <td>76.6</td><td>87.9</td>
+    <td>72.4</td><td>48.5</td>
+    <td>78.9</td><td>68.4</td>
+    <td>85.1</td><td>80.1</td>
+  </tr>
+</tbody>
+
+- Acc: whether all dialogue acts of an utterance are correctly predicted
+- F1: F1 measure of the dialogue act predictions over the corpus.
\ No newline at end of file
diff --git a/convlab/policy/ppo/semantic_level_config.json b/convlab/policy/ppo/semantic_level_config.json
index 1da7b873628f1b189fa2da37d7b0778fdb0f1103..b9908c9cb7717515775221227f3fba19636d20dc 100644
--- a/convlab/policy/ppo/semantic_level_config.json
+++ b/convlab/policy/ppo/semantic_level_config.json
@@ -1,15 +1,15 @@
 {
 	"model": {
-		"load_path": "convlab/policy/ppo/pretrained_models/mle",
+		"load_path": "",
 		"use_pretrained_initialisation": false,
 		"pretrained_load_path": "",
-		"batchsz": 500,
+		"batchsz": 1000,
 		"seed": 0,
 		"epoch": 10,
-		"eval_frequency": 5,
+		"eval_frequency": 1,
 		"process_num": 4,
 		"sys_semantic_to_usr": false,
-		"num_eval_dialogues": 200
+		"num_eval_dialogues": 500
 	},
 	"vectorizer_sys": {
 		"uncertainty_vector_mul": {
diff --git a/convlab/policy/ppo/setsumbt_config.json b/convlab/policy/ppo/setsumbt_config.json
index 5a13ee82fcbf24c0b13112106d2b97f115966e1a..31a8ac6d275166e4163e416e0dbef6f742cddb7f 100644
--- a/convlab/policy/ppo/setsumbt_config.json
+++ b/convlab/policy/ppo/setsumbt_config.json
@@ -1,6 +1,6 @@
 {
 	"model": {
-		"load_path": "/gpfs/project/niekerk/src/ConvLab3/convlab/policy/mle/experiments/experiment_2022-11-13-12-56-34/save/supervised",
+		"load_path": "",
 		"pretrained_load_path": "",
 		"use_pretrained_initialisation": false,
 		"batchsz": 1000,
@@ -58,4 +58,4 @@
 			}
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d0c3cbf1020d5c292abdedf27627c6abe25e2293..0000000000000000000000000000000000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = source
-BUILDDIR      = build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 6247f7e231716482115f34084ac61030743e0715..0000000000000000000000000000000000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index 0a4ab653e6fcef85f60b933a7352a7b4dfc9da9b..0000000000000000000000000000000000000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('../..'))
-
-
-# -- Project information -----------------------------------------------------
-
-project = 'ConvLab-2'
-copyright = '2020, thu-coai'
-author = 'thu-coai'
-
-# The full version, including alpha/beta/rc tags
-release = '1.0.0'
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.doctest',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.todo',
-    'sphinx.ext.coverage',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.ifconfig',
-    'sphinx.ext.githubpages',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The master toctree document.
-master_doc = 'index'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-html_theme_path = ["_themes", ]
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-html_theme_options = {
-    'canonical_url': '',
-    #'analytics_id': 'UA-XXXXXXX-1',  #  Provided by Google in your dashboard
-    'logo_only': False,
-    'display_version': True,
-    'prev_next_buttons_location': 'bottom',
-    'style_external_links': False,
-    #'vcs_pageview_mode': '',
-    # Toc options
-    'collapse_navigation': True,
-    'sticky_navigation': True,
-    'navigation_depth': 4,
-    'includehidden': True,
-    'titles_only': False
-}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# The default sidebars (for documents that don't match any pattern) are
-# defined by theme itself.  Builtin themes are using these templates by
-# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
-# 'searchbox.html']``.
-#
-# html_sidebars = {}
-
-
-# -- Options for HTMLHelp output ---------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'ConvLab-2 doc'
-
-
-# -- Options for LaTeX output ------------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-# latex_documents = [
-#     (master_doc, 'tatk.tex', 'tatk Documentation',
-#      'thu-coai', 'manual'),
-# ]
-
-
-# -- Options for manual page output ------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-# man_pages = [
-#     (master_doc, project, project + ' Documentation',
-#      [author], 1)
-# ]
-
-
-# -- Options for Texinfo output ----------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-# texinfo_documents = [
-#     (master_doc, project, project + ' Documentation',
-#      author, 'tatk', 'One line description of project.',
-#      'Miscellaneous'),
-# ]
-
-
-# -- Options for Epub output -------------------------------------------------
-
-# Bibliographic Dublin Core info.
-epub_title = project
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#
-# epub_identifier = ''
-
-# A unique identification for the text.
-#
-# epub_uid = ''
-
-# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
-
-
-# -- Extension configuration -------------------------------------------------
-
-# -- Options for intersphinx extension ---------------------------------------
-
-# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
-
-# -- Options for todo extension ----------------------------------------------
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = True
diff --git a/docs/source/convlab2.dialog_agent.rst b/docs/source/convlab2.dialog_agent.rst
deleted file mode 100644
index ca42d2adf1b628f61eb38cb01fe542daac412488..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dialog_agent.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.dialog\_agent package
-==============================
-
-Submodules
-----------
-
-convlab.dialog\_agent.agent module
------------------------------------
-
-.. automodule:: convlab.dialog_agent.agent
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dialog\_agent.env module
----------------------------------
-
-.. automodule:: convlab.dialog_agent.env
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dialog\_agent.session module
--------------------------------------
-
-.. automodule:: convlab.dialog_agent.session
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dialog_agent
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.comer.rst b/docs/source/convlab2.dst.comer.rst
deleted file mode 100644
index aa3b933babe0cf6779ba6301009d83efaa243c13..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.comer.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-convlab.dst.comer package
-==========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.dst.comer.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.comer
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.mdbt.multiwoz.rst b/docs/source/convlab2.dst.mdbt.multiwoz.rst
deleted file mode 100644
index df23278b816482148573b80cb9d1ee7a2aa95842..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.mdbt.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.dst.mdbt.multiwoz package
-==================================
-
-Submodules
-----------
-
-convlab.dst.mdbt.multiwoz.dst module
--------------------------------------
-
-.. automodule:: convlab.dst.mdbt.multiwoz.dst
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.mdbt.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.mdbt.rst b/docs/source/convlab2.dst.mdbt.rst
deleted file mode 100644
index c693a7f2bd578970984388e7f3285d1735f3f460..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.mdbt.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-convlab.dst.mdbt package
-=========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.dst.mdbt.multiwoz
-
-Submodules
-----------
-
-convlab.dst.mdbt.mdbt module
------------------------------
-
-.. automodule:: convlab.dst.mdbt.mdbt
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.mdbt.mdbt\_util module
------------------------------------
-
-.. automodule:: convlab.dst.mdbt.mdbt_util
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.mdbt
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.rst b/docs/source/convlab2.dst.rst
deleted file mode 100644
index 5d68b6be77ec92a214f16660e079dcb02ac5c54b..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.rst
+++ /dev/null
@@ -1,41 +0,0 @@
-convlab.dst package
-====================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.dst.comer
-   convlab.dst.mdbt
-   convlab.dst.rule
-   convlab.dst.sumbt
-   convlab.dst.trade
-
-Submodules
-----------
-
-convlab.dst.dst module
------------------------
-
-.. automodule:: convlab.dst.dst
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.evaluate module
-----------------------------
-
-.. automodule:: convlab.dst.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.rule.camrest.rst b/docs/source/convlab2.dst.rule.camrest.rst
deleted file mode 100644
index 434973e140361a6421ca19ef335a722f78e31f16..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.rule.camrest.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.dst.rule.camrest package
-=================================
-
-Submodules
-----------
-
-convlab.dst.rule.camrest.dst module
-------------------------------------
-
-.. automodule:: convlab.dst.rule.camrest.dst
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.rule.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.rule.crosswoz.rst b/docs/source/convlab2.dst.rule.crosswoz.rst
deleted file mode 100644
index 2cb5894050dfd49df59bdf8013737419c8f778c2..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.rule.crosswoz.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-convlab.dst.rule.crosswoz package
-==================================
-
-Submodules
-----------
-
-convlab.dst.rule.crosswoz.dst module
--------------------------------------
-
-.. automodule:: convlab.dst.rule.crosswoz.dst
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.rule.crosswoz.evaluate module
-------------------------------------------
-
-.. automodule:: convlab.dst.rule.crosswoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.rule.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.rule.multiwoz.rst b/docs/source/convlab2.dst.rule.multiwoz.rst
deleted file mode 100644
index 702c1cabd943323b8ba4864ad6cee00ef8c18080..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.rule.multiwoz.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-convlab.dst.rule.multiwoz package
-==================================
-
-Submodules
-----------
-
-convlab.dst.rule.multiwoz.dst module
--------------------------------------
-
-.. automodule:: convlab.dst.rule.multiwoz.dst
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.rule.multiwoz.dst\_util module
--------------------------------------------
-
-.. automodule:: convlab.dst.rule.multiwoz.dst_util
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.rule.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.rule.rst b/docs/source/convlab2.dst.rule.rst
deleted file mode 100644
index ea858e16f46945241db5fa3e7bc3958bdff3936a..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.rule.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-convlab.dst.rule package
-=========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.dst.rule.camrest
-   convlab.dst.rule.crosswoz
-   convlab.dst.rule.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.rule
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.sumbt.multiwoz.rst b/docs/source/convlab2.dst.sumbt.multiwoz.rst
deleted file mode 100644
index 76125b5482deb9ff66c4f99c35cb1aae46e007d2..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.sumbt.multiwoz.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.dst.sumbt.multiwoz package
-===================================
-
-Submodules
-----------
-
-convlab.dst.sumbt.multiwoz.convert\_to\_glue\_format module
-------------------------------------------------------------
-
-.. automodule:: convlab.dst.sumbt.multiwoz.convert_to_glue_format
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.sumbt.multiwoz.sumbt module
-----------------------------------------
-
-.. automodule:: convlab.dst.sumbt.multiwoz.sumbt
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.sumbt.multiwoz.sumbt\_config module
-------------------------------------------------
-
-.. automodule:: convlab.dst.sumbt.multiwoz.sumbt_config
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.dst.sumbt.multiwoz.sumbt\_utils module
------------------------------------------------
-
-.. automodule:: convlab.dst.sumbt.multiwoz.sumbt_utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.sumbt.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.sumbt.rst b/docs/source/convlab2.dst.sumbt.rst
deleted file mode 100644
index 17b1080484960fef4da0bb332fc560fcaa8e5110..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.sumbt.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-convlab.dst.sumbt package
-==========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.dst.sumbt.multiwoz
-
-Submodules
-----------
-
-convlab.dst.sumbt.BeliefTrackerSlotQueryMultiSlot module
----------------------------------------------------------
-
-.. automodule:: convlab.dst.sumbt.BeliefTrackerSlotQueryMultiSlot
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.sumbt
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.dst.trade.rst b/docs/source/convlab2.dst.trade.rst
deleted file mode 100644
index ae5d305a322573818739e578824a9e16ed56c827..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.dst.trade.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.dst.trade package
-==========================
-
-Submodules
-----------
-
-convlab.dst.trade.trade module
--------------------------------
-
-.. automodule:: convlab.dst.trade.trade
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.dst.trade
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.damd.multiwoz.rst b/docs/source/convlab2.e2e.damd.multiwoz.rst
deleted file mode 100644
index 754453c3ee8f156a115ac13237fa3c04af12905b..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.damd.multiwoz.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-convlab.e2e.damd.multiwoz package
-==================================
-
-Submodules
-----------
-
-convlab.e2e.damd.multiwoz.clean\_dataset module
-------------------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.clean_dataset
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.config module
-----------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.config
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.damd module
---------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.damd
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.damd\_net module
--------------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.damd_net
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.db\_ops module
------------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.db_ops
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.ontology module
-------------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.ontology
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.reader module
-----------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.reader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.damd.multiwoz.utils module
----------------------------------------
-
-.. automodule:: convlab.e2e.damd.multiwoz.utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.damd.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.damd.rst b/docs/source/convlab2.e2e.damd.rst
deleted file mode 100644
index a9ce3e528be39a350c3e0d702bb2cbb564dac69c..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.damd.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-convlab.e2e.damd package
-=========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.e2e.damd.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.damd
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.rnn_rollout.deal_or_not.rst b/docs/source/convlab2.e2e.rnn_rollout.deal_or_not.rst
deleted file mode 100644
index fd6e64fbab417e7b8d13fe98189853b00f2fc75e..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.rnn_rollout.deal_or_not.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.e2e.rnn\_rollout.deal\_or\_not package
-===============================================
-
-Submodules
-----------
-
-convlab.e2e.rnn\_rollout.deal\_or\_not.model module
-----------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.deal_or_not.model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.rnn_rollout.deal_or_not
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.rnn_rollout.engines.rst b/docs/source/convlab2.e2e.rnn_rollout.engines.rst
deleted file mode 100644
index 922c6139ff4114fe1f4f621ac988f42f52dbba4c..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.rnn_rollout.engines.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.e2e.rnn\_rollout.engines package
-=========================================
-
-Submodules
-----------
-
-convlab.e2e.rnn\_rollout.engines.engine module
------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.engines.engine
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.engines.latent\_clustering\_engine module
--------------------------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.engines.latent_clustering_engine
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.engines.rnn\_engine module
-----------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.engines.rnn_engine
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.engines.selection\_engine module
-----------------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.engines.selection_engine
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.rnn_rollout.engines
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.rnn_rollout.models.rst b/docs/source/convlab2.e2e.rnn_rollout.models.rst
deleted file mode 100644
index fa1fdc43f973c296f865214536474b8324c1e6cd..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.rnn_rollout.models.rst
+++ /dev/null
@@ -1,70 +0,0 @@
-convlab.e2e.rnn\_rollout.models package
-========================================
-
-Submodules
-----------
-
-convlab.e2e.rnn\_rollout.models.attn module
---------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.attn
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.models.ctx\_encoder module
-----------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.ctx_encoder
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.models.latent\_clustering\_model module
------------------------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.latent_clustering_model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.models.modules module
------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.modules
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.models.rnn\_model module
---------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.rnn_model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.models.selection\_model module
---------------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.selection_model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.models.utils module
----------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models.utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.rnn_rollout.models
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.rnn_rollout.rst b/docs/source/convlab2.e2e.rnn_rollout.rst
deleted file mode 100644
index 57f1a6a8b7a0b7c4999c0dfd334468e003832ddb..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.rnn_rollout.rst
+++ /dev/null
@@ -1,143 +0,0 @@
-convlab.e2e.rnn\_rollout package
-=================================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.e2e.rnn_rollout.deal_or_not
-   convlab.e2e.rnn_rollout.engines
-   convlab.e2e.rnn_rollout.models
-
-Submodules
-----------
-
-convlab.e2e.rnn\_rollout.agent module
---------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.agent
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.avg\_rank module
-------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.avg_rank
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.chat module
--------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.chat
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.config module
----------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.config
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.data module
--------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.data
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.dialog module
----------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.dialog
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.domain module
----------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.domain
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.eval\_selfplay module
------------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.eval_selfplay
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.metric module
----------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.metric
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.reinforce module
-------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.reinforce
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.rnn\_model module
--------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.rnn_model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.rnnrollout module
--------------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.rnnrollout
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.split module
---------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.split
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.utils module
---------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.rnn\_rollout.vis module
-------------------------------------
-
-.. automodule:: convlab.e2e.rnn_rollout.vis
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.rnn_rollout
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.rst b/docs/source/convlab2.e2e.rst
deleted file mode 100644
index e5993e95ea81363243bdcd807e6a528df85f0b84..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-convlab.e2e package
-====================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.e2e.damd
-   convlab.e2e.rnn_rollout
-   convlab.e2e.sequicity
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.sequicity.camrest.rst b/docs/source/convlab2.e2e.sequicity.camrest.rst
deleted file mode 100644
index cfbaac61023e9181622f151e853f420307cff07a..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.sequicity.camrest.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.e2e.sequicity.camrest package
-======================================
-
-Submodules
-----------
-
-convlab.e2e.sequicity.camrest.sequicity module
------------------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.camrest.sequicity
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.sequicity.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.sequicity.multiwoz.rst b/docs/source/convlab2.e2e.sequicity.multiwoz.rst
deleted file mode 100644
index c38b55e5f1fd3dd454224cd6a68ee82683356087..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.sequicity.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.e2e.sequicity.multiwoz package
-=======================================
-
-Submodules
-----------
-
-convlab.e2e.sequicity.multiwoz.sequicity module
-------------------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.multiwoz.sequicity
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.sequicity.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.e2e.sequicity.rst b/docs/source/convlab2.e2e.sequicity.rst
deleted file mode 100644
index e2f3e9c0e6750a703167d292a5b6853d165f6e12..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.e2e.sequicity.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-convlab.e2e.sequicity package
-==============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.e2e.sequicity.camrest
-   convlab.e2e.sequicity.multiwoz
-
-Submodules
-----------
-
-convlab.e2e.sequicity.config module
-------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.config
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.sequicity.metric module
-------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.metric
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.sequicity.model module
------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.sequicity.reader module
-------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.reader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.e2e.sequicity.tsd\_net module
---------------------------------------
-
-.. automodule:: convlab.e2e.sequicity.tsd_net
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.e2e.sequicity
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.evaluator.rst b/docs/source/convlab2.evaluator.rst
deleted file mode 100644
index 6cb325a95085a5cedda659a2b724f8d4c60e4807..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.evaluator.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-convlab.evaluator package
-==========================
-
-Submodules
-----------
-
-convlab.evaluator.evaluator module
------------------------------------
-
-.. automodule:: convlab.evaluator.evaluator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.evaluator.multiwoz\_eval module
-----------------------------------------
-
-.. automodule:: convlab.evaluator.multiwoz_eval
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.evaluator
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.rst b/docs/source/convlab2.nlg.rst
deleted file mode 100644
index e936d91a8b96bdd18cc9a1e8ea8bffdaeeb67163..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlg package
-====================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlg.sclstm
-   convlab.nlg.template
-
-Submodules
-----------
-
-convlab.nlg.evaluate module
-----------------------------
-
-.. automodule:: convlab.nlg.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.nlg module
------------------------
-
-.. automodule:: convlab.nlg.nlg
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.sclstm.camrest.rst b/docs/source/convlab2.nlg.sclstm.camrest.rst
deleted file mode 100644
index 0e8f0a0dd6fe41f7c8500ee92cd1de97eb15dcc0..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.sclstm.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlg.sclstm.camrest package
-===================================
-
-Submodules
-----------
-
-convlab.nlg.sclstm.camrest.evaluate module
--------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.camrest.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.camrest.sc\_lstm module
--------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.camrest.sc_lstm
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.camrest.train module
-----------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.camrest.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.sclstm.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.sclstm.crosswoz.rst b/docs/source/convlab2.nlg.sclstm.crosswoz.rst
deleted file mode 100644
index 7f8f5134a3e321b2635e7a9937f5878c2d97d2a7..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.sclstm.crosswoz.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.nlg.sclstm.crosswoz package
-====================================
-
-Submodules
-----------
-
-convlab.nlg.sclstm.crosswoz.evaluate module
---------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.crosswoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.crosswoz.generate\_resources module
--------------------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.crosswoz.generate_resources
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.crosswoz.sc\_lstm module
---------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.crosswoz.sc_lstm
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.crosswoz.train module
------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.crosswoz.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.sclstm.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.sclstm.multiwoz.rst b/docs/source/convlab2.nlg.sclstm.multiwoz.rst
deleted file mode 100644
index 044c1c1cc932049c2a6c4ae65bb11b771b6d9fa2..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.sclstm.multiwoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlg.sclstm.multiwoz package
-====================================
-
-Submodules
-----------
-
-convlab.nlg.sclstm.multiwoz.evaluate module
---------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.multiwoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.multiwoz.sc\_lstm module
---------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.multiwoz.sc_lstm
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.sclstm.multiwoz.train module
------------------------------------------
-
-.. automodule:: convlab.nlg.sclstm.multiwoz.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.sclstm.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.sclstm.rst b/docs/source/convlab2.nlg.sclstm.rst
deleted file mode 100644
index 411e4538c65267fdada38eb21936659649059281..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.sclstm.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-convlab.nlg.sclstm package
-===========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlg.sclstm.camrest
-   convlab.nlg.sclstm.crosswoz
-   convlab.nlg.sclstm.multiwoz
-
-Submodules
-----------
-
-convlab.nlg.sclstm.bleu module
--------------------------------
-
-.. automodule:: convlab.nlg.sclstm.bleu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.sclstm
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.template.camrest.rst b/docs/source/convlab2.nlg.template.camrest.rst
deleted file mode 100644
index 015c1aa3f9046ddb8657209564161cb936db4e98..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.template.camrest.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-convlab.nlg.template.camrest package
-=====================================
-
-Submodules
-----------
-
-convlab.nlg.template.camrest.evaluate module
----------------------------------------------
-
-.. automodule:: convlab.nlg.template.camrest.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.template.camrest.nlg module
-----------------------------------------
-
-.. automodule:: convlab.nlg.template.camrest.nlg
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.template.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.template.crosswoz.rst b/docs/source/convlab2.nlg.template.crosswoz.rst
deleted file mode 100644
index 569fe598ece0f2266546bd4b8a82823da42008ec..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.template.crosswoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlg.template.crosswoz package
-======================================
-
-Submodules
-----------
-
-convlab.nlg.template.crosswoz.evaluate module
-----------------------------------------------
-
-.. automodule:: convlab.nlg.template.crosswoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.template.crosswoz.generate\_auto\_template module
---------------------------------------------------------------
-
-.. automodule:: convlab.nlg.template.crosswoz.generate_auto_template
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlg.template.crosswoz.nlg module
------------------------------------------
-
-.. automodule:: convlab.nlg.template.crosswoz.nlg
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.template.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.template.multiwoz.rst b/docs/source/convlab2.nlg.template.multiwoz.rst
deleted file mode 100644
index 8e86f2905baeacfcc9357f2ff359bfe4775a7d49..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.template.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.nlg.template.multiwoz package
-======================================
-
-Submodules
-----------
-
-convlab.nlg.template.multiwoz.nlg module
------------------------------------------
-
-.. automodule:: convlab.nlg.template.multiwoz.nlg
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.template.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlg.template.rst b/docs/source/convlab2.nlg.template.rst
deleted file mode 100644
index 20d5d1f2b6f1268faf527a12a81ac92168d65492..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlg.template.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-convlab.nlg.template package
-=============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlg.template.camrest
-   convlab.nlg.template.crosswoz
-   convlab.nlg.template.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.nlg.template
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.jointBERT.camrest.rst b/docs/source/convlab2.nlu.jointBERT.camrest.rst
deleted file mode 100644
index 5053706b49686124729c92a4ef30122eca653577..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.jointBERT.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlu.jointBERT.camrest package
-======================================
-
-Submodules
-----------
-
-convlab.nlu.jointBERT.camrest.nlu module
------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.camrest.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.camrest.postprocess module
--------------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.camrest.postprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.camrest.preprocess module
-------------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.camrest.preprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.jointBERT.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.jointBERT.crosswoz.rst b/docs/source/convlab2.nlu.jointBERT.crosswoz.rst
deleted file mode 100644
index a875026bb376703fd073a30c0bebc8fbed1a8715..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.jointBERT.crosswoz.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.nlu.jointBERT.crosswoz package
-=======================================
-
-Submodules
-----------
-
-convlab.nlu.jointBERT.crosswoz.analyse module
-----------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.crosswoz.analyse
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.crosswoz.nlu module
-------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.crosswoz.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.crosswoz.postprocess module
---------------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.crosswoz.postprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.crosswoz.preprocess module
--------------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.crosswoz.preprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.jointBERT.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.jointBERT.multiwoz.rst b/docs/source/convlab2.nlu.jointBERT.multiwoz.rst
deleted file mode 100644
index de513a34dfefa1cfebc9715bfc27b736300b4cc2..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.jointBERT.multiwoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlu.jointBERT.multiwoz package
-=======================================
-
-Submodules
-----------
-
-convlab.nlu.jointBERT.multiwoz.nlu module
-------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.multiwoz.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.multiwoz.postprocess module
---------------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.multiwoz.postprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.multiwoz.preprocess module
--------------------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.multiwoz.preprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.jointBERT.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.jointBERT.rst b/docs/source/convlab2.nlu.jointBERT.rst
deleted file mode 100644
index 187458ab3ade0589d965133ac5d6dc9521a66181..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.jointBERT.rst
+++ /dev/null
@@ -1,55 +0,0 @@
-convlab.nlu.jointBERT package
-==============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlu.jointBERT.camrest
-   convlab.nlu.jointBERT.crosswoz
-   convlab.nlu.jointBERT.multiwoz
-
-Submodules
-----------
-
-convlab.nlu.jointBERT.dataloader module
-----------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.dataloader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.jointBERT module
----------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.jointBERT
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.test module
-----------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.test
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.jointBERT.train module
------------------------------------
-
-.. automodule:: convlab.nlu.jointBERT.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.jointBERT
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.milu.multiwoz.rst b/docs/source/convlab2.nlu.milu.multiwoz.rst
deleted file mode 100644
index 51dda55566651e8ce8b7be09a3f60206dcb0e7aa..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.milu.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.nlu.milu.multiwoz package
-==================================
-
-Submodules
-----------
-
-convlab.nlu.milu.multiwoz.nlu module
--------------------------------------
-
-.. automodule:: convlab.nlu.milu.multiwoz.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.milu.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.milu.rst b/docs/source/convlab2.nlu.milu.rst
deleted file mode 100644
index 438ea6631bc88055e3849e5eabadf3fbd598d858..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.milu.rst
+++ /dev/null
@@ -1,69 +0,0 @@
-convlab.nlu.milu package
-=========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlu.milu.multiwoz
-
-Submodules
-----------
-
-convlab.nlu.milu.dai\_f1\_measure module
------------------------------------------
-
-.. automodule:: convlab.nlu.milu.dai_f1_measure
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.milu.dataset\_reader module
-----------------------------------------
-
-.. automodule:: convlab.nlu.milu.dataset_reader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.milu.evaluate module
----------------------------------
-
-.. automodule:: convlab.nlu.milu.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.milu.model module
-------------------------------
-
-.. automodule:: convlab.nlu.milu.model
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.milu.multilabel\_f1\_measure module
-------------------------------------------------
-
-.. automodule:: convlab.nlu.milu.multilabel_f1_measure
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.milu.train module
-------------------------------
-
-.. automodule:: convlab.nlu.milu.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.milu
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.rst b/docs/source/convlab2.nlu.rst
deleted file mode 100644
index 890d0071efc7b8249b1f74817aa1460b30400ab6..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-convlab.nlu package
-====================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlu.jointBERT
-   convlab.nlu.milu
-   convlab.nlu.svm
-
-Submodules
-----------
-
-convlab.nlu.evaluate module
-----------------------------
-
-.. automodule:: convlab.nlu.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.nlu module
------------------------
-
-.. automodule:: convlab.nlu.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.svm.camrest.rst b/docs/source/convlab2.nlu.svm.camrest.rst
deleted file mode 100644
index 87da8e5865067fa00421e687cc44d004f536a46c..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.svm.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlu.svm.camrest package
-================================
-
-Submodules
-----------
-
-convlab.nlu.svm.camrest.evaluate module
-----------------------------------------
-
-.. automodule:: convlab.nlu.svm.camrest.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.camrest.nlu module
------------------------------------
-
-.. automodule:: convlab.nlu.svm.camrest.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.camrest.preprocess module
-------------------------------------------
-
-.. automodule:: convlab.nlu.svm.camrest.preprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.svm.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.svm.multiwoz.rst b/docs/source/convlab2.nlu.svm.multiwoz.rst
deleted file mode 100644
index 1cf584298aa87611c134457b73893250a6081f6a..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.svm.multiwoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.nlu.svm.multiwoz package
-=================================
-
-Submodules
-----------
-
-convlab.nlu.svm.multiwoz.evaluate module
------------------------------------------
-
-.. automodule:: convlab.nlu.svm.multiwoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.multiwoz.nlu module
-------------------------------------
-
-.. automodule:: convlab.nlu.svm.multiwoz.nlu
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.multiwoz.preprocess module
--------------------------------------------
-
-.. automodule:: convlab.nlu.svm.multiwoz.preprocess
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.svm.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.nlu.svm.rst b/docs/source/convlab2.nlu.svm.rst
deleted file mode 100644
index 1e76053eb86c1593e9d8049084aabe7ac3310100..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.nlu.svm.rst
+++ /dev/null
@@ -1,70 +0,0 @@
-convlab.nlu.svm package
-========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.nlu.svm.camrest
-   convlab.nlu.svm.multiwoz
-
-Submodules
-----------
-
-convlab.nlu.svm.Classifier module
-----------------------------------
-
-.. automodule:: convlab.nlu.svm.Classifier
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.Features module
---------------------------------
-
-.. automodule:: convlab.nlu.svm.Features
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.Tuples module
-------------------------------
-
-.. automodule:: convlab.nlu.svm.Tuples
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.dataset\_walker module
----------------------------------------
-
-.. automodule:: convlab.nlu.svm.dataset_walker
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.sutils module
-------------------------------
-
-.. automodule:: convlab.nlu.svm.sutils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.nlu.svm.train module
------------------------------
-
-.. automodule:: convlab.nlu.svm.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.nlu.svm
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.gdpl.multiwoz.rst b/docs/source/convlab2.policy.gdpl.multiwoz.rst
deleted file mode 100644
index e7d2baf16c54691e8a2c46be9dc3dbb5963535fe..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.gdpl.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.policy.gdpl.multiwoz package
-=====================================
-
-Submodules
-----------
-
-convlab.policy.gdpl.multiwoz.gdpl\_policy module
--------------------------------------------------
-
-.. automodule:: convlab.policy.gdpl.multiwoz.gdpl_policy
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.gdpl.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.gdpl.rst b/docs/source/convlab2.policy.gdpl.rst
deleted file mode 100644
index 9fbf92e9b72909f522309f8939e9f4eabe721224..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.gdpl.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-convlab.policy.gdpl package
-============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.gdpl.multiwoz
-
-Submodules
-----------
-
-convlab.policy.gdpl.estimator module
--------------------------------------
-
-.. automodule:: convlab.policy.gdpl.estimator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.gdpl.gdpl module
---------------------------------
-
-.. automodule:: convlab.policy.gdpl.gdpl
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.gdpl.train module
----------------------------------
-
-.. automodule:: convlab.policy.gdpl.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.gdpl
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.mdrg.rst b/docs/source/convlab2.policy.mdrg.rst
deleted file mode 100644
index 42475946e600aa5b8e14225e2fe9751782e2e238..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.mdrg.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-convlab.policy.mdrg package
-============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.mdrg.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.mdrg
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.mle.camrest.rst b/docs/source/convlab2.policy.mle.camrest.rst
deleted file mode 100644
index 534a176d1b616ff304eb1df8c8489c2a1b9195f2..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.mle.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.policy.mle.camrest package
-===================================
-
-Submodules
-----------
-
-convlab.policy.mle.camrest.loader module
------------------------------------------
-
-.. automodule:: convlab.policy.mle.camrest.loader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.camrest.mle module
---------------------------------------
-
-.. automodule:: convlab.policy.mle.camrest.mle
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.camrest.train module
-----------------------------------------
-
-.. automodule:: convlab.policy.mle.camrest.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.mle.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.mle.crosswoz.rst b/docs/source/convlab2.policy.mle.crosswoz.rst
deleted file mode 100644
index 7bdb5dd4ed66bff145e7909c3af8c4b597570514..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.mle.crosswoz.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.policy.mle.crosswoz package
-====================================
-
-Submodules
-----------
-
-convlab.policy.mle.crosswoz.evaluate module
---------------------------------------------
-
-.. automodule:: convlab.policy.mle.crosswoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.crosswoz.loader module
-------------------------------------------
-
-.. automodule:: convlab.policy.mle.crosswoz.loader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.crosswoz.mle module
----------------------------------------
-
-.. automodule:: convlab.policy.mle.crosswoz.mle
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.crosswoz.train module
------------------------------------------
-
-.. automodule:: convlab.policy.mle.crosswoz.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.mle.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.mle.multiwoz.rst b/docs/source/convlab2.policy.mle.multiwoz.rst
deleted file mode 100644
index 9c97544797e935864a6621738ff1f7823c7551d6..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.mle.multiwoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.policy.mle.multiwoz package
-====================================
-
-Submodules
-----------
-
-convlab.policy.mle.multiwoz.loader module
-------------------------------------------
-
-.. automodule:: convlab.policy.mle.multiwoz.loader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.multiwoz.mle module
----------------------------------------
-
-.. automodule:: convlab.policy.mle.multiwoz.mle
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.multiwoz.train module
------------------------------------------
-
-.. automodule:: convlab.policy.mle.multiwoz.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.mle.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.mle.rst b/docs/source/convlab2.policy.mle.rst
deleted file mode 100644
index c2a6333372b98036a733cbdc1a160f6c23511689..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.mle.rst
+++ /dev/null
@@ -1,47 +0,0 @@
-convlab.policy.mle package
-===========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.mle.camrest
-   convlab.policy.mle.crosswoz
-   convlab.policy.mle.multiwoz
-
-Submodules
-----------
-
-convlab.policy.mle.loader module
----------------------------------
-
-.. automodule:: convlab.policy.mle.loader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.mle module
-------------------------------
-
-.. automodule:: convlab.policy.mle.mle
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.mle.train module
---------------------------------
-
-.. automodule:: convlab.policy.mle.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.mle
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.pg.multiwoz.rst b/docs/source/convlab2.policy.pg.multiwoz.rst
deleted file mode 100644
index 6743536bf4a1af6675b76c97a6bf29054f435283..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.pg.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.policy.pg.multiwoz package
-===================================
-
-Submodules
-----------
-
-convlab.policy.pg.multiwoz.pg\_policy module
----------------------------------------------
-
-.. automodule:: convlab.policy.pg.multiwoz.pg_policy
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.pg.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.pg.rst b/docs/source/convlab2.policy.pg.rst
deleted file mode 100644
index 4a0438565fe2879daa5842058cedefb27822ebed..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.pg.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-convlab.policy.pg package
-==========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.pg.multiwoz
-
-Submodules
-----------
-
-convlab.policy.pg.pg module
-----------------------------
-
-.. automodule:: convlab.policy.pg.pg
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.pg.train module
--------------------------------
-
-.. automodule:: convlab.policy.pg.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.pg
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.ppo.multiwoz.rst b/docs/source/convlab2.policy.ppo.multiwoz.rst
deleted file mode 100644
index c8f6a40f8f1a11cefd53fb639e9ff80810d8871c..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.ppo.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.policy.ppo.multiwoz package
-====================================
-
-Submodules
-----------
-
-convlab.policy.ppo.multiwoz.ppo\_policy module
------------------------------------------------
-
-.. automodule:: convlab.policy.ppo.multiwoz.ppo_policy
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.ppo.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.ppo.rst b/docs/source/convlab2.policy.ppo.rst
deleted file mode 100644
index 021684a87a78c6fe58578c061e071d2cd281fb7e..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.ppo.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-convlab.policy.ppo package
-===========================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.ppo.multiwoz
-
-Submodules
-----------
-
-convlab.policy.ppo.ppo module
-------------------------------
-
-.. automodule:: convlab.policy.ppo.ppo
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.ppo.train module
---------------------------------
-
-.. automodule:: convlab.policy.ppo.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.ppo
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.rst b/docs/source/convlab2.policy.rst
deleted file mode 100644
index fea1046151fd58646738d9bbeb98d3270d0484a1..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.rst
+++ /dev/null
@@ -1,60 +0,0 @@
-convlab.policy package
-=======================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.gdpl
-   convlab.policy.mdrg
-   convlab.policy.mle
-   convlab.policy.pg
-   convlab.policy.ppo
-   convlab.policy.rule
-   convlab.policy.vector
-   convlab.policy.vhus
-
-Submodules
-----------
-
-convlab.policy.evaluate module
--------------------------------
-
-.. automodule:: convlab.policy.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.policy module
------------------------------
-
-.. automodule:: convlab.policy.policy
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.rlmodule module
--------------------------------
-
-.. automodule:: convlab.policy.rlmodule
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vec module
---------------------------
-
-.. automodule:: convlab.policy.vec
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.rule.camrest.rst b/docs/source/convlab2.policy.rule.camrest.rst
deleted file mode 100644
index 00bccc9d8b2c15b3b5d942dad951f7edd287f968..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.rule.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.policy.rule.camrest package
-====================================
-
-Submodules
-----------
-
-convlab.policy.rule.camrest.policy\_agenda\_camrest module
------------------------------------------------------------
-
-.. automodule:: convlab.policy.rule.camrest.policy_agenda_camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.rule.camrest.rule module
-----------------------------------------
-
-.. automodule:: convlab.policy.rule.camrest.rule
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.rule.camrest.rule\_based\_camrest\_bot module
--------------------------------------------------------------
-
-.. automodule:: convlab.policy.rule.camrest.rule_based_camrest_bot
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.rule.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.rule.crosswoz.rst b/docs/source/convlab2.policy.rule.crosswoz.rst
deleted file mode 100644
index 416c5d0cebe621b9f8935c69ea216b051ed352a2..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.rule.crosswoz.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-convlab.policy.rule.crosswoz package
-=====================================
-
-Submodules
-----------
-
-convlab.policy.rule.crosswoz.evaluate module
----------------------------------------------
-
-.. automodule:: convlab.policy.rule.crosswoz.evaluate
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.rule.crosswoz.rule\_simulator module
-----------------------------------------------------
-
-.. automodule:: convlab.policy.rule.crosswoz.rule_simulator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.rule.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.rule.multiwoz.rst b/docs/source/convlab2.policy.rule.multiwoz.rst
deleted file mode 100644
index 4af738e06767cfaf31c5050f159dbb55c201b45f..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.rule.multiwoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.policy.rule.multiwoz package
-=====================================
-
-Submodules
-----------
-
-convlab.policy.rule.multiwoz.policy\_agenda\_multiwoz module
--------------------------------------------------------------
-
-.. automodule:: convlab.policy.rule.multiwoz.policy_agenda_multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.rule.multiwoz.rule module
------------------------------------------
-
-.. automodule:: convlab.policy.rule.multiwoz.rule
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.rule.multiwoz.rule\_based\_multiwoz\_bot module
----------------------------------------------------------------
-
-.. automodule:: convlab.policy.rule.multiwoz.rule_based_multiwoz_bot
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.rule.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.rule.rst b/docs/source/convlab2.policy.rule.rst
deleted file mode 100644
index 5dc2ca91425d70affa9e1ebc2f986d7694005e53..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.rule.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-convlab.policy.rule package
-============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.rule.camrest
-   convlab.policy.rule.crosswoz
-   convlab.policy.rule.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.rule
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.vector.rst b/docs/source/convlab2.policy.vector.rst
deleted file mode 100644
index 9abd78174e9af1109d3ed51b5bd5e59466ca96e8..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.vector.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.policy.vector package
-==============================
-
-Submodules
-----------
-
-convlab.policy.vector.dataset module
--------------------------------------
-
-.. automodule:: convlab.policy.vector.dataset
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vector.vector\_camrest module
----------------------------------------------
-
-.. automodule:: convlab.policy.vector.vector_camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vector.vector\_crosswoz module
-----------------------------------------------
-
-.. automodule:: convlab.policy.vector.vector_crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vector.vector\_multiwoz module
-----------------------------------------------
-
-.. automodule:: convlab.policy.vector.vector_multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.vector
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.vhus.camrest.rst b/docs/source/convlab2.policy.vhus.camrest.rst
deleted file mode 100644
index 63e420277df2bd736fa34c94ece40ab282af64dc..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.vhus.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.policy.vhus.camrest package
-====================================
-
-Submodules
-----------
-
-convlab.policy.vhus.camrest.train module
------------------------------------------
-
-.. automodule:: convlab.policy.vhus.camrest.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.camrest.usermanager module
------------------------------------------------
-
-.. automodule:: convlab.policy.vhus.camrest.usermanager
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.camrest.vhus module
-----------------------------------------
-
-.. automodule:: convlab.policy.vhus.camrest.vhus
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.vhus.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.vhus.multiwoz.rst b/docs/source/convlab2.policy.vhus.multiwoz.rst
deleted file mode 100644
index 9faa27dec3789497cf108de077808988da838735..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.vhus.multiwoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.policy.vhus.multiwoz package
-=====================================
-
-Submodules
-----------
-
-convlab.policy.vhus.multiwoz.train module
-------------------------------------------
-
-.. automodule:: convlab.policy.vhus.multiwoz.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.multiwoz.usermanager module
-------------------------------------------------
-
-.. automodule:: convlab.policy.vhus.multiwoz.usermanager
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.multiwoz.vhus module
------------------------------------------
-
-.. automodule:: convlab.policy.vhus.multiwoz.vhus
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.vhus.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.policy.vhus.rst b/docs/source/convlab2.policy.vhus.rst
deleted file mode 100644
index 54eb0fa7335d7802a0c8f7e8944ae6f25c227dde..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.policy.vhus.rst
+++ /dev/null
@@ -1,54 +0,0 @@
-convlab.policy.vhus package
-============================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.policy.vhus.camrest
-   convlab.policy.vhus.multiwoz
-
-Submodules
-----------
-
-convlab.policy.vhus.train module
----------------------------------
-
-.. automodule:: convlab.policy.vhus.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.usermodule module
---------------------------------------
-
-.. automodule:: convlab.policy.vhus.usermodule
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.util module
---------------------------------
-
-.. automodule:: convlab.policy.vhus.util
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.policy.vhus.vhus module
---------------------------------
-
-.. automodule:: convlab.policy.vhus.vhus
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.policy.vhus
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.rst b/docs/source/convlab2.rst
deleted file mode 100644
index c05fce52bb0e643f8013df6dcfb9784c63236f72..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-convlab package
-================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.dialog_agent
-   convlab.dst
-   convlab.e2e
-   convlab.evaluator
-   convlab.nlg
-   convlab.nlu
-   convlab.policy
-   convlab.task
-   convlab.util
-
-Module contents
----------------
-
-.. automodule:: convlab
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.task.camrest.rst b/docs/source/convlab2.task.camrest.rst
deleted file mode 100644
index f26a58371c97ea711ddccc31969a075a05a32b09..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.task.camrest.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.task.camrest package
-=============================
-
-Submodules
-----------
-
-convlab.task.camrest.goal\_generator module
---------------------------------------------
-
-.. automodule:: convlab.task.camrest.goal_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.task.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.task.crosswoz.rst b/docs/source/convlab2.task.crosswoz.rst
deleted file mode 100644
index 8c2ab1474e7377993128fa12d128e165d48609e0..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.task.crosswoz.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-convlab.task.crosswoz package
-==============================
-
-Submodules
-----------
-
-convlab.task.crosswoz.attraction\_generator module
----------------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.attraction_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.goal\_generator module
----------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.goal_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.hotel\_generator module
-----------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.hotel_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.metro\_generator module
-----------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.metro_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.reorder module
--------------------------------------
-
-.. automodule:: convlab.task.crosswoz.reorder
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.restaurant\_generator module
----------------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.restaurant_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.sentence\_generator module
--------------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.sentence_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.task.crosswoz.taxi\_generator module
----------------------------------------------
-
-.. automodule:: convlab.task.crosswoz.taxi_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.task.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.task.multiwoz.rst b/docs/source/convlab2.task.multiwoz.rst
deleted file mode 100644
index eb6f80e018c26b674cc4ac54cd5bb12dc2467089..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.task.multiwoz.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-convlab.task.multiwoz package
-==============================
-
-Submodules
-----------
-
-convlab.task.multiwoz.goal\_generator module
----------------------------------------------
-
-.. automodule:: convlab.task.multiwoz.goal_generator
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.task.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.task.rst b/docs/source/convlab2.task.rst
deleted file mode 100644
index 30609493b8a4808409a61ab4023e6d04ee745baa..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.task.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-convlab.task package
-=====================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.task.camrest
-   convlab.task.crosswoz
-   convlab.task.multiwoz
-
-Module contents
----------------
-
-.. automodule:: convlab.task
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.util.camrest.rst b/docs/source/convlab2.util.camrest.rst
deleted file mode 100644
index 7553f24cfd61bbf399887b25ca6e83f417473854..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.util.camrest.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.util.camrest package
-=============================
-
-Submodules
-----------
-
-convlab.util.camrest.dbquery module
-------------------------------------
-
-.. automodule:: convlab.util.camrest.dbquery
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.camrest.lexicalize module
----------------------------------------
-
-.. automodule:: convlab.util.camrest.lexicalize
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.camrest.state module
-----------------------------------
-
-.. automodule:: convlab.util.camrest.state
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.util.camrest
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.util.crosswoz.rst b/docs/source/convlab2.util.crosswoz.rst
deleted file mode 100644
index 4f7f3ad7b461f86e7dcf5c6e8cce72bf1f78218d..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.util.crosswoz.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-convlab.util.crosswoz package
-==============================
-
-Submodules
-----------
-
-convlab.util.crosswoz.dbquery module
--------------------------------------
-
-.. automodule:: convlab.util.crosswoz.dbquery
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.crosswoz.lexicalize module
-----------------------------------------
-
-.. automodule:: convlab.util.crosswoz.lexicalize
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.crosswoz.state module
------------------------------------
-
-.. automodule:: convlab.util.crosswoz.state
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.util.crosswoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.util.dataloader.rst b/docs/source/convlab2.util.dataloader.rst
deleted file mode 100644
index ce6307e968fd32714472de06bffffa2f0a87a48e..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.util.dataloader.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-convlab.util.dataloader package
-================================
-
-Submodules
-----------
-
-convlab.util.dataloader.dataset\_dataloader module
----------------------------------------------------
-
-.. automodule:: convlab.util.dataloader.dataset_dataloader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.dataloader.module\_dataloader module
---------------------------------------------------
-
-.. automodule:: convlab.util.dataloader.module_dataloader
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.util.dataloader
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.util.multiwoz.rst b/docs/source/convlab2.util.multiwoz.rst
deleted file mode 100644
index b0e44d79751d645d690c231fe07083aa49e4caee..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.util.multiwoz.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-convlab.util.multiwoz package
-==============================
-
-Submodules
-----------
-
-convlab.util.multiwoz.dbquery module
--------------------------------------
-
-.. automodule:: convlab.util.multiwoz.dbquery
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.multiwoz.lexicalize module
-----------------------------------------
-
-.. automodule:: convlab.util.multiwoz.lexicalize
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.multiwoz.multiwoz\_slot\_trans module
----------------------------------------------------
-
-.. automodule:: convlab.util.multiwoz.multiwoz_slot_trans
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.multiwoz.state module
------------------------------------
-
-.. automodule:: convlab.util.multiwoz.state
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.util.multiwoz
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/convlab2.util.rst b/docs/source/convlab2.util.rst
deleted file mode 100644
index e73afeae04576af4bc7c4f8209eaf424e7f268bd..0000000000000000000000000000000000000000
--- a/docs/source/convlab2.util.rst
+++ /dev/null
@@ -1,56 +0,0 @@
-convlab.util package
-=====================
-
-Subpackages
------------
-
-.. toctree::
-
-   convlab.util.camrest
-   convlab.util.crosswoz
-   convlab.util.dataloader
-   convlab.util.multiwoz
-
-Submodules
-----------
-
-convlab.util.allennlp\_file\_utils module
-------------------------------------------
-
-.. automodule:: convlab.util.allennlp_file_utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.file\_util module
--------------------------------
-
-.. automodule:: convlab.util.file_util
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.module module
----------------------------
-
-.. automodule:: convlab.util.module
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-convlab.util.train\_util module
---------------------------------
-
-.. automodule:: convlab.util.train_util
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: convlab.util
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/index.rst b/docs/source/index.rst
deleted file mode 100644
index ca951cd7f5cbb2b1b2fe9b82aa81def9bb1cb153..0000000000000000000000000000000000000000
--- a/docs/source/index.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. convlab documentation master file, created by
-   sphinx-quickstart on Thu May  7 12:16:27 2020.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to convlab's documentation!
-====================================
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
deleted file mode 100644
index 39cf0072e6c2d2ee5c663aae18da59f8bbe89162..0000000000000000000000000000000000000000
--- a/docs/source/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-convlab
-========
-
-.. toctree::
-   :maxdepth: 4
-
-   convlab
diff --git a/requirements.txt b/requirements.txt
index 1f2ab21fe01047f604424fcb2674fa3c1e1a0bcc..26b14729071e602dcb2d7891eeb5f6b32cd3397e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,114 +1,125 @@
-absl-py==1.1.0
-accelerate==0.10.0
-aiohttp==3.8.1
-aiosignal==1.2.0
+absl-py==1.3.0
+accelerate==0.14.0
+aiohttp==3.8.3
+aiosignal==1.3.1
 async-timeout==4.0.2
-attrs==21.4.0
-blis==0.7.8
-boto3==1.24.22
-botocore==1.27.22
+attrs==22.1.0
+blis==0.7.9
+boto3==1.26.18
+botocore==1.29.18
 cachetools==5.2.0
-catalogue==2.0.7
-certifi==2022.5.18.1
-charset-normalizer==2.1.0
+catalogue==2.0.8
+certifi==2022.9.24
+charset-normalizer==2.1.1
 click==8.1.3
-colorama==0.4.5
+colorama==0.4.6
+confection==0.0.3
+contourpy==1.0.6
 cycler==0.11.0
-cymem==2.0.6
-datasets==2.3.2
+cymem==2.0.7
+datasets==2.7.1
 deepspeech==0.9.3
-dill==0.3.5.1
+dill==0.3.6
 embeddings==0.0.8
-filelock==3.7.1
-fonttools==4.33.3
-frozenlist==1.3.0
-fsspec==2022.5.0
+filelock==3.8.0
+fonttools==4.38.0
+frozenlist==1.3.3
+fsspec==2022.11.0
 fuzzywuzzy==0.18.0
-google-auth==2.9.0
+google-auth==2.14.1
 google-auth-oauthlib==0.4.6
-grpcio==1.47.0
-gTTS==2.2.4
-huggingface-hub==0.8.1
-idna==3.3
-importlib-metadata==4.12.0
+grpcio==1.50.0
+gTTS==2.3.0
+huggingface-hub==0.11.1
+idna==3.4
+importlib-metadata==5.1.0
 jieba==0.42.1
 Jinja2==3.1.2
 jmespath==1.0.1
-joblib==1.1.0
+joblib==1.2.0
 json-lines==0.5.0
 jsonpatch==1.32
 jsonpointer==2.3
-kiwisolver==1.4.3
+kiwisolver==1.4.4
 langcodes==3.3.0
-Markdown==3.3.7
+Levenshtein==0.20.8
+lxml==4.9.1
+Markdown==3.4.1
 MarkupSafe==2.1.1
-matplotlib==3.5.2
+matplotlib==3.6.2
 multidict==6.0.2
-multiprocess==0.70.13
-murmurhash==1.0.7
+multiprocess==0.70.14
+murmurhash==1.0.9
+networkx==2.8.8
 nltk==3.7
-numpy==1.23.0
-oauthlib==3.2.0
+numpy==1.23.5
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+oauthlib==3.2.2
 packaging==21.3
-pandas==1.4.3
-pathy==0.6.2
-Pillow==9.2.0
-portalocker==2.4.0
-preshed==3.0.6
-protobuf==3.19.4
-psutil==5.9.1
-pyarrow==8.0.0
+pandas==1.5.2
+pathy==0.10.0
+Pillow==9.3.0
+portalocker==2.6.0
+preshed==3.0.8
+protobuf==3.20.2
+psutil==5.9.4
+pyarrow==10.0.1
 pyasn1==0.4.8
 pyasn1-modules==0.2.8
-pydantic==1.8.2
+pydantic==1.10.2
 pydub==0.25.1
 pyparsing==3.0.9
 python-dateutil==2.8.2
-python-Levenshtein==0.12.2
-pytz==2022.1
+python-Levenshtein==0.20.8
+pytz==2022.6
 PyYAML==6.0
-pyzmq==23.2.0
 quadprog==0.1.11
-regex==2022.6.2
+rapidfuzz==2.13.2
+regex==2022.10.31
 requests==2.28.1
 requests-oauthlib==1.3.1
 responses==0.18.0
-rouge-score==0.0.4
-rsa==4.8
+rouge-score==0.1.2
+rsa==4.9
 s3transfer==0.6.0
-sacrebleu==2.1.0
-scikit-learn==1.1.1
-scipy==1.8.1
+sacrebleu==2.3.1
+scikit-learn==1.1.3
+scipy==1.9.3
+seaborn==0.12.1
 sentence-transformers==2.2.2
+sentencepiece==0.1.97
 seqeval==1.2.2
-simplejson==3.17.6
+simplejson==3.18.0
 six==1.16.0
 smart-open==5.2.1
-spacy==3.3.1
-spacy-legacy==3.0.9
-spacy-loggers==1.0.2
-srsly==2.4.3
-tabulate==0.8.10
-tensorboard==2.9.1
+spacy==3.4.3
+spacy-legacy==3.0.10
+spacy-loggers==1.0.3
+srsly==2.4.5
+tabulate==0.9.0
+tensorboard==2.11.0
 tensorboard-data-server==0.6.1
 tensorboard-plugin-wit==1.8.1
-tensorboardX==2.5.1
-thinc==8.0.17
+tensorboardX==2.5
+thinc==8.1.5
 threadpoolctl==3.1.0
-tokenizers==0.12.1
-torch==1.12.0
-torchfile==0.1.0
+tokenizers==0.13.2
+torch==1.11.0
+torchvision==0.12.0
 tornado==6.2
-tqdm==4.64.0
-transformers==4.20.1
-typer==0.4.2
-typing_extensions==4.3.0
-Unidecode==1.3.4
-urllib3==1.26.9
-visdom==0.1.8.9
-wasabi==0.9.1
-websocket-client==1.3.3
-Werkzeug==2.1.2
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.8.0
+tqdm==4.64.1
+transformers==4.24.0
+typer==0.7.0
+typing_extensions==4.4.0
+Unidecode==1.3.6
+urllib3==1.26.13
+visdom==0.2.3
+wasabi==0.10.1
+websocket-client==1.4.2
+Werkzeug==2.2.2
+xxhash==3.1.0
+yarl==1.8.1
+zipp==3.11.0
diff --git a/setup.py b/setup.py
index d195fdcc10bc83cd7a1ab1fcf85e68ae1156cdec..c2cae13c857561899c9b71f68e236f30d705f5ce 100755
--- a/setup.py
+++ b/setup.py
@@ -8,14 +8,12 @@ setup(
     version='3.0.0',
     packages=find_packages(),
     license='Apache',
-    description='An Open-source Dialog System Platform',
+    description='An Open-source Dialog System Toolkit',
     long_description=open('README.md', encoding='UTF-8').read(),
     long_description_content_type="text/markdown",
     classifiers=[
         'Development Status :: 3 - Alpha',
         'License :: OSI Approved :: Apache Software License',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
         'Intended Audience :: Science/Research',
@@ -24,12 +22,17 @@ setup(
     ],
     setup_requires=['setuptools-git'],
     install_requires=[
+        'joblib>=1.2.0',
+        'pillow>=9.3.0',
+        'protobuf>=3.20.2',
+        'oauthlib>=3.2.1',
         'accelerate',
         'rouge-score',
         'sacrebleu',
         'tensorboardX',
         'boto3',
         'matplotlib',
+        'seaborn',
         'tabulate',
         'python-Levenshtein',
         'requests',
@@ -37,10 +40,10 @@ setup(
         'nltk',
         'scipy',
         'tensorboard',
-        'torch>=1.6',
-        'transformers>=4.0',
-        'sentence-transformers',
-        'datasets>=1.8',
+        'torch>=1.10.1,<=1.13',
+        'transformers>=4.17.0,<=4.24.0',
+        'sentence-transformers>=2.2.2',
+        'datasets>=2.0',
         'seqeval',
         'spacy',
         'simplejson',
@@ -74,6 +77,6 @@ setup(
     url='https://github.com/ConvLab/ConvLab-3',
     author='convlab',
     author_email='convlab@googlegroups.com',
-    python_requires='>=3.6',
+    python_requires='>=3.8',
     zip_safe=False
 )