diff --git a/data/unified_datasets/README.md b/data/unified_datasets/README.md
index 8e502f43eff1115139ade8144a5e3ce1d60aa94f..82b43645a0f9f5bb55cbb58c9b49baa3ca1ebb24 100644
--- a/data/unified_datasets/README.md
+++ b/data/unified_datasets/README.md
@@ -19,7 +19,7 @@ if __name__ == '__main__':
     preprocess()
 ```
 
-- `data.zip`: the zipped directory contains:
+- `data.zip`: the zipped directory `data` contains:
   - `ontology.json`: dataset ontology, contains descriptions, state definition, etc.
   - `dialogues.json`: a list of all dialogues in the dataset.
   - other necessary files such as databases.
@@ -42,7 +42,7 @@ class Database:
 We first introduce the unified format of `ontology` and `dialogues`. To transform a new dataset into the unified format:
 1. Create `data/unified_datasets/$dataset` folder, where `$dataset` is the name of the dataset.
 2. Write `preprocess.py` to transform the original dataset into the unified format, producing `data.zip` and `dummy_data.json`.
-3. Run `python test.py $dataset` in the `data/unified_datasets` directory to check the validation of processed dataset and get data statistics.
+3. Run `python check.py $dataset` in the `data/unified_datasets` directory to check the validation of processed dataset and get data statistics.
 4. Write `README.md` to describe the data following [How to create dataset README](#how-to-create-dataset-readme).
 5. Add `$dataset.py` and `dataset_info.json` following this [instruction](https://huggingface.co/docs/datasets/dataset_script.html) (Here no need to generate dummy data). Upload the dataset directory to Hugging Face's `Datasets` following this [instruction](https://huggingface.co/docs/datasets/share.html#add-a-community-dataset) (set `--organization` to `ConvLab`).
 
@@ -73,37 +73,37 @@ We first introduce the unified format of `ontology` and `dialogues`. To transfor
 `dialogues.json`: a *list* of dialogues (*dict*) containing:
 
 - `dataset`: (*str*) dataset name, must be the same as the data directory.
-- `data_split`: (*str*) in `["train", "validation", "test"]`.
+- `data_split`: (*str*) in `["train", "validation", "test", ...]`.
 - `dialogue_id`: (*str*) `"$dataset-$split-$id"`, `id` increases from 0.
 - `domains`: (*list*) involved domains in this dialogue.
-- `goal`: (*dict*, optional)
-  - `description`: (*str*, optional) a string describes the user goal.
-  - `constraints`: (*dict*, optional) same format as dialogue state of involved domains but with only filled slots as constraints.
-  - `requirements`: (*dict*, optional) same format as dialogue state of involved domains but with only empty required slots.
+- `goal`: (*dict*)
+  - `description`: (*str*, could be empty) a string describes the user goal.
+  - `constraints`: (*dict*, could be empty) same format as dialogue state of involved domains but with only filled slots as constraints.
+  - `requirements`: (*dict*, could be empty) same format as dialogue state of involved domains but with only empty required slots.
 - `turns`: (*list* of *dict*)
   - `speaker`: (*str*) "user" or "system".
   - `utterance`: (*str*)
   - `utt_idx`: (*int*) `turns['utt_idx']` gives current turn.
-  - `dialogue_acts`: (*dict*, optional)
-    - `categorical`: (*list* of *dict*) for categorical slots.
+  - `dialogue_acts`: (*dict*)
+    - `categorical`: (*list* of *dict*, could be empty) for categorical slots.
       - `{"intent": (str), "domain": (str), "slot": (str), "value": (str)}`. Value sets are defined in the ontology.
-    - `non-categorical` (*list* of *dict*) for non-categorical slots.
+    - `non-categorical` (*list* of *dict*, could be empty) for non-categorical slots.
       - `{"intent": (str), "domain": (str), "slot": (str), "value": (str), "start": (int), "end": (int)}`. `start` and `end` are character indexes for the value span in the utterance and can be absent.
-    - `binary` (*list* of *dict*) for binary dialogue acts in ontology.
+    - `binary` (*list* of *dict*, could be empty) for binary dialogue acts in ontology.
       - `{"intent": (str), "domain": (str), "slot": (str), "value": (str)}`. Possible dialogue acts are listed in the `ontology['binary_dialogue_acts']`.
-  - `state`: (*dict*, user side, optional) dialogue state of involved domains. full state is shown in `ontology['state']`.
+  - `state`: (*dict*, user side, could be empty) dialogue state of involved domains. full state is shown in `ontology['state']`.
     - `$domain_name`: (*dict*) contains all slots in this domain.
       - `$slot_name`: (*str*) value for this slot.
-  - `db_results`: (*dict*, optional)
+  - `db_results`: (*dict*, system side, could be empty)
     - `$domain_name`: (*list* of *dict*) topk entities (each entity contains slot-value pairs)
 
 Other attributes are optional.
 
-Run `python test.py $dataset` in the `data/unified_datasets` directory to check the validation of processed dataset and get data statistics.
+Run `python check.py $dataset` in the `data/unified_datasets` directory to check the validation of processed dataset and get data statistics.
 
 ### How to create dataset README
 Each dataset has a README.md to describe the original and transformed data. Follow the Hugging Face's [dataset card creation](https://huggingface.co/docs/datasets/dataset_card.html) to export `README.md`. Make sure that the following additional information is included in the **Dataset Summary** section:
-- Main changes from original data to processed data.
+- How to get the transformed data from original data and what are the main changes.
 - Annotations: whether have user goal, dialogue acts, state, db results, etc.
 
-And the data statistics given by `test.py` should be included in the **Data Splits** section.
+And the data statistics given by `check.py` should be included in the **Data Splits** section.
diff --git a/data/unified_datasets/evaluate.py b/data/unified_datasets/check.py
similarity index 51%
rename from data/unified_datasets/evaluate.py
rename to data/unified_datasets/check.py
index 1c68f384db76c34a6e05a4e3993af227e2e3be3c..1f33a22c88f55bba73ee88f455353208549d0880 100644
--- a/data/unified_datasets/evaluate.py
+++ b/data/unified_datasets/check.py
@@ -1,11 +1,14 @@
 import json
 import os
 from copy import deepcopy
+from zipfile import ZipFile
+import importlib
+from tabulate import tabulate
 
-special_values = ['dontcare', '']
+special_values = ['', 'dontcare', None]
 
 
-def check_ontology(name):
+def check_ontology(ontology):
     """
     ontology: {
         "domains": {
@@ -13,10 +16,9 @@ def check_ontology(name):
                 "description": domain description,
                 "slots": {
                     slot name: {
-                        "description": slot description
-                        // possible_values is empty iff is_categorical is False
+                        "description": slot description,
                         "is_categorical": is_categorical,
-                        "possible_values": [possible_values...]
+                        "possible_values": [possible_values...], not empty if is_categorical
                     }
                 }
             }
@@ -26,11 +28,11 @@ def check_ontology(name):
                 "description": intent description
             }
         },
-        "binary_dialogue_act": {
+        "binary_dialogue_acts": {
             [
                 {
                     "intent": intent name,
-                    "domain": domain name
+                    "domain": domain name,
                     "slot": slot name,
                     "value": some value
                 }
@@ -44,10 +46,6 @@ def check_ontology(name):
     }
     """
     global special_values
-
-    ontology_file = os.path.join(f'{name}', 'ontology.json')
-    assert os.path.exists(ontology_file), f'ontology file should named {ontology_file}'
-    ontology = json.load(open(ontology_file))
     
     # record issues in ontology
     descriptions = {
@@ -59,8 +57,6 @@ def check_ontology(name):
     for domain_name, domain in ontology['domains'].items():
         if not domain['description']:
             descriptions["domains"] = False
-        # if not domain_name in ontology['state']:
-        #     print(f"domain '{domain_name}' not found in state")
         for slot_name, slot in domain["slots"].items():
             if not slot["description"]:
                 descriptions["slots"] = False
@@ -75,22 +71,17 @@ def check_ontology(name):
             descriptions["intents"] = False
 
     binary_dialogue_acts = set()
-    for bda in ontology['binary_dialogue_act']:
+    for bda in ontology['binary_dialogue_acts']:
         assert bda['intent'] is None or bda["intent"] in ontology['intents'], f'ONTOLOGY\tintent undefined intent in binary dialog act: {bda}'
         binary_dialogue_acts.add(tuple(bda.values()))
     ontology['bda_set'] = binary_dialogue_acts
 
     assert 'state' in ontology, 'ONTOLOGY\tno state'
-    redundant_value = False
     for domain_name, domain in ontology['state'].items():
         assert domain_name in ontology['domains']
         for slot_name, value in domain.items():
             assert slot_name in ontology['domains'][domain_name]['slots']
-            if value:
-                redundant_value = True
-
-    if redundant_value:
-        print('ONTOLOGY: redundant value description in state')
+            assert value == "", "should set value in state to \"\""
 
     # print('description existence:', descriptions, '\n')
     for description, value in descriptions.items():
@@ -99,27 +90,13 @@ def check_ontology(name):
     return ontology
 
 
-def check_data(name, ontology):
+def check_dialogues(name, dialogues, ontology):
     global special_values
 
-    from zipfile import ZipFile
-    data_file = os.path.join(f'{name}', 'data.zip')
-    if not os.path.exists(data_file):
-        print('cannot find data.zip')
-        return
-
-    print('loading data')
-    with ZipFile(data_file) as zipfile:
-        with zipfile.open('data.json', 'r') as f:
-            data = json.load(f)
-
     all_id = set()
-    splits = ['train', 'val', 'test']
+    splits = ['train', 'validation', 'test']
     da_values = 0
     da_matches = 0
-    state_values = 0
-    state_matches = 0
-    distances = []
     stat_keys = ['dialogues', 'utterances', 'tokens', 'domains']
     stat = {
         split: {
@@ -129,24 +106,26 @@ def check_data(name, ontology):
 
     # present for both non-categorical or categorical
 
-    for dialogue in data:
+    for dialogue in dialogues:
         dialogue_id = dialogue['dialogue_id']
-        assert isinstance(dialogue_id, str), '`dialogue_id` is expected to be str type'
-        dialogue_id = str(dialogue_id)
+        assert isinstance(dialogue_id, str), f'{dialogue_id}\t`dialogue_id` is expected to be str type'
 
         assert dialogue['dataset'] == name, f'{dialogue_id}\tinconsistent dataset name: {dialogue["dataset"]}'
 
         split = dialogue['data_split']
-        assert split in splits, f'unknown split: `{split}`'
+        assert isinstance(split, str), f'{dialogue_id}\t`split` is expected to be str type but got {type(split)}'
+        if split not in splits:
+            splits.append(split)
+            stat[split] = {key: 0 for key in stat_keys}
+        
         cur_stat = stat[split]
         cur_stat['dialogues'] += 1
         try:
-            prefix, num = dialogue_id.split('_')
-            assert prefix == name
+            prefix, id_split, num = dialogue_id.split('-')
+            assert prefix == name and id_split == split
             int(num)    # try converting to int
         except:
-            print(f'{dialogue_id}\twrong dialogue id format: {dialogue_id}')
-            raise Exception
+            raise Exception(f'{dialogue_id}\twrong dialogue id format: {dialogue_id}')
         assert dialogue_id not in all_id, f'multiple dialogue id: {dialogue_id}'
         all_id.add(dialogue_id)
 
@@ -158,17 +137,45 @@ def check_data(name, ontology):
         for domain_name in cur_domains:
             assert domain_name in ontology['domains'], f'{dialogue_id}\tundefined current domain: {domain_name}'
 
+        # check domain-slot-value
+        # prefix: error prefix
+        def check_dsv(domain_name, slot_name, value, categorical=None, prefix=f'{dialogue_id}'):
+            assert domain_name in cur_domains, f'{prefix}\t{domain_name} not presented in current domains'
+            domain = ontology['domains'][domain_name]
+            assert slot_name in domain['slots'], f'{prefix}\t{slot_name} not presented in domain {domain_name} in ontology'
+            slot = domain['slots'][slot_name]
+            if categorical is None:
+                categorical = slot['is_categorical']
+            else:
+                assert categorical == slot['is_categorical'], \
+                    f'{prefix}\t{domain_name}-{slot_name} is_categorical should be {slot["is_categorical"]} as in ontology'
+            if categorical:
+                value = value.lower()
+                assert value in special_values or value in slot['possible_values'], \
+                    f'{prefix}\t`{value}` not presented in possible values of {domain_name}-{slot_name}: {slot["possible_values"]}'
+
+        def check_da(da, categorical):
+            assert da['intent'] in ontology['intents'], f'{dialogue_id}:{turn_id}:da\tundefined intent {da["intent"]}'
+            check_dsv(da['domain'], da['slot'], da['value'], categorical, f'{dialogue_id}:{turn_id}:da')
+        
+        goal = dialogue['goal']
+        assert isinstance(goal['description'], str), f'{dialogue_id}\tgoal description {goal["description"]} should be string'
+        assert isinstance(goal['constraints'], dict), f'{dialogue_id}\tgoal constraints {goal["constraints"]} should be dict'
+        assert isinstance(goal['requirements'], dict), f'{dialogue_id}\tgoal requirements {goal["requirements"]} should be dict'
+        for domain_name, domain in goal['constraints'].items():
+            for slot_name, value in domain.items():
+                check_dsv(domain_name, slot_name, value, prefix=f'{dialogue_id}:goal:constraints')
+                assert value != "", f'{dialogue_id}\tshould set non-empty value in goal constraints {goal["constraints"]}'
+        for domain_name, domain in goal['requirements'].items():
+            for slot_name, value in domain.items():
+                check_dsv(domain_name, slot_name, value, prefix=f'{dialogue_id}:goal:requirements')
+                assert value == "", f'{dialogue_id}\tshould set empty value in goal requirements {goal["requirements"]}'
+
         turns = dialogue['turns']
         cur_stat['utterances'] += len(turns)
         assert turns, f'{dialogue_id}\tempty turn'
 
-        assert turns[0]['speaker'] == 'user', f'{dialogue_id}\tnot start with user role'
-        if ontology['state']:
-            # update cur_state with state_update every turn, and compare it with state annotation
-            cur_state = {
-                domain_name: deepcopy(ontology['state'][domain_name]) for domain_name in cur_domains
-            }
-        # check dialog act
+        # assert turns[0]['speaker'] == 'user', f'{dialogue_id}\tnot start with user role'
         for turn_id, turn in enumerate(turns):
             assert turn['speaker'] in ['user', 'system'], f'{dialogue_id}:{turn_id}\tunknown speaker value: {turn["speaker"]}'
             assert turn_id == turn['utt_idx'], f'{dialogue_id}:{turn_id}\twrong utt_idx'
@@ -177,27 +184,11 @@ def check_data(name, ontology):
 
             utterance = turn['utterance']
             cur_stat['tokens'] += len(utterance.strip().split(' '))
-            dialogue_acts = turn['dialogue_act']
-
-            # check domain-slot-value
-            # prefix: error prefix
-            def check_dsv(domain_name, slot_name, value, categorical, prefix):
-                assert domain_name in cur_domains or domain_name == 'booking', f'{prefix}\t{domain_name} not presented in current domains'
-                domain = ontology['domains'][domain_name]
-                assert slot_name in domain['slots'], f'{prefix}\t{slot_name} not presented in domain {domain_name}'
-                slot = domain['slots'][slot_name]
-                if categorical:
-                    assert slot['is_categorical'], f'{prefix}\t{domain_name}-{slot_name} is not categorical'
-                    value = value.lower()
-                    assert value in special_values or value in slot['possible_values'], f'{prefix}\t`{value}` not presented in possible values of' \
-                                                             f' {domain_name}-{slot_name}: {slot["possible_values"]}'
-                else:
-                    assert not slot['is_categorical'], f'{prefix}\t{domain_name}-{slot_name} is not non-categorical'
-
-            def check_da(da, categorical):
-                assert da['intent'] in ontology['intents'], f'{dialogue_id}:{turn_id}\tundefined intent {da["intent"]}'
-                check_dsv(da['domain'], da['slot'], da['value'], categorical, f'{dialogue_id}:{turn_id}:da')
 
+            dialogue_acts = turn['dialogue_acts']
+            assert isinstance(dialogue_acts['categorical'], list), f'{dialogue_id}:{turn_id}\tcategorical dialogue_acts should be a list'
+            assert isinstance(dialogue_acts['non-categorical'], list), f'{dialogue_id}:{turn_id}\tnon-categorical dialogue_acts should be a list'
+            assert isinstance(dialogue_acts['binary'], list), f'{dialogue_id}:{turn_id}\tbinary dialogue_acts should be a list'
             for da in dialogue_acts['categorical']:
                 check_da(da, True)
             for da in dialogue_acts['non-categorical']:
@@ -205,7 +196,7 @@ def check_data(name, ontology):
                 # values only match after .strip() in some case, it's the issue of pre-processing
                 if da['value'] not in special_values:
                     da_values += 1
-                    assert 'start' in da and 'end' in da or 'start' not in da and 'end' not in da, \
+                    assert ('start' in da) == ('end' in da), \
                         f'{dialogue_id}:{turn_id}\tstart and end field in da should both present or neither not present'
                     if 'start' in da:
                         value = utterance[da['start']:da['end']]
@@ -214,51 +205,37 @@ def check_data(name, ontology):
 
             for da in dialogue_acts['binary']:
                 assert tuple(da.values()) in ontology['bda_set'], f'{dialogue_id}:{turn_id}\tbinary dialog act {da} not present in ontology'
-                # do not check domain-slot-value in binary dialogue acts
+                # do not check_dsv for binary dialogue acts
 
             if turn['speaker'] == 'user':
-                assert 'state' in turn and 'state_update' in turn, f"{dialogue_id}:{turn_id}\tstate and state_update must present in user's role"
-                state_update = turn['state_update']
-
-                def apply_update(update, categorical):
-                    domain_name = update['domain']
-                    slot_name = update['slot']
-                    value = update['value']
-                    check_dsv(domain_name, slot_name, value, categorical, f'{dialogue_id}:{turn_id}:state_update')
-                    cur_state[domain_name][slot_name] = value
-                if ontology['state']:
-                    for update in state_update['categorical']:
-                        apply_update(update, True)
-                    for update in state_update['non-categorical']:
-                        apply_update(update, False)
-                        value = update['value']
-                        if value not in special_values:
-                            state_values += 1
-                            if 'utt_idx' in update:
-                                if turns[update['utt_idx']]['utterance'][update['start']:update['end']].lower() == update['value']:
-                                    state_matches += 1
-                                else:
-                                    print('value in utt:\t', turns[update['utt_idx']]['utterance'][update['start']:update['end']].strip())
-                                    print('value in state:\t', update['value'])
-                                    pass
-
-                    assert cur_state == turn['state'], f'{dialogue_id}:{turn_id}:state_update incorrect state or state update calculation'
+                assert 'db_results' not in turn
+                assert 'state' in turn, f"{dialogue_id}:{turn_id}\tstate must present in user's role, but could be empty"
+                state = turn['state']
+                assert isinstance(state, dict), f'{dialogue_id}:{turn_id}\tstate should be a dict'
+                for domain_name, domain in state.items():
+                    for slot_name, value in domain.items():
+                        check_dsv(domain_name, slot_name, value, prefix=f'{dialogue_id}:{turn_id}:state')
 
             else:
-                assert 'state' not in turn or 'state_update' in turn, f"{dialogue_id}:{turn_id}\tstate or state_update cannot present in system's role"
+                assert 'state' not in turn, f"{dialogue_id}:{turn_id}\tstate cannot present in system's role"
+                assert 'db_results' in turn
+                db_results = turn['db_results']
+                assert isinstance(db_results, dict), f'{dialogue_id}:{turn_id}\db_results should be a dict'
+                for domain_name, results in db_results.items():
+                    assert domain_name in cur_domains, f'{dialogue_id}:{turn_id}:db_results\t{domain_name} not presented in current domains'
+                    assert isinstance(results, list)
 
-        assert turns[-1]['speaker'] == 'user', f'{dialogue_id} dialog must end with user role'
+        # assert turns[-1]['speaker'] == 'user', f'{dialogue_id} dialog must end with user role'
 
     if da_values:
-        print('da values match rate:    {:.3f}'.format(da_matches * 100 / da_values))
-    if state_values:
-        print('state values match rate: {:.3f}'.format(state_matches * 100 / state_values))
+        print('da values span match rate:    {:.3f}'.format(da_matches * 100 / da_values))
 
     all_stat = {key: 0 for key in stat_keys}
     for key in stat_keys:
         all_stat[key] = sum(stat[split][key] for split in splits)
     stat['all'] = all_stat
 
+    table = []
     for split in splits + ['all']:
         cur_stat = stat[split]
         if cur_stat['dialogues']:
@@ -267,19 +244,24 @@ def check_data(name, ontology):
             cur_stat['avg_domains'] = round(cur_stat.pop('domains') / cur_stat['dialogues'], 2)
         else:
             del stat[split]
+        table.append({
+            'split':split, 
+            '\# dialogues': cur_stat['dialogues'], '\# utterances': cur_stat['utterances'],
+            'avg_utt': cur_stat['avg_utt'], 'avg_tokens': cur_stat['avg_tokens'], 'avg_domains': cur_stat['avg_domains']
+        })
+    
     print(f'domains: {len(ontology["domains"])}')
-    print(json.dumps(stat, indent=4))
-    if state_matches:
-        for dis, cnt in enumerate(distances):
-            print(cnt)
+    print('\n\nCopy-and-paste the following statistics to dataset README.md->Dataset Summary section')
+    print(tabulate(table, headers='keys', tablefmt='github'))
+    print()
 
 
 if __name__ == '__main__':
     from argparse import ArgumentParser
 
-    parser = ArgumentParser(description="evaluate pre-processed datasets")
-    parser.add_argument('datasets', metavar='dataset_name', nargs='*', help='dataset names to be evaluated')
-    parser.add_argument('--all', action='store_true', help='evaluate all datasets')
+    parser = ArgumentParser(description="test pre-processed datasets")
+    parser.add_argument('datasets', metavar='dataset_name', nargs='*', help='dataset names to be tested')
+    parser.add_argument('--all', action='store_true', help='test all datasets')
     parser.add_argument('--no-int', action='store_true', help='not interrupted by exception')
     parser.add_argument('--preprocess', '-p', action='store_true', help='run preprocess automatically')
     args = parser.parse_args()
@@ -293,7 +275,7 @@ if __name__ == '__main__':
         parser.print_help()
         exit(1)
 
-    print('datasets to be evaluated:', datasets)
+    print('datasets to be tested:', datasets)
 
     fail = []
 
@@ -301,29 +283,38 @@ if __name__ == '__main__':
         try:
             print('')
             if not os.path.isdir(name):
-                print(f'dataset {name} not found')
-                continue
+                raise FileNotFoundError(f'dataset {name} not found')
 
             print(f'checking {name}')
             preprocess_file = os.path.join(f'{name}', 'preprocess.py')
             if not os.path.exists(preprocess_file):
-                print('no preprocess.py')
-                if args.preprocess:
-                    print(f'skip evaluation of {name}')
-                    continue
+                raise FileNotFoundError(f'no {preprocess_file}')
+
             if args.preprocess:
                 print('pre-processing')
 
                 os.chdir(name)
-                import importlib
                 preprocess = importlib.import_module(f'{name}.preprocess')
                 preprocess.preprocess()
                 os.chdir('..')
 
-            ontology = check_ontology(name)
-            check_data(name, ontology)
+            data_file = os.path.join(f'{name}', 'data.zip')
+            if not os.path.exists(data_file):
+                raise FileNotFoundError(f'cannot find {data_file}')
+
+            with ZipFile(data_file) as zipfile:
+                print('check ontology')
+                with zipfile.open('data/ontology.json', 'r') as f:
+                    ontology = json.load(f)
+                    check_ontology(ontology)
+                
+                print('check dialogues')
+                with zipfile.open('data/dialogues.json', 'r') as f:
+                    dialogues = json.load(f)
+                    check_dialogues(name, dialogues, ontology)
         except Exception as e:
             if args.no_int:
+                print(e)
                 fail.append(name)
             else:
                 raise e