From 21a2193261a3994d63e53cf08132cbb866d10f0d Mon Sep 17 00:00:00 2001
From: zqwerty <zhuq96@hotmail.com>
Date: Thu, 2 Dec 2021 09:45:50 +0000
Subject: [PATCH] add stat description

---
 data/unified_datasets/check.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/data/unified_datasets/check.py b/data/unified_datasets/check.py
index 2676c753..40af072d 100644
--- a/data/unified_datasets/check.py
+++ b/data/unified_datasets/check.py
@@ -345,6 +345,11 @@ if __name__ == '__main__':
                 print(f'Please copy-and-paste the statistics in {name}/stat.txt to dataset README.md->Data Splits section\n')
                 with open(f'{name}/stat.txt', 'w') as f:
                     print(stat, file=f)
+                    print('', file=f)
+                    all_domains = list(ontology["domains"].keys())
+                    print(f'{len(all_domains)} domains: {all_domains}', file=f)
+                    print('- **cat slot match**: how many values of categorical slots are in the possible values of ontology in percentage.', file=f)
+                    print('- **non-cat slot span**: how many values of non-categorical slots have span annotation in percentage.', file=f)
 
         except Exception as e:
             if args.no_int:
-- 
GitLab