From 5d818f1d298d23661a7437e0618f16c468413557 Mon Sep 17 00:00:00 2001 From: zqwerty <zhuq96@hotmail.com> Date: Thu, 14 Jul 2022 10:26:20 +0800 Subject: [PATCH] rm useless func --- .../unified_datasets/opendialkg/preprocess.py | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/data/unified_datasets/opendialkg/preprocess.py b/data/unified_datasets/opendialkg/preprocess.py index a010d408..01fe29a3 100644 --- a/data/unified_datasets/opendialkg/preprocess.py +++ b/data/unified_datasets/opendialkg/preprocess.py @@ -15,31 +15,6 @@ import csv import random -def value_in_utt(value, utt): - """return character level (start, end) if value in utt""" - value = value.strip(punctuation).lower() - utt = utt - p = '(^|[\s,\.:\?!-])(?P<v>{})([\s,\.:\?!-\']|$)'.format(re.escape(value)) - p = re.compile(p, re.I) - m = re.search(p, utt) - if m: - # very few value appears more than once, take the first span - return True, m.span('v') - else: - try: - # solve date representation, e.g. '3 pm' vs '3pm' - date_parser.parse(value) - if (value.endswith('pm') or value.endswith('am')) and ''.join(value.split(' ')) in ''.join(utt.split(' ')): - return True, None - - except: - if value in utt: - # value appears, but may be in the plural, -ing, -ly, etc. - return True, None - - return False, None - - def preprocess(): random.seed(42) -- GitLab