From 2236ae821a85e8a7aa24a6a39f298a972a9096a6 Mon Sep 17 00:00:00 2001 From: aaa123git <wandz19@mails.tsinghua.edu.cn> Date: Thu, 20 May 2021 18:44:21 +0800 Subject: [PATCH] fix bug. replace 'id' with 'ID' (#198) --- convlab2/nlu/jointBERT/multiwoz/nlu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/convlab2/nlu/jointBERT/multiwoz/nlu.py b/convlab2/nlu/jointBERT/multiwoz/nlu.py index 8dd0014..8b9f0dc 100755 --- a/convlab2/nlu/jointBERT/multiwoz/nlu.py +++ b/convlab2/nlu/jointBERT/multiwoz/nlu.py @@ -1,4 +1,5 @@ import os +import re import zipfile import json import torch @@ -66,6 +67,8 @@ class BERTNLU(NLU): print("BERTNLU loaded") def predict(self, utterance, context=list()): + # Note: spacy cannot tokenize 'id' or 'Id' correctly. + utterance = re.sub(r'\b(id|Id)\b', 'ID', utterance) # tokenization first, very important! ori_word_seq = [token.text for token in self.nlp(unidecode(utterance)) if token.text.strip()] # print(ori_word_seq) -- GitLab