From 2236ae821a85e8a7aa24a6a39f298a972a9096a6 Mon Sep 17 00:00:00 2001
From: aaa123git <wandz19@mails.tsinghua.edu.cn>
Date: Thu, 20 May 2021 18:44:21 +0800
Subject: [PATCH] fix bug. replace 'id' with 'ID' (#198)

---
 convlab2/nlu/jointBERT/multiwoz/nlu.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/convlab2/nlu/jointBERT/multiwoz/nlu.py b/convlab2/nlu/jointBERT/multiwoz/nlu.py
index 8dd0014..8b9f0dc 100755
--- a/convlab2/nlu/jointBERT/multiwoz/nlu.py
+++ b/convlab2/nlu/jointBERT/multiwoz/nlu.py
@@ -1,4 +1,5 @@
 import os
+import re
 import zipfile
 import json
 import torch
@@ -66,6 +67,8 @@ class BERTNLU(NLU):
         print("BERTNLU loaded")
 
     def predict(self, utterance, context=list()):
+        # Note: spacy cannot tokenize 'id' or 'Id' correctly.
+        utterance = re.sub(r'\b(id|Id)\b', 'ID', utterance)
         # tokenization first, very important!
         ori_word_seq = [token.text for token in self.nlp(unidecode(utterance)) if token.text.strip()]
         # print(ori_word_seq)
-- 
GitLab