From 0451cb4c6a4f0d208752ec7f52788a2f62d6cf0d Mon Sep 17 00:00:00 2001 From: aaa123git <wandz19@mails.tsinghua.edu.cn> Date: Thu, 29 Apr 2021 19:29:09 +0800 Subject: [PATCH] fix bug. len(word_seq) <= 510, since self.dataloader.pad_batch add two special tokens (#190) --- convlab2/nlu/jointBERT/multiwoz/nlu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convlab2/nlu/jointBERT/multiwoz/nlu.py b/convlab2/nlu/jointBERT/multiwoz/nlu.py index e900f1e..8dd0014 100755 --- a/convlab2/nlu/jointBERT/multiwoz/nlu.py +++ b/convlab2/nlu/jointBERT/multiwoz/nlu.py @@ -81,8 +81,8 @@ class BERTNLU(NLU): da = {} word_seq, tag_seq, new2ori = self.dataloader.bert_tokenize(ori_word_seq, ori_tag_seq) - word_seq = word_seq[:512] - tag_seq = tag_seq[:512] + word_seq = word_seq[:510] + tag_seq = tag_seq[:510] batch_data = [[ori_word_seq, ori_tag_seq, intents, da, context_seq, new2ori, word_seq, self.dataloader.seq_tag2id(tag_seq), self.dataloader.seq_intent2id(intents)]] -- GitLab