Skip to content
Snippets Groups Projects
Commit 747eafd9 authored by zqwerty's avatar zqwerty Committed by zhuqi
Browse files

fix nlu max len

parent 37f2df82
No related branches found
No related tags found
No related merge requests found
...@@ -66,15 +66,15 @@ class BERTNLU(NLU): ...@@ -66,15 +66,15 @@ class BERTNLU(NLU):
if len(context) > 0 and type(context[0]) is list and len(context[0]) > 1: if len(context) > 0 and type(context[0]) is list and len(context[0]) > 1:
context = [item[1] for item in context] context = [item[1] for item in context]
context_seq = self.dataloader.tokenizer.encode('[CLS] ' + ' [SEP] '.join(context[-3:])) context_seq = self.dataloader.tokenizer.encode('[CLS] ' + ' [SEP] '.join(context[-3:]))
context_seq = context_seq[:self.dataloader.tokenizer.max_model_input_sizes] context_seq = context_seq[:512]
else: else:
context_seq = self.dataloader.tokenizer.encode('[CLS]') context_seq = self.dataloader.tokenizer.encode('[CLS]')
intents = [] intents = []
da = {} da = {}
word_seq, tag_seq, new2ori = self.dataloader.bert_tokenize(ori_word_seq, ori_tag_seq) word_seq, tag_seq, new2ori = self.dataloader.bert_tokenize(ori_word_seq, ori_tag_seq)
word_seq = word_seq[:self.dataloader.tokenizer.max_model_input_sizes] word_seq = word_seq[:512]
tag_seq = tag_seq[:self.dataloader.tokenizer.max_model_input_sizes] tag_seq = tag_seq[:512]
batch_data = [[ori_word_seq, ori_tag_seq, intents, da, context_seq, batch_data = [[ori_word_seq, ori_tag_seq, intents, da, context_seq,
new2ori, word_seq, self.dataloader.seq_tag2id(tag_seq), self.dataloader.seq_intent2id(intents)]] new2ori, word_seq, self.dataloader.seq_tag2id(tag_seq), self.dataloader.seq_intent2id(intents)]]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment