Skip to content
Snippets Groups Projects
Commit 8b7e88a3 authored by mehrad's avatar mehrad Committed by zhuqi
Browse files

use transformers library to automate model caching

parent de195883
Branches
No related tags found
No related merge requests found
# extras
Pipfile*
results*
*.pyc *.pyc
__pycache__ __pycache__
...@@ -13,6 +17,7 @@ __pycache__ ...@@ -13,6 +17,7 @@ __pycache__
data/**/train.json data/**/train.json
data/**/val.json data/**/val.json
data/**/test.json data/**/test.json
data/**/human_val.json
data/camrest/CamRest676_v2.json data/camrest/CamRest676_v2.json
data/multiwoz/annotated_user_da_with_span_full.json data/multiwoz/annotated_user_da_with_span_full.json
data/schema/dstc8-schema-guided-dialogue-master data/schema/dstc8-schema-guided-dialogue-master
...@@ -34,6 +39,7 @@ convlab2/nlg/sclstm/**/generated_sens_sys.json ...@@ -34,6 +39,7 @@ convlab2/nlg/sclstm/**/generated_sens_sys.json
convlab2/nlg/template/**/generated_sens_sys.json convlab2/nlg/template/**/generated_sens_sys.json
convlab2/nlu/jointBERT/crosswoz/**/data convlab2/nlu/jointBERT/crosswoz/**/data
convlab2/nlu/jointBERT/multiwoz/**/data convlab2/nlu/jointBERT/multiwoz/**/data
# test script # test script
*_test.py *_test.py
......
import os.path
import math import math
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.nn import CrossEntropyLoss from torch.nn import CrossEntropyLoss
from torch.nn import CosineEmbeddingLoss
from pytorch_pretrained_bert.modeling import BertModel from transformers import BertModel
from pytorch_pretrained_bert.modeling import BertPreTrainedModel from transformers import BertPreTrainedModel
class BertForUtteranceEncoding(BertPreTrainedModel): class BertForUtteranceEncoding(BertPreTrainedModel):
...@@ -19,7 +17,7 @@ class BertForUtteranceEncoding(BertPreTrainedModel): ...@@ -19,7 +17,7 @@ class BertForUtteranceEncoding(BertPreTrainedModel):
self.bert = BertModel(config) self.bert = BertModel(config)
def forward(self, input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False): def forward(self, input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False):
return self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers) return self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, encoder_hidden_states=output_all_encoded_layers)
class MultiHeadAttention(nn.Module): class MultiHeadAttention(nn.Module):
...@@ -93,7 +91,8 @@ class BeliefTracker(nn.Module): ...@@ -93,7 +91,8 @@ class BeliefTracker(nn.Module):
self.device = device self.device = device
### Utterance Encoder ### Utterance Encoder
self.utterance_encoder = BertForUtteranceEncoding.from_pretrained(args.bert_model) self.utterance_encoder = BertForUtteranceEncoding.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir)
self.utterance_encoder.train()
self.bert_output_dim = self.utterance_encoder.config.hidden_size self.bert_output_dim = self.utterance_encoder.config.hidden_size
self.hidden_dropout_prob = self.utterance_encoder.config.hidden_dropout_prob self.hidden_dropout_prob = self.utterance_encoder.config.hidden_dropout_prob
if args.fix_utterance_encoder: if args.fix_utterance_encoder:
...@@ -101,7 +100,8 @@ class BeliefTracker(nn.Module): ...@@ -101,7 +100,8 @@ class BeliefTracker(nn.Module):
p.requires_grad = False p.requires_grad = False
### slot, slot-value Encoder (not trainable) ### slot, slot-value Encoder (not trainable)
self.sv_encoder = BertForUtteranceEncoding.from_pretrained(args.bert_model) self.sv_encoder = BertForUtteranceEncoding.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir)
self.sv_encoder.train()
for p in self.sv_encoder.bert.parameters(): for p in self.sv_encoder.bert.parameters():
p.requires_grad = False p.requires_grad = False
......
...@@ -7,15 +7,15 @@ import zipfile ...@@ -7,15 +7,15 @@ import zipfile
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
# from tensorboardX.writer import SummaryWriter from tensorboardX.writer import SummaryWriter
from tqdm._tqdm import trange, tqdm from tqdm._tqdm import trange, tqdm
from convlab2.util.file_util import cached_path from convlab2.util.file_util import cached_path
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from pytorch_pretrained_bert.tokenization import BertTokenizer from transformers import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam from transformers import get_linear_schedule_with_warmup, AdamW
from convlab2.dst.dst import DST from convlab2.dst.dst import DST
from convlab2.dst.sumbt.crosswoz_en.convert_to_glue_format import convert_to_glue_format, trans_value from convlab2.dst.sumbt.crosswoz_en.convert_to_glue_format import convert_to_glue_format, trans_value
...@@ -114,10 +114,7 @@ class SUMBTTracker(DST): ...@@ -114,10 +114,7 @@ class SUMBTTracker(DST):
num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type
# tokenizer # tokenizer
# vocab_dir = os.path.join(data_dir, 'model', '%s-vocab.txt' % args.bert_model) self.tokenizer = BertTokenizer.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir)
# if not os.path.exists(vocab_dir):
# raise ValueError("Can't find %s " % vocab_dir)
self.tokenizer = BertTokenizer.from_pretrained(args.bert_model)
random.seed(args.seed) random.seed(args.seed)
np.random.seed(args.seed) np.random.seed(args.seed)
torch.manual_seed(args.seed) torch.manual_seed(args.seed)
...@@ -196,7 +193,7 @@ class SUMBTTracker(DST): ...@@ -196,7 +193,7 @@ class SUMBTTracker(DST):
print('loading weights from trained model') print('loading weights from trained model')
self.load_weights(model_path=os.path.join(SUMBT_PATH, args.output_dir, 'pytorch_model.bin')) self.load_weights(model_path=os.path.join(SUMBT_PATH, args.output_dir, 'pytorch_model.bin'))
else: else:
raise ValueError('no availabel weights found.') raise ValueError('no available weights found.')
self.param_restored = True self.param_restored = True
def construct_query(self, context): def construct_query(self, context):
...@@ -395,10 +392,8 @@ class SUMBTTracker(DST): ...@@ -395,10 +392,8 @@ class SUMBTTracker(DST):
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale)
else: else:
optimizer = BertAdam(optimizer_grouped_parameters, optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, correct_bias=False)
lr=args.learning_rate, scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_proportion*t_total, num_training_steps=t_total)
warmup=args.warmup_proportion,
t_total=t_total)
logger.info(optimizer) logger.info(optimizer)
# Training code # Training code
...@@ -470,7 +465,11 @@ class SUMBTTracker(DST): ...@@ -470,7 +465,11 @@ class SUMBTTracker(DST):
summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step) summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
if scheduler is not None:
torch.nn.utils.clip_grad_norm_(optimizer_grouped_parameters, 1.0)
optimizer.step() optimizer.step()
if scheduler is not None:
scheduler.step()
optimizer.zero_grad() optimizer.zero_grad()
global_step += 1 global_step += 1
......
...@@ -23,9 +23,9 @@ class DotMap(): ...@@ -23,9 +23,9 @@ class DotMap():
self.do_eval = True self.do_eval = True
self.num_train_epochs = 300 self.num_train_epochs = 300
self.bert_model = os.path.join(convlab2.get_root_path(), "pre-trained-models/bert-base-uncased") self.bert_model = os.path.join(convlab2.get_root_path(), "pre-trained-models/bert-base-uncased")
self.bert_model_cache_dir = os.path.join(convlab2.get_root_path(), "pre-trained-models/")
self.bert_model_name = "bert-base-uncased"
self.do_lower_case = True self.do_lower_case = True
self.task_name = 'bert-gru-sumbt' self.task_name = 'bert-gru-sumbt'
self.nbt = 'rnn' self.nbt = 'rnn'
......
...@@ -5,15 +5,15 @@ from itertools import chain ...@@ -5,15 +5,15 @@ from itertools import chain
import numpy as np import numpy as np
import zipfile import zipfile
# from tensorboardX.writer import SummaryWriter from tensorboardX.writer import SummaryWriter
from tqdm._tqdm import trange, tqdm from tqdm._tqdm import trange, tqdm
from convlab2.util.file_util import cached_path from convlab2.util.file_util import cached_path
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from pytorch_pretrained_bert.tokenization import BertTokenizer from transformers import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam from transformers import get_linear_schedule_with_warmup, AdamW
from convlab2.dst.dst import DST from convlab2.dst.dst import DST
from convlab2.dst.sumbt.multiwoz.convert_to_glue_format import convert_to_glue_format from convlab2.dst.sumbt.multiwoz.convert_to_glue_format import convert_to_glue_format
...@@ -94,10 +94,7 @@ class SUMBTTracker(DST): ...@@ -94,10 +94,7 @@ class SUMBTTracker(DST):
num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type
# tokenizer # tokenizer
# vocab_dir = os.path.join(data_dir, 'model', '%s-vocab.txt' % args.bert_model) self.tokenizer = BertTokenizer.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir)
# if not os.path.exists(vocab_dir):
# raise ValueError("Can't find %s " % vocab_dir)
self.tokenizer = BertTokenizer.from_pretrained(args.bert_model)
random.seed(args.seed) random.seed(args.seed)
np.random.seed(args.seed) np.random.seed(args.seed)
torch.manual_seed(args.seed) torch.manual_seed(args.seed)
...@@ -402,6 +399,7 @@ class SUMBTTracker(DST): ...@@ -402,6 +399,7 @@ class SUMBTTracker(DST):
t_total = num_train_steps t_total = num_train_steps
scheduler = None
if args.fp16: if args.fp16:
try: try:
from apex.optimizers import FP16_Optimizer from apex.optimizers import FP16_Optimizer
...@@ -420,10 +418,8 @@ class SUMBTTracker(DST): ...@@ -420,10 +418,8 @@ class SUMBTTracker(DST):
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale)
else: else:
optimizer = BertAdam(optimizer_grouped_parameters, optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, correct_bias=False)
lr=args.learning_rate, scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_proportion*t_total, num_training_steps=t_total)
warmup=args.warmup_proportion,
t_total=t_total)
logger.info(optimizer) logger.info(optimizer)
# Training code # Training code
...@@ -492,7 +488,11 @@ class SUMBTTracker(DST): ...@@ -492,7 +488,11 @@ class SUMBTTracker(DST):
summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step) summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
if scheduler is not None:
torch.nn.utils.clip_grad_norm_(optimizer_grouped_parameters, 1.0)
optimizer.step() optimizer.step()
if scheduler is not None:
scheduler.step()
optimizer.zero_grad() optimizer.zero_grad()
global_step += 1 global_step += 1
......
...@@ -29,8 +29,9 @@ class DotMap(): ...@@ -29,8 +29,9 @@ class DotMap():
self.do_eval = True self.do_eval = True
self.num_train_epochs = 300 self.num_train_epochs = 300
self.bert_model = os.path.join(convlab2.get_root_path(), "pre-trained-models/bert-base-uncased") self.bert_model = os.path.join(convlab2.get_root_path(), "pre-trained-models/bert-base-uncased")
self.bert_model_cache_dir = os.path.join(convlab2.get_root_path(), "pre-trained-models/")
self.bert_model_name = "bert-base-uncased"
self.do_lower_case = True self.do_lower_case = True
self.task_name = 'bert-gru-sumbt' self.task_name = 'bert-gru-sumbt'
self.nbt = 'rnn' self.nbt = 'rnn'
......
import os
import copy import copy
from pprint import pprint
import random import random
from itertools import chain from itertools import chain
import numpy as np import numpy as np
import zipfile import zipfile
# from tensorboardX.writer import SummaryWriter from tensorboardX.writer import SummaryWriter
from tqdm._tqdm import trange, tqdm from tqdm import trange, tqdm
from convlab2.util.file_util import cached_path
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from pytorch_pretrained_bert.tokenization import BertTokenizer from transformers import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam from transformers import get_linear_schedule_with_warmup, AdamW
from convlab2.dst.dst import DST from convlab2.dst.dst import DST
from convlab2.dst.sumbt.multiwoz_zh.convert_to_glue_format import convert_to_glue_format, trans_value from convlab2.dst.sumbt.multiwoz_zh.convert_to_glue_format import convert_to_glue_format
from convlab2.util.multiwoz_zh.state import default_state from convlab2.util.multiwoz_zh.state import default_state
from convlab2.dst.sumbt.BeliefTrackerSlotQueryMultiSlot import BeliefTracker from convlab2.dst.sumbt.BeliefTrackerSlotQueryMultiSlot import BeliefTracker
...@@ -98,10 +94,7 @@ class SUMBTTracker(DST): ...@@ -98,10 +94,7 @@ class SUMBTTracker(DST):
num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type
# tokenizer # tokenizer
# vocab_dir = os.path.join(data_dir, 'model', '%s-vocab.txt' % args.bert_model) self.tokenizer = BertTokenizer.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir)
# if not os.path.exists(vocab_dir):
# raise ValueError("Can't find %s " % vocab_dir)
self.tokenizer = BertTokenizer.from_pretrained(args.bert_model)
random.seed(args.seed) random.seed(args.seed)
np.random.seed(args.seed) np.random.seed(args.seed)
torch.manual_seed(args.seed) torch.manual_seed(args.seed)
...@@ -256,6 +249,7 @@ class SUMBTTracker(DST): ...@@ -256,6 +249,7 @@ class SUMBTTracker(DST):
t_total = num_train_steps t_total = num_train_steps
scheduler = None
if args.fp16: if args.fp16:
try: try:
from apex.optimizers import FP16_Optimizer from apex.optimizers import FP16_Optimizer
...@@ -274,10 +268,8 @@ class SUMBTTracker(DST): ...@@ -274,10 +268,8 @@ class SUMBTTracker(DST):
optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale)
else: else:
optimizer = BertAdam(optimizer_grouped_parameters, optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, correct_bias=False)
lr=args.learning_rate, scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_proportion*t_total, num_training_steps=t_total)
warmup=args.warmup_proportion,
t_total=t_total)
logger.info(optimizer) logger.info(optimizer)
# Training code # Training code
...@@ -346,7 +338,11 @@ class SUMBTTracker(DST): ...@@ -346,7 +338,11 @@ class SUMBTTracker(DST):
summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step) summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
if scheduler is not None:
torch.nn.utils.clip_grad_norm_(optimizer_grouped_parameters, 1.0)
optimizer.step() optimizer.step()
if scheduler is not None:
scheduler.step()
optimizer.zero_grad() optimizer.zero_grad()
global_step += 1 global_step += 1
......
...@@ -27,8 +27,9 @@ class DotMap(): ...@@ -27,8 +27,9 @@ class DotMap():
self.do_eval = True self.do_eval = True
self.num_train_epochs = 300 self.num_train_epochs = 300
self.bert_model = os.path.join(convlab2.get_root_path(), "pre-trained-models/chinese-bert-wwm-ext")
self.bert_model = os.path.join(convlab2.get_root_path(), "pre-trained-models/bert-chinese-wwm-ext") self.bert_model_cache_dir = os.path.join(convlab2.get_root_path(), "pre-trained-models/")
self.bert_model_name = "hfl/chinese-bert-wwm-ext"
self.do_lower_case = True self.do_lower_case = True
self.task_name = 'bert-gru-sumbt' self.task_name = 'bert-gru-sumbt'
self.nbt = 'rnn' self.nbt = 'rnn'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment