DialoGPT是什么
导读:本文共6186字符,通常情况下阅读需要21分钟。同时您也可以点击右侧朗读,来听本文内容。按键盘←(左) →(右) 方向键可以翻页。
摘要: 引言 Large-scale pretraining for dialogue DialoGPT是基于GPT-2的对话生成预训练模型,在reddit数据集上训练 假定已经设置好环境, 在eval_util.py中增加 inference函数 def inference_model_results(model, tokenizer, inferenc... ...
目录
(为您整理了一些要点),点击可以直达。引言
Large-scale pretraining for dialogue
DialoGPT是基于GPT-2的对话生成预训练模型,在reddit数据集上训练
假定已经设置好环境,
在eval_util.py中增加 inference函数
# use the same signature with eval_model_generation
logger.info('compute eval model loss, using eval mode, '
'please change it back to train after calling this function')
model.eval()
tot_sample = []
with torch.no_grad():
for step, batch in enumerate(inference_dataloader):
batch = tuple(t.to(args.device) for t in batch)
input_ids, position_ids, token_ids, label_ids, src_len, _ = batch
if args.no_token_id:
token_ids = None
n_sample = input_ids.shape[0]
logits = model.inference(input_ids, position_ids, token_ids)
def decode(batch_data, tokenizer, input_flag):
results = []
batch_data = batch_data.cpu().data.numpy()
for one_logits in batch_data: # [sentence_len, vocabulary_size]
if not input_flag:
word_ids = np.argmax(one_logits, axis=1)
else:
word_ids = one_logits
words = []
for id in word_ids:
if tokenizer.decoder[id] != "<|endoftext|>":
words.append(tokenizer.decoder[id])
else:
break
output_words = []
for word in words:
results.append(" ".join(output_words))
return results
posts = decode(input_ids, tokenizer, True)
inferences = decode(logits, tokenizer, False)
tot_sample.append(n_sample)
logger.info("model inference results")
for index in range(len(posts)):
print("post: ", posts[index])
print("inference: ", inferences[index])
# print(inferences)
break
# todo
return None
lm_logits = self.lm_head(hidden_states)
return lm_logits
自定义inference_LSP.py 文件
文件内容
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
* @Desc: train GPT2 from scratch/ fine tuning.
Modified based on Huggingface GPT-2 implementation
'''
import json
import os
import sys
import argparse
import logging
import time
import tqdm
import datetime
import torch
import numpy as np
from os.path import join
from torch.distributed import get_rank, get_world_size
from lsp_model import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, Adam
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)
logger = logging.getLogger(__name__)
INF = 100000000
CACHE_EMPTY_STEP = 10000
EVAL_STEP = 10000
#########################################################################
# Prepare Parser
##########################################################################
parser = argparse.ArgumentParser()
parser.add_argument('--model_name_or_path', type=str, required=True,
help='pretrained model name or path to local checkpoint')
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--max_seq_length", type=int, default=128)
parser.add_argument("--init_checkpoint", type=str, required=True)
parser.add_argument("--inference_input_file", type=str, required=True)
parser.add_argument("--inference_batch_size", type=int, default=8)
parser.add_argument("--num_optim_steps", type=int, default=1000000,
help="new API specifies num update steps")
parser.add_argument("--fp16", type=boolean_string, default=True)
parser.add_argument("--loss_scale", type=float, default=0)
parser.add_argument("--no_token_id", type=boolean_string, default=True)
parser.add_argument("--log_dir", type=str, required=True)
# distributed
parser.add_argument('--local_rank', type=int, default=-1,
help='for torch.distributed')
parser.add_argument('--config', help='JSON config file')
# do normal parsing
args = parser.parse_args()
if args.config is not None:
# override argparse defaults by config JSON
opts = json.load(open(args.config))
for k, v in opts.items():
if isinstance(v, str):
# PHILLY ENV special cases
if 'PHILLY_JOB_DIRECTORY' in v:
v = v.replace('PHILLY_JOB_DIRECTORY',
os.environ['PHILLY_JOB_DIRECTORY'])
elif 'PHILLY_LOG_DIRECTORY' in v:
v = v.replace('PHILLY_LOG_DIRECTORY',
os.environ['PHILLY_LOG_DIRECTORY'])
setattr(args, k, v)
# command line should override config JSON
argv = sys.argv[1:]
overrides, _ = parser.parse_known_args(argv)
for k, v in vars(overrides).items():
if f'--{k}' in argv:
setattr(args, k, v)
setattr(args, 'local_rank', overrides.local_rank)
if args.local_rank == -1:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
args.device, args.n_gpu = device, n_gpu
else:郑州妇科医院哪家好 http://www.120zzzy.com/
# distributed training
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
# Initializes the distributed backend which will take care of
# sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
n_gpu = torch.distributed.get_world_size()
args.device, args.n_gpu = device, 1
logger.info("device: {} n_gpu: {}, distributed training: {}, "
"16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16))
timestamp = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
log_dir = args.log_dir
logger.info('Input Argument Information')
args_dict = vars(args)
for a in args_dict:
logger.info('%-28s %s' % (a, args_dict[a]))
#########################################################################
# Prepare Data Set
##########################################################################
print("Prepare Data")
enc = GPT2Tokenizer.from_pretrained(args.model_name_or_path)
config = GPT2Config.from_json_file(
join(args.model_name_or_path, 'config.json'))
inference_dataloader_loss = DynamicBatchingLoader(
args.inference_input_file, enc, args.normalize_data,
args.inference_batch_size, args.max_seq_length)
inference_dataloader_gen = get_eval_list_same_length(
args.inference_input_file, enc, args.inference_batch_size, True)
# eval_dataloader_loss = DynamicBatchingLoader(
# args.eval_input_file, enc, args.normalize_data,
# args.eval_batch_size, args.max_seq_length)
#
# eval_dataloader_gen = get_eval_list_same_length(
# args.eval_input_file, enc, args.eval_batch_size, True)
#########################################################################
# Prepare Model
##########################################################################
print("Prepare Model")
logger.info("Prepare Model")
model = load_model(GPT2LMHeadModel(config), args.init_checkpoint,
args, verbose=True)
if args.local_rank != -1:
# when from scratch make sure initial models are the same
params = [p.data for p in model.parameters()]
no_decay = ['bias', 'ln'] # no decay for bias and LayerNorm (ln)
#########################################################################
# Inference !
##########################################################################
print("Model inference")
logger.info("Model inference")
epoch = 0
if args.local_rank != -1:
n_gpu = 1
# todo modify loss out.
# todo output format
logger.info("inference_final_results:")
if results is None:
logger.info("current results are None")
else:
logger.info(results)
inference_logger.close()
Inference
validset.tsv:
./models/medium/medium_ft.pkl
DialoGPT是什么的详细内容,希望对您有所帮助,信息来源于网络。