DialoGPT是什么(云服务器、云主机、高防IP、高防服务器、香港服务器、美国服务器,编程语言)

时间:2024-05-03 01:41:19 作者 : 石家庄SEO 分类 : 编程语言
  • TAG :

    DialoGPT%E6%98%AF%E4%BB%80%E4%B9%88

  引言

  Large-scale pretraining for dialogue

  DialoGPT是基于GPT-2的对话生成预训练模型,在reddit数据集上训练

  假定已经设置好环境,

  在eval_util.py中增加 inference函数

  # use the same signature with eval_model_generation

  logger.info('compute eval model loss, using eval mode, '

  'please change it back to train after calling this function')

  model.eval()

  tot_sample = []

  with torch.no_grad():

  for step, batch in enumerate(inference_dataloader):

  batch = tuple(t.to(args.device) for t in batch)

  input_ids, position_ids, token_ids, label_ids, src_len, _ = batch

  if args.no_token_id:

  token_ids = None

  n_sample = input_ids.shape[0]

  logits = model.inference(input_ids, position_ids, token_ids)

  def decode(batch_data, tokenizer, input_flag):

  results = []

  batch_data = batch_data.cpu().data.numpy()

  for one_logits in batch_data: # [sentence_len, vocabulary_size]

  if not input_flag:

  word_ids = np.argmax(one_logits, axis=1)

  else:

  word_ids = one_logits

  words = []

  for id in word_ids:

  if tokenizer.decoder[id] != "<|endoftext|>":

  words.append(tokenizer.decoder[id])

  else:

  break

  output_words = []

  for word in words:

  results.append(" ".join(output_words))

  return results

  posts = decode(input_ids, tokenizer, True)

  inferences = decode(logits, tokenizer, False)

  tot_sample.append(n_sample)

  logger.info("model inference results")

  for index in range(len(posts)):

  print("post: ", posts[index])

  print("inference: ", inferences[index])

  # print(inferences)

  break

  # todo

  return None

  lm_logits = self.lm_head(hidden_states)

  return lm_logits

  自定义inference_LSP.py 文件

  文件内容

  # Copyright (c) Microsoft Corporation.

  # Licensed under the MIT license.

  '''

  * @Desc: train GPT2 from scratch/ fine tuning.

  Modified based on Huggingface GPT-2 implementation

  '''

  import json

  import os

  import sys

  import argparse

  import logging

  import time

  import tqdm

  import datetime

  import torch

  import numpy as np

  from os.path import join

  from torch.distributed import get_rank, get_world_size

  from lsp_model import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, Adam

  os.environ['CUDA_VISIBLE_DEVICES'] = "0"

  logging.basicConfig(

  format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',

  datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)

  logger = logging.getLogger(__name__)

  INF = 100000000

  CACHE_EMPTY_STEP = 10000

  EVAL_STEP = 10000

  #########################################################################

  # Prepare Parser

  ##########################################################################

  parser = argparse.ArgumentParser()

  parser.add_argument('--model_name_or_path', type=str, required=True,

  help='pretrained model name or path to local checkpoint')

  parser.add_argument("--seed", type=int, default=42)

  parser.add_argument("--max_seq_length", type=int, default=128)

  parser.add_argument("--init_checkpoint", type=str, required=True)

  parser.add_argument("--inference_input_file", type=str, required=True)

  parser.add_argument("--inference_batch_size", type=int, default=8)

  parser.add_argument("--num_optim_steps", type=int, default=1000000,

  help="new API specifies num update steps")

  parser.add_argument("--fp16", type=boolean_string, default=True)

  parser.add_argument("--loss_scale", type=float, default=0)

  parser.add_argument("--no_token_id", type=boolean_string, default=True)

  parser.add_argument("--log_dir", type=str, required=True)

  # distributed

  parser.add_argument('--local_rank', type=int, default=-1,

  help='for torch.distributed')

  parser.add_argument('--config', help='JSON config file')

  # do normal parsing

  args = parser.parse_args()

  if args.config is not None:

  # override argparse defaults by config JSON

  opts = json.load(open(args.config))

  for k, v in opts.items():

  if isinstance(v, str):

  # PHILLY ENV special cases

  if 'PHILLY_JOB_DIRECTORY' in v:

  v = v.replace('PHILLY_JOB_DIRECTORY',

  os.environ['PHILLY_JOB_DIRECTORY'])

  elif 'PHILLY_LOG_DIRECTORY' in v:

  v = v.replace('PHILLY_LOG_DIRECTORY',

  os.environ['PHILLY_LOG_DIRECTORY'])

  setattr(args, k, v)

  # command line should override config JSON

  argv = sys.argv[1:]

  overrides, _ = parser.parse_known_args(argv)

  for k, v in vars(overrides).items():

  if f'--{k}' in argv:

  setattr(args, k, v)

  setattr(args, 'local_rank', overrides.local_rank)

  if args.local_rank == -1:

  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  n_gpu = torch.cuda.device_count()

  args.device, args.n_gpu = device, n_gpu

  else:郑州妇科医院哪家好 http://www.120zzzy.com/

  # distributed training

  torch.cuda.set_device(args.local_rank)

  device = torch.device("cuda", args.local_rank)

  # Initializes the distributed backend which will take care of

  # sychronizing nodes/GPUs

  torch.distributed.init_process_group(backend='nccl')

  n_gpu = torch.distributed.get_world_size()

  args.device, args.n_gpu = device, 1

  logger.info("device: {} n_gpu: {}, distributed training: {}, "

  "16-bits training: {}".format(

  device, n_gpu, bool(args.local_rank != -1), args.fp16))

  timestamp = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')

  log_dir = args.log_dir

  logger.info('Input Argument Information')

  args_dict = vars(args)

  for a in args_dict:

  logger.info('%-28s %s' % (a, args_dict[a]))

  #########################################################################

  # Prepare Data Set

  ##########################################################################

  print("Prepare Data")

  enc = GPT2Tokenizer.from_pretrained(args.model_name_or_path)

  config = GPT2Config.from_json_file(

  join(args.model_name_or_path, 'config.json'))

  inference_dataloader_loss = DynamicBatchingLoader(

  args.inference_input_file, enc, args.normalize_data,

  args.inference_batch_size, args.max_seq_length)

  inference_dataloader_gen = get_eval_list_same_length(

  args.inference_input_file, enc, args.inference_batch_size, True)

  # eval_dataloader_loss = DynamicBatchingLoader(

  # args.eval_input_file, enc, args.normalize_data,

  # args.eval_batch_size, args.max_seq_length)

  #

  # eval_dataloader_gen = get_eval_list_same_length(

  # args.eval_input_file, enc, args.eval_batch_size, True)

  #########################################################################

  # Prepare Model

  ##########################################################################

  print("Prepare Model")

  logger.info("Prepare Model")

  model = load_model(GPT2LMHeadModel(config), args.init_checkpoint,

  args, verbose=True)

  if args.local_rank != -1:

  # when from scratch make sure initial models are the same

  params = [p.data for p in model.parameters()]

  no_decay = ['bias', 'ln'] # no decay for bias and LayerNorm (ln)

  #########################################################################

  # Inference !

  ##########################################################################

  print("Model inference")

  logger.info("Model inference")

  epoch = 0

  if args.local_rank != -1:

  n_gpu = 1

  # todo modify loss out.

  # todo output format

  logger.info("inference_final_results:")

  if results is None:

  logger.info("current results are None")

  else:

  logger.info(results)

  inference_logger.close()

  Inference

  validset.tsv:

  ./models/medium/medium_ft.pkl

本文:DialoGPT是什么的详细内容,希望对您有所帮助,信息来源于网络。
上一篇:CNN怎么实现数字识别并改变参数下一篇:

6 人围观 / 0 条评论 ↓快速评论↓

(必须)

(必须,保密)

阿狸1 阿狸2 阿狸3 阿狸4 阿狸5 阿狸6 阿狸7 阿狸8 阿狸9 阿狸10 阿狸11 阿狸12 阿狸13 阿狸14 阿狸15 阿狸16 阿狸17 阿狸18