diff --git a/convai_evaluation.py b/convai_evaluation.py index fbc2105..789a10d 100644 --- a/convai_evaluation.py +++ b/convai_evaluation.py @@ -17,7 +17,7 @@ from projects.convai2.eval_f1 import eval_f1, setup_args as setup_args_f1 from projects.convai2.eval_ppl import eval_ppl, setup_args as setup_args_ppl from projects.convai2.build_dict import build_dict -from pytorch_transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, +from transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, GPT2DoubleHeadsModel, GPT2LMHeadModel, GPT2Tokenizer) from train import build_input_from_segments, pad_dataset, SPECIAL_TOKENS, add_special_tokens_ diff --git a/interact.py b/interact.py index 5d05291..58e1ae2 100644 --- a/interact.py +++ b/interact.py @@ -12,7 +12,7 @@ import torch import torch.nn.functional as F -from pytorch_transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, GPT2LMHeadModel, GPT2Tokenizer +from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, GPT2LMHeadModel, GPT2Tokenizer from train import SPECIAL_TOKENS, build_input_from_segments, add_special_tokens_ from utils import get_dataset, download_pretrained_model diff --git a/requirements.txt b/requirements.txt index 1005759..f8f6ff4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ torch pytorch-ignite -pytorch-transformers>=1.2 +transformers==2.5.1 tensorboardX==1.8 tensorflow # for tensorboardX diff --git a/test_special_tokens.py b/test_special_tokens.py index fb39ce7..78013d3 100644 --- a/test_special_tokens.py +++ b/test_special_tokens.py @@ -2,7 +2,7 @@ import shutil import unittest -from pytorch_transformers import OpenAIGPTTokenizer, GPT2Tokenizer +from transformers import OpenAIGPTTokenizer, GPT2Tokenizer from train import ATTR_TO_SPECIAL_TOKEN, SPECIAL_TOKENS class TestSpecialTokenTreatment(unittest.TestCase): diff --git a/train.py b/train.py index bf70da2..2be654c 100644 --- a/train.py +++ b/train.py @@ -16,14 +16,14 @@ from ignite.metrics import Accuracy, Loss, MetricsLambda, RunningAverage from ignite.contrib.handlers import ProgressBar, PiecewiseLinear from ignite.contrib.handlers.tensorboard_logger import TensorboardLogger, OutputHandler, OptimizerParamsHandler -from pytorch_transformers import (AdamW, OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, +from transformers import (AdamW, OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, GPT2DoubleHeadsModel, GPT2Tokenizer, WEIGHTS_NAME, CONFIG_NAME) from utils import get_dataset, make_logdir SPECIAL_TOKENS = ["", "", "", "", ""] ATTR_TO_SPECIAL_TOKEN = {'bos_token': '', 'eos_token': '', 'pad_token': '', - 'additional_special_tokens': ('', '')} + 'additional_special_tokens': ['', '']} MODEL_INPUTS = ["input_ids", "mc_token_ids", "lm_labels", "mc_labels", "token_type_ids"] PADDED_INPUTS = ["input_ids", "lm_labels", "token_type_ids"] @@ -42,7 +42,7 @@ def pad_dataset(dataset, padding=0): """ Pad the dataset. This could be optimized by defining a Dataset class and padding at the batch level, but this is simpler. """ max_l = max(len(x) for x in dataset["input_ids"]) for name in PADDED_INPUTS: - dataset[name] = [x + [padding if name != "lm_labels" else -1] * (max_l - len(x)) for x in dataset[name]] + dataset[name] = [x + [padding if name != "lm_labels" else -100] * (max_l - len(x)) for x in dataset[name]] return dataset @@ -62,9 +62,9 @@ def build_input_from_segments(persona, history, reply, tokenizer, lm_labels=Fals instance["input_ids"] = list(chain(*sequence)) instance["token_type_ids"] = [speaker2 if i % 2 else speaker1 for i, s in enumerate(sequence) for _ in s] instance["mc_token_ids"] = len(instance["input_ids"]) - 1 - instance["lm_labels"] = [-1] * len(instance["input_ids"]) + instance["lm_labels"] = [-100] * len(instance["input_ids"]) if lm_labels: - instance["lm_labels"] = ([-1] * sum(len(s) for s in sequence[:-1])) + [-1] + sequence[-1][1:] + instance["lm_labels"] = ([-100] * sum(len(s) for s in sequence[:-1])) + [-100] + sequence[-1][1:] return instance @@ -227,7 +227,7 @@ def inference(engine, batch): # Prepare metrics - note how we compute distributed metrics RunningAverage(output_transform=lambda x: x).attach(trainer, "loss") - metrics = {"nll": Loss(torch.nn.CrossEntropyLoss(ignore_index=-1), output_transform=lambda x: (x[0][0], x[1][0])), + metrics = {"nll": Loss(torch.nn.CrossEntropyLoss(ignore_index=-100), output_transform=lambda x: (x[0][0], x[1][0])), "accuracy": Accuracy(output_transform=lambda x: (x[0][1], x[1][1]))} metrics.update({"average_nll": MetricsLambda(average_distributed_scalar, metrics["nll"], args), "average_accuracy": MetricsLambda(average_distributed_scalar, metrics["accuracy"], args)}) @@ -260,7 +260,7 @@ def inference(engine, batch): # On the main process: close tensorboard logger and rename the last checkpoint (for easy re-loading with OpenAIGPTModel.from_pretrained method) if args.local_rank in [-1, 0] and args.n_epochs > 0: - os.rename(checkpoint_handler._saved[-1][1][-1], os.path.join(log_dir, WEIGHTS_NAME)) # TODO: PR in ignite to have better access to saved file paths (cleaner) + os.rename(os.path.join(log_dir, checkpoint_handler._saved[-1][1]), os.path.join(log_dir, WEIGHTS_NAME)) # TODO: PR in ignite to have better access to saved file paths (cleaner) tb_logger.close() if __name__ == "__main__": diff --git a/utils.py b/utils.py index 4fa8db0..bebd750 100644 --- a/utils.py +++ b/utils.py @@ -11,7 +11,7 @@ import torch -from pytorch_transformers import cached_path +from transformers import cached_path PERSONACHAT_URL = "https://s3.amazonaws.com/datasets.huggingface.co/personachat/personachat_self_original.json" HF_FINETUNED_MODEL = "https://s3.amazonaws.com/models.huggingface.co/transfer-learning-chatbot/gpt_personachat_cache.tar.gz"