forked from amazon-science/polygon-transformer
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1,149 changed files
with
294,870 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import logging | ||
import math | ||
|
||
import torch | ||
import torch.nn.functional as F | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def swish(x): | ||
return x * torch.sigmoid(x) | ||
|
||
|
||
def _gelu_python(x): | ||
""" Original Implementation of the gelu activation function in Google Bert repo when initially created. | ||
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): | ||
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) | ||
This is now written in C in torch.nn.functional | ||
Also see https://arxiv.org/abs/1606.08415 | ||
""" | ||
return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) | ||
|
||
|
||
def gelu_new(x): | ||
""" Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT). | ||
Also see https://arxiv.org/abs/1606.08415 | ||
""" | ||
return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) | ||
|
||
|
||
if torch.__version__ < "1.4.0": | ||
gelu = _gelu_python | ||
else: | ||
gelu = F.gelu | ||
|
||
|
||
def gelu_fast(x): | ||
return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x))) | ||
|
||
|
||
ACT2FN = { | ||
"relu": F.relu, | ||
"swish": swish, | ||
"gelu": gelu, | ||
"tanh": torch.tanh, | ||
"gelu_new": gelu_new, | ||
"gelu_fast": gelu_fast, | ||
} | ||
|
||
|
||
def get_activation(activation_string): | ||
if activation_string in ACT2FN: | ||
return ACT2FN[activation_string] | ||
else: | ||
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys()))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# coding=utf-8 | ||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. | ||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" BERT model configuration """ | ||
|
||
|
||
import logging | ||
|
||
from .configuration_utils import PretrainedConfig | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { | ||
"bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json", | ||
"bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json", | ||
"bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json", | ||
"bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-config.json", | ||
"bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-config.json", | ||
"bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-config.json", | ||
"bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json", | ||
"bert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json", | ||
"bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json", | ||
"bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json", | ||
"bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json", | ||
"bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json", | ||
"bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json", | ||
"bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-config.json", | ||
"bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-config.json", | ||
"cl-tohoku/bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/config.json", | ||
"cl-tohoku/bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/config.json", | ||
"cl-tohoku/bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/config.json", | ||
"cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/config.json", | ||
"TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json", | ||
"TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json", | ||
"wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/config.json", | ||
# See all BERT models at https://huggingface.co/models?filter=bert | ||
} | ||
|
||
|
||
class BertConfig(PretrainedConfig): | ||
r""" | ||
This is the configuration class to store the configuration of a :class:`~transformers.BertModel`. | ||
It is used to instantiate an BERT model according to the specified arguments, defining the model | ||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of | ||
the BERT `bert-base-uncased <https://huggingface.co/bert-base-uncased>`__ architecture. | ||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used | ||
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig` | ||
for more information. | ||
Args: | ||
vocab_size (:obj:`int`, optional, defaults to 30522): | ||
Vocabulary size of the BERT model. Defines the different tokens that | ||
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.BertModel`. | ||
hidden_size (:obj:`int`, optional, defaults to 768): | ||
Dimensionality of the encoder layers and the pooler layer. | ||
num_hidden_layers (:obj:`int`, optional, defaults to 12): | ||
Number of hidden layers in the Transformer encoder. | ||
num_attention_heads (:obj:`int`, optional, defaults to 12): | ||
Number of attention heads for each attention layer in the Transformer encoder. | ||
intermediate_size (:obj:`int`, optional, defaults to 3072): | ||
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. | ||
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to "gelu"): | ||
The non-linear activation function (function or string) in the encoder and pooler. | ||
If string, "gelu", "relu", "swish" and "gelu_new" are supported. | ||
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1): | ||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. | ||
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1): | ||
The dropout ratio for the attention probabilities. | ||
max_position_embeddings (:obj:`int`, optional, defaults to 512): | ||
The maximum sequence length that this model might ever be used with. | ||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048). | ||
type_vocab_size (:obj:`int`, optional, defaults to 2): | ||
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`. | ||
initializer_range (:obj:`float`, optional, defaults to 0.02): | ||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. | ||
layer_norm_eps (:obj:`float`, optional, defaults to 1e-12): | ||
The epsilon used by the layer normalization layers. | ||
gradient_checkpointing (:obj:`bool`, optional, defaults to False): | ||
If True, use gradient checkpointing to save memory at the expense of slower backward pass. | ||
Example:: | ||
>>> from transformers import BertModel, BertConfig | ||
>>> # Initializing a BERT bert-base-uncased style configuration | ||
>>> configuration = BertConfig() | ||
>>> # Initializing a model from the bert-base-uncased style configuration | ||
>>> model = BertModel(configuration) | ||
>>> # Accessing the model configuration | ||
>>> configuration = model.config | ||
""" | ||
model_type = "bert" | ||
|
||
def __init__( | ||
self, | ||
vocab_size=30522, | ||
hidden_size=768, | ||
num_hidden_layers=12, | ||
num_attention_heads=12, | ||
intermediate_size=3072, | ||
hidden_act="gelu", | ||
hidden_dropout_prob=0.1, | ||
attention_probs_dropout_prob=0.1, | ||
max_position_embeddings=512, | ||
type_vocab_size=2, | ||
initializer_range=0.02, | ||
layer_norm_eps=1e-12, | ||
pad_token_id=0, | ||
gradient_checkpointing=False, | ||
**kwargs | ||
): | ||
super().__init__(pad_token_id=pad_token_id, **kwargs) | ||
|
||
self.vocab_size = vocab_size | ||
self.hidden_size = hidden_size | ||
self.num_hidden_layers = num_hidden_layers | ||
self.num_attention_heads = num_attention_heads | ||
self.hidden_act = hidden_act | ||
self.intermediate_size = intermediate_size | ||
self.hidden_dropout_prob = hidden_dropout_prob | ||
self.attention_probs_dropout_prob = attention_probs_dropout_prob | ||
self.max_position_embeddings = max_position_embeddings | ||
self.type_vocab_size = type_vocab_size | ||
self.initializer_range = initializer_range | ||
self.layer_norm_eps = layer_norm_eps | ||
self.gradient_checkpointing = gradient_checkpointing |
Oops, something went wrong.