huggingface · BenjaminBossan · Nov 19, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 24, 2024
diff --git a/src/peft/__init__.py b/src/peft/__init__.py
@@ -91,6 +91,8 @@
     HRAConfig,
     HRAModel,
     VBLoRAConfig,
+    CPTEmbedding,
+    CPTConfig,
 )
 from .utils import (
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,

diff --git a/src/peft/mapping.py b/src/peft/mapping.py
@@ -38,6 +38,8 @@
     AdaptionPromptConfig,
     BOFTConfig,
     BOFTModel,
+    CPTConfig,
+    CPTEmbedding,
     FourierFTConfig,
     FourierFTModel,
     HRAConfig,
@@ -104,6 +106,7 @@
     "XLORA": XLoraConfig,
     "HRA": HRAConfig,
     "VBLORA": VBLoRAConfig,
+    "CPT": CPTConfig,
 }
 
 PEFT_TYPE_TO_TUNER_MAPPING: dict[str, type[BaseTuner]] = {
@@ -121,6 +124,7 @@
     "XLORA": XLoraModel,
     "HRA": HRAModel,
     "VBLORA": VBLoRAModel,
+    "CPT": CPTEmbedding,
 }
 
 

diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
@@ -46,6 +46,7 @@
     AdaLoraModel,
     AdaptionPromptModel,
     BOFTModel,
+    CPTEmbedding,
     FourierFTModel,
     HRAModel,
     IA3Model,
@@ -103,6 +104,7 @@
     PeftType.XLORA: XLoraModel,
     PeftType.HRA: HRAModel,
     PeftType.VBLORA: VBLoRAModel,
+    PeftType.CPT: CPTEmbedding,
 }
 
 
@@ -648,6 +650,8 @@ def _setup_prompt_encoder(self, adapter_name: str):
             prompt_encoder = PromptEncoder(config)
         elif config.peft_type == PeftType.PREFIX_TUNING:
             prompt_encoder = PrefixEncoder(config)
+        elif config.peft_type == PeftType.CPT:
+            prompt_encoder = CPTEmbedding(config, self.word_embeddings)
         else:
             raise ValueError("Not supported")
 
@@ -1718,6 +1722,8 @@ def forward(
             # overwrite past_kv in kwargs
             kwargs["past_key_values"] = self.get_prompt(batch_size)
             return self.base_model(input_ids=input_ids, inputs_embeds=inputs_embeds, **kwargs)
+        elif peft_config.peft_type == PeftType.CPT:
+            return self._cpt_forward(input_ids, inputs_embeds, peft_config, task_ids, batch_size, **kwargs)
         else:
             if inputs_embeds is None:
                 inputs_embeds = self.word_embeddings(input_ids)
@@ -1730,6 +1736,54 @@ def forward(
             inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
             return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
 
+
+    def _cpt_forward(self, input_ids=None, inputs_embeds=None, peft_config=None, task_ids=None, batch_size=None, **kwargs):
+        # Extract labels from kwargs
+        labels = kwargs.pop("labels")
+        # Extract input_type_mask from kwargs and move it to the same device as labels
+        input_type_mask = kwargs.pop("input_type_mask").to(labels.device)
+
+        if peft_config.cpt_prompt_tuning_init == "TEXT":
+            cpt_token_ids = peft_config.cpt_token_ids
+            cpt_tokens_type_mask = peft_config.cpt_tokens_type_mask
+        else:
+            cpt_token_ids = [0] * peft_config.num_virtual_tokens
+            cpt_tokens_type_mask = [0] * peft_config.num_virtual_tokens
+
+        # Generate embeddings if not provided
+        if inputs_embeds is None:
+            inputs_embeds = self.word_embeddings(input_ids)
+        # Get prompt and concatenate with input embeddings
+        prompts = self.get_prompt(batch_size=batch_size, task_ids=task_ids)
+        prompts = prompts.to(inputs_embeds.dtype)
+        inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
+        # If labels are provided, generate prefix labels and type mask
+        if labels is not None:
+            # Generate prefix labels and concatenate with the input labels
+            prefix_labels = torch.Tensor(cpt_token_ids).long().view(1, -1)
+            prefix_labels = prefix_labels.repeat(batch_size, 1).to(labels.device)
+            cpt_labels = torch.cat((prefix_labels, labels), dim=1)
+            # Generate prefix type mask and shift input type mask values to avoid conflicts
+            prefix_type_mask = torch.Tensor(cpt_tokens_type_mask).long().view(1, -1)
+            prefix_type_mask = prefix_type_mask.repeat(batch_size, 1).to(labels.device)
+            adjusted_input_type_mask = input_type_mask
+            adjusted_input_type_mask[adjusted_input_type_mask > 0] += prefix_type_mask.max()
+            # Concatenate prefix and shifted input type masks
+            cpt_type_mask = torch.cat((prefix_type_mask, adjusted_input_type_mask), dim=1)
+            # Identify valid label positions and mask invalid ones with -100
+            labels_idx = (cpt_type_mask > 0) & (cpt_type_mask % 4 == 0)
+            cpt_labels[~labels_idx] = -100
+            # Update kwargs with the modified labels
+            kwargs["labels"] = cpt_labels
+        # Pass the modified inputs to the base model
+        base_model_output = self.base_model(inputs_embeds=inputs_embeds, **kwargs)
+        # Calculate the loss using the custom CPT loss function
+        base_model_output = CPTEmbedding.calculate_loss(
+            base_model_output, cpt_labels, cpt_type_mask, self.peft_config["default"]
+        )
+
+        return base_model_output
+
     def generate(self, *args, **kwargs):
         peft_config = self.active_peft_config
         self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generation

diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py
@@ -37,3 +37,4 @@
 from .xlora import XLoraConfig, XLoraModel
 from .hra import HRAConfig, HRAModel
 from .vblora import VBLoRAConfig, VBLoRAModel
+from .cpt import CPTConfig, CPTEmbedding
diff --git a/src/peft/tuners/cpt/__init__.py b/src/peft/tuners/cpt/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2024-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .config import CPTConfig
+from .model import CPTEmbedding
+
+
+__all__ = ["CPTConfig", "CPTEmbedding"]
diff --git a/src/peft/tuners/cpt/config.py b/src/peft/tuners/cpt/config.py
@@ -0,0 +1,102 @@
+# Copyright 2024-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import enum
+from dataclasses import dataclass, field
+from typing import Optional
+
+import torch
+
+from peft.config import PeftConfig
+from peft.utils import PeftType
+
+
+
+class CPTPromptInit(str, enum.Enum):
+    """Enum for specifying the initialization method for CPT."""
+
+    TEXT = "TEXT"  # Initialize using text-based embeddings.
+    RANDOM = "RANDOM"  # Initialize randomly.
+
+
+@dataclass
+class CPTConfig(PeftConfig):
+    """
+    CPT Configuration class extending PeftConfig for Context-aware Prompt Tuning (CPT).
+
+    This class introduces additional parameters required for CPT, such as:
+    - Token type masks
+    - Prompt tuning initialization
+    - Loss weighting
+    - Projection settings
+
+    For more details, see the paper: https://arxiv.org/abs/2410.17222
+    """
+
+    # Token-related configurations
+    cpt_token_ids: Optional[list[int]] = field(
+        default=None, metadata={"help": "Tensor of token IDs used for CPT prompts."}
+    )
+    cpt_mask: Optional[list[int]] = field(default=None, metadata={"help": "Tensor mask applied to CPT tokens."})
+    cpt_tokens_type_mask: Optional[list[int]] = field(
+        default=None, metadata={"help": "Mask indicating the type of each CPT token."}
+    )
+
+    # Prompt tuning initialization method
+    cpt_prompt_tuning_init: Optional[str] = field(
+        default="TEXT", metadata={"help": "Initialization method: 'TEXT' for embedding-based, 'RANDOM' for random."}
+    )
+
+    # Loss-related configurations
+    opt_weighted_loss_type: Optional[str] = field(
+        default="none", metadata={"help": "Type of weighted loss: 'none' or 'decay'."}
+    )
+    opt_loss_decay_factor: Optional[float] = field(
+        default=1.0, metadata={"help": "Factor for exponential decay in loss weighting."}
+    )
+
+    # Projection-related configurations
+    opt_projection_epsilon: Optional[float] = field(
+        default=0.1, metadata={"help": "Epsilon value for input projection."}
+    )
+    opt_projection_format_epsilon: Optional[float] = field(
+        default=0.1, metadata={"help": "Epsilon value for format projection."}
+    )
+
+    # Tokenizer configuration
+    tokenizer_name_or_path: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The tokenizer to use for prompt tuning initialization. Only used if prompt_tuning_init is `TEXT`"
+        },
+    )
+
+    # Virtual token configurations
+    num_virtual_tokens: int = field(default=0, metadata={"help": "Number of virtual tokens used in the prompt."})
+
+    # CPT-specific static attributes
+    is_prompt_learning = True  # Indicates that CPT is a prompt-learning method.
+    num_layers = None  # Number of layers (optional, not always required).
+    token_dim = None  # Dimension of token embeddings.
+    num_attention_heads = None  # Number of attention heads (if applicable).
+    task_type = "CAUSAL_LM"  # Specifies that CPT is used for causal language modeling.
+    num_transformer_submodules = 1  # Number of transformer submodules used.
+
+    def __post_init__(self):
+        """
+        Post-initialization hook to set additional attributes after the config is initialized.
+        """
+        self.peft_type = PeftType.CPT  # Specifies that the PEFT type is CPT.
+        self.target_modules = None  # Placeholder for target modules in CPT.
+        self.task_type = "CAUSAL_LM"  # Ensures task type is causal language modeling.