Skip to content

Commit

Permalink
修复:将剩余tk命名修改为mindpet
Browse files Browse the repository at this point in the history
  • Loading branch information
husichao666 committed Sep 26, 2023
1 parent 0b9b229 commit e64eb3f
Show file tree
Hide file tree
Showing 27 changed files with 188 additions and 188 deletions.
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ pip uninstall mindpet

| 微调算法 | 算法论文 | 使用说明 |
|----------------| ----------------------------------------------------------- |-----------------------------------------------------------------|
| LoRA | LoRA: Low-Rank Adaptation of Large Language Models | [TK_DeltaAlgorithm_README](doc/TK_DeltaAlgorithm_README.md) 第一章 |
| PrefixTuning | Prefix-Tuning: Optimizing Continuous Prompts for Generation | [TK_DeltaAlgorithm_README](doc/TK_DeltaAlgorithm_README.md) 第二章 |
| Adapter | Parameter-Efficient Transfer Learning for NLP | [TK_DeltaAlgorithm_README](doc/TK_DeltaAlgorithm_README.md) 第三章 |
| LowRankAdapter | Compacter: Efficient low-rank hypercom plex adapter layers | [TK_DeltaAlgorithm_README](doc/TK_DeltaAlgorithm_README.md) 第四章 |
| BitFit | BitFit: Simple Parameter-efficient Fine-tuning for Transformer-based Masked Language-models | [TK_DeltaAlgorithm_README](doc/TK_DeltaAlgorithm_README.md) 第五章 |
| R_Drop | R-Drop: Regularized Dropout for Neural Networks | [TK_DeltaAlgorithm_README](doc/TK_DeltaAlgorithm_README.md) 第六章 |
| LoRA | LoRA: Low-Rank Adaptation of Large Language Models | [MindPet_DeltaAlgorithm_README](doc/MindPet_DeltaAlgorithm_README.md) 第一章 |
| PrefixTuning | Prefix-Tuning: Optimizing Continuous Prompts for Generation | [MindPet_DeltaAlgorithm_README](doc/MindPet_DeltaAlgorithm_README.md) 第二章 |
| Adapter | Parameter-Efficient Transfer Learning for NLP | [MindPet_DeltaAlgorithm_README](doc/MindPet_DeltaAlgorithm_README.md) 第三章 |
| LowRankAdapter | Compacter: Efficient low-rank hypercom plex adapter layers | [MindPet_DeltaAlgorithm_README](doc/MindPet_DeltaAlgorithm_README.md) 第四章 |
| BitFit | BitFit: Simple Parameter-efficient Fine-tuning for Transformer-based Masked Language-models | [MindPet_DeltaAlgorithm_README](doc/MindPet_DeltaAlgorithm_README.md) 第五章 |
| R_Drop | R-Drop: Regularized Dropout for Neural Networks | [MindPet_DeltaAlgorithm_README](doc/MindPet_DeltaAlgorithm_README.md) 第六章 |



Expand All @@ -60,12 +60,12 @@ pip uninstall mindpet

MindPet支持用户根据 微调算法 或 模块名 冻结网络中部分模块,提供调用接口和配置文件两种实现方式。

使用说明参考[TK_GraphOperation_README](doc/TK_GraphOperation_README.md) 第一章。
使用说明参考[MindPet_GraphOperation_README](doc/MindPet_GraphOperation_README.md) 第一章。



### 4.2 保存可训练参数功能API

MindPet支持用户单独保存训练中可更新的参数为ckpt文件,从而节省存储所用的物理资源。

使用说明参考[TK_GraphOperation_README](doc/TK_GraphOperation_README.md) 第二章。
使用说明参考[MindPet_GraphOperation_README](doc/MindPet_GraphOperation_README.md) 第二章。

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ freeze_modules(model, include, exclude)
**样例:**

```python
from tk.graph.freeze_utils import freeze_modules
from mindpet.graph.freeze_utils import freeze_modules

# 初始化网络结构
model = Network()
Expand Down Expand Up @@ -86,7 +86,7 @@ freeze_delta(model, mode, include, exclude)
**样例:**

```python
from tk.graph.freeze_utils import freeze_delta
from mindpet.graph.freeze_utils import freeze_delta

# 初始化网络结构
model = Network()
Expand Down Expand Up @@ -141,7 +141,7 @@ freeze_from_config(model, config_path)
**样例:**

```python
from tk.graph.freeze_utils import freeze_from_config
from mindpet.graph.freeze_utils import freeze_from_config

# 初始化网络结构
model = Network()
Expand Down Expand Up @@ -187,7 +187,7 @@ TrainableParamsCheckPoint(directory, prefix, config)
- **在模型微调时**,从大模型微调工具包中引入`TrainableParamsCheckPoint`类,用法与MindSpore的`ModelCheckpoint`一致,实例化此`callback`后,加入训练时的`callback list`即可,例如:

```python
from tk.graph import TrainableParamsCheckPoint
from mindpet.graph import TrainableParamsCheckPoint
from mindspore import CheckpointConfig

ckpt_config = CheckpointConfig()
Expand Down
4 changes: 2 additions & 2 deletions mindpet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
# -*- coding: utf-8 -*-
# Copyright © Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.

import mindpet.tk_sdk as tk_sdk
import mindpet.mindpet_sdk as mindpet_sdk

__all__ = ["tk_sdk"]
__all__ = ["mindpet_sdk"]
34 changes: 17 additions & 17 deletions mindpet/delta/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ def __init__(
self.non_linearity_name = non_linearity

adapter_dict = OrderedDict()
adapter_dict["tk_delta_adapter_down_sampler"] = _Linear(hidden_size,
adapter_dict["mindpet_delta_adapter_down_sampler"] = _Linear(hidden_size,
bottleneck_size,
compute_dtype=compute_dtype,
param_init_type=param_init_type)
adapter_dict["tk_delta_adapter_non_linear"] = get_activation(non_linearity)
adapter_dict["tk_delta_adapter_up_sampler"] = _Linear(bottleneck_size,
adapter_dict["mindpet_delta_adapter_non_linear"] = get_activation(non_linearity)
adapter_dict["mindpet_delta_adapter_up_sampler"] = _Linear(bottleneck_size,
hidden_size,
compute_dtype=compute_dtype,
param_init_type=param_init_type)

self.tk_delta_adapter_block = nn.SequentialCell(adapter_dict)
self.mindpet_delta_adapter_block = nn.SequentialCell(adapter_dict)
self.residual_add = P.Add()
self.cast = P.Cast()
self.shape = P.Shape()
Expand All @@ -79,7 +79,7 @@ def construct(self, input_tensor):
input_tensor = self.reshape(input_tensor, (-1, input_tensor_shape[-1]))

# calculate adapter_out
adapter_out = self.tk_delta_adapter_block(input_tensor)
adapter_out = self.mindpet_delta_adapter_block(input_tensor)

# residual connection, add input and adapter_out
output = self.residual_add(input_tensor, adapter_out)
Expand All @@ -99,26 +99,26 @@ def shard(self,
strategy_residual_add=None):
"""Shard Method"""
try:
self.tk_delta_adapter_block.tk_delta_adapter_down_sampler.shard(
self.mindpet_delta_adapter_block.mindpet_delta_adapter_down_sampler.shard(
strategy_matmul=strategy_matmul_down_sampler, strategy_bias=strategy_bias_down_sampler)

if self.non_linearity_name.lower() == "leakyrelu":
self.tk_delta_adapter_block.tk_delta_adapter_non_linear.select_op.shard(
self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear.select_op.shard(
(strategy_non_linearity[0], strategy_non_linearity[0]))
elif self.non_linearity_name.lower() == "logsigmoid":
self.tk_delta_adapter_block.tk_delta_adapter_non_linear.mul.shard((strategy_non_linearity[0], ()))
self.tk_delta_adapter_block.tk_delta_adapter_non_linear.exp.shard(strategy_non_linearity)
self.tk_delta_adapter_block.tk_delta_adapter_non_linear.add.shard((strategy_non_linearity[0], ()))
self.tk_delta_adapter_block.tk_delta_adapter_non_linear.rec.shard(strategy_non_linearity)
self.tk_delta_adapter_block.tk_delta_adapter_non_linear.log.shard(strategy_non_linearity)
self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear.mul.shard((strategy_non_linearity[0], ()))
self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear.exp.shard(strategy_non_linearity)
self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear.add.shard((strategy_non_linearity[0], ()))
self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear.rec.shard(strategy_non_linearity)
self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear.log.shard(strategy_non_linearity)
elif self.non_linearity_name.lower() == "logsoftmax":
raise ValueError("The 'LogSoftmax' function is not supported in semi auto parallel "
"or auto parallel mode.")
else:
getattr(self.tk_delta_adapter_block.tk_delta_adapter_non_linear,
getattr(self.mindpet_delta_adapter_block.mindpet_delta_adapter_non_linear,
self.non_linearity_name).shard(strategy_non_linearity)

self.tk_delta_adapter_block.tk_delta_adapter_up_sampler.shard(strategy_matmul=strategy_matmul_up_sampler,
self.mindpet_delta_adapter_block.mindpet_delta_adapter_up_sampler.shard(strategy_matmul=strategy_matmul_up_sampler,
strategy_bias=strategy_bias_up_sampler)

self.residual_add.shard(strategy_residual_add)
Expand Down Expand Up @@ -194,7 +194,7 @@ def __init__(self,
has_bias=has_bias,
activation=activation)

self.tk_delta_adapter = AdapterLayer(hidden_size=out_channels,
self.mindpet_delta_adapter = AdapterLayer(hidden_size=out_channels,
bottleneck_size=bottleneck_size,
non_linearity=non_linearity,
param_init_type=param_init_type,
Expand Down Expand Up @@ -226,7 +226,7 @@ def construct(self, input_tensor):
input_tensor = self.activation(input_tensor)

# calculate adapter_out
input_tensor = self.tk_delta_adapter(input_tensor)
input_tensor = self.mindpet_delta_adapter(input_tensor)

# recover the previous outshape and dtype
out_shape = x_shape[:-1] + (-1,)
Expand Down Expand Up @@ -267,7 +267,7 @@ def shard(self,
getattr(self.activation, self.act_name).shard(strategy_activation_org)

# set adapter strategy
self.tk_delta_adapter.shard(strategy_matmul_down_sampler=strategy_matmul_down_sampler,
self.mindpet_delta_adapter.shard(strategy_matmul_down_sampler=strategy_matmul_down_sampler,
strategy_bias_down_sampler=strategy_bias_down_sampler,
strategy_non_linearity=strategy_non_linearity,
strategy_matmul_up_sampler=strategy_matmul_up_sampler,
Expand Down
12 changes: 6 additions & 6 deletions mindpet/delta/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ def __init__(
self.lora_rank = lora_rank
self.lora_alpha = lora_alpha
self.lora_dropout = get_dropout(lora_dropout)
self.tk_delta_lora_a = Parameter(
self.mindpet_delta_lora_a = Parameter(
initializer(lora_a_init, [lora_rank, in_channels], param_init_type),
name='tk_delta_lora_A')
self.tk_delta_lora_b = Parameter(initializer(lora_b_init, [out_channels, lora_rank], param_init_type),
name='tk_delta_lora_B')
name='mindpet_delta_lora_A')
self.mindpet_delta_lora_b = Parameter(initializer(lora_b_init, [out_channels, lora_rank], param_init_type),
name='mindpet_delta_lora_B')
self.scaling = self.lora_alpha / self.lora_rank

# Calculation utils
Expand All @@ -80,8 +80,8 @@ def construct(self, input_tensor):
ori_dtype = F.dtype(input_tensor)
input_tensor = self.cast(input_tensor, self.dtype)
weight = self.cast(self.weight, self.dtype)
lora_a = self.cast(self.tk_delta_lora_a, self.dtype)
lora_b = self.cast(self.tk_delta_lora_b, self.dtype)
lora_a = self.cast(self.mindpet_delta_lora_a, self.dtype)
lora_b = self.cast(self.mindpet_delta_lora_b, self.dtype)
scaling = self.cast(self.scaling, self.dtype)

# Shape operations
Expand Down
48 changes: 24 additions & 24 deletions mindpet/delta/low_rank_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ def __init__(self,
self.out_channels = out_channels
self.rank = rank
self.weight_init = weight_init
self.tk_delta_low_rank_adapter_weight_left = \
self.mindpet_delta_low_rank_adapter_weight_left = \
Parameter(initializer(self.weight_init, [in_channels, rank], param_init_type),
name="tk_delta_low_rank_adapter_weight_left")
self.tk_delta_low_rank_adapter_weight_right = \
name="mindpet_delta_low_rank_adapter_weight_left")
self.mindpet_delta_low_rank_adapter_weight_right = \
Parameter(initializer(self.weight_init, [rank, out_channels], param_init_type),
name="tk_delta_low_rank_adapter_weight_right")
name="mindpet_delta_low_rank_adapter_weight_right")
self.has_bias = has_bias

self.bias = None
Expand Down Expand Up @@ -99,8 +99,8 @@ def construct(self, input_tensor):
input_tensor = self.reshape(input_tensor, (-1, x_shape[-1]))

# compute weight
weight = self.matmul_weight(self.cast(self.tk_delta_low_rank_adapter_weight_left, self.compt_dtype),
self.cast(self.tk_delta_low_rank_adapter_weight_right, self.compt_dtype))
weight = self.matmul_weight(self.cast(self.mindpet_delta_low_rank_adapter_weight_left, self.compt_dtype),
self.cast(self.mindpet_delta_low_rank_adapter_weight_right, self.compt_dtype))

input_tensor = self.cast(input_tensor, self.compt_dtype)
input_tensor = self.matmul_input(input_tensor, weight)
Expand Down Expand Up @@ -179,15 +179,15 @@ def __init__(
self.bottleneck_size = hidden_size // reduction_factor
self.non_linearity = non_linearity

self.tk_delta_low_rank_adapter_down_sampler = LowRankLinear(in_channels=hidden_size,
self.mindpet_delta_low_rank_adapter_down_sampler = LowRankLinear(in_channels=hidden_size,
out_channels=self.bottleneck_size,
rank=low_rank_size,
weight_init=low_rank_w_init,
param_init_type=param_init_type,
compute_dtype=compute_dtype)
self.tk_delta_low_rank_adapter_non_linear = get_activation(
self.mindpet_delta_low_rank_adapter_non_linear = get_activation(
non_linearity)
self.tk_delta_low_rank_adapter_up_sampler = LowRankLinear(in_channels=self.bottleneck_size,
self.mindpet_delta_low_rank_adapter_up_sampler = LowRankLinear(in_channels=self.bottleneck_size,
out_channels=hidden_size,
rank=low_rank_size,
weight_init=low_rank_w_init,
Expand All @@ -205,11 +205,11 @@ def construct(self, input_tensor):
input_tensor = P.Reshape()(input_tensor, (-1, x_shape[-1]))

# calculate adapter_out
adapter_down_sampler_output = self.tk_delta_low_rank_adapter_down_sampler(
adapter_down_sampler_output = self.mindpet_delta_low_rank_adapter_down_sampler(
input_tensor)
adapter_non_linear_output = self.tk_delta_low_rank_adapter_non_linear(
adapter_non_linear_output = self.mindpet_delta_low_rank_adapter_non_linear(
adapter_down_sampler_output)
adapter_output = self.tk_delta_low_rank_adapter_up_sampler(
adapter_output = self.mindpet_delta_low_rank_adapter_up_sampler(
adapter_non_linear_output)

# residual connection, add input and adapter_output
Expand Down Expand Up @@ -253,30 +253,30 @@ def shard(self, strategy_matmul_down_sampler_weight=None,
strategy_residual_add (tuple): The strategy for the residual_add.
"""
try:
self.tk_delta_low_rank_adapter_down_sampler.shard(
self.mindpet_delta_low_rank_adapter_down_sampler.shard(
strategy_matmul_down_sampler_weight, strategy_matmul_down_sampler_input, strategy_bias_down_sampler)
self.tk_delta_low_rank_adapter_up_sampler.shard(
self.mindpet_delta_low_rank_adapter_up_sampler.shard(
strategy_matmul_up_sampler_weight, strategy_matmul_up_sampler_input, strategy_bias_up_sampler)
# some operations has many primitives, need to manually set the shard
if self.non_linearity.lower() == "leakyrelu":
self.tk_delta_low_rank_adapter_non_linear.select_op.shard(
self.mindpet_delta_low_rank_adapter_non_linear.select_op.shard(
(strategy_non_linearity[0], strategy_non_linearity[0]))
elif self.non_linearity.lower() == "logsigmoid":
self.tk_delta_low_rank_adapter_non_linear.mul.shard(
self.mindpet_delta_low_rank_adapter_non_linear.mul.shard(
(strategy_non_linearity[0], ()))
self.tk_delta_low_rank_adapter_non_linear.exp.shard(
self.mindpet_delta_low_rank_adapter_non_linear.exp.shard(
strategy_non_linearity)
self.tk_delta_low_rank_adapter_non_linear.add.shard(
self.mindpet_delta_low_rank_adapter_non_linear.add.shard(
(strategy_non_linearity[0], ()))
self.tk_delta_low_rank_adapter_non_linear.rec.shard(
self.mindpet_delta_low_rank_adapter_non_linear.rec.shard(
strategy_non_linearity)
self.tk_delta_low_rank_adapter_non_linear.log.shard(
self.mindpet_delta_low_rank_adapter_non_linear.log.shard(
strategy_non_linearity)
elif self.non_linearity.lower() == "logsoftmax":
raise ValueError("The 'LogSoftmax' function is not supported in semi auto parallel "
"or auto parallel mode.")
else:
getattr(self.tk_delta_low_rank_adapter_non_linear,
getattr(self.mindpet_delta_low_rank_adapter_non_linear,
self.non_linearity).shard(strategy_non_linearity)
self.residual_add.shard(strategy_residual_add)

Expand Down Expand Up @@ -372,7 +372,7 @@ def __init__(self,
bias_init=bias_init,
has_bias=has_bias,
activation=activation)
self.tk_delta_low_rank_adapter = LowRankAdapterLayer(hidden_size=out_channels,
self.mindpet_delta_low_rank_adapter = LowRankAdapterLayer(hidden_size=out_channels,
reduction_factor=reduction_factor,
low_rank_size=low_rank_size,
low_rank_w_init=low_rank_w_init,
Expand Down Expand Up @@ -404,7 +404,7 @@ def construct(self, input_tensor):
input_tensor = self.activation(input_tensor)

# calculate low_rank_adapter_out
input_tensor = self.tk_delta_low_rank_adapter(input_tensor)
input_tensor = self.mindpet_delta_low_rank_adapter(input_tensor)

# recover the previous outshape and dtype
out_shape = x_shape[:-1] + (-1,)
Expand Down Expand Up @@ -472,7 +472,7 @@ def shard(self, strategy_matmul_org=None,
strategy_activation_org)

# set low_rank_adapter strategy
self.tk_delta_low_rank_adapter.shard(strategy_matmul_down_sampler_weight,
self.mindpet_delta_low_rank_adapter.shard(strategy_matmul_down_sampler_weight,
strategy_matmul_down_sampler_input,
strategy_bias_down_sampler,
strategy_non_linearity,
Expand Down
Loading

0 comments on commit e64eb3f

Please sign in to comment.