mindspore-lab · suhaibo1 · Sep 21, 2023 · Sep 18, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -31,7 +31,7 @@ jobs:
         pip install -r requirements/ci_requirements.txt
     - name: Analysing the code with pylint
       run: |
-        pylint tk --rcfile=.github/pylint.conf
+        pylint mindpet --rcfile=.github/pylint.conf
     - name: Test with unit test (UT) pytest
       run: |
         pytest test/unit_test

diff --git a/tk/__init__.py → mindpet/__init__.py b/tk/__init__.py → mindpet/__init__.py
@@ -2,6 +2,6 @@
 # -*- coding: utf-8 -*-
 # Copyright © Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
 
-import tk.tk_sdk as tk_sdk
+import mindpet.tk_sdk as tk_sdk
 
 __all__ = ["tk_sdk"]
diff --git a/mindpet/delta/__init__.py b/mindpet/delta/__init__.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright © Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
+"""Mindpet delta init"""
+from mindpet.delta.lora import LoRADense
+from mindpet.delta.prefix_layer import PrefixLayer
+from mindpet.delta.low_rank_adapter import LowRankAdapterDense, LowRankAdapterLayer
+from mindpet.delta.adapter import AdapterDense, AdapterLayer
+from mindpet.delta.r_drop import RDropLoss, rdrop_repeat
+
+__all__ = ['LoRADense', 'PrefixLayer', 'LowRankAdapterDense', 'LowRankAdapterLayer',
+           'AdapterDense', 'AdapterLayer', 'RDropLoss', 'rdrop_repeat']
diff --git a/tk/delta/adapter.py → mindpet/delta/adapter.py b/tk/delta/adapter.py → mindpet/delta/adapter.py
@@ -1,24 +1,23 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright (c) Huawei Technologies Co., Ltd. 2022-2023, All rights reserved.
-
+"""Adapter Cell"""
 from collections import OrderedDict
 
-import mindspore as ms
-import mindspore.nn as nn
+from mindspore import nn
 import mindspore.common.dtype as mstype
 from mindspore.nn.layer.activation import get_activation, _activation
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
-from tk.delta.delta_constants import VALID_TENSOR_DATATYPE
-
 try:
     from mindspore.nn.transformer.layers import _Linear, _args_type_validator_check, _valid_value_checks
     from mindspore._checkparam import Validator
-except:
-    from mindformers.modules.layers import Linear, _args_type_validator_check, _valid_value_checks
+except ImportError:
     import mindspore._checkparam as Validator
+    from mindformers.modules.layers import Linear, _args_type_validator_check, _valid_value_checks
     _Linear = Linear
+from mindpet.delta.delta_constants import VALID_TENSOR_DATATYPE
+
 
 class AdapterLayer(nn.Cell):
     """
@@ -48,7 +47,7 @@ def __init__(
             param_init_type: mstype = mstype.float32,
             compute_dtype: mstype = mstype.float16):
 
-        super(AdapterLayer, self).__init__()
+        super().__init__()
 
         self.bottleneck_size = bottleneck_size
         self.non_linearity_name = non_linearity
@@ -71,6 +70,7 @@ def __init__(
         self.reshape = P.Reshape()
 
     def construct(self, input_tensor):
+        """Foward"""
         # get input_tensor info
         input_tensor_shape = self.shape(input_tensor)
         ori_dtype = F.dtype(input_tensor)
@@ -97,6 +97,7 @@ def shard(self,
               strategy_matmul_up_sampler=None,
               strategy_bias_up_sampler=None,
               strategy_residual_add=None):
+        """Shard Method"""
         try:
             self.tk_delta_adapter_block.tk_delta_adapter_down_sampler.shard(
                 strategy_matmul=strategy_matmul_down_sampler, strategy_bias=strategy_bias_down_sampler)
@@ -114,7 +115,7 @@ def shard(self,
                 raise ValueError("The 'LogSoftmax' function is not supported in semi auto parallel "
                                 "or auto parallel mode.")
             else:
-                getattr(self.tk_delta_adapter_block.tk_delta_adapter_non_linear, 
+                getattr(self.tk_delta_adapter_block.tk_delta_adapter_non_linear,
                     self.non_linearity_name).shard(strategy_non_linearity)
 
             self.tk_delta_adapter_block.tk_delta_adapter_up_sampler.shard(strategy_matmul=strategy_matmul_up_sampler,
@@ -123,9 +124,10 @@ def shard(self,
             self.residual_add.shard(strategy_residual_add)
 
         except Exception as ex:
+            # pylint: disable=W0719
             raise Exception(f"Exception occurred when set the shard for AdapterLayer, error message: \
                 {str(ex)}") from ex
-        
+
 
 class AdapterDense(nn.Dense):
     """
@@ -184,15 +186,13 @@ def __init__(self,
                  bottleneck_size: int = 64,
                  non_linearity: str = "gelu",
                  param_init_type: mstype = mstype.float32,
-                 compute_dtype: mstype = mstype.float16,
-                 **kwargs):
-
-        super(AdapterDense, self).__init__(in_channels=in_channels,
-                                           out_channels=out_channels,
-                                           weight_init=weight_init,
-                                           bias_init=bias_init,
-                                           has_bias=has_bias,
-                                           activation=activation)
+                 compute_dtype: mstype = mstype.float16):
+        super().__init__(in_channels=in_channels,
+                         out_channels=out_channels,
+                         weight_init=weight_init,
+                         bias_init=bias_init,
+                         has_bias=has_bias,
+                         activation=activation)
 
         self.tk_delta_adapter = AdapterLayer(hidden_size=out_channels,
                                              bottleneck_size=bottleneck_size,
@@ -206,7 +206,7 @@ def __init__(self,
         self.act_name = activation
 
     def construct(self, input_tensor):
-
+        """Foward"""
         # get input_x info
         x_shape = self.shape_op(input_tensor)
         ori_dtype = F.dtype(input_tensor)
@@ -234,7 +234,7 @@ def construct(self, input_tensor):
         output = self.cast(input_tensor, ori_dtype)
         return output
 
-    def shard(self, 
+    def shard(self,
               strategy_matmul_org=None,
               strategy_bias_org=None,
               strategy_activation_org=None,
@@ -244,6 +244,7 @@ def shard(self,
               strategy_matmul_up_sampler=None,
               strategy_bias_up_sampler=None,
               strategy_residual_add=None):
+        """Shard Method"""
         try:
             # set origin dense strategy
             self.matmul.shard(strategy_matmul_org)
@@ -274,5 +275,6 @@ def shard(self,
                                         strategy_residual_add=strategy_residual_add)
 
         except Exception as ex:
+            # pylint: disable=W0719
             raise Exception(f"Exception occurred when set the shard for AdapterDense, error message: \
                 {str(ex)}") from ex
diff --git a/tk/delta/delta_constants.py → mindpet/delta/delta_constants.py b/tk/delta/delta_constants.py → mindpet/delta/delta_constants.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright © Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
-
+"""Constants for delta"""
 from mindspore import dtype as mstype
 
 # 算法精度与类型控制

diff --git a/tk/delta/lora.py → mindpet/delta/lora.py b/tk/delta/lora.py → mindpet/delta/lora.py
@@ -1,26 +1,25 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright (c) Huawei Technologies Co., Ltd. 2022-2023, All rights reserved.
-
+"""LoRA Cell"""
 import math
-import mindspore as ms
-import mindspore.nn as nn
+from mindspore import nn
 from mindspore import ops
 from mindspore import Parameter
 from mindspore.common.tensor import Tensor
 from mindspore import dtype as mstype
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
-from mindspore.common.initializer import initializer, HeUniform
-from tk.delta.delta_constants import VALID_TENSOR_DATATYPE
-from tk.utils.version_control import get_dropout
-
 try:
     from mindspore._checkparam import Validator, Rel
     INC_LEFT = Rel.INC_LEFT
-except:
+except ImportError:
     import mindspore._checkparam as Validator
     INC_LEFT = Validator.INC_LEFT
+from mindspore.common.initializer import initializer, HeUniform
+from mindpet.delta.delta_constants import VALID_TENSOR_DATATYPE
+from mindpet.utils.version_control import get_dropout
+
 
 class LoRADense(nn.Dense):
     """Define a dense layer with LoRA structure.
@@ -49,7 +48,7 @@ def __init__(
             compute_dtype=mstype.float16,
             **kwargs
     ):
-        super(LoRADense, self).__init__(in_channels, out_channels, **kwargs)
+        super().__init__(in_channels, out_channels, **kwargs)
 
         # Check params
         self._check_num(lora_rank, lora_alpha, lora_dropout)
@@ -76,6 +75,7 @@ def __init__(
         self.lora_b_matmul = P.MatMul(transpose_b=True)
 
     def construct(self, input_tensor):
+        """Foward"""
         # Data type operation
         ori_dtype = F.dtype(input_tensor)
         input_tensor = self.cast(input_tensor, self.dtype)
@@ -164,6 +164,7 @@ def shard(self, strategy_org_dense_matmul=None,
                 else:
                     getattr(self.activation, self.act_name).shard(strategy_activation)
         except Exception as ex:
+            # pylint: disable=W0719
             raise Exception(f"Exception occurred when set the shard for LoRADense, error message: {str(ex)}") from ex
 
     def _check_num(self, lora_rank, lora_alpha, lora_dropout):
@@ -179,6 +180,7 @@ def _check_num(self, lora_rank, lora_alpha, lora_dropout):
                                     rel=INC_LEFT, arg_name='lora_dropout', prim_name=self.cls_name)
 
     def _check_init(self, lora_a_init, lora_b_init, lora_rank):
+        """check init arguments"""
         if isinstance(lora_a_init, Tensor):
             if lora_a_init.ndim != 2 or lora_a_init.shape[0] != lora_rank or \
                     lora_a_init.shape[1] != self.in_channels:
@@ -201,4 +203,3 @@ def _check_type_of_data(self, param_init_type, compute_dtype):
         if compute_dtype not in VALID_TENSOR_DATATYPE:
             raise TypeError(f"For {self.cls_name}, the 'compute_dtype' must be mindspore.dtype.float16 or "
                             f"mindspore.dtype.float32, but got {compute_dtype}.")
-
diff --git a/tk/delta/low_rank_adapter.py → mindpet/delta/low_rank_adapter.py b/tk/delta/low_rank_adapter.py → mindpet/delta/low_rank_adapter.py
@@ -1,27 +1,22 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright (c) Huawei Technologies Co., Ltd. 2022-2023, All rights reserved.
-
-import numbers
-
-import mindspore as ms
-import mindspore.nn as nn
+"""Low Rank Adapter Cell"""
+from mindspore import nn
 import mindspore.common.dtype as mstype
 from mindspore.common.initializer import initializer, Initializer
 from mindspore.common.parameter import Parameter
 from mindspore import Tensor
-
 from mindspore.nn.layer.activation import get_activation, _activation
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
-from tk.delta.delta_constants import VALID_TENSOR_DATATYPE
-
 try:
-    from mindspore.nn.transformer.layers import _args_type_validator_check, _valid_type_checks, _valid_value_checks
+    from mindspore.nn.transformer.layers import _args_type_validator_check, _valid_value_checks
     from mindspore._checkparam import Validator
-except:
-    from mindformers.modules.layers import _args_type_validator_check, _valid_type_checks, _valid_value_checks
+except ImportError:
     import mindspore._checkparam as Validator
+    from mindformers.modules.layers import _args_type_validator_check, _valid_value_checks
+from mindpet.delta.delta_constants import VALID_TENSOR_DATATYPE
 
 
 class LowRankLinear(nn.Cell):
@@ -63,7 +58,7 @@ def __init__(self,
                  has_bias: bool = True,
                  param_init_type: mstype = mstype.float32,
                  compute_dtype: mstype = mstype.float16):
-        super(LowRankLinear, self).__init__()
+        super().__init__()
 
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -95,6 +90,7 @@ def __init__(self,
         self.shape_op = P.Shape()
 
     def construct(self, input_tensor):
+        """Forward"""
         # get input_x info
         x_shape = self.shape_op(input_tensor)
         ori_dtype = F.dtype(input_tensor)
@@ -178,7 +174,7 @@ def __init__(
             compute_dtype: mstype = mstype.float16):
         self._check_low_rank_w_init(low_rank_w_init)
         self._check_reduction_factor(hidden_size, reduction_factor)
-        super(LowRankAdapterLayer, self).__init__()
+        super().__init__()
 
         self.bottleneck_size = hidden_size // reduction_factor
         self.non_linearity = non_linearity
@@ -200,6 +196,7 @@ def __init__(
         self.residual_add = P.Add()
 
     def construct(self, input_tensor):
+        """Forward"""
         # get input_x info
         x_shape = P.Shape()(input_tensor)
         ori_dtype = F.dtype(input_tensor)
@@ -284,6 +281,7 @@ def shard(self, strategy_matmul_down_sampler_weight=None,
             self.residual_add.shard(strategy_residual_add)
 
         except Exception as ex:
+            # pylint: disable=W0719
             raise Exception(
                 f"Exception occurred when set the shard for LowRankAdapterLayer, error message: {str(ex)}") from ex
 
@@ -368,12 +366,12 @@ def __init__(self,
                  non_linearity: str = "gelu",
                  param_init_type: mstype = mstype.float32,
                  compute_dtype: mstype = mstype.float16):
-        super(LowRankAdapterDense, self).__init__(in_channels=in_channels,
-                                                  out_channels=out_channels,
-                                                  weight_init=weight_init,
-                                                  bias_init=bias_init,
-                                                  has_bias=has_bias,
-                                                  activation=activation)
+        super().__init__(in_channels=in_channels,
+                         out_channels=out_channels,
+                         weight_init=weight_init,
+                         bias_init=bias_init,
+                         has_bias=has_bias,
+                         activation=activation)
         self.tk_delta_low_rank_adapter = LowRankAdapterLayer(hidden_size=out_channels,
                                                              reduction_factor=reduction_factor,
                                                              low_rank_size=low_rank_size,
@@ -386,6 +384,7 @@ def __init__(self,
         self.act_name = activation
 
     def construct(self, input_tensor):
+        """Forward"""
         # get input_x info
         x_shape = self.shape_op(input_tensor)
         ori_dtype = F.dtype(input_tensor)
@@ -482,6 +481,7 @@ def shard(self, strategy_matmul_org=None,
                                                  strategy_bias_up_sampler,
                                                  strategy_residual_add)
         except Exception as ex:
+            # pylint: disable=W0719
             raise Exception(
                 f"Exception occurred when set the shard for LowRankAdapterDense, error message: {str(ex)}") from ex
 

diff --git a/tk/delta/prefix_layer.py → mindpet/delta/prefix_layer.py b/tk/delta/prefix_layer.py → mindpet/delta/prefix_layer.py
@@ -1,18 +1,16 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright © Huawei Technologies Co., Ltd. 2010-2022. All rights reserved.
-
+"""Prefix Layer Cell"""
 import mindspore as ms
-import mindspore.nn as nn
-
-from tk.utils.version_control import get_dropout
-
+from mindspore import nn
 try:
     from mindspore._checkparam import Validator, Rel
     INC_LEFT = Rel.INC_LEFT
-except:
+except ImportError:
     import mindspore._checkparam as Validator
     INC_LEFT = Validator.INC_LEFT
+from mindpet.utils.version_control import get_dropout
 
 
 def check_multiple(param_dividend, value_dividend, param_divisor, value_divisor):