transmuteAI · sydarb · Aug 31, 2023 · Aug 31, 2023 · Aug 31, 2023 · Aug 31, 2023
diff --git a/experiments/quantization/LAPQ/lapq_demo.ipynb b/experiments/quantization/LAPQ/lapq_demo.ipynb
diff --git a/trailmet/algorithms/algorithms.py b/trailmet/algorithms/algorithms.py
@@ -180,20 +180,20 @@ def accuracy(self, output, target, topk=(1, )):
                 res.append(correct_k.mul_(100.0 / batch_size))
             return res
 
-    def test(self, model, dataloader, loss_fn=None, device=None):
+    def test(self, model, dataloader, loss_fn=None, device=None, progress=True):
         """This method is used to test the performance of the trained model."""
         if device is None:
             device = next(model.parameters()).device
         else:
             model.to(device)
         model.eval()
         counter = 0
-        tk1 = tqdm_notebook(dataloader, total=len(dataloader))
         running_acc1 = 0
         running_acc5 = 0
         running_loss = 0
+        pbar = tqdm_notebook(dataloader, total=len(dataloader)) if progress else dataloader
         with torch.no_grad():
-            for images, targets in tk1:
+            for images, targets in pbar:
                 counter += 1
                 images = images.to(device)
                 targets = targets.to(device)
@@ -204,13 +204,15 @@ def test(self, model, dataloader, loss_fn=None, device=None):
                 if loss_fn is not None:
                     loss = loss_fn(outputs, targets)
                     running_loss += loss.item()
-                    tk1.set_postfix(
-                        loss=running_loss / counter,
-                        acc1=running_acc1 / counter,
-                        acc5=running_acc5 / counter,
-                    )
+                    if progress:
+                        pbar.set_postfix(
+                            loss=running_loss / counter,
+                            acc1=running_acc1 / counter,
+                            acc5=running_acc5 / counter,
+                        )
                 else:
-                    tk1.set_postfix(acc1=running_acc1 / counter,
+                    if progress:
+                        pbar.set_postfix(acc1=running_acc1 / counter,
                                     acc5=running_acc5 / counter)
         if loss_fn is not None:
             return running_acc1 / counter, running_loss / counter

diff --git a/trailmet/algorithms/quantize/__init__.py b/trailmet/algorithms/quantize/__init__.py
@@ -19,53 +19,8 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from .bitsplit import BitSplit
-from .brecq import BRECQ
-from .lapq import LAPQ
-from .methods import (
-    UniformAffineQuantizer,
-    AdaRoundQuantizer,
-    BitSplitQuantizer,
-    ActQuantizer,
-    QuantizationBase,
-    UniformQuantization,
-    ClippedUniformQuantization,
-    FixedClipValueQuantization,
-    MaxAbsStaticQuantization,
-    LearnedStepSizeQuantization,
-    LpNormQuantization,
-)
-from .qmodel import (
-    QuantBasicBlock,
-    QuantBottleneck,
-    QuantInvertedResidual,
-    QuantModule,
-    BaseQuantBlock,
-    QBasicBlock,
-    QBottleneck,
-    QInvertedResidual,
-    ActivationModuleWrapper,
-    ParameterModuleWrapper,
-)
-from .quantize import (
-    BaseQuantization,
-    StraightThrough,
-    RoundSTE,
-    Conv2dFunctor,
-    LinearFunctor,
-    FoldBN,
-)
-from .reconstruct import (
-    StopForwardException,
-    DataSaverHook,
-    GetLayerInpOut,
-    save_inp_oup_data,
-    GradSaverHook,
-    GetLayerGrad,
-    save_grad_data,
-    LinearTempDecay,
-    LayerLossFunction,
-    layer_reconstruction,
-    BlockLossFunction,
-    block_reconstruction,
-)
+
+from . import quantize
+from . import lapq
+from . import bitsplit
+from . import brecq
diff --git a/trailmet/algorithms/quantize/_methods.py b/trailmet/algorithms/quantize/_methods.py
@@ -0,0 +1,105 @@
+import torch
+import torch.nn as nn
+from typing import Dict, Callable
+from trailmet.algorithms.quantize.observers import BaseObserver, MinMaxObserver, LpNormObserver
+from trailmet.algorithms.quantize.utils import reshape_qparams_by_channel
+
+
+
+OBSERVER_MAPPING: Dict[str, Callable] = {
+    'min_max': MinMaxObserver,
+    'lp_norm': LpNormObserver
+}
+
+
+class RoundSTE(torch.autograd.Function):
+    """grad enabled round function"""
+    @staticmethod
+    def forward(ctx, input):
+        return torch.round(input)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output
+
+
+class FloorSTE(torch.autograd.Function):
+    """grad enabled floor function"""
+    @staticmethod
+    def forward(ctx, input):
+        return torch.floor(input)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output
+
+
+class BaseQuantizer(nn.Module):
+    def __init__(self, kwargs: dict):
+        self.observer: BaseObserver = OBSERVER_MAPPING[kwargs.get(
+            'observer', 'min_max')](**kwargs)
+        self.quant_min = self.observer.quant_min
+        self.quant_max = self.observer.quant_max
+        self.per_channel = kwargs.get('per_channel', False)
+        self.ch_axis = kwargs.get('ch_axis', 0)
+        self.enable_observation = True
+        self.enable_quantization = True
+
+    def __register_buffer__(self, name, value):
+        if hasattr(self, name):
+            delattr(self, name)
+        self.register_buffer(name, value)
+
+    def __register_parameter__(self, name, value):
+        if hasattr(self, name):
+            delattr(self, name)
+        self.register_parameter(name, nn.Parameter(value))
+
+    def quantize(self, x: torch.Tensor, scale: torch.Tensor, zero_point: torch.Tensor,
+            round_mode: str = 'nearest'):
+        if self.per_channel:
+            scale, zero_point = reshape_qparams_by_channel(
+                x, scale, zero_point, self.ch_axis)
+        if round_mode == 'nearest':
+            x_int = RoundSTE.apply(x / scale)
+        elif round_mode == 'stochastic':
+            x_floor = FloorSTE.apply(x / scale)
+            x_int = x_floor + torch.bernoulli((x / scale) - x_floor)
+        else: 
+            raise NotImplementedError
+        x_quant = torch.clamp(x_int + zero_point, self.quant_min, self.quant_max)
+        return x_quant
+
+    def dequantize(self, x_quant: torch.Tensor, scale: torch.Tensor, zero_point: torch.Tensor):
+        x_dequant = (x_quant - zero_point) * scale
+        return x_dequant
+
+    def reset_bitwidth(self, n_bits: int):
+        self.observer.reset_bitwidth(n_bits)
+        self.quant_min = self.observer.quant_min
+        self.quant_max = self.observer.quant_max
+
+
+class UniformQuantizer(BaseQuantizer):
+    def __init__(self, kwargs: dict):
+        super().__init__(kwargs)
+        self.__register_buffer__('scale', torch.tensor([1.0], dtype=torch.float))
+        self.__register_buffer__('zero_point', torch.tensor([0], dtype=torch.int))
+
+    def forward(self, x: torch.Tensor):
+        if self.enable_observation:
+            x = self.observer(x)
+
+        if self.enable_quantization:
+            self.scale, self.zero_point = self.observer.calculate_qparams()
+            self.scale, self.zero_point = self.scale.to(x.device), self.zero_point.to(x.device)
+            x_quant = self.quantize(x, self.scale, self.zero_point)
+            x_dequant = self.dequantize(x_quant, self.scale, self.zero_point)
+            return x_dequant
+
+        return x
+
+
+class AdaRoundQuantizer(BaseQuantizer):
+    def __init__(self, kwargs: dict):
+        super().__init__(kwargs)
diff --git a/trailmet/algorithms/quantize/assets/quantization_pipeline.png b/trailmet/algorithms/quantize/assets/quantization_pipeline.png
diff --git a/trailmet/algorithms/quantize/assets/quantizer_flow.png b/trailmet/algorithms/quantize/assets/quantizer_flow.png