[tests] add pytorch.mobilenet

Signed-off-by: Avimitin <[email protected]>
chipsalliance · Aug 27, 2024 · 85619c9 · 85619c9
1 parent e499a93
commit 85619c9
Show file tree

Hide file tree

Showing 6 changed files with 300 additions and 1 deletion.
diff --git a/nix/pkgs/buddy-mlir.nix b/nix/pkgs/buddy-mlir.nix
@@ -57,6 +57,7 @@ let
       pyenv = python3.withPackages (ps: [
         self
         ps.torch
+        ps.torchvision
       ]);
     };
   };

diff --git a/tests/default.nix b/tests/default.nix
@@ -80,7 +80,7 @@ let
     findAndBuild = dir: build:
       lib.recurseIntoAttrs (lib.pipe (builtins.readDir dir) [
         # filter out all non-directory entrires and underscore-prefixed directories
-        (lib.filterAttrs (name: type: type == "directory" && ! (lib.hasPrefix "_" name) && name != "include"))
+        (lib.filterAttrs (name: type: type == "directory" && ! (lib.hasPrefix "_" name) && ! (lib.elem name [ "lib" "include" ])))
         # prepend path with base directory
         (lib.mapAttrs (subDirName: _: (lib.path.append dir subDirName)))
         # build. If {sourcePath}/default.nix exists, call it. Otherwise call the generic builder

diff --git a/tests/pytorch/lib/MemrefCopy.cc b/tests/pytorch/lib/MemrefCopy.cc
@@ -0,0 +1,120 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Unranked MemRef
+template <typename T> struct UnrankedMemRefType {
+  int32_t rank;
+  void *descriptor;
+};
+
+/// StridedMemRef descriptor type with static rank.
+template <typename T, int N> struct StridedMemRefType {
+  T *basePtr;
+  T *data;
+  int64_t offset;
+  int64_t sizes[N];
+  int64_t strides[N];
+};
+
+/// StridedMemRef descriptor type specialized for rank 1.
+template <typename T> struct StridedMemRefType<T, 1> {
+  T *basePtr;
+  T *data;
+  int64_t offset;
+  int64_t sizes[1];
+  int64_t strides[1];
+
+  T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
+};
+
+/// StridedMemRef descriptor type specialized for rank 0.
+template <typename T> struct StridedMemRefType<T, 0> {
+  T *basePtr;
+  T *data;
+  int64_t offset;
+};
+
+// A reference to one of the StridedMemRef types.
+template <typename T> class DynamicMemRefType {
+public:
+  int64_t rank;
+  T *basePtr;
+  T *data;
+  int64_t offset;
+  const int64_t *sizes;
+  const int64_t *strides;
+
+  explicit DynamicMemRefType(const StridedMemRefType<T, 0> &memRef)
+      : rank(0), basePtr(memRef.basePtr), data(memRef.data),
+        offset(memRef.offset), sizes(nullptr), strides(nullptr) {}
+  template <int N>
+  explicit DynamicMemRefType(const StridedMemRefType<T, N> &memRef)
+      : rank(N), basePtr(memRef.basePtr), data(memRef.data),
+        offset(memRef.offset), sizes(memRef.sizes), strides(memRef.strides) {}
+  explicit DynamicMemRefType(const ::UnrankedMemRefType<T> &memRef)
+      : rank(memRef.rank) {
+    auto *desc = static_cast<StridedMemRefType<T, 1> *>(memRef.descriptor);
+    basePtr = desc->basePtr;
+    data = desc->data;
+    offset = desc->offset;
+    sizes = rank == 0 ? nullptr : desc->sizes;
+    strides = sizes + rank;
+  }
+};
+
+extern "C" void memrefCopy(int32_t elemSize, UnrankedMemRefType<char> *srcArg,
+                           UnrankedMemRefType<char> *dstArg) {
+  DynamicMemRefType<char> src(*srcArg);
+  DynamicMemRefType<char> dst(*dstArg);
+
+  int32_t rank = src.rank;
+
+  // Handle empty shapes -> nothing to copy.
+  for (int rankp = 0; rankp < rank; ++rankp)
+    if (src.sizes[rankp] == 0)
+      return;
+
+  char *srcPtr = src.data + src.offset * elemSize;
+  char *dstPtr = dst.data + dst.offset * elemSize;
+
+  if (rank == 0) {
+    memcpy(dstPtr, srcPtr, elemSize);
+    return;
+  }
+
+  int32_t *indices = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank));
+  int32_t *srcStrides = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank));
+  int32_t *dstStrides = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank));
+
+  // Initialize index and scale strides.
+  for (int rankp = 0; rankp < rank; ++rankp) {
+    indices[rankp] = 0;
+    srcStrides[rankp] = src.strides[rankp] * elemSize;
+    dstStrides[rankp] = dst.strides[rankp] * elemSize;
+  }
+
+  int32_t readIndex = 0, writeIndex = 0;
+  for (;;) {
+    // Copy over the element, byte by byte.
+    memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize);
+    // Advance index and read position.
+    for (int32_t axis = rank - 1; axis >= 0; --axis) {
+      // Advance at current axis.
+      auto newIndex = ++indices[axis];
+      readIndex += srcStrides[axis];
+      writeIndex += dstStrides[axis];
+      // If this is a valid index, we have our next index, so continue copying.
+      if (src.sizes[axis] != newIndex)
+        break;
+      // We reached the end of this axis. If this is axis 0, we are done.
+      if (axis == 0)
+        return;
+      // Else, reset to 0 and undo the advancement of the linear index that
+      // this axis had. Then continue with the axis one outer.
+      indices[axis] = 0;
+      readIndex -= src.sizes[axis] * srcStrides[axis];
+      writeIndex -= dst.sizes[axis] * dstStrides[axis];
+    }
+  }
+}
diff --git a/tests/pytorch/mobilenet/build.nix b/tests/pytorch/mobilenet/build.nix
@@ -0,0 +1,73 @@
+{ fetchurl
+, buildBuddyE2ETest
+}:
+let
+  checkpointFile = "mobilenet_v3_small-047dcff4.pth";
+  modelCache = fetchurl {
+    url = "https://download.pytorch.org/models/${checkpointFile}";
+    hash = "sha256-BH3P9K3e+G6lvC7/E8lhTcEfR6sRYNCnGiXn25lPTh8=";
+  };
+in
+buildBuddyE2ETest {
+  caseName = "mobilenet";
+
+  optPhase = ''
+    mkdir -p pytorchCache/hub/checkpoints/
+    cp -v ${modelCache} pytorchCache/hub/checkpoints/${checkpointFile}
+    export TORCH_HOME=pytorchCache
+    python ./mobilenet.py
+
+    echo "Lowering forward.mlir"
+    buddy-opt forward.mlir -pass-pipeline \
+        "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), \
+              empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, \
+              func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" \
+      | buddy-opt -pass-pipeline \
+        "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), \
+              eliminate-empty-tensors, func.func(llvm-request-c-wrappers), \
+              convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, \
+              convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, \
+              convert-func-to-llvm, reconcile-unrealized-casts)" \
+      > forward-lowered.mlir
+
+    echo "Lowering subgraphs[0]"
+    buddy-opt subgraphs0.mlir -pass-pipeline \
+        "builtin.module(func.func(tosa-to-linalg-named, tosa-to-arith, tosa-to-linalg, tosa-to-tensor))" \
+      | buddy-opt \
+          --convert-elementwise-to-linalg \
+          --func-bufferize-dynamic-offset \
+          --arith-bufferize \
+          --func-bufferize \
+          --tensor-bufferize \
+          --linalg-bufferize \
+          --finalizing-bufferize \
+          --batchmatmul-optimize \
+          --convert-linalg-to-affine-loops \
+          --lower-affine \
+          --convert-vector-to-scf \
+          --convert-scf-to-cf \
+          --llvm-request-c-wrappers \
+          --lower-vector-exp \
+          --lower-rvv=rv32 \
+          --convert-vector-to-llvm \
+          --convert-math-to-llvm \
+          --convert-math-to-libm \
+          --convert-arith-to-llvm \
+          --convert-func-to-llvm \
+          --expand-strided-metadata \
+          --finalize-memref-to-llvm \
+          --reconcile-unrealized-casts \
+      > subgraphs0-lowered.mlir
+
+    echo "Compiling memrefCopy library"
+    $CXX -nostdlib -c ${../lib/MemrefCopy.cc} -o memrefCopy.o
+    llcArtifacts+=(
+      memrefCopy.o
+    )
+
+    optArtifacts+=(
+      "forward-lowered.mlir"
+      "subgraphs0-lowered.mlir"
+    )
+  '';
+}
diff --git a/tests/pytorch/mobilenet/mobilenet.cc b/tests/pytorch/mobilenet/mobilenet.cc
@@ -0,0 +1,45 @@
+#include "img.hpp"
+
+#define INPUT_N 1
+#define INPUT_C 3
+#define INPUT_H 224
+#define INPUT_W 224
+#define INPUT_TOTAL (INPUT_N * INPUT_C * INPUT_H * INPUT_W)
+#define OUTPUT_N 1000
+// #define PARAM_N0 2554968
+#define PARAM_N0 25549
+#define PARAM_N1 34
+
+__attribute((section(".vdata"))) float input_0[INPUT_TOTAL];
+__attribute((section(".vdata"))) float output_0[OUTPUT_N];
+__attribute((section(".vdata"))) float param_0[PARAM_N0];
+__attribute((section(".vdata"))) int64_t param_1[PARAM_N1];
+
+// Define the sizes of the input and output tensors.
+static const int32_t sizesInput[4] = {INPUT_N, INPUT_C, INPUT_H, INPUT_W};
+static const int32_t sizesOutput[2] = {1, OUTPUT_N};
+static const int32_t sizesParam0[1] = {PARAM_N0};
+static const int32_t sizesParam1[1] = {PARAM_N1};
+
+extern "C" {
+void _mlir_ciface_forward(MemRef<float, 2> *output, MemRef<float, 1> *arg0,
+                          MemRef<int64_t, 1> *arg1, Image<float, 4> *input);
+}
+
+extern "C" int test() {
+
+  // Generate input memref container with random numbers.
+  const int inputSize = INPUT_N * INPUT_C * INPUT_H * INPUT_W;
+
+  // Create input and output containers for the image and model output.
+  Image<float, 4> input(input_0, sizesInput);
+  MemRef<float, 2> output(output_0, sizesOutput);
+
+  // Set random model parameters.
+  MemRef<float, 1> paramsF32(param_0, 2.0, sizesParam0);
+  MemRef<int64_t, 1> paramsI64(param_1, 1, sizesParam1);
+
+  _mlir_ciface_forward(&output, &paramsF32, &paramsI64, &input);
+
+  return 0;
+}
diff --git a/tests/pytorch/mobilenet/mobilenet.py b/tests/pytorch/mobilenet/mobilenet.py
@@ -0,0 +1,60 @@
+# ===- buddy_mobilenetv3_import.py ---------------------------------------------
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===---------------------------------------------------------------------------
+#
+# This is the MobileNet V3 model AOT importer.
+#
+# Source: https://github.com/buddy-compiler/buddy-benchmark
+#
+# ===---------------------------------------------------------------------------
+
+import os
+
+from pathlib import Path
+import numpy as np
+import torch
+import torchvision.models as models
+from torch._inductor.decomposition import decompositions as inductor_decomp
+
+from buddy.compiler.frontend import DynamoCompiler
+from buddy.compiler.graph import GraphDriver
+from buddy.compiler.graph.transform import simply_fuse
+from buddy.compiler.ops import tosa
+
+model = models.mobilenet_v3_small(
+    weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True
+)
+model = model.eval()
+
+# Initialize Dynamo Compiler with specific configurations as an importer.
+dynamo_compiler = DynamoCompiler(
+    primary_registry=tosa.ops_registry,
+    aot_autograd_decomposition=inductor_decomp,
+)
+data = torch.randn([1, 3, 224, 224])
+# Import the model into MLIR module and parameters.
+with torch.no_grad():
+    graphs = dynamo_compiler.importer(model, data)
+assert len(graphs) == 1
+graph = graphs[0]
+params = dynamo_compiler.imported_params[graph]
+pattern_list = [simply_fuse]
+graphs[0].fuse_ops(pattern_list)
+driver = GraphDriver(graphs[0])
+driver.subgraphs[0].lower_to_top_level_ir()
+with open("subgraphs0.mlir", "w") as module_file:
+    print(driver.subgraphs[0]._imported_module, file=module_file)
+with open("forward.mlir", "w") as module_file:
+    print(driver.construct_main_graph(True), file=module_file)
-Original file line number
+Diff line change
@@ Expand Up / @@ -57,6 +57,7 @@ let @@
           pyenv = python3.withPackages (ps: [
             self
             ps.torch
+            ps.torchvision
           ]);
         };
       };
@@ Expand Down @@