Skip to content

Commit

Permalink
[tests] add pytorch.mobilenet
Browse files Browse the repository at this point in the history
Signed-off-by: Avimitin <[email protected]>
  • Loading branch information
Avimitin committed Aug 27, 2024
1 parent e499a93 commit 85619c9
Show file tree
Hide file tree
Showing 6 changed files with 300 additions and 1 deletion.
1 change: 1 addition & 0 deletions nix/pkgs/buddy-mlir.nix
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ let
pyenv = python3.withPackages (ps: [
self
ps.torch
ps.torchvision
]);
};
};
Expand Down
2 changes: 1 addition & 1 deletion tests/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ let
findAndBuild = dir: build:
lib.recurseIntoAttrs (lib.pipe (builtins.readDir dir) [
# filter out all non-directory entrires and underscore-prefixed directories
(lib.filterAttrs (name: type: type == "directory" && ! (lib.hasPrefix "_" name) && name != "include"))
(lib.filterAttrs (name: type: type == "directory" && ! (lib.hasPrefix "_" name) && ! (lib.elem name [ "lib" "include" ])))
# prepend path with base directory
(lib.mapAttrs (subDirName: _: (lib.path.append dir subDirName)))
# build. If {sourcePath}/default.nix exists, call it. Otherwise call the generic builder
Expand Down
120 changes: 120 additions & 0 deletions tests/pytorch/lib/MemrefCopy.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

// Unranked MemRef
template <typename T> struct UnrankedMemRefType {
int32_t rank;
void *descriptor;
};

/// StridedMemRef descriptor type with static rank.
template <typename T, int N> struct StridedMemRefType {
T *basePtr;
T *data;
int64_t offset;
int64_t sizes[N];
int64_t strides[N];
};

/// StridedMemRef descriptor type specialized for rank 1.
template <typename T> struct StridedMemRefType<T, 1> {
T *basePtr;
T *data;
int64_t offset;
int64_t sizes[1];
int64_t strides[1];

T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
};

/// StridedMemRef descriptor type specialized for rank 0.
template <typename T> struct StridedMemRefType<T, 0> {
T *basePtr;
T *data;
int64_t offset;
};

// A reference to one of the StridedMemRef types.
template <typename T> class DynamicMemRefType {
public:
int64_t rank;
T *basePtr;
T *data;
int64_t offset;
const int64_t *sizes;
const int64_t *strides;

explicit DynamicMemRefType(const StridedMemRefType<T, 0> &memRef)
: rank(0), basePtr(memRef.basePtr), data(memRef.data),
offset(memRef.offset), sizes(nullptr), strides(nullptr) {}
template <int N>
explicit DynamicMemRefType(const StridedMemRefType<T, N> &memRef)
: rank(N), basePtr(memRef.basePtr), data(memRef.data),
offset(memRef.offset), sizes(memRef.sizes), strides(memRef.strides) {}
explicit DynamicMemRefType(const ::UnrankedMemRefType<T> &memRef)
: rank(memRef.rank) {
auto *desc = static_cast<StridedMemRefType<T, 1> *>(memRef.descriptor);
basePtr = desc->basePtr;
data = desc->data;
offset = desc->offset;
sizes = rank == 0 ? nullptr : desc->sizes;
strides = sizes + rank;
}
};

extern "C" void memrefCopy(int32_t elemSize, UnrankedMemRefType<char> *srcArg,
UnrankedMemRefType<char> *dstArg) {
DynamicMemRefType<char> src(*srcArg);
DynamicMemRefType<char> dst(*dstArg);

int32_t rank = src.rank;

// Handle empty shapes -> nothing to copy.
for (int rankp = 0; rankp < rank; ++rankp)
if (src.sizes[rankp] == 0)
return;

char *srcPtr = src.data + src.offset * elemSize;
char *dstPtr = dst.data + dst.offset * elemSize;

if (rank == 0) {
memcpy(dstPtr, srcPtr, elemSize);
return;
}

int32_t *indices = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank));
int32_t *srcStrides = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank));
int32_t *dstStrides = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank));

// Initialize index and scale strides.
for (int rankp = 0; rankp < rank; ++rankp) {
indices[rankp] = 0;
srcStrides[rankp] = src.strides[rankp] * elemSize;
dstStrides[rankp] = dst.strides[rankp] * elemSize;
}

int32_t readIndex = 0, writeIndex = 0;
for (;;) {
// Copy over the element, byte by byte.
memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize);
// Advance index and read position.
for (int32_t axis = rank - 1; axis >= 0; --axis) {
// Advance at current axis.
auto newIndex = ++indices[axis];
readIndex += srcStrides[axis];
writeIndex += dstStrides[axis];
// If this is a valid index, we have our next index, so continue copying.
if (src.sizes[axis] != newIndex)
break;
// We reached the end of this axis. If this is axis 0, we are done.
if (axis == 0)
return;
// Else, reset to 0 and undo the advancement of the linear index that
// this axis had. Then continue with the axis one outer.
indices[axis] = 0;
readIndex -= src.sizes[axis] * srcStrides[axis];
writeIndex -= dst.sizes[axis] * dstStrides[axis];
}
}
}
73 changes: 73 additions & 0 deletions tests/pytorch/mobilenet/build.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
{ fetchurl
, buildBuddyE2ETest
}:
let
checkpointFile = "mobilenet_v3_small-047dcff4.pth";
modelCache = fetchurl {
url = "https://download.pytorch.org/models/${checkpointFile}";
hash = "sha256-BH3P9K3e+G6lvC7/E8lhTcEfR6sRYNCnGiXn25lPTh8=";
};
in
buildBuddyE2ETest {
caseName = "mobilenet";

optPhase = ''
mkdir -p pytorchCache/hub/checkpoints/
cp -v ${modelCache} pytorchCache/hub/checkpoints/${checkpointFile}
export TORCH_HOME=pytorchCache
python ./mobilenet.py
echo "Lowering forward.mlir"
buddy-opt forward.mlir -pass-pipeline \
"builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), \
empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, \
func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" \
| buddy-opt -pass-pipeline \
"builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), \
eliminate-empty-tensors, func.func(llvm-request-c-wrappers), \
convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, \
convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, \
convert-func-to-llvm, reconcile-unrealized-casts)" \
> forward-lowered.mlir
echo "Lowering subgraphs[0]"
buddy-opt subgraphs0.mlir -pass-pipeline \
"builtin.module(func.func(tosa-to-linalg-named, tosa-to-arith, tosa-to-linalg, tosa-to-tensor))" \
| buddy-opt \
--convert-elementwise-to-linalg \
--func-bufferize-dynamic-offset \
--arith-bufferize \
--func-bufferize \
--tensor-bufferize \
--linalg-bufferize \
--finalizing-bufferize \
--batchmatmul-optimize \
--convert-linalg-to-affine-loops \
--lower-affine \
--convert-vector-to-scf \
--convert-scf-to-cf \
--llvm-request-c-wrappers \
--lower-vector-exp \
--lower-rvv=rv32 \
--convert-vector-to-llvm \
--convert-math-to-llvm \
--convert-math-to-libm \
--convert-arith-to-llvm \
--convert-func-to-llvm \
--expand-strided-metadata \
--finalize-memref-to-llvm \
--reconcile-unrealized-casts \
> subgraphs0-lowered.mlir
echo "Compiling memrefCopy library"
$CXX -nostdlib -c ${../lib/MemrefCopy.cc} -o memrefCopy.o
llcArtifacts+=(
memrefCopy.o
)
optArtifacts+=(
"forward-lowered.mlir"
"subgraphs0-lowered.mlir"
)
'';
}
45 changes: 45 additions & 0 deletions tests/pytorch/mobilenet/mobilenet.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "img.hpp"

#define INPUT_N 1
#define INPUT_C 3
#define INPUT_H 224
#define INPUT_W 224
#define INPUT_TOTAL (INPUT_N * INPUT_C * INPUT_H * INPUT_W)
#define OUTPUT_N 1000
// #define PARAM_N0 2554968
#define PARAM_N0 25549
#define PARAM_N1 34

__attribute((section(".vdata"))) float input_0[INPUT_TOTAL];
__attribute((section(".vdata"))) float output_0[OUTPUT_N];
__attribute((section(".vdata"))) float param_0[PARAM_N0];
__attribute((section(".vdata"))) int64_t param_1[PARAM_N1];

// Define the sizes of the input and output tensors.
static const int32_t sizesInput[4] = {INPUT_N, INPUT_C, INPUT_H, INPUT_W};
static const int32_t sizesOutput[2] = {1, OUTPUT_N};
static const int32_t sizesParam0[1] = {PARAM_N0};
static const int32_t sizesParam1[1] = {PARAM_N1};

extern "C" {
void _mlir_ciface_forward(MemRef<float, 2> *output, MemRef<float, 1> *arg0,
MemRef<int64_t, 1> *arg1, Image<float, 4> *input);
}

extern "C" int test() {

// Generate input memref container with random numbers.
const int inputSize = INPUT_N * INPUT_C * INPUT_H * INPUT_W;

// Create input and output containers for the image and model output.
Image<float, 4> input(input_0, sizesInput);
MemRef<float, 2> output(output_0, sizesOutput);

// Set random model parameters.
MemRef<float, 1> paramsF32(param_0, 2.0, sizesParam0);
MemRef<int64_t, 1> paramsI64(param_1, 1, sizesParam1);

_mlir_ciface_forward(&output, &paramsF32, &paramsI64, &input);

return 0;
}
60 changes: 60 additions & 0 deletions tests/pytorch/mobilenet/mobilenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# ===- buddy_mobilenetv3_import.py ---------------------------------------------
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===---------------------------------------------------------------------------
#
# This is the MobileNet V3 model AOT importer.
#
# Source: https://github.com/buddy-compiler/buddy-benchmark
#
# ===---------------------------------------------------------------------------

import os

from pathlib import Path
import numpy as np
import torch
import torchvision.models as models
from torch._inductor.decomposition import decompositions as inductor_decomp

from buddy.compiler.frontend import DynamoCompiler
from buddy.compiler.graph import GraphDriver
from buddy.compiler.graph.transform import simply_fuse
from buddy.compiler.ops import tosa

model = models.mobilenet_v3_small(
weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True
)
model = model.eval()

# Initialize Dynamo Compiler with specific configurations as an importer.
dynamo_compiler = DynamoCompiler(
primary_registry=tosa.ops_registry,
aot_autograd_decomposition=inductor_decomp,
)
data = torch.randn([1, 3, 224, 224])
# Import the model into MLIR module and parameters.
with torch.no_grad():
graphs = dynamo_compiler.importer(model, data)
assert len(graphs) == 1
graph = graphs[0]
params = dynamo_compiler.imported_params[graph]
pattern_list = [simply_fuse]
graphs[0].fuse_ops(pattern_list)
driver = GraphDriver(graphs[0])
driver.subgraphs[0].lower_to_top_level_ir()
with open("subgraphs0.mlir", "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
with open("forward.mlir", "w") as module_file:
print(driver.construct_main_graph(True), file=module_file)

0 comments on commit 85619c9

Please sign in to comment.