-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Avimitin <[email protected]>
- Loading branch information
Showing
6 changed files
with
300 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,7 @@ let | |
pyenv = python3.withPackages (ps: [ | ||
self | ||
ps.torch | ||
ps.torchvision | ||
]); | ||
}; | ||
}; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#include <stdint.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
// Unranked MemRef | ||
template <typename T> struct UnrankedMemRefType { | ||
int32_t rank; | ||
void *descriptor; | ||
}; | ||
|
||
/// StridedMemRef descriptor type with static rank. | ||
template <typename T, int N> struct StridedMemRefType { | ||
T *basePtr; | ||
T *data; | ||
int64_t offset; | ||
int64_t sizes[N]; | ||
int64_t strides[N]; | ||
}; | ||
|
||
/// StridedMemRef descriptor type specialized for rank 1. | ||
template <typename T> struct StridedMemRefType<T, 1> { | ||
T *basePtr; | ||
T *data; | ||
int64_t offset; | ||
int64_t sizes[1]; | ||
int64_t strides[1]; | ||
|
||
T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); } | ||
}; | ||
|
||
/// StridedMemRef descriptor type specialized for rank 0. | ||
template <typename T> struct StridedMemRefType<T, 0> { | ||
T *basePtr; | ||
T *data; | ||
int64_t offset; | ||
}; | ||
|
||
// A reference to one of the StridedMemRef types. | ||
template <typename T> class DynamicMemRefType { | ||
public: | ||
int64_t rank; | ||
T *basePtr; | ||
T *data; | ||
int64_t offset; | ||
const int64_t *sizes; | ||
const int64_t *strides; | ||
|
||
explicit DynamicMemRefType(const StridedMemRefType<T, 0> &memRef) | ||
: rank(0), basePtr(memRef.basePtr), data(memRef.data), | ||
offset(memRef.offset), sizes(nullptr), strides(nullptr) {} | ||
template <int N> | ||
explicit DynamicMemRefType(const StridedMemRefType<T, N> &memRef) | ||
: rank(N), basePtr(memRef.basePtr), data(memRef.data), | ||
offset(memRef.offset), sizes(memRef.sizes), strides(memRef.strides) {} | ||
explicit DynamicMemRefType(const ::UnrankedMemRefType<T> &memRef) | ||
: rank(memRef.rank) { | ||
auto *desc = static_cast<StridedMemRefType<T, 1> *>(memRef.descriptor); | ||
basePtr = desc->basePtr; | ||
data = desc->data; | ||
offset = desc->offset; | ||
sizes = rank == 0 ? nullptr : desc->sizes; | ||
strides = sizes + rank; | ||
} | ||
}; | ||
|
||
extern "C" void memrefCopy(int32_t elemSize, UnrankedMemRefType<char> *srcArg, | ||
UnrankedMemRefType<char> *dstArg) { | ||
DynamicMemRefType<char> src(*srcArg); | ||
DynamicMemRefType<char> dst(*dstArg); | ||
|
||
int32_t rank = src.rank; | ||
|
||
// Handle empty shapes -> nothing to copy. | ||
for (int rankp = 0; rankp < rank; ++rankp) | ||
if (src.sizes[rankp] == 0) | ||
return; | ||
|
||
char *srcPtr = src.data + src.offset * elemSize; | ||
char *dstPtr = dst.data + dst.offset * elemSize; | ||
|
||
if (rank == 0) { | ||
memcpy(dstPtr, srcPtr, elemSize); | ||
return; | ||
} | ||
|
||
int32_t *indices = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank)); | ||
int32_t *srcStrides = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank)); | ||
int32_t *dstStrides = static_cast<int32_t *>(alloca(sizeof(int32_t) * rank)); | ||
|
||
// Initialize index and scale strides. | ||
for (int rankp = 0; rankp < rank; ++rankp) { | ||
indices[rankp] = 0; | ||
srcStrides[rankp] = src.strides[rankp] * elemSize; | ||
dstStrides[rankp] = dst.strides[rankp] * elemSize; | ||
} | ||
|
||
int32_t readIndex = 0, writeIndex = 0; | ||
for (;;) { | ||
// Copy over the element, byte by byte. | ||
memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize); | ||
// Advance index and read position. | ||
for (int32_t axis = rank - 1; axis >= 0; --axis) { | ||
// Advance at current axis. | ||
auto newIndex = ++indices[axis]; | ||
readIndex += srcStrides[axis]; | ||
writeIndex += dstStrides[axis]; | ||
// If this is a valid index, we have our next index, so continue copying. | ||
if (src.sizes[axis] != newIndex) | ||
break; | ||
// We reached the end of this axis. If this is axis 0, we are done. | ||
if (axis == 0) | ||
return; | ||
// Else, reset to 0 and undo the advancement of the linear index that | ||
// this axis had. Then continue with the axis one outer. | ||
indices[axis] = 0; | ||
readIndex -= src.sizes[axis] * srcStrides[axis]; | ||
writeIndex -= dst.sizes[axis] * dstStrides[axis]; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
{ fetchurl | ||
, buildBuddyE2ETest | ||
}: | ||
let | ||
checkpointFile = "mobilenet_v3_small-047dcff4.pth"; | ||
modelCache = fetchurl { | ||
url = "https://download.pytorch.org/models/${checkpointFile}"; | ||
hash = "sha256-BH3P9K3e+G6lvC7/E8lhTcEfR6sRYNCnGiXn25lPTh8="; | ||
}; | ||
in | ||
buildBuddyE2ETest { | ||
caseName = "mobilenet"; | ||
|
||
optPhase = '' | ||
mkdir -p pytorchCache/hub/checkpoints/ | ||
cp -v ${modelCache} pytorchCache/hub/checkpoints/${checkpointFile} | ||
export TORCH_HOME=pytorchCache | ||
python ./mobilenet.py | ||
echo "Lowering forward.mlir" | ||
buddy-opt forward.mlir -pass-pipeline \ | ||
"builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), \ | ||
empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, \ | ||
func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" \ | ||
| buddy-opt -pass-pipeline \ | ||
"builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), \ | ||
eliminate-empty-tensors, func.func(llvm-request-c-wrappers), \ | ||
convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, \ | ||
convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, \ | ||
convert-func-to-llvm, reconcile-unrealized-casts)" \ | ||
> forward-lowered.mlir | ||
echo "Lowering subgraphs[0]" | ||
buddy-opt subgraphs0.mlir -pass-pipeline \ | ||
"builtin.module(func.func(tosa-to-linalg-named, tosa-to-arith, tosa-to-linalg, tosa-to-tensor))" \ | ||
| buddy-opt \ | ||
--convert-elementwise-to-linalg \ | ||
--func-bufferize-dynamic-offset \ | ||
--arith-bufferize \ | ||
--func-bufferize \ | ||
--tensor-bufferize \ | ||
--linalg-bufferize \ | ||
--finalizing-bufferize \ | ||
--batchmatmul-optimize \ | ||
--convert-linalg-to-affine-loops \ | ||
--lower-affine \ | ||
--convert-vector-to-scf \ | ||
--convert-scf-to-cf \ | ||
--llvm-request-c-wrappers \ | ||
--lower-vector-exp \ | ||
--lower-rvv=rv32 \ | ||
--convert-vector-to-llvm \ | ||
--convert-math-to-llvm \ | ||
--convert-math-to-libm \ | ||
--convert-arith-to-llvm \ | ||
--convert-func-to-llvm \ | ||
--expand-strided-metadata \ | ||
--finalize-memref-to-llvm \ | ||
--reconcile-unrealized-casts \ | ||
> subgraphs0-lowered.mlir | ||
echo "Compiling memrefCopy library" | ||
$CXX -nostdlib -c ${../lib/MemrefCopy.cc} -o memrefCopy.o | ||
llcArtifacts+=( | ||
memrefCopy.o | ||
) | ||
optArtifacts+=( | ||
"forward-lowered.mlir" | ||
"subgraphs0-lowered.mlir" | ||
) | ||
''; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#include "img.hpp" | ||
|
||
#define INPUT_N 1 | ||
#define INPUT_C 3 | ||
#define INPUT_H 224 | ||
#define INPUT_W 224 | ||
#define INPUT_TOTAL (INPUT_N * INPUT_C * INPUT_H * INPUT_W) | ||
#define OUTPUT_N 1000 | ||
// #define PARAM_N0 2554968 | ||
#define PARAM_N0 25549 | ||
#define PARAM_N1 34 | ||
|
||
__attribute((section(".vdata"))) float input_0[INPUT_TOTAL]; | ||
__attribute((section(".vdata"))) float output_0[OUTPUT_N]; | ||
__attribute((section(".vdata"))) float param_0[PARAM_N0]; | ||
__attribute((section(".vdata"))) int64_t param_1[PARAM_N1]; | ||
|
||
// Define the sizes of the input and output tensors. | ||
static const int32_t sizesInput[4] = {INPUT_N, INPUT_C, INPUT_H, INPUT_W}; | ||
static const int32_t sizesOutput[2] = {1, OUTPUT_N}; | ||
static const int32_t sizesParam0[1] = {PARAM_N0}; | ||
static const int32_t sizesParam1[1] = {PARAM_N1}; | ||
|
||
extern "C" { | ||
void _mlir_ciface_forward(MemRef<float, 2> *output, MemRef<float, 1> *arg0, | ||
MemRef<int64_t, 1> *arg1, Image<float, 4> *input); | ||
} | ||
|
||
extern "C" int test() { | ||
|
||
// Generate input memref container with random numbers. | ||
const int inputSize = INPUT_N * INPUT_C * INPUT_H * INPUT_W; | ||
|
||
// Create input and output containers for the image and model output. | ||
Image<float, 4> input(input_0, sizesInput); | ||
MemRef<float, 2> output(output_0, sizesOutput); | ||
|
||
// Set random model parameters. | ||
MemRef<float, 1> paramsF32(param_0, 2.0, sizesParam0); | ||
MemRef<int64_t, 1> paramsI64(param_1, 1, sizesParam1); | ||
|
||
_mlir_ciface_forward(&output, ¶msF32, ¶msI64, &input); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# ===- buddy_mobilenetv3_import.py --------------------------------------------- | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
# ===--------------------------------------------------------------------------- | ||
# | ||
# This is the MobileNet V3 model AOT importer. | ||
# | ||
# Source: https://github.com/buddy-compiler/buddy-benchmark | ||
# | ||
# ===--------------------------------------------------------------------------- | ||
|
||
import os | ||
|
||
from pathlib import Path | ||
import numpy as np | ||
import torch | ||
import torchvision.models as models | ||
from torch._inductor.decomposition import decompositions as inductor_decomp | ||
|
||
from buddy.compiler.frontend import DynamoCompiler | ||
from buddy.compiler.graph import GraphDriver | ||
from buddy.compiler.graph.transform import simply_fuse | ||
from buddy.compiler.ops import tosa | ||
|
||
model = models.mobilenet_v3_small( | ||
weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True | ||
) | ||
model = model.eval() | ||
|
||
# Initialize Dynamo Compiler with specific configurations as an importer. | ||
dynamo_compiler = DynamoCompiler( | ||
primary_registry=tosa.ops_registry, | ||
aot_autograd_decomposition=inductor_decomp, | ||
) | ||
data = torch.randn([1, 3, 224, 224]) | ||
# Import the model into MLIR module and parameters. | ||
with torch.no_grad(): | ||
graphs = dynamo_compiler.importer(model, data) | ||
assert len(graphs) == 1 | ||
graph = graphs[0] | ||
params = dynamo_compiler.imported_params[graph] | ||
pattern_list = [simply_fuse] | ||
graphs[0].fuse_ops(pattern_list) | ||
driver = GraphDriver(graphs[0]) | ||
driver.subgraphs[0].lower_to_top_level_ir() | ||
with open("subgraphs0.mlir", "w") as module_file: | ||
print(driver.subgraphs[0]._imported_module, file=module_file) | ||
with open("forward.mlir", "w") as module_file: | ||
print(driver.construct_main_graph(True), file=module_file) |