Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement basic cache and support resnet in tvm backend #76

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,9 @@ src/paddlefx/_version.py
.cache/
*.so
tmp/

# viztracer
result.json

# mlir
*.mlir
22 changes: 13 additions & 9 deletions examples/TODO/resnet_dynamo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@

import paddlefx

from paddlefx.compiler.tvm import TVMCompiler

def my_compiler(gl: paddlefx.GraphLayer, example_inputs: list[paddle.Tensor] = None):
print("my_compiler() called with FX graph:")
print(gl.get_source())
gl.graph.print_tabular(print_mode="rich")
return gl.forward
paddle.seed(1234)

# logging.getLogger().setLevel(logging.DEBUG)

net = resnet18()
optimized_net = paddlefx.optimize(backend=my_compiler)(net)
compiler = TVMCompiler(full_graph=True, print_tabular_mode="rich")
net = resnet18(pretrained=True, num_classes=2)
optimized_net = paddlefx.optimize(net, backend=compiler)

x = paddle.rand([1, 3, 224, 224])
x = paddle.rand([1, 3, 224, 224], dtype="float32")
out = net(x)
res = optimized_net(x)
np.testing.assert_allclose(res.numpy(), out.numpy(), rtol=1e-5, atol=1e-6)

np.testing.assert_equal(res.numpy(), out.numpy())
for _ in range(10):
out = net(x)

for _ in range(10):
res = optimized_net(x)
108 changes: 0 additions & 108 deletions examples/TODO/simple_dynamo.py

This file was deleted.

4 changes: 3 additions & 1 deletion examples/resnet_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@

from paddlefx import symbolic_trace

paddle.seed(1234)

net = resnet18()
traced_layer = symbolic_trace(net)

example_input = paddle.rand([2, 3, 224, 224])
example_input = paddle.rand([2, 3, 24, 24])
orig_output = net(example_input)
traced_output = traced_layer(example_input)

Expand Down
22 changes: 11 additions & 11 deletions examples/simple_compiler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import logging

import numpy as np
import paddle
import paddle.nn
Expand All @@ -11,14 +9,14 @@

from paddlefx.compiler import TVMCompiler

paddle.seed(0)
paddle.seed(1234)

logging.getLogger().setLevel(logging.DEBUG)
# logging.getLogger().setLevel(logging.DEBUG)


def inner_func(x, y):
p = paddle.add(x, y)
q = paddle._C_ops.subtract(x, y)
q = paddle._C_ops.subtract(x, y) # type: ignore
z = p * q
return z / y

Expand All @@ -28,11 +26,13 @@ def func(a, b):
return d


optimized_net = paddlefx.optimize(func, backend=TVMCompiler(print_tabular=True))
optimized_func = paddlefx.optimize(func, backend=TVMCompiler(print_tabular_mode="rich"))

x = paddle.rand([1, 224])
y = paddle.rand([1, 224])
out = func(x, y)
res = optimized_net(x, y)
x = paddle.rand([4, 6, 1])
y = paddle.rand([4, 6, 224])
for _ in range(10):
res = optimized_func(x, y)
res = optimized_func(y, x)
out = func(y, x)

np.testing.assert_equal(res.numpy(), out.numpy())
np.testing.assert_equal(res.numpy(), out.numpy())
50 changes: 50 additions & 0 deletions examples/simple_dynamo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from __future__ import annotations

import logging

import numpy as np
import paddle
import paddle.nn

import paddlefx

from paddlefx.compiler import DummyCompiler, TVMCompiler

logging.getLogger().setLevel(logging.DEBUG)
dummy_compier = DummyCompiler(full_graph=True, print_tabular_mode="rich")
compiler = TVMCompiler(full_graph=True, print_tabular_mode="rich")


def check_func(func, *args, backend: None = None):
if backend is None:
comiled_func = paddlefx.optimize(func)
else:
comiled_func = paddlefx.optimize(func, backend=backend)
out = func(*args)
res = comiled_func(*args)
if isinstance(out, tuple):
for i in range(len(res)):
np.testing.assert_allclose(res[i], out[i])
else:
np.testing.assert_allclose(res, out, rtol=1e-5, atol=1e-6)


class SimpleNet(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.fc1 = paddle.nn.Linear(16, 4)
self.fc2 = paddle.nn.Linear(16, 1)

def forward(self, a, b):
c = self.fc1(a)
d = self.fc2(b)
e = paddle.add(c, d)
return e


net = SimpleNet()


in_a = paddle.rand([8, 16])
in_b = paddle.rand([8, 16])
check_func(net, in_a, in_b, backend=compiler)
6 changes: 3 additions & 3 deletions examples/targets/target_3_add_paddle.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
logging.basicConfig(level=logging.DEBUG, format="%(message)s")
# logging.basicConfig(level=logging.INFO, format="%(message)s")

paddle.seed(0)
paddle.seed(1234)


def func(x, y):
z = paddle.add(x, y)
o = paddle._C_ops.add(z, z)
o = paddle._C_ops.add(z, z) # type: ignore
return o


@paddlefx.optimize(backend=TVMCompiler(print_tabular=True))
@paddlefx.optimize(backend=TVMCompiler(print_tabular_mode="rich"))
def net(a, b):
c = func(a, b)
return c
Expand Down
3 changes: 2 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ pre-commit>=3.0.0

tabulate==0.9.0

apache-tvm>=0.11.1
apache-tvm==0.14.dev214
xgboost

# debug python with paddle
opencv-python-headless
60 changes: 60 additions & 0 deletions src/paddlefx/cache_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

import dataclasses
import types

from typing import TYPE_CHECKING, Callable

if TYPE_CHECKING:
GuardFunction = Callable[[types.FrameType], bool]
GuardedCodes = list["GuardedCode"]


@dataclasses.dataclass
class GuardedCode:
code: types.CodeType
guard_fn: GuardFunction


class CodeCacheManager:
cache_dict: dict[types.CodeType, GuardedCodes] = {}

@classmethod
def add_cache(cls, code: types.CodeType, guarded_code: GuardedCode):
cls.cache_dict.setdefault(code, [])
cls.cache_dict[code].append(guarded_code)

@classmethod
def get_cache(cls, frame: types.FrameType) -> GuardedCode | None:
code: types.CodeType = frame.f_code
if code not in cls.cache_dict:
print(f"Firstly call {code}\n")
return None
return cls.lookup(frame, cls.cache_dict[code])

@classmethod
def clear_cache(cls):
cls.cache_dict.clear()

@classmethod
def lookup(
cls, frame: types.FrameType, guarded_codes: GuardedCodes
) -> GuardedCode | None:
for guarded_code in guarded_codes:
try:
guard_fn = guarded_code.guard_fn
if guard_fn(frame):
print(
f"[Cache]: Cache hit, GuardFunction is {guard_fn}\n",
)
return guarded_code
else:
print(
f"[Cache]: Cache miss, GuardFunction is {guard_fn}\n",
)
except Exception as e:
print(f"[Cache]: GuardFunction function error: {e}\n")
continue

print("[Cache]: all guards missed\n")
return None
4 changes: 3 additions & 1 deletion src/paddlefx/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def make_call_generated_code(self, fn_name: str):
self.extend_output(create_call_function(len(placeholders), False))

def call(self, vars: VariableStack[VariableBase]):
self.tensor_index = 0 # TDDO: rm this
for var in vars:
self.call_one(var)

Expand All @@ -149,7 +150,8 @@ def call_one(self, value: VariableBase):

output.append(self.create_load(self.graph_output_var))
# TODO: rm hardcode
output.append(self.create_load_const(0))
output.append(self.create_load_const(self.tensor_index))
self.tensor_index += 1
output.append(create_instruction("BINARY_SUBSCR"))
elif value.var == None:
output.append(self.create_load_const(None))
Expand Down
Loading