Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update python api for vodla #766

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions python/halo/halo.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ class CXXCodeGenOpts(Structure):
("save_temps", c_bool),
]

HALO_MODEL_INFO_MAX_OUTPUT_NR = 64
HALO_VODLA_MAX_OUTPUT_RSC_EST = 2048
class ModelInfo(Structure):
_fields_ = [
("num_outputs", c_size_t),
("output_buf_sizes", c_size_t*HALO_MODEL_INFO_MAX_OUTPUT_NR),
("input_qps", c_int),
("adaptive_bsz", c_int),
("output_rsc_est", c_char*HALO_VODLA_MAX_OUTPUT_RSC_EST),
]


"""
int halo_Compile(halo::ModelFormat model_format, unsigned num_models,
Expand Down Expand Up @@ -91,6 +102,24 @@ class CXXCodeGenOpts(Structure):
c_void_p, # model_info
]

Analyze.argtypes = [
c_int, # model_format
c_uint, # num_models
c_void_p, # models
c_void_p, # model_sizes
c_char_p, # target
c_int, # batch
c_uint, # num_input_shapes
c_void_p, # input_shapes
c_uint, # num_inputs
c_void_p, # inputs
c_uint, # num_outputs
c_void_p, # outputs
c_void_p, # cg_opts
c_char_p, # filename
c_void_p, # model_info
]


def exec(args):
proc = subprocess.run(args)
Expand Down Expand Up @@ -139,8 +168,8 @@ def CompileModel(model_file, input_shapes, output_names, batch, format):

target = "cxx".encode("utf-8")
output_filename = output_file.encode("utf-8")
logger.info("Begin Halo compilation")
logger.info("Halo lib:" + str(lib_halo._name))
logger.debug("Begin Halo compilation")
logger.debug("Halo lib:" + str(lib_halo._name))
logger.debug("Intermediate file:" + str(output_filename))
Compile(
format_val,
Expand All @@ -150,7 +179,7 @@ def CompileModel(model_file, input_shapes, output_names, batch, format):
target,
batch,
num_input_shapes,
(c_char_p * num_input_shapes)(*input_shapes),
(c_char_p * num_input_shapes)(*input_shapes), # input_shapes,
num_inputs,
inputs,
num_outputs,
Expand All @@ -159,11 +188,11 @@ def CompileModel(model_file, input_shapes, output_names, batch, format):
output_filename,
0,
)
logger.info("Done Halo Compilation")
logger.debug("Done Halo Compilation")
return [output_file, output_bin]


def AnalyzeModel(model_file, input_shapes, batch, format):
def AnalyzeModel(model_file, input_shapes, batch, format, model_info):
output_file = ""
odla_lib = cast(create_string_buffer(b""), c_char_p)
opts = CXXCodeGenOpts()
Expand Down Expand Up @@ -215,10 +244,9 @@ def AnalyzeModel(model_file, input_shapes, batch, format):
outputs,
pointer(opts),
output_filename,
0
pointer(model_info),
)


def CompileODLAModel(files, device, debug=False):
cc_file = files[0]
bin_file = files[1]
Expand All @@ -235,7 +263,7 @@ def CompileODLAModel(files, device, debug=False):
str(so_file),
str(cc_file),
str(bin_file),
"-l" + device,
"-l" + "vodla",
"-Wl,-rpath=/usr/local/lib",
]
logger.debug("Building ODLA model: " + " ".join(args))
Expand Down
25 changes: 10 additions & 15 deletions python/halo/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import sys
import logging
from logging import StreamHandler, Formatter
import os


class Inference:
Expand All @@ -31,6 +32,7 @@ def __init__(
device,
batch,
format,
qps,
debug,
log_level,
):
Expand All @@ -53,34 +55,27 @@ def __init__(
self.format = format
self.device = device
self.batch = batch
self.qps = qps
self.model = None
self.so_file = None
self.intermediate_files = []
self.save_temps = False

def __del__(self):
self.logger.info(str(self.intermediate_files))
for file in self.intermediate_files:
if not self.save_temps:
Path(file).unlink()
del self.model

def Initialize(self):
self.logger.info("Begin initialization")
files = halo.CompileModel(
self.logger.info(f"Begin initialization;{self.model_file}")
self.so_file = "/usr/local/lib/libvodla.so"
self.model = odla.ODLAModel(self.so_file)
self.model.Load(
self.model_file,
self.input_shapes,
self.output_names,
self.batch,
self.format,
)
self.so_file = halo.CompileODLAModel(files, self.device, self.debug)
self.intermediate_files = [*files, self.so_file]
self.model = odla.ODLAModel(self.so_file)
self.model.Load()
self.batch,
self.qps)
self.logger.info("Done initialization")

def Run(self, data):
if self.model is None:
self.Initialize()
return self.model.Execute(data)
return self.model.Execute(data, self.model_file, self.batch)
204 changes: 170 additions & 34 deletions python/halo/odla.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
from enum import Enum
from time import time
import logging

import os
from pathlib import Path
from halo import halo

class Device(Enum):
CUDA = 1
Expand All @@ -37,57 +39,191 @@ def __init__(self, so_file):
self.logger = logging.getLogger(__name__)
self.so_file = so_file
self.h = None
self.buffers = []
self.save_temps = False
self.intermediate_files = []

def __del__(self):
self.logger.info(str(self.intermediate_files))
for file in self.intermediate_files:
if not self.save_temps:
Path(file).unlink()
if self.h is not None:
self.h.odla_DestroyContext(self.ctx)
self.h.odla_DestroyComputation(self.comp)
self.h.odla_DestroyDevice(self.device)

def Load(self):
def Load(self,model,input_shapes,output_names,format,batch,qps):
if self.h is None:
self.h = CDLL(self.so_file)
self.comp = c_void_p(0)
self.h.odla_CreateComputation(pointer(self.comp))
# TODO:
use_sim = c_bool(True)
self.h.odla_SetComputationItem(self.comp, 7, pointer(use_sim))
self.device = c_void_p(0)
model_info = halo.ModelInfo()
model_info.input_qps = qps
model_info.adaptive_bsz = batch
rsc_est = c_void_p(0)
if qps>0 and batch==1:
halo.AnalyzeModel(model,input_shapes,1,format,model_info)
rsc_est = (c_char_p)(model_info.output_rsc_est)
self.h.odla_AllocateDevice(c_void_p(0), 0, pointer(self.device), rsc_est)
self.files = halo.CompileModel(
model,
input_shapes,
output_names,
batch,
format,
)
self.intermediate_files = [*self.files]
cc_file = str(self.files[0]).encode("utf-8")
bin_file = str(self.files[1]).encode("utf-8")

self.h.model_helper(self.comp)
self.comp = self.h.model_helper(cc_file, bin_file)
self.ctx = c_void_p(0)
self.h.odla_CreateContext(pointer(self.ctx))
n = c_int32(-1)
self.h.odla_GetNumOfArgsFromComputation(self.comp, pointer(n))
self.nr_args = n.value
# self.nr_args = n.value
if("bert" in model):
self.nr_args = 3
elif("shoucai" in model):
self.nr_args = 16
else:
self.nr_args = 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be we can raise NotImplementedError for the rest models.


nr_args = c_int32(-1)
# nr_args = c_int32(-1)
self.h.odla_GetNumOfOutputsFromComputation(self.comp, pointer(n))
self.nr_outputs = n.value
# self.nr_outputs = n.value
if("bert" in model):
self.nr_outputs = 2
else:
self.nr_outputs = 1

self.in_vals = []
for idx in range(0, self.nr_args):
arg_v = c_void_p(0)
self.h.odla_GetArgFromComputationByIdx(self.comp, idx, pointer(arg_v))
vt = ValueType()
self.h.odla_GetValueType(arg_v, pointer(vt))
self.in_vals.append((arg_v.value, vt))

self.ctx = c_void_p(0)
self.h.odla_CreateContext(pointer(self.ctx))

self.out_vals = []
for idx in range(0, self.nr_outputs):
out = c_void_p(0)
self.h.odla_GetOutputFromComputationByIdx(self.comp, idx, pointer(out))
vt = ValueType()
self.h.odla_GetValueType(out, pointer(vt))
n = 1
for r in range(0, vt.shape.size):
n *= vt.shape.dims[r]
self.out_vals.append((out, vt, n))
buf = (c_float * n)() # FIXME: handle types
self.h.odla_BindToOutput(out, buf, self.ctx)
self.buffers.append(buf)

def Execute(self, data):
for idx, v in enumerate(self.in_vals):
self.h.odla_BindToArgument(
v[0], data[idx].ctypes.data_as(c_void_p), self.ctx
)

def Execute(self, data, model, batch):
print(f"model:{model},batch:{batch}")
# bind input
if("bert" in model):
self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(1), data[1].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(2), data[2].ctypes.data_as(c_void_p), self.ctx)
elif("shoucai" in model):
self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(1), data[1].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(2), data[2].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(3), data[3].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(4), data[4].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(5), data[5].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(6), data[6].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(7), data[7].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(8), data[8].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(9), data[9].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(10), data[10].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(11), data[11].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(12), data[12].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(13), data[13].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(14), data[14].ctypes.data_as(c_void_p), self.ctx)
self.h.odla_BindToArgument(c_void_p(15), data[15].ctypes.data_as(c_void_p), self.ctx)
else:
self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx)

# output buffer
buffers = []
if("bert" in model):
buf1 = (c_float * 256 * batch)()
buffers.append(buf1)
self.h.odla_BindToOutput(c_void_p(0), buf1, self.ctx)
buf2 = (c_float * 256 * batch)()
buffers.append(buf2)
self.h.odla_BindToOutput(c_void_p(1), buf2, self.ctx)
else:
if("resnet50" in model):
buf = (c_float * 1*1000 * batch)()
elif("dbnet" in model):
assert((batch==1) and "dbnet only support 1 batch.")
buf = (c_float * 1228800 * batch)()
elif("crnn" in model):
assert((batch==1) and "crnn only support 1 batch.")
buf = (c_float * 918146 * batch)()
elif("shoucai" in model):
#Add info for Shoucai Model
assert((batch==1) and "shoucai only support 1 batch!")
buf = (c_float * 425 * batch)()
else:
assert(False and f"unknown model.{model}")
buffers.append(buf)
self.h.odla_BindToOutput(c_void_p(0), buf, self.ctx)

# send data
if("resnet50" in model):
self.h.model_data(self.ctx, (c_int32 * 1)(*[224*224*3*4*batch]), (c_int32 * 1)(*[1000*4*batch]))
elif("dbnet" in model):
self.h.model_data(self.ctx, (c_int32 * 1)(*[1*3*960*1280*4*batch]), (c_int32 * 1)(*[1228800 * 4*batch]))
elif("crnn" in model):
self.h.model_data(self.ctx, (c_int32 * 1)(*[63840*4*batch]), (c_int32 * 1)(*[918146*4*batch]))
elif("bert" in model):
self.h.model_data(self.ctx, (c_int32 * 3)(*[512*4*batch, 256*4*batch, 256*8*batch]), (c_int32 * 2)(*[256*4*batch,256*4*batch]))
elif ("shoucai" in model):
#per Input File
#self.h.model_data(self.ctx, (c_int32 * 16)(*[8*4*batch, 1*4*batch, 592*4*batch, 1*4*batch, 512*4*batch, 512*4*batch, 73728*4*batch, 27200*4*batch, 13600*4*batch, 13600*4*batch, 122400*4*batch, 350200*4*batch, 15552*4*batch, 1161984*4*batch, 178*4*batch, 425*8*batch]), (c_int32*1)(*[425*4*batch]))

#per Input Shape
self.h.model_data(self.ctx, (c_int32 * 18)(*[425*4*batch, 8*4*batch, 1*4*batch, 592*4*batch, 1*4*batch, 512*4*batch, 512*4*batch, 73728*4*batch, 425*4*batch, 425*4*batch, 27200*4*batch,13600*4*batch, 13600*4*batch, 122400*4*batch, 350200*4*batch, 15552*4*batch, 1161984*4*batch, 178*4*batch]), (c_int32*1)(*[425*4*batch]))


'''
self.h.model_data(self._ctx, (c_int32 * 16)(*[8*4*batch, \ # embedding_ui_oage_shared_embedding.txt
1*4*batch, \ # input_from_feature_columns_concat_3.txt
592*4*batch, \ # input_from_feature_columns_concat.txt
1*4*batch, \ #input_from_feature_columns_concat_5.txt
512*4*batch, \ #all_clk_seq_1_time.txt
512*4*batch, \#all_clk_seq_1_st.txt
73728*4*batch, \ #seq_input_from_feature_columns_concat_1.txt
27200*4*batch, \#embedding_item_id_d_shard_embedding_2.txt
13600*4*batch, \#embedding_item_cate_id_d_shared_embedding_2.txt
13600*4*batch, \#embedding_item_seller_id_d_shared_embedding_2.txt
122400*4*batch, \#input_from_feature_columns_concat_4.txt
350200*4*batch, \#input_from_feature_columns_concat_1.txt
15552*4*batch, \#seq_input_from_feature_columns_concat.txt
1161984*4*batch,\ #seq_input_from_feature_columns_concat_2.txt
178*4*batch, \#input_from_feature_columns_concat_7.txt
425*8*batch]), \#Unique_preprocess_int64.txt
(c_int32*1)(*[425*4*batch])) #output


self.h.model_data(self._ctx, (c_int32 * 18)(*[425*4*batch, \# LookupPkOP
8*4*batch, \ # embedding_ui_oage_shared_embedding.txt
1*4*batch, \ # input_from_feature_columns_concat_3.txt
592*4*batch, \ # input_from_feature_columns_concat.txt
1*4*batch, \ #input_from_feature_columns_concat_5.txt
512*4*batch, \ #all_clk_seq_1_time.txt
512*4*batch, \#all_clk_seq_1_st.txt
73728*4*batch, \ #seq_input_from_feature_columns_concat_1.txt
425*4*batch, \ #batch_fill_attributes_for_gul_rank_item_feature
425*4*batch, \ #batch_fill_attributes_for_gul_rank_item_feature_1
27200*4*batch, \#embedding_item_id_d_shard_embedding_2.txt
13600*4*batch, \#embedding_item_cate_id_d_shared_embedding_2.txt
13600*4*batch, \#embedding_item_seller_id_d_shared_embedding_2.txt
122400*4*batch, \#input_from_feature_columns_concat_4.txt
350200*4*batch, \#input_from_feature_columns_concat_1.txt
15552*4*batch, \#seq_input_from_feature_columns_concat.txt
1161984*4*batch,\ #seq_input_from_feature_columns_concat_2.txt
178*4*batch]), \#input_from_feature_columns_concat_7.txt

(c_int32*1)(*[425*4*batch])) #output
'''
else:
assert(False and f"unknown model.{model}")

s = time()
self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, c_void_p(0))
self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, self.device)
t = time()
self.logger.info("Execution time:" + str(t - s) + " sec(s)")
return self.buffers
return buffers