From 372b7325f07ea935393776bbeb7e681cc97cbd21 Mon Sep 17 00:00:00 2001 From: "maruiyan.mry" Date: Tue, 12 Oct 2021 01:04:58 -0700 Subject: [PATCH 1/6] feat:support vodla demo --- python/halo/halo.py | 10 ++-- python/halo/inference.py | 15 +++-- python/halo/odla.py | 116 ++++++++++++++++++++++++++++----------- 3 files changed, 99 insertions(+), 42 deletions(-) diff --git a/python/halo/halo.py b/python/halo/halo.py index 8853938af..7d55ba37d 100644 --- a/python/halo/halo.py +++ b/python/halo/halo.py @@ -139,8 +139,8 @@ def CompileModel(model_file, input_shapes, output_names, batch, format): target = "cxx".encode("utf-8") output_filename = output_file.encode("utf-8") - logger.info("Begin Halo compilation") - logger.info("Halo lib:" + str(lib_halo._name)) + logger.debug("Begin Halo compilation") + logger.debug("Halo lib:" + str(lib_halo._name)) logger.debug("Intermediate file:" + str(output_filename)) Compile( format_val, @@ -150,7 +150,7 @@ def CompileModel(model_file, input_shapes, output_names, batch, format): target, batch, num_input_shapes, - (c_char_p * num_input_shapes)(*input_shapes), + (c_char_p * num_input_shapes)(*input_shapes), # input_shapes, num_inputs, inputs, num_outputs, @@ -159,7 +159,7 @@ def CompileModel(model_file, input_shapes, output_names, batch, format): output_filename, 0, ) - logger.info("Done Halo Compilation") + logger.debug("Done Halo Compilation") return [output_file, output_bin] @@ -235,7 +235,7 @@ def CompileODLAModel(files, device, debug=False): str(so_file), str(cc_file), str(bin_file), - "-l" + device, + "-l" + "vodla", "-Wl,-rpath=/usr/local/lib", ] logger.debug("Building ODLA model: " + " ".join(args)) diff --git a/python/halo/inference.py b/python/halo/inference.py index 1ac879de4..508a95197 100644 --- a/python/halo/inference.py +++ b/python/halo/inference.py @@ -20,6 +20,7 @@ import sys import logging from logging import StreamHandler, Formatter +import os class Inference: @@ -57,6 +58,7 @@ def __init__( self.so_file = None self.intermediate_files = [] self.save_temps = False + print(f"self.batch:{self.batch}") def __del__(self): self.logger.info(str(self.intermediate_files)) @@ -66,7 +68,7 @@ def __del__(self): del self.model def Initialize(self): - self.logger.info("Begin initialization") + self.logger.info(f"Begin initialization;{self.model_file}") files = halo.CompileModel( self.model_file, self.input_shapes, @@ -74,13 +76,14 @@ def Initialize(self): self.batch, self.format, ) - self.so_file = halo.CompileODLAModel(files, self.device, self.debug) - self.intermediate_files = [*files, self.so_file] - self.model = odla.ODLAModel(self.so_file) - self.model.Load() + # self.so_file = halo.CompileODLAModel(files, self.device, self.debug) + self.so_file = "/usr/local/lib/libvodla.so" + self.intermediate_files = [*files] + self.model = odla.ODLAModel(self.so_file,files) + self.model.Load(self.model_file) self.logger.info("Done initialization") def Run(self, data): if self.model is None: self.Initialize() - return self.model.Execute(data) + return self.model.Execute(data, self.model_file, self.batch) diff --git a/python/halo/odla.py b/python/halo/odla.py index 068848c17..7843fd292 100644 --- a/python/halo/odla.py +++ b/python/halo/odla.py @@ -16,7 +16,8 @@ from enum import Enum from time import time import logging - +import os +from pdb import set_trace class Device(Enum): CUDA = 1 @@ -33,61 +34,114 @@ class ValueType(Structure): class ODLAModel: - def __init__(self, so_file): + def __init__(self, so_file, files): self.logger = logging.getLogger(__name__) self.so_file = so_file self.h = None self.buffers = [] + self.files = files - def Load(self): + def Load(self,model): if self.h is None: self.h = CDLL(self.so_file) self.comp = c_void_p(0) + self.device = c_void_p(0) + self.h.odla_AllocateDevice(c_void_p(0), 0, pointer(self.device)) self.h.odla_CreateComputation(pointer(self.comp)) # TODO: - use_sim = c_bool(True) - self.h.odla_SetComputationItem(self.comp, 7, pointer(use_sim)) + # use_sim = c_bool(True) + # self.h.odla_SetComputationItem(self.comp, 7, pointer(use_sim)) + cc_file = str(self.files[0]).encode("utf-8") + bin_file = str(self.files[1]).encode("utf-8") - self.h.model_helper(self.comp) + self.comp = self.h.model_helper(cc_file, bin_file) + self.ctx = c_void_p(0) + self.h.odla_CreateContext(pointer(self.ctx)) n = c_int32(-1) self.h.odla_GetNumOfArgsFromComputation(self.comp, pointer(n)) - self.nr_args = n.value + # self.nr_args = n.value + if("bert" in model): + print("bert_model input num.") + self.nr_args = 3 + else: + self.nr_args = 1 - nr_args = c_int32(-1) + # nr_args = c_int32(-1) self.h.odla_GetNumOfOutputsFromComputation(self.comp, pointer(n)) - self.nr_outputs = n.value + # self.nr_outputs = n.value + if("bert" in model): + print("bert_model output num.") + self.nr_outputs = 2 + else: + self.nr_outputs = 1 + self.in_vals = [] for idx in range(0, self.nr_args): arg_v = c_void_p(0) self.h.odla_GetArgFromComputationByIdx(self.comp, idx, pointer(arg_v)) - vt = ValueType() - self.h.odla_GetValueType(arg_v, pointer(vt)) - self.in_vals.append((arg_v.value, vt)) + # vt = ValueType() + # self.h.odla_GetValueType(arg_v, pointer(vt)) + # self.in_vals.append((arg_v.value, vt)) - self.ctx = c_void_p(0) - self.h.odla_CreateContext(pointer(self.ctx)) + # self.ctx = c_void_p(0) + # self.h.odla_CreateContext(pointer(self.ctx)) self.out_vals = [] for idx in range(0, self.nr_outputs): out = c_void_p(0) self.h.odla_GetOutputFromComputationByIdx(self.comp, idx, pointer(out)) - vt = ValueType() - self.h.odla_GetValueType(out, pointer(vt)) - n = 1 - for r in range(0, vt.shape.size): - n *= vt.shape.dims[r] - self.out_vals.append((out, vt, n)) - buf = (c_float * n)() # FIXME: handle types - self.h.odla_BindToOutput(out, buf, self.ctx) - self.buffers.append(buf) - - def Execute(self, data): - for idx, v in enumerate(self.in_vals): - self.h.odla_BindToArgument( - v[0], data[idx].ctypes.data_as(c_void_p), self.ctx - ) + # vt = ValueType() + # self.h.odla_GetValueType(out, pointer(vt)) + # n = 1 + # for r in range(0, vt.shape.size): + # n *= vt.shape.dims[r] + # self.out_vals.append((out, vt, n)) + # buf = (c_float * n)() # FIXME: handle types + # self.h.odla_BindToOutput(out, buf, self.ctx) + # self.buffers.append(buf) + + def Execute(self, data, model, batch): + print(f"model:{model},batch:{batch}") + # for idx, v in enumerate(self.in_vals): + # self.h.odla_BindToArgument( + # v[0], data[idx].ctypes.data_as(c_void_p), self.ctx + # ) + self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx) + # output buffer + buffers = [] + if("bert" in model): + buf1 = (c_float * 256 * batch)() + buffers.append(buf1) + self.h.odla_BindToOutput(c_void_p(0), buf1, self.ctx) + buf2 = (c_float * 256 * batch)() + buffers.append(buf2) + self.h.odla_BindToOutput(c_void_p(0), buf2, self.ctx) + else: + if("resnet50" in model): + buf = (c_float * 1*1000 * batch)() + elif("dbnet" in model): + buf = (c_float * 1228800 * batch)() + elif("crnn" in model): + buf = (c_float * 918146 * batch)() + else: + assert(False and f"unknown model.{model}") + buffers.append(buf) + self.h.odla_BindToOutput(c_void_p(0), buf, self.ctx) + + if("resnet50" in model): + self.h.model_data(self.ctx, (c_int32 * 1)(*[224*224*3*4]), (c_int32 * 1)(*[1000*4])) + elif("dbnet" in model): + self.h.model_data(self.ctx, (c_int32 * 1)(*[1*3*960*1280*4]), (c_int32 * 1)(*[1228800 * 4])) + elif("crnn" in model): + self.h.model_data(self.ctx, (c_int32 * 1)(*[63840*4]), (c_int32 * 1)(*[918146*4])) + elif("bert" in model): + self.h.model_data(self.ctx, (c_int32 * 1)(*[512*4, 256*4, 256*4]), (c_int32 * 1)(*[256*4,256*4])) + else: + assert(False and f"unknown model.{model}") + s = time() - self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, c_void_p(0)) + # self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, c_void_p(0)) + self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, self.device) t = time() self.logger.info("Execution time:" + str(t - s) + " sec(s)") - return self.buffers + return buffers From c5886f0990b9127c7fd72bf87921f0f0d25e9fcd Mon Sep 17 00:00:00 2001 From: tianboh <54729592+tianboh@users.noreply.github.com> Date: Tue, 2 Nov 2021 17:19:11 +0800 Subject: [PATCH 2/6] add odla.py support for shoucai (#664) --- python/halo/odla.py | 119 +++++++++++++++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 29 deletions(-) diff --git a/python/halo/odla.py b/python/halo/odla.py index 7843fd292..d6acdc447 100644 --- a/python/halo/odla.py +++ b/python/halo/odla.py @@ -18,6 +18,7 @@ import logging import os from pdb import set_trace +import numpy as np class Device(Enum): CUDA = 1 @@ -61,8 +62,9 @@ def Load(self,model): self.h.odla_GetNumOfArgsFromComputation(self.comp, pointer(n)) # self.nr_args = n.value if("bert" in model): - print("bert_model input num.") self.nr_args = 3 + elif("shoucai" in model): + self.nr_args = 16 else: self.nr_args = 1 @@ -70,7 +72,6 @@ def Load(self,model): self.h.odla_GetNumOfOutputsFromComputation(self.comp, pointer(n)) # self.nr_outputs = n.value if("bert" in model): - print("bert_model output num.") self.nr_outputs = 2 else: self.nr_outputs = 1 @@ -79,68 +80,128 @@ def Load(self,model): for idx in range(0, self.nr_args): arg_v = c_void_p(0) self.h.odla_GetArgFromComputationByIdx(self.comp, idx, pointer(arg_v)) - # vt = ValueType() - # self.h.odla_GetValueType(arg_v, pointer(vt)) - # self.in_vals.append((arg_v.value, vt)) - - # self.ctx = c_void_p(0) - # self.h.odla_CreateContext(pointer(self.ctx)) self.out_vals = [] for idx in range(0, self.nr_outputs): out = c_void_p(0) self.h.odla_GetOutputFromComputationByIdx(self.comp, idx, pointer(out)) - # vt = ValueType() - # self.h.odla_GetValueType(out, pointer(vt)) - # n = 1 - # for r in range(0, vt.shape.size): - # n *= vt.shape.dims[r] - # self.out_vals.append((out, vt, n)) - # buf = (c_float * n)() # FIXME: handle types - # self.h.odla_BindToOutput(out, buf, self.ctx) - # self.buffers.append(buf) def Execute(self, data, model, batch): print(f"model:{model},batch:{batch}") - # for idx, v in enumerate(self.in_vals): - # self.h.odla_BindToArgument( - # v[0], data[idx].ctypes.data_as(c_void_p), self.ctx - # ) - self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx) + # bind input + if("bert" in model): + self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(1), data[1].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(2), data[2].ctypes.data_as(c_void_p), self.ctx) + elif("shoucai" in model): + self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(1), data[1].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(2), data[2].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(3), data[3].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(4), data[4].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(5), data[5].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(6), data[6].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(7), data[7].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(8), data[8].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(9), data[9].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(10), data[10].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(11), data[11].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(12), data[12].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(13), data[13].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(14), data[14].ctypes.data_as(c_void_p), self.ctx) + self.h.odla_BindToArgument(c_void_p(15), data[15].ctypes.data_as(c_void_p), self.ctx) + else: + self.h.odla_BindToArgument(c_void_p(0), data[0].ctypes.data_as(c_void_p), self.ctx) + # output buffer buffers = [] if("bert" in model): - buf1 = (c_float * 256 * batch)() + buf1 = (c_float * 256 * batch)() buffers.append(buf1) self.h.odla_BindToOutput(c_void_p(0), buf1, self.ctx) buf2 = (c_float * 256 * batch)() buffers.append(buf2) - self.h.odla_BindToOutput(c_void_p(0), buf2, self.ctx) + self.h.odla_BindToOutput(c_void_p(1), buf2, self.ctx) else: if("resnet50" in model): buf = (c_float * 1*1000 * batch)() elif("dbnet" in model): + assert((batch==1) and "dbnet only support 1 batch.") buf = (c_float * 1228800 * batch)() elif("crnn" in model): + assert((batch==1) and "crnn only support 1 batch.") buf = (c_float * 918146 * batch)() + elif("shoucai" in model): + #Add info for Shoucai Model + assert((batch==1) and "shoucai only support 1 batch!") + buf = (c_float * 425 * batch)() else: assert(False and f"unknown model.{model}") buffers.append(buf) self.h.odla_BindToOutput(c_void_p(0), buf, self.ctx) + # send data if("resnet50" in model): - self.h.model_data(self.ctx, (c_int32 * 1)(*[224*224*3*4]), (c_int32 * 1)(*[1000*4])) + self.h.model_data(self.ctx, (c_int32 * 1)(*[224*224*3*4*batch]), (c_int32 * 1)(*[1000*4*batch])) elif("dbnet" in model): - self.h.model_data(self.ctx, (c_int32 * 1)(*[1*3*960*1280*4]), (c_int32 * 1)(*[1228800 * 4])) + self.h.model_data(self.ctx, (c_int32 * 1)(*[1*3*960*1280*4*batch]), (c_int32 * 1)(*[1228800 * 4*batch])) elif("crnn" in model): - self.h.model_data(self.ctx, (c_int32 * 1)(*[63840*4]), (c_int32 * 1)(*[918146*4])) + self.h.model_data(self.ctx, (c_int32 * 1)(*[63840*4*batch]), (c_int32 * 1)(*[918146*4*batch])) elif("bert" in model): - self.h.model_data(self.ctx, (c_int32 * 1)(*[512*4, 256*4, 256*4]), (c_int32 * 1)(*[256*4,256*4])) + self.h.model_data(self.ctx, (c_int32 * 3)(*[512*4*batch, 256*4*batch, 256*8*batch]), (c_int32 * 2)(*[256*4*batch,256*4*batch])) + elif ("shoucai" in model): + #per Input File + #self.h.model_data(self.ctx, (c_int32 * 16)(*[8*4*batch, 1*4*batch, 592*4*batch, 1*4*batch, 512*4*batch, 512*4*batch, 73728*4*batch, 27200*4*batch, 13600*4*batch, 13600*4*batch, 122400*4*batch, 350200*4*batch, 15552*4*batch, 1161984*4*batch, 178*4*batch, 425*8*batch]), (c_int32*1)(*[425*4*batch])) + + #per Input Shape + self.h.model_data(self.ctx, (c_int32 * 18)(*[425*4*batch, 8*4*batch, 1*4*batch, 592*4*batch, 1*4*batch, 512*4*batch, 512*4*batch, 73728*4*batch, 425*4*batch, 425*4*batch, 27200*4*batch,13600*4*batch, 13600*4*batch, 122400*4*batch, 350200*4*batch, 15552*4*batch, 1161984*4*batch, 178*4*batch]), (c_int32*1)(*[425*4*batch])) + + + ''' + self.h.model_data(self._ctx, (c_int32 * 16)(*[8*4*batch, \ # embedding_ui_oage_shared_embedding.txt + 1*4*batch, \ # input_from_feature_columns_concat_3.txt + 592*4*batch, \ # input_from_feature_columns_concat.txt + 1*4*batch, \ #input_from_feature_columns_concat_5.txt + 512*4*batch, \ #all_clk_seq_1_time.txt + 512*4*batch, \#all_clk_seq_1_st.txt + 73728*4*batch, \ #seq_input_from_feature_columns_concat_1.txt + 27200*4*batch, \#embedding_item_id_d_shard_embedding_2.txt + 13600*4*batch, \#embedding_item_cate_id_d_shared_embedding_2.txt + 13600*4*batch, \#embedding_item_seller_id_d_shared_embedding_2.txt + 122400*4*batch, \#input_from_feature_columns_concat_4.txt + 350200*4*batch, \#input_from_feature_columns_concat_1.txt + 15552*4*batch, \#seq_input_from_feature_columns_concat.txt + 1161984*4*batch,\ #seq_input_from_feature_columns_concat_2.txt + 178*4*batch, \#input_from_feature_columns_concat_7.txt + 425*8*batch]), \#Unique_preprocess_int64.txt + (c_int32*1)(*[425*4*batch])) #output + + + self.h.model_data(self._ctx, (c_int32 * 18)(*[425*4*batch, \# LookupPkOP + 8*4*batch, \ # embedding_ui_oage_shared_embedding.txt + 1*4*batch, \ # input_from_feature_columns_concat_3.txt + 592*4*batch, \ # input_from_feature_columns_concat.txt + 1*4*batch, \ #input_from_feature_columns_concat_5.txt + 512*4*batch, \ #all_clk_seq_1_time.txt + 512*4*batch, \#all_clk_seq_1_st.txt + 73728*4*batch, \ #seq_input_from_feature_columns_concat_1.txt + 425*4*batch, \ #batch_fill_attributes_for_gul_rank_item_feature + 425*4*batch, \ #batch_fill_attributes_for_gul_rank_item_feature_1 + 27200*4*batch, \#embedding_item_id_d_shard_embedding_2.txt + 13600*4*batch, \#embedding_item_cate_id_d_shared_embedding_2.txt + 13600*4*batch, \#embedding_item_seller_id_d_shared_embedding_2.txt + 122400*4*batch, \#input_from_feature_columns_concat_4.txt + 350200*4*batch, \#input_from_feature_columns_concat_1.txt + 15552*4*batch, \#seq_input_from_feature_columns_concat.txt + 1161984*4*batch,\ #seq_input_from_feature_columns_concat_2.txt + 178*4*batch]), \#input_from_feature_columns_concat_7.txt + + (c_int32*1)(*[425*4*batch])) #output + ''' else: assert(False and f"unknown model.{model}") s = time() - # self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, c_void_p(0)) self.h.odla_ExecuteComputation(self.comp, self.ctx, 0, self.device) t = time() self.logger.info("Execution time:" + str(t - s) + " sec(s)") From 349e3b45058646bf0c620191179286232eb5b145 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Sat, 27 Nov 2021 16:09:23 -0800 Subject: [PATCH 3/6] remove unneeded import --- python/halo/odla.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/halo/odla.py b/python/halo/odla.py index d6acdc447..c4f5c7b8c 100644 --- a/python/halo/odla.py +++ b/python/halo/odla.py @@ -17,8 +17,6 @@ from time import time import logging import os -from pdb import set_trace -import numpy as np class Device(Enum): CUDA = 1 From d47b46de7b5d6bad1a23b5bbbd343a6193a4a5c0 Mon Sep 17 00:00:00 2001 From: zh-wei Date: Thu, 2 Dec 2021 09:50:37 +0000 Subject: [PATCH 4/6] Complete Python api for AnalyzeModel. --- python/halo/halo.py | 34 ++++++++++++++++++++++++++++--- python/halo/inference.py | 22 +++++++------------- python/halo/odla.py | 43 ++++++++++++++++++++++++++++++---------- 3 files changed, 71 insertions(+), 28 deletions(-) diff --git a/python/halo/halo.py b/python/halo/halo.py index 7d55ba37d..a48d58621 100644 --- a/python/halo/halo.py +++ b/python/halo/halo.py @@ -62,6 +62,17 @@ class CXXCodeGenOpts(Structure): ("save_temps", c_bool), ] +HALO_MODEL_INFO_MAX_OUTPUT_NR = 64 +HALO_VODLA_MAX_OUTPUT_RSC_EST = 2048 +class ModelInfo(Structure): + _fields_ = [ + ("num_outputs", c_size_t), + ("output_buf_sizes", c_size_t*HALO_MODEL_INFO_MAX_OUTPUT_NR), + ("input_qps", c_int), + ("adaptive_bsz", c_int), + ("output_rsc_est", c_char*HALO_VODLA_MAX_OUTPUT_RSC_EST), + ] + """ int halo_Compile(halo::ModelFormat model_format, unsigned num_models, @@ -91,6 +102,24 @@ class CXXCodeGenOpts(Structure): c_void_p, # model_info ] +Analyze.argtypes = [ + c_int, # model_format + c_uint, # num_models + c_void_p, # models + c_void_p, # model_sizes + c_char_p, # target + c_int, # batch + c_uint, # num_input_shapes + c_void_p, # input_shapes + c_uint, # num_inputs + c_void_p, # inputs + c_uint, # num_outputs + c_void_p, # outputs + c_void_p, # cg_opts + c_char_p, # filename + c_void_p, # model_info +] + def exec(args): proc = subprocess.run(args) @@ -163,7 +192,7 @@ def CompileModel(model_file, input_shapes, output_names, batch, format): return [output_file, output_bin] -def AnalyzeModel(model_file, input_shapes, batch, format): +def AnalyzeModel(model_file, input_shapes, batch, format, model_info): output_file = "" odla_lib = cast(create_string_buffer(b""), c_char_p) opts = CXXCodeGenOpts() @@ -215,10 +244,9 @@ def AnalyzeModel(model_file, input_shapes, batch, format): outputs, pointer(opts), output_filename, - 0 + pointer(model_info), ) - def CompileODLAModel(files, device, debug=False): cc_file = files[0] bin_file = files[1] diff --git a/python/halo/inference.py b/python/halo/inference.py index 508a95197..022457c43 100644 --- a/python/halo/inference.py +++ b/python/halo/inference.py @@ -32,6 +32,7 @@ def __init__( device, batch, format, + qps, debug, log_level, ): @@ -54,33 +55,24 @@ def __init__( self.format = format self.device = device self.batch = batch + self.qps = qps self.model = None self.so_file = None - self.intermediate_files = [] - self.save_temps = False - print(f"self.batch:{self.batch}") def __del__(self): - self.logger.info(str(self.intermediate_files)) - for file in self.intermediate_files: - if not self.save_temps: - Path(file).unlink() del self.model def Initialize(self): self.logger.info(f"Begin initialization;{self.model_file}") - files = halo.CompileModel( + self.so_file = "/usr/local/lib/libvodla.so" + self.model = odla.ODLAModel(self.so_file) + self.model.Load( self.model_file, self.input_shapes, self.output_names, - self.batch, self.format, - ) - # self.so_file = halo.CompileODLAModel(files, self.device, self.debug) - self.so_file = "/usr/local/lib/libvodla.so" - self.intermediate_files = [*files] - self.model = odla.ODLAModel(self.so_file,files) - self.model.Load(self.model_file) + self.batch, + self.qps) self.logger.info("Done initialization") def Run(self, data): diff --git a/python/halo/odla.py b/python/halo/odla.py index c4f5c7b8c..5ffb51633 100644 --- a/python/halo/odla.py +++ b/python/halo/odla.py @@ -17,6 +17,8 @@ from time import time import logging import os +from pathlib import Path +from halo import halo class Device(Enum): CUDA = 1 @@ -33,23 +35,44 @@ class ValueType(Structure): class ODLAModel: - def __init__(self, so_file, files): + def __init__(self, so_file): self.logger = logging.getLogger(__name__) self.so_file = so_file self.h = None - self.buffers = [] - self.files = files - - def Load(self,model): + self.save_temps = False + self.intermediate_files = [] + + def __del__(self): + self.logger.info(str(self.intermediate_files)) + for file in self.intermediate_files: + if not self.save_temps: + Path(file).unlink() + if self.h is not None: + self.h.odla_DestroyContext(self.ctx) + self.h.odla_DestroyComputation(self.comp) + self.h.odla_DestroyDevice(self.device) + + def Load(self,model,input_shapes,output_names,format,batch,qps): if self.h is None: self.h = CDLL(self.so_file) self.comp = c_void_p(0) self.device = c_void_p(0) - self.h.odla_AllocateDevice(c_void_p(0), 0, pointer(self.device)) - self.h.odla_CreateComputation(pointer(self.comp)) - # TODO: - # use_sim = c_bool(True) - # self.h.odla_SetComputationItem(self.comp, 7, pointer(use_sim)) + model_info = halo.ModelInfo() + model_info.input_qps = qps + model_info.adaptive_bsz = batch + rsc_est = c_void_p(0) + if qps>0 and batch==1: + halo.AnalyzeModel(model,[],1,format,model_info) + rsc_est = (c_char_p)(model_info.output_rsc_est) + self.h.odla_AllocateDevice(c_void_p(0), 0, pointer(self.device), rsc_est) + self.files = halo.CompileModel( + model, + input_shapes, + output_names, + model_info.adaptive_bsz, + format, + ) + self.intermediate_files = [*self.files] cc_file = str(self.files[0]).encode("utf-8") bin_file = str(self.files[1]).encode("utf-8") From ed3a1c12de5c1fdf90d6ccd98283060b1a1e53aa Mon Sep 17 00:00:00 2001 From: zh-wei Date: Mon, 6 Dec 2021 09:42:56 +0000 Subject: [PATCH 5/6] [Analyzer][Bugfix] halo.CompileModel() should use user's batchsize instead of adaptive_bsz. --- python/halo/odla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/halo/odla.py b/python/halo/odla.py index 5ffb51633..e0e4b2aba 100644 --- a/python/halo/odla.py +++ b/python/halo/odla.py @@ -69,7 +69,7 @@ def Load(self,model,input_shapes,output_names,format,batch,qps): model, input_shapes, output_names, - model_info.adaptive_bsz, + batch, format, ) self.intermediate_files = [*self.files] From 909c7f02e1d02575d7cf31c631a717ac8b5f9ea2 Mon Sep 17 00:00:00 2001 From: zh-wei Date: Fri, 24 Dec 2021 02:35:35 +0000 Subject: [PATCH 6/6] [AnalyzeModel] Analyzer should make use of 'input_shapes' for computation estimating. --- python/halo/odla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/halo/odla.py b/python/halo/odla.py index e0e4b2aba..394c85d02 100644 --- a/python/halo/odla.py +++ b/python/halo/odla.py @@ -62,7 +62,7 @@ def Load(self,model,input_shapes,output_names,format,batch,qps): model_info.adaptive_bsz = batch rsc_est = c_void_p(0) if qps>0 and batch==1: - halo.AnalyzeModel(model,[],1,format,model_info) + halo.AnalyzeModel(model,input_shapes,1,format,model_info) rsc_est = (c_char_p)(model_info.output_rsc_est) self.h.odla_AllocateDevice(c_void_p(0), 0, pointer(self.device), rsc_est) self.files = halo.CompileModel(