Skip to content

Commit

Permalink
Add logs and avoid multiple init called when exception occurred (#916)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackzipu authored May 24, 2022
1 parent c6b6bc5 commit f9a9d47
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 4 deletions.
1 change: 1 addition & 0 deletions ODLA/platforms/odla_popart/odla_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ odla_status odla_DestroyComputation(odla_computation comp) {
}
popart::logging::warn("reset config state, comp: {}", comp);
PopartConfig::instance()->reset_init_state();
popart::logging::warn("odla_DestroyComputation successfully, comp: {}", comp);

return ODLA_SUCCESS;
}
Expand Down
11 changes: 9 additions & 2 deletions ODLA/platforms/odla_popart/odla_popart.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ odla_status _odla_computation::init(bool is_compile) {
std::lock_guard<std::mutex> guard(init_mutex_);
if (!session) {
POPLAR_TRY
// only continue to init the computation when no failure
if (ODLA_SUCCESS != QManager::instance()->get_status())
return QManager::instance()->get_status();
odla_status status = set_opts();
if (status != ODLA_SUCCESS) {
popart::logging::err("set computation option failed");
Expand All @@ -213,7 +216,7 @@ odla_status _odla_computation::init(bool is_compile) {
popart::AnchorReturnType("All"));
// Acquire IPU
if (opts.use_ipu_model) {
popart::logging::info("Using IPU Model to run.");
popart::logging::warn("Using IPU Model to run.");
std::map<std::string, std::string> deviceOpts{
{"numIPUs", std::to_string(opts.ipu_num)}, {"tilesPerIPU", "1216"}};
device =
Expand All @@ -230,6 +233,7 @@ odla_status _odla_computation::init(bool is_compile) {
throw std::runtime_error(
"Failed to get a device when initializing odla_computation");
}
popart::logging::warn("Device acquired to run model");

// Create and config SessionOptions
set_session_opts();
Expand All @@ -255,6 +259,9 @@ odla_status _odla_computation::init(bool is_compile) {
// Create InferenceSession
new_session = std::move(popart::InferenceSession::createFromOnnxModel(
proto, data_flow, device, popart::InputShapeInfo(), session_opts_));
popart::logging::warn(
"New session: {} has been created for computation: {}",
new_session.get(), this);

if (!is_compile) {
if (PopartConfig::instance()->load_or_save_cache()) {
Expand Down Expand Up @@ -404,7 +411,7 @@ bool _odla_computation::hold() {
} else {
std::stringstream ss_holder;
ss_holder << thread_id_of_holder;
popart::logging::warn(
popart::logging::info(
"The odla_computation {} has been held by thread: {}"
", when thread {} try to hold it.",
this, thread_id_of_holder, this_thread_id);
Expand Down
9 changes: 8 additions & 1 deletion ODLA/platforms/odla_popart/odla_popart.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,14 @@ struct _odla_computation {
inline bool is_compile_only() { return is_compile_only_; }
inline void release_session() {
if (session != nullptr) {
session->getDevice().getDeviceInfo()->detach();
if (session->getDevice().getDeviceInfo() != nullptr) {
popart::logging::warn("Tring to detach device for computation: {}",
this);
session->getDevice().getDeviceInfo()->detach();
} else
popart::logging::warn(
"Device info is nullptr when try to detach for computation: {}",
this);
popart::logging::warn(
"The computation:{} session:{} detached from device", this,
session.get());
Expand Down
2 changes: 1 addition & 1 deletion ODLA/platforms/odla_popart/popart_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class PopartConfig {
std::lock_guard<std::mutex> guard(config_mutex_);
if (inited_) {
inited_ = false;
if (cache_fs->is_open()) {
if (cache_fs && cache_fs->is_open()) {
cache_fs->close();
cache_fs->clear();
}
Expand Down

0 comments on commit f9a9d47

Please sign in to comment.