diff --git a/docs/Memory_Optimizer.md b/docs/Memory_Optimizer.md
index 3ef3a575f20a1..e9ceae00a684d 100644
--- a/docs/Memory_Optimizer.md
+++ b/docs/Memory_Optimizer.md
@@ -20,10 +20,10 @@ Not all models and recipes need this optimizer technique. Imagine if your traini
## Quick trial
1. Make sure ONNX Runtime training wheel is installed and correctly configured.
-2. Integrate models using `ORTModule`, be noted log_level should be equal or lower than INFO.
- > ort_model = ORTModule(pt_model, DebugOptions(log_level=LogLevel.INFO))
-3. Run the training as usual and redirect all outputs into log file; then stop it after training few steps.
-4. Check the logging file, search "Summary", you could possibly find something like this:
+2. Integrate models using `ORTModule`, be noted log_level should be equal to or lower than DEVINFO.
+ > ort_model = ORTModule(pt_model, DebugOptions(log_level=LogLevel.DEVINFO))
+3. Run the training as usual and redirect all outputs into the log file; then stop it after training a few steps.
+4. Check the logging file, and search "Summary", you could find something like this:
```
MemoryOptimizer Summary:
User config:
diff --git a/docs/ORTModule_Training_Guidelines.md b/docs/ORTModule_Training_Guidelines.md
index 5350988b20964..12733c3551704 100644
--- a/docs/ORTModule_Training_Guidelines.md
+++ b/docs/ORTModule_Training_Guidelines.md
@@ -49,6 +49,90 @@ More options for **developers**.
```
Check [DebugOptions implementation](../orttraining/orttraining/python/training/ortmodule/options.py) for more details.
+#### Log Level Explanations
+
+
+
+Log Level |
+Description |
+
+
+
+
+`FATAL` | `ERROR` | `WARNING` (For Users)
+
+`WARNING` is the default and recommended level for
+ users.
+ |
+
+
+- ONNX Runtime backend log level - `FATAL` | `ERROR` | `WARNING`.
+- ORTModule log level - `FATAL` | `ERROR` | `WARNING`.
+- Rank-0 log filtering is `ON` (e.g. logging on rank-0-only).
+- PyTorch exporter export logs filtering is `ON`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `ON`.
+
+ |
+
+
+
+
+`INFO` (For Users | ORT Developers)
+
+`INFO` is used for collecting experimental
+ feature stats, or a little bit more error messages.
+ |
+
+
+- ONNX Runtime backend log level - `WARNING`.
+- ORTModule log level - `INFO`.
+- Rank-0 log filtering is `ON` (e.g. logging on rank-0-only).
+- PyTorch exporter export logs filtering is `ON`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.
+
+ |
+
+
+
+
+`DEVINFO` (For ORT Developers)
+
+`DEVINFO` is the recommended level for
+ debugging purposes.
+ |
+
+
+- ONNX Runtime backend log level - `INFO`.
+- ORTModule log level - `INFO`.
+- Rank-0 log filtering is `OFF` (e.g. logging on all ranks).
+- PyTorch exporter export logs filtering is `OFF`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.
+
+ |
+
+
+
+
+
+`VERBOSE` (For ORT Developers)
+
+`VERBOSE` is the last resort for debugging
+ hard problems.
+ |
+
+
+- ONNX Runtime backend log level - `VERBOSE`.
+- ORTModule log level - `VERBOSE`.
+- Rank-0 log filtering is `OFF` (e.g. logging on all ranks).
+- PyTorch exporter export logs filtering is `OFF`.
+- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.
+
+ |
+
+
+
+
+
### 2.1 Environment Variables
`ORTModule` provides environment variables targeting different use cases.
diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
index 04820218b7c49..5eb1d9f382380 100755
--- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
@@ -473,7 +473,6 @@ def _get_graph_transformer_config(self) -> C.TrainingGraphTransformerConfigurati
return graph_transformer_config
@_logger.TrackTime(_logger.ORTModuleInitPhase.GRAPH_BUILDER_INIT)
- @_logger.SuppressLogs(_logger.ORTModuleInitPhase.GRAPH_BUILDER_INIT)
def _initialize_graph_builder(self):
"""Creates a new OrtModuleGraphBuilder, initializes it and saves it to self._graph_builder"""
diff --git a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py
index 8d8be81c549d1..6690af9b71bf1 100644
--- a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py
@@ -16,7 +16,7 @@
from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy
from ._graph_execution_manager import GraphExecutionManager, _RunStateInfo
from ._io import unflatten_user_output
-from ._logger import ORTModuleInitPhase, SuppressLogs, TrackTime
+from ._logger import ORTModuleInitPhase, TrackTime
from ._utils import save_tuning_results, set_tuning_results
from .options import DebugOptions, _SkipCheck
@@ -207,7 +207,6 @@ def forward(self, *inputs, **kwargs):
return self._fallback_manager.fallback(self._debug_options.logging.log_level, *inputs, **kwargs)
@TrackTime(ORTModuleInitPhase.BUILD_GRAPH)
- @SuppressLogs(ORTModuleInitPhase.BUILD_GRAPH)
def _build_graph(self, graph_transformer_config):
"""Build an inference graph using the module_graph_builder"""
@@ -221,7 +220,6 @@ def _build_graph(self, graph_transformer_config):
)
@TrackTime(ORTModuleInitPhase.CREATE_SESSION)
- @SuppressLogs(ORTModuleInitPhase.CREATE_SESSION)
def _create_execution_agent(self):
"""Creates an InferenceAgent that can run forward graph on an inference model"""
diff --git a/orttraining/orttraining/python/training/ortmodule/_logger.py b/orttraining/orttraining/python/training/ortmodule/_logger.py
index e075ced8eaac2..0728ebdf19af8 100644
--- a/orttraining/orttraining/python/training/ortmodule/_logger.py
+++ b/orttraining/orttraining/python/training/ortmodule/_logger.py
@@ -21,15 +21,18 @@
class LogLevel(IntEnum):
VERBOSE = 0
- INFO = 1
- WARNING = 2
- ERROR = 3
- FATAL = 4
+ DEVINFO = 1 # For ORT developers.
+ INFO = 2 # For ORT users.
+ WARNING = 3
+ ERROR = 4
+ FATAL = 5
ORTMODULE_LOG_LEVEL_MAP: Dict[LogLevel, List[int]] = {
LogLevel.VERBOSE: [Severity.VERBOSE, logging.DEBUG],
- LogLevel.INFO: [Severity.INFO, logging.INFO],
+ LogLevel.DEVINFO: [Severity.INFO, logging.INFO],
+ # ONNX Runtime has too many INFO logs, so we map it to WARNING for a better user experience.
+ LogLevel.INFO: [Severity.WARNING, logging.INFO],
LogLevel.WARNING: [Severity.WARNING, logging.WARNING],
LogLevel.ERROR: [Severity.ERROR, logging.ERROR],
LogLevel.FATAL: [Severity.FATAL, logging.FATAL],
@@ -48,13 +51,13 @@ def configure_ortmodule_logger(log_level: LogLevel) -> logging.Logger:
"""Configure the logger for ortmodule according to following rules.
1. If multiple processes are used, the rank will be appended
to the logger name.
- 2. If the log level is greater than info, the logger will be
+ 2. If the log level is equal to or greater than INFO, the logger will be
disabled for non-zero ranks.
"""
rank_info = f".rank-{get_rank()}" if get_world_size() > 1 else ""
logger = logging.getLogger(f"orttraining{rank_info}")
- # Disable the logger for non-zero ranks when level > info
- logger.disabled = log_level > LogLevel.INFO and get_rank() != 0
+ # Disable the logger for non-zero ranks when level >= INFO
+ logger.disabled = log_level >= LogLevel.INFO and get_rank() != 0
logger.setLevel(ortmodule_loglevel_to_python_loglevel(log_level))
return logger
diff --git a/orttraining/orttraining/python/training/ortmodule/_training_manager.py b/orttraining/orttraining/python/training/ortmodule/_training_manager.py
index e0f11e5aa407e..bafb64235546b 100644
--- a/orttraining/orttraining/python/training/ortmodule/_training_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_training_manager.py
@@ -18,7 +18,7 @@
from ._gradient_accumulation_manager import GradientAccumulationManager
from ._graph_execution_manager import GraphExecutionManager, _RunStateInfo
from ._io import _FlattenedModule, _InputInfo, unflatten_user_output
-from ._logger import ORTModuleInitPhase, SuppressLogs, TrackTime
+from ._logger import ORTModuleInitPhase, TrackTime
from ._runtime_inspector import Phase
from ._utils import save_tuning_results, set_tuning_results
from .graph_optimizer_registry import GraphOptimizerRegistry
@@ -358,7 +358,6 @@ def forward(self, *inputs, **kwargs):
return self._fallback_manager.fallback(self._debug_options.logging.log_level, *inputs, **kwargs)
@TrackTime(ORTModuleInitPhase.BUILD_GRAPH)
- @SuppressLogs(ORTModuleInitPhase.BUILD_GRAPH)
def _build_graph(self, graph_transformer_config):
"""Build an optimized gradient graph using the module_graph_builder"""
@@ -401,7 +400,6 @@ def _build_graph(self, graph_transformer_config):
self._gradient_map.append(-1)
@TrackTime(ORTModuleInitPhase.CREATE_SESSION)
- @SuppressLogs(ORTModuleInitPhase.CREATE_SESSION)
def _create_execution_agent(self):
"""Creates a TrainingAgent that can run the forward and backward graph on the training model"""
diff --git a/orttraining/orttraining/python/training/ortmodule/options.py b/orttraining/orttraining/python/training/ortmodule/options.py
index ff0cde37195cb..cddd9cd440b28 100644
--- a/orttraining/orttraining/python/training/ortmodule/options.py
+++ b/orttraining/orttraining/python/training/ortmodule/options.py
@@ -167,11 +167,6 @@ def torch_exporter_filter(self):
@property
def onnxruntime_log_filter(self):
"""Accessor for the filter onnxruntime logs configuration."""
- if self.log_level >= LogLevel.INFO:
- return [
- "CleanUnusedInitializersAndNodeArgs] Removing initializer",
- "Serializing optimized model with Graph Optimization level greater than ORT_ENABLE_EXTENDED",
- ]
return None