fix the trainer debug path and save the interactive html graph path i…

…n the debug report
SylphAI-Inc · Dec 27, 2024 · 04cd65b · 04cd65b
1 parent f831344
commit 04cd65b
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 35 deletions.
diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
@@ -1094,7 +1094,6 @@ def wrap_and_escape(text, width=40):
                 #     n.gradients, [n.gradients_context[g] for g in n.gradients]
                 # )
                 # if "output" in n.name:
-                print(f"Node: {n.name}, \n gradients: {n.gradients}")
                 for g in n.gradients:
                     gradient_context = g.context
                     log.info(f"Gradient context display: {gradient_context}")
@@ -1141,8 +1140,14 @@ def wrap_and_escape(text, width=40):
         save_json(self.to_dict(), f"{filepath}_root.json")
 
         # draw interactive graph
-        self.draw_interactive_html_graph(filepath=filepath, nodes=nodes, edges=edges)
-        output = {"graph_path": final_path, "root_path": f"{filepath}_root.json"}
+        graph_file: Dict[str, str] = self.draw_interactive_html_graph(
+            filepath=filepath, nodes=nodes, edges=edges
+        )
+        output = {
+            "graph_path": final_path,
+            "root_path": f"{filepath}_root.json",
+            "interactive_html_graph": graph_file["graph_path"],
+        }
         print(f"Graph saved as {filepath}.{format}")
         return output
 
@@ -1163,7 +1168,6 @@ def draw_output_subgraph(
             filepath (str): Path to save the graph.
         """
 
-        # TODO: improve the pathes
         assert rankdir in ["LR", "TB"]
         from adalflow.utils.global_config import get_adalflow_default_root_path
 
@@ -1176,13 +1180,8 @@ def draw_output_subgraph(
             ) from e
 
         root_path = get_adalflow_default_root_path()
-        # # prepare the log directory
-        # log_dir = os.path.join(root_path, "logs")
 
-        # # Set up TensorBoard logging
-        # writer = SummaryWriter(log_dir)
-
-        filename = f"trace_component_output_graph_{self.name}_id_{self.id}"
+        filename = f"trace_component_output_graph_{self.name}_id_{self.id}.{format}"
         filepath = (
             os.path.join(filepath, filename)
             if filepath
@@ -1193,9 +1192,7 @@ def draw_output_subgraph(
         nodes, edges = self._collect_output_subgraph()
 
         # Step 2: Render using Graphviz
-        filename = f"output_subgraph_{self.name}_{self.id}"
-        filepath = filepath or f"./{filename}"
-        print(f"Saving OUTPUT subgraph to {filepath}.{format}")
+        print(f"Saving OUTPUT subgraph to {filepath}")
 
         dot = Digraph(format=format, graph_attr={"rankdir": rankdir})
         node_ids = set()
@@ -1230,7 +1227,7 @@ def draw_output_subgraph(
 
         # Step 3: Save and render
         dot.render(filepath, cleanup=True)
-        print(f"Graph saved as {filepath}.{format}")
+        print(f"Graph saved as {filepath}")
         return {"output_subgraph": filepath}
 
     def draw_component_subgraph(

diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py
@@ -963,29 +963,29 @@ def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> Dict[str, str
         debug_files.update(debug_component_file)
 
         # zero grad
-        # self._zero_grad_text_optimizers()
-        # # revert
-        # self._revert_text_optimizers()
+        self._zero_grad_text_optimizers()
+        # revert
+        self._revert_text_optimizers()
 
-        # total_loss.reset_all_gradients()
+        total_loss.reset_all_gradients()
 
-        # # draw graph on a single loss
-        # total_loss = sum_ops([copy(failed_loss)])
-        # total_loss.backward()
-        # self._propose_text_optimizers()
+        # draw graph on a single loss
+        total_loss = sum_ops([copy(failed_loss)])
+        total_loss.backward()
+        self._propose_text_optimizers()
 
-        # failed_debug_files = total_loss.draw_graph(
-        #     filepath=debug_path, full_trace=False
-        # )
-        # failed_output_file = total_loss.draw_output_subgraph(filepath=debug_path)
-        # failed_component_file = total_loss.draw_component_subgraph(filepath=debug_path)
-        # failed_debug_files.update(failed_output_file)
-        # failed_debug_files.update(failed_component_file)
-
-        # for k, v in failed_debug_files.items():
-        #     if k in debug_files:
-        #         k = f"failed_{k}"
-        #     debug_files[k] = v
+        failed_debug_files = total_loss.draw_graph(
+            filepath=debug_path, full_trace=False
+        )
+        failed_output_file = total_loss.draw_output_subgraph(filepath=debug_path)
+        failed_component_file = total_loss.draw_component_subgraph(filepath=debug_path)
+        failed_debug_files.update(failed_output_file)
+        failed_debug_files.update(failed_component_file)
+
+        for k, v in failed_debug_files.items():
+            if k in debug_files:
+                k = f"failed_{k}"
+            debug_files[k] = v
 
         return debug_files
 

diff --git a/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py b/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
@@ -161,7 +161,7 @@ def train(
     # )
 
     train(
-        debug=False,
+        debug=True,
         max_steps=12,
     )
     # 0.68 on val without training, 0.74on the second step. 0.84 test