Skip to content

Commit

Permalink
fix the trainer debug path and save the interactive html graph path i…
Browse files Browse the repository at this point in the history
…n the debug report
  • Loading branch information
liyin2015 committed Dec 27, 2024
1 parent f831344 commit 04cd65b
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 35 deletions.
25 changes: 11 additions & 14 deletions adalflow/adalflow/optim/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1094,7 +1094,6 @@ def wrap_and_escape(text, width=40):
# n.gradients, [n.gradients_context[g] for g in n.gradients]
# )
# if "output" in n.name:
print(f"Node: {n.name}, \n gradients: {n.gradients}")
for g in n.gradients:
gradient_context = g.context
log.info(f"Gradient context display: {gradient_context}")
Expand Down Expand Up @@ -1141,8 +1140,14 @@ def wrap_and_escape(text, width=40):
save_json(self.to_dict(), f"{filepath}_root.json")

# draw interactive graph
self.draw_interactive_html_graph(filepath=filepath, nodes=nodes, edges=edges)
output = {"graph_path": final_path, "root_path": f"{filepath}_root.json"}
graph_file: Dict[str, str] = self.draw_interactive_html_graph(
filepath=filepath, nodes=nodes, edges=edges
)
output = {
"graph_path": final_path,
"root_path": f"{filepath}_root.json",
"interactive_html_graph": graph_file["graph_path"],
}
print(f"Graph saved as {filepath}.{format}")
return output

Expand All @@ -1163,7 +1168,6 @@ def draw_output_subgraph(
filepath (str): Path to save the graph.
"""

# TODO: improve the pathes
assert rankdir in ["LR", "TB"]
from adalflow.utils.global_config import get_adalflow_default_root_path

Expand All @@ -1176,13 +1180,8 @@ def draw_output_subgraph(
) from e

root_path = get_adalflow_default_root_path()
# # prepare the log directory
# log_dir = os.path.join(root_path, "logs")

# # Set up TensorBoard logging
# writer = SummaryWriter(log_dir)

filename = f"trace_component_output_graph_{self.name}_id_{self.id}"
filename = f"trace_component_output_graph_{self.name}_id_{self.id}.{format}"
filepath = (
os.path.join(filepath, filename)
if filepath
Expand All @@ -1193,9 +1192,7 @@ def draw_output_subgraph(
nodes, edges = self._collect_output_subgraph()

# Step 2: Render using Graphviz
filename = f"output_subgraph_{self.name}_{self.id}"
filepath = filepath or f"./{filename}"
print(f"Saving OUTPUT subgraph to {filepath}.{format}")
print(f"Saving OUTPUT subgraph to {filepath}")

dot = Digraph(format=format, graph_attr={"rankdir": rankdir})
node_ids = set()
Expand Down Expand Up @@ -1230,7 +1227,7 @@ def draw_output_subgraph(

# Step 3: Save and render
dot.render(filepath, cleanup=True)
print(f"Graph saved as {filepath}.{format}")
print(f"Graph saved as {filepath}")
return {"output_subgraph": filepath}

def draw_component_subgraph(
Expand Down
40 changes: 20 additions & 20 deletions adalflow/adalflow/optim/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,29 +963,29 @@ def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> Dict[str, str
debug_files.update(debug_component_file)

# zero grad
# self._zero_grad_text_optimizers()
# # revert
# self._revert_text_optimizers()
self._zero_grad_text_optimizers()
# revert
self._revert_text_optimizers()

# total_loss.reset_all_gradients()
total_loss.reset_all_gradients()

# # draw graph on a single loss
# total_loss = sum_ops([copy(failed_loss)])
# total_loss.backward()
# self._propose_text_optimizers()
# draw graph on a single loss
total_loss = sum_ops([copy(failed_loss)])
total_loss.backward()
self._propose_text_optimizers()

# failed_debug_files = total_loss.draw_graph(
# filepath=debug_path, full_trace=False
# )
# failed_output_file = total_loss.draw_output_subgraph(filepath=debug_path)
# failed_component_file = total_loss.draw_component_subgraph(filepath=debug_path)
# failed_debug_files.update(failed_output_file)
# failed_debug_files.update(failed_component_file)

# for k, v in failed_debug_files.items():
# if k in debug_files:
# k = f"failed_{k}"
# debug_files[k] = v
failed_debug_files = total_loss.draw_graph(
filepath=debug_path, full_trace=False
)
failed_output_file = total_loss.draw_output_subgraph(filepath=debug_path)
failed_component_file = total_loss.draw_component_subgraph(filepath=debug_path)
failed_debug_files.update(failed_output_file)
failed_debug_files.update(failed_component_file)

for k, v in failed_debug_files.items():
if k in debug_files:
k = f"failed_{k}"
debug_files[k] = v

return debug_files

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def train(
# )

train(
debug=False,
debug=True,
max_steps=12,
)
# 0.68 on val without training, 0.74on the second step. 0.84 test
Expand Down

0 comments on commit 04cd65b

Please sign in to comment.