Skip to content

Commit

Permalink
tb_plugin: follow symlinks when looking for runs (#327)
Browse files Browse the repository at this point in the history
This commit makes `walk`-ing directories follow symlinks when searching
for run data (on local filesystems, where it's supported!).

This makes the plugin's search behavior consistent with that of
tensorboard itself; using symlink trees to organize runs is one of the
recommendations made in the tensorboard docs to have fine-grained
control over the naming of runs and the location of the data [1]:

> TensorBoard walks log directories recursively; for finer-grained
> control, prefer using a symlink tree.

A unit test is added to validate the new behavior.

[1] https://github.com/tensorflow/tensorboard/blob/master/README.md#logdir--logdir_spec-legacy-mode
  • Loading branch information
mautier authored Jul 1, 2021
1 parent e828a2c commit 0ce4bcb
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
29 changes: 26 additions & 3 deletions tb_plugin/test/test_tensorboard_end2end.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import os
import shutil
import socket
import tempfile
import time
import unittest
import urllib
Expand All @@ -9,6 +11,10 @@
from urllib.error import HTTPError


def get_samples_dir():
return os.path.join(os.path.dirname(os.path.abspath(__file__)), '../samples')


class TestEnd2End(unittest.TestCase):

#def test_tensorboard_gs(self):
Expand All @@ -17,24 +23,41 @@ class TestEnd2End(unittest.TestCase):
# self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'})

def test_tensorboard_end2end(self):
test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples')
test_folder = get_samples_dir()
expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]'

print("starting spawn mode testing...")
self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'})

def test_tensorboard_fork(self):
test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples')
test_folder = get_samples_dir()
expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]'

print("starting fork mode testing")
self._test_tensorboard_with_arguments(test_folder, expected_runs)

def test_tensorboard_with_path_prefix(self):
test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples')
test_folder = get_samples_dir()
expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]'
self._test_tensorboard_with_arguments(test_folder, expected_runs, path_prefix='/tensorboard/viewer/')

def test_tensorboard_with_symlinks(self):
logdir = tempfile.mkdtemp(prefix="tensorboard_logdir")

samples_dir = get_samples_dir()

# Create the following layout, with 1 symlink to a run dir, and 1 regular run dir:
# logdir/
# run_concrete/
# run_symlink/ --> path/to/samples/resnet50_num_workers_4/
shutil.copytree(os.path.join(samples_dir, "resnet50_num_workers_0"), os.path.join(logdir, "run_concrete"))
os.symlink(os.path.join(samples_dir, "resnet50_num_workers_4"), os.path.join(logdir, "run_symlink"))

expected_runs = b'["run_concrete", "run_symlink"]'
self._test_tensorboard_with_arguments(logdir, expected_runs)

shutil.rmtree(logdir)

def _test_tensorboard_with_arguments(self, test_folder, expected_runs, env=None, path_prefix=None):
host='localhost'
port=7007
Expand Down
6 changes: 5 additions & 1 deletion tb_plugin/torch_tb_profiler/io/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,11 @@ def stat(self, filename):
return StatData(file_length)

def walk(self, top, topdown=True, onerror=None):
yield from os.walk(top, topdown, onerror)
# Note on followlinks=True: per the tensorboard documentation [1], users are encouraged to
# use symlink trees to have fine-grained control over the filesystem layout of runs. To
# support such trees, we must follow links.
# [1] https://github.com/tensorflow/tensorboard/blob/master/README.md#logdir--logdir_spec-legacy-mode
yield from os.walk(top, topdown, onerror, followlinks=True)

class S3FileSystem(RemotePath, BaseFileSystem):
"""Provides filesystem access to S3."""
Expand Down

0 comments on commit 0ce4bcb

Please sign in to comment.