diff --git a/tb_plugin/test/test_tensorboard_end2end.py b/tb_plugin/test/test_tensorboard_end2end.py index 1f7597f43..6051b9592 100644 --- a/tb_plugin/test/test_tensorboard_end2end.py +++ b/tb_plugin/test/test_tensorboard_end2end.py @@ -1,6 +1,8 @@ import json import os +import shutil import socket +import tempfile import time import unittest import urllib @@ -9,6 +11,10 @@ from urllib.error import HTTPError +def get_samples_dir(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), '../samples') + + class TestEnd2End(unittest.TestCase): #def test_tensorboard_gs(self): @@ -17,24 +23,41 @@ class TestEnd2End(unittest.TestCase): # self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'}) def test_tensorboard_end2end(self): - test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples') + test_folder = get_samples_dir() expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' print("starting spawn mode testing...") self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'}) def test_tensorboard_fork(self): - test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples') + test_folder = get_samples_dir() expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' print("starting fork mode testing") self._test_tensorboard_with_arguments(test_folder, expected_runs) def test_tensorboard_with_path_prefix(self): - test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples') + test_folder = get_samples_dir() expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' self._test_tensorboard_with_arguments(test_folder, expected_runs, path_prefix='/tensorboard/viewer/') + def test_tensorboard_with_symlinks(self): + logdir = tempfile.mkdtemp(prefix="tensorboard_logdir") + + samples_dir = get_samples_dir() + + # Create the following layout, with 1 symlink to a run dir, and 1 regular run dir: + # logdir/ + # run_concrete/ + # run_symlink/ --> path/to/samples/resnet50_num_workers_4/ + shutil.copytree(os.path.join(samples_dir, "resnet50_num_workers_0"), os.path.join(logdir, "run_concrete")) + os.symlink(os.path.join(samples_dir, "resnet50_num_workers_4"), os.path.join(logdir, "run_symlink")) + + expected_runs = b'["run_concrete", "run_symlink"]' + self._test_tensorboard_with_arguments(logdir, expected_runs) + + shutil.rmtree(logdir) + def _test_tensorboard_with_arguments(self, test_folder, expected_runs, env=None, path_prefix=None): host='localhost' port=7007 diff --git a/tb_plugin/torch_tb_profiler/io/file.py b/tb_plugin/torch_tb_profiler/io/file.py index 83d16e702..baf8aa0c6 100644 --- a/tb_plugin/torch_tb_profiler/io/file.py +++ b/tb_plugin/torch_tb_profiler/io/file.py @@ -159,7 +159,11 @@ def stat(self, filename): return StatData(file_length) def walk(self, top, topdown=True, onerror=None): - yield from os.walk(top, topdown, onerror) + # Note on followlinks=True: per the tensorboard documentation [1], users are encouraged to + # use symlink trees to have fine-grained control over the filesystem layout of runs. To + # support such trees, we must follow links. + # [1] https://github.com/tensorflow/tensorboard/blob/master/README.md#logdir--logdir_spec-legacy-mode + yield from os.walk(top, topdown, onerror, followlinks=True) class S3FileSystem(RemotePath, BaseFileSystem): """Provides filesystem access to S3."""