From 0ce4bcbd45f7730760eb9bbe84ed4912ef810986 Mon Sep 17 00:00:00 2001 From: Mautier Date: Wed, 30 Jun 2021 17:57:06 -0700 Subject: [PATCH] tb_plugin: follow symlinks when looking for runs (#327) This commit makes `walk`-ing directories follow symlinks when searching for run data (on local filesystems, where it's supported!). This makes the plugin's search behavior consistent with that of tensorboard itself; using symlink trees to organize runs is one of the recommendations made in the tensorboard docs to have fine-grained control over the naming of runs and the location of the data [1]: > TensorBoard walks log directories recursively; for finer-grained > control, prefer using a symlink tree. A unit test is added to validate the new behavior. [1] https://github.com/tensorflow/tensorboard/blob/master/README.md#logdir--logdir_spec-legacy-mode --- tb_plugin/test/test_tensorboard_end2end.py | 29 +++++++++++++++++++--- tb_plugin/torch_tb_profiler/io/file.py | 6 ++++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/tb_plugin/test/test_tensorboard_end2end.py b/tb_plugin/test/test_tensorboard_end2end.py index 1f7597f43..6051b9592 100644 --- a/tb_plugin/test/test_tensorboard_end2end.py +++ b/tb_plugin/test/test_tensorboard_end2end.py @@ -1,6 +1,8 @@ import json import os +import shutil import socket +import tempfile import time import unittest import urllib @@ -9,6 +11,10 @@ from urllib.error import HTTPError +def get_samples_dir(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), '../samples') + + class TestEnd2End(unittest.TestCase): #def test_tensorboard_gs(self): @@ -17,24 +23,41 @@ class TestEnd2End(unittest.TestCase): # self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'}) def test_tensorboard_end2end(self): - test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples') + test_folder = get_samples_dir() expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' print("starting spawn mode testing...") self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'}) def test_tensorboard_fork(self): - test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples') + test_folder = get_samples_dir() expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' print("starting fork mode testing") self._test_tensorboard_with_arguments(test_folder, expected_runs) def test_tensorboard_with_path_prefix(self): - test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'../samples') + test_folder = get_samples_dir() expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' self._test_tensorboard_with_arguments(test_folder, expected_runs, path_prefix='/tensorboard/viewer/') + def test_tensorboard_with_symlinks(self): + logdir = tempfile.mkdtemp(prefix="tensorboard_logdir") + + samples_dir = get_samples_dir() + + # Create the following layout, with 1 symlink to a run dir, and 1 regular run dir: + # logdir/ + # run_concrete/ + # run_symlink/ --> path/to/samples/resnet50_num_workers_4/ + shutil.copytree(os.path.join(samples_dir, "resnet50_num_workers_0"), os.path.join(logdir, "run_concrete")) + os.symlink(os.path.join(samples_dir, "resnet50_num_workers_4"), os.path.join(logdir, "run_symlink")) + + expected_runs = b'["run_concrete", "run_symlink"]' + self._test_tensorboard_with_arguments(logdir, expected_runs) + + shutil.rmtree(logdir) + def _test_tensorboard_with_arguments(self, test_folder, expected_runs, env=None, path_prefix=None): host='localhost' port=7007 diff --git a/tb_plugin/torch_tb_profiler/io/file.py b/tb_plugin/torch_tb_profiler/io/file.py index 83d16e702..baf8aa0c6 100644 --- a/tb_plugin/torch_tb_profiler/io/file.py +++ b/tb_plugin/torch_tb_profiler/io/file.py @@ -159,7 +159,11 @@ def stat(self, filename): return StatData(file_length) def walk(self, top, topdown=True, onerror=None): - yield from os.walk(top, topdown, onerror) + # Note on followlinks=True: per the tensorboard documentation [1], users are encouraged to + # use symlink trees to have fine-grained control over the filesystem layout of runs. To + # support such trees, we must follow links. + # [1] https://github.com/tensorflow/tensorboard/blob/master/README.md#logdir--logdir_spec-legacy-mode + yield from os.walk(top, topdown, onerror, followlinks=True) class S3FileSystem(RemotePath, BaseFileSystem): """Provides filesystem access to S3."""