From 063c757570f0c11678e9ade47811c3daeba26233 Mon Sep 17 00:00:00 2001 From: Thomas Adamcik Date: Wed, 15 Oct 2014 23:21:11 +0200 Subject: [PATCH] utils/path: Add support for handling sym/hardlink loops --- mopidy/utils/path.py | 15 +++++++++++---- tests/utils/test_path.py | 39 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/mopidy/utils/path.py b/mopidy/utils/path.py index 34a7f75e..01b97a50 100644 --- a/mopidy/utils/path.py +++ b/mopidy/utils/path.py @@ -122,7 +122,7 @@ def _find_worker(relative, follow, done, work, results, errors): """ while not done.is_set(): try: - entry = work.get(block=False) + entry, parents = work.get(block=False) except queue.Empty: continue @@ -137,13 +137,19 @@ def _find_worker(relative, follow, done, work, results, errors): else: st = os.lstat(entry) + if (st.st_dev, st.st_ino) in parents: + errors[path] = Exception('Sym/hardlink loop found.') + continue + + parents = parents + [(st.st_dev, st.st_ino)] if stat.S_ISDIR(st.st_mode): for e in os.listdir(entry): - work.put(os.path.join(entry, e)) + work.put((os.path.join(entry, e), parents)) elif stat.S_ISREG(st.st_mode): results[path] = st else: errors[path] = Exception('Not a file or directory') + except os.error as e: errors[path] = e finally: @@ -153,7 +159,8 @@ def _find_worker(relative, follow, done, work, results, errors): def _find(root, thread_count=10, relative=False, follow=False): """Threaded find implementation that provides stat results for files. - Note that we do _not_ handle loops from bad sym/hardlinks in any way. + Tries to protect against sym/hardlink loops by keeping an eye on parent + (st_dev, st_ino) pairs. :param str root: root directory to search from, may not be a file :param int thread_count: number of workers to use, mainly useful to @@ -166,7 +173,7 @@ def _find(root, thread_count=10, relative=False, follow=False): errors = {} done = threading.Event() work = queue.Queue() - work.put(os.path.abspath(root)) + work.put((os.path.abspath(root), [])) if not relative: root = None diff --git a/tests/utils/test_path.py b/tests/utils/test_path.py index 0e91e008..8dafd951 100644 --- a/tests/utils/test_path.py +++ b/tests/utils/test_path.py @@ -221,12 +221,12 @@ class FindMTimesTest(unittest.TestCase): shutil.rmtree(self.tmpdir, ignore_errors=True) def mkdir(self, *args): - name = os.path.join(self.tmpdir, *args) + name = os.path.join(self.tmpdir, *[bytes(a) for a in args]) os.mkdir(name) return name def touch(self, *args): - name = os.path.join(self.tmpdir, *args) + name = os.path.join(self.tmpdir, *[bytes(a) for a in args]) open(name, 'w').close() return name @@ -320,6 +320,7 @@ class FindMTimesTest(unittest.TestCase): pass def test_symlink_pointing_at_itself_fails(self): + """Symlink pointing at itself should give as an OS error""" link = os.path.join(self.tmpdir, 'link') os.symlink(link, link) @@ -328,12 +329,44 @@ class FindMTimesTest(unittest.TestCase): self.assertEqual({link: tests.IsA(OSError)}, errors) def test_symlink_pointing_at_parent_fails(self): + """We should detect a loop via the parent and give up on the branch""" os.symlink(self.tmpdir, os.path.join(self.tmpdir, 'link')) result, errors = path.find_mtimes(self.tmpdir, follow=True) self.assertEqual({}, result) self.assertEqual(1, len(errors)) - self.assertEqual(tests.IsA(OSError), errors.values()[0]) + self.assertEqual(tests.IsA(Exception), errors.values()[0]) + + def test_indirect_symlink_loop(self): + """More indirect loops should also be detected""" + # Setup tmpdir/directory/loop where loop points to tmpdir + directory = os.path.join(self.tmpdir, b'directory') + loop = os.path.join(directory, b'loop') + + os.mkdir(directory) + os.symlink(self.tmpdir, loop) + + result, errors = path.find_mtimes(self.tmpdir, follow=True) + self.assertEqual({}, result) + self.assertEqual({loop: tests.IsA(Exception)}, errors) + + def test_symlink_branches_are_not_excluded(self): + """Using symlinks to make a file show up multiple times should work""" + self.mkdir('directory') + target = self.touch('directory', 'target') + link1 = os.path.join(self.tmpdir, b'link1') + link2 = os.path.join(self.tmpdir, b'link2') + + os.symlink(target, link1) + os.symlink(target, link2) + + expected = {target: tests.any_int, + link1: tests.any_int, + link2: tests.any_int} + + result, errors = path.find_mtimes(self.tmpdir, follow=True) + self.assertEqual(expected, result) + self.assertEqual({}, errors) # TODO: kill this in favour of just os.path.getmtime + mocks