utils/path: Add support for handling sym/hardlink loops

This commit is contained in:
Thomas Adamcik 2014-10-15 23:21:11 +02:00
parent 54a89038d3
commit 063c757570
2 changed files with 47 additions and 7 deletions

View File

@ -122,7 +122,7 @@ def _find_worker(relative, follow, done, work, results, errors):
"""
while not done.is_set():
try:
entry = work.get(block=False)
entry, parents = work.get(block=False)
except queue.Empty:
continue
@ -137,13 +137,19 @@ def _find_worker(relative, follow, done, work, results, errors):
else:
st = os.lstat(entry)
if (st.st_dev, st.st_ino) in parents:
errors[path] = Exception('Sym/hardlink loop found.')
continue
parents = parents + [(st.st_dev, st.st_ino)]
if stat.S_ISDIR(st.st_mode):
for e in os.listdir(entry):
work.put(os.path.join(entry, e))
work.put((os.path.join(entry, e), parents))
elif stat.S_ISREG(st.st_mode):
results[path] = st
else:
errors[path] = Exception('Not a file or directory')
except os.error as e:
errors[path] = e
finally:
@ -153,7 +159,8 @@ def _find_worker(relative, follow, done, work, results, errors):
def _find(root, thread_count=10, relative=False, follow=False):
"""Threaded find implementation that provides stat results for files.
Note that we do _not_ handle loops from bad sym/hardlinks in any way.
Tries to protect against sym/hardlink loops by keeping an eye on parent
(st_dev, st_ino) pairs.
:param str root: root directory to search from, may not be a file
:param int thread_count: number of workers to use, mainly useful to
@ -166,7 +173,7 @@ def _find(root, thread_count=10, relative=False, follow=False):
errors = {}
done = threading.Event()
work = queue.Queue()
work.put(os.path.abspath(root))
work.put((os.path.abspath(root), []))
if not relative:
root = None

View File

@ -221,12 +221,12 @@ class FindMTimesTest(unittest.TestCase):
shutil.rmtree(self.tmpdir, ignore_errors=True)
def mkdir(self, *args):
name = os.path.join(self.tmpdir, *args)
name = os.path.join(self.tmpdir, *[bytes(a) for a in args])
os.mkdir(name)
return name
def touch(self, *args):
name = os.path.join(self.tmpdir, *args)
name = os.path.join(self.tmpdir, *[bytes(a) for a in args])
open(name, 'w').close()
return name
@ -320,6 +320,7 @@ class FindMTimesTest(unittest.TestCase):
pass
def test_symlink_pointing_at_itself_fails(self):
"""Symlink pointing at itself should give as an OS error"""
link = os.path.join(self.tmpdir, 'link')
os.symlink(link, link)
@ -328,12 +329,44 @@ class FindMTimesTest(unittest.TestCase):
self.assertEqual({link: tests.IsA(OSError)}, errors)
def test_symlink_pointing_at_parent_fails(self):
"""We should detect a loop via the parent and give up on the branch"""
os.symlink(self.tmpdir, os.path.join(self.tmpdir, 'link'))
result, errors = path.find_mtimes(self.tmpdir, follow=True)
self.assertEqual({}, result)
self.assertEqual(1, len(errors))
self.assertEqual(tests.IsA(OSError), errors.values()[0])
self.assertEqual(tests.IsA(Exception), errors.values()[0])
def test_indirect_symlink_loop(self):
"""More indirect loops should also be detected"""
# Setup tmpdir/directory/loop where loop points to tmpdir
directory = os.path.join(self.tmpdir, b'directory')
loop = os.path.join(directory, b'loop')
os.mkdir(directory)
os.symlink(self.tmpdir, loop)
result, errors = path.find_mtimes(self.tmpdir, follow=True)
self.assertEqual({}, result)
self.assertEqual({loop: tests.IsA(Exception)}, errors)
def test_symlink_branches_are_not_excluded(self):
"""Using symlinks to make a file show up multiple times should work"""
self.mkdir('directory')
target = self.touch('directory', 'target')
link1 = os.path.join(self.tmpdir, b'link1')
link2 = os.path.join(self.tmpdir, b'link2')
os.symlink(target, link1)
os.symlink(target, link2)
expected = {target: tests.any_int,
link1: tests.any_int,
link2: tests.any_int}
result, errors = path.find_mtimes(self.tmpdir, follow=True)
self.assertEqual(expected, result)
self.assertEqual({}, errors)
# TODO: kill this in favour of just os.path.getmtime + mocks