diff --git a/docs/ext/local.rst b/docs/ext/local.rst index 31d00d66..18f66adc 100644 --- a/docs/ext/local.rst +++ b/docs/ext/local.rst @@ -86,6 +86,10 @@ See :ref:`config` for general help on configuring Mopidy. Number of milliseconds before giving up scanning a file and moving on to the next file. +.. confval:: local/scan_follow_symlinks + + If we should follow symlinks found in :confval:`local/media_dir` + .. confval:: local/scan_flush_threshold Number of tracks to wait before telling library it should try and store diff --git a/mopidy/local/__init__.py b/mopidy/local/__init__.py index fc1035fe..a0bb7bc4 100644 --- a/mopidy/local/__init__.py +++ b/mopidy/local/__init__.py @@ -29,6 +29,7 @@ class Extension(ext.Extension): schema['scan_timeout'] = config.Integer( minimum=1000, maximum=1000*60*60) schema['scan_flush_threshold'] = config.Integer(minimum=0) + schema['scan_follow_symlinks'] = config.Boolean() schema['excluded_file_extensions'] = config.List(optional=True) return schema diff --git a/mopidy/local/commands.py b/mopidy/local/commands.py index f2a7ec24..1b8981df 100644 --- a/mopidy/local/commands.py +++ b/mopidy/local/commands.py @@ -74,9 +74,17 @@ class ScanCommand(commands.Command): uris_to_update = set() uris_to_remove = set() - file_mtimes = path.find_mtimes(media_dir) + file_mtimes, file_errors = path.find_mtimes( + media_dir, follow=config['local']['scan_follow_symlinks']) + logger.info('Found %d files in media_dir.', len(file_mtimes)) + if file_errors: + logger.warning('Encountered %d errors while scanning media_dir.', + len(file_errors)) + for name in file_errors: + logger.debug('Scan error %r for %r', file_errors[name], name) + num_tracks = library.load() logger.info('Checking %d tracks from library.', num_tracks) @@ -97,11 +105,13 @@ class ScanCommand(commands.Command): relpath = os.path.relpath(abspath, media_dir) uri = translator.path_to_local_track_uri(relpath) - if relpath.lower().endswith(excluded_file_extensions): + # TODO: move these to a "predicate" check in the finder? + if b'/.' in relpath: + logger.debug('Skipped %s: Hidden directory/file.', uri) + elif relpath.lower().endswith(excluded_file_extensions): logger.debug('Skipped %s: File extension excluded.', uri) - continue - - uris_to_update.add(uri) + else: + uris_to_update.add(uri) logger.info( 'Found %d tracks which need to be updated.', len(uris_to_update)) diff --git a/mopidy/local/ext.conf b/mopidy/local/ext.conf index 9a0f19f1..535f4806 100644 --- a/mopidy/local/ext.conf +++ b/mopidy/local/ext.conf @@ -6,6 +6,7 @@ data_dir = $XDG_DATA_DIR/mopidy/local playlists_dir = $XDG_DATA_DIR/mopidy/local/playlists scan_timeout = 1000 scan_flush_threshold = 1000 +scan_follow_symlinks = false excluded_file_extensions = .directory .html diff --git a/mopidy/utils/path.py b/mopidy/utils/path.py index bad3b6c4..be735ae5 100644 --- a/mopidy/utils/path.py +++ b/mopidy/utils/path.py @@ -110,11 +110,11 @@ def expand_path(path): return path -def _find_worker(relative, hidden, done, work, results, errors): +def _find_worker(relative, follow, done, work, results, errors): """Worker thread for collecting stat() results. :param str relative: directory to make results relative to - :param bool hidden: whether to include files and dirs starting with '.' + :param bool follow: if symlinks should be followed :param threading.Event done: event indicating that all work has been done :param queue.Queue work: queue of paths to process :param dict results: shared dictionary for storing all the stat() results @@ -122,7 +122,7 @@ def _find_worker(relative, hidden, done, work, results, errors): """ while not done.is_set(): try: - entry = work.get(block=False) + entry, parents = work.get(block=False) except queue.Empty: continue @@ -132,45 +132,57 @@ def _find_worker(relative, hidden, done, work, results, errors): path = entry try: - st = os.lstat(entry) + if follow: + st = os.stat(entry) + else: + st = os.lstat(entry) + + if (st.st_dev, st.st_ino) in parents: + errors[path] = Exception('Sym/hardlink loop found.') + continue + + parents = parents + [(st.st_dev, st.st_ino)] if stat.S_ISDIR(st.st_mode): for e in os.listdir(entry): - if hidden or not e.startswith(b'.'): - work.put(os.path.join(entry, e)) + work.put((os.path.join(entry, e), parents)) elif stat.S_ISREG(st.st_mode): results[path] = st + elif stat.S_ISLNK(st.st_mode): + errors[path] = Exception('Not following symlinks.') else: - errors[path] = 'Not a file or directory' - except os.error as e: - errors[path] = str(e) + errors[path] = Exception('Not a file or directory.') + + except OSError as e: + errors[path] = e finally: work.task_done() -def _find(root, thread_count=10, hidden=True, relative=False): +def _find(root, thread_count=10, relative=False, follow=False): """Threaded find implementation that provides stat results for files. - Note that we do _not_ handle loops from bad sym/hardlinks in any way. + Tries to protect against sym/hardlink loops by keeping an eye on parent + (st_dev, st_ino) pairs. :param str root: root directory to search from, may not be a file :param int thread_count: number of workers to use, mainly useful to mitigate network lag when scanning on NFS etc. - :param bool hidden: whether to include files and dirs starting with '.' :param bool relative: if results should be relative to root or absolute + :param bool follow: if symlinks should be followed """ threads = [] results = {} errors = {} done = threading.Event() work = queue.Queue() - work.put(os.path.abspath(root)) + work.put((os.path.abspath(root), [])) if not relative: root = None + args = (root, follow, done, work, results, errors) for i in range(thread_count): - t = threading.Thread(target=_find_worker, - args=(root, hidden, done, work, results, errors)) + t = threading.Thread(target=_find_worker, args=args) t.daemon = True t.start() threads.append(t) @@ -182,9 +194,10 @@ def _find(root, thread_count=10, hidden=True, relative=False): return results, errors -def find_mtimes(root): - results, errors = _find(root, hidden=False, relative=False) - return dict((f, int(st.st_mtime)) for f, st in results.iteritems()) +def find_mtimes(root, follow=False): + results, errors = _find(root, relative=False, follow=follow) + mtimes = dict((f, int(st.st_mtime)) for f, st in results.iteritems()) + return mtimes, errors def check_file_path_is_inside_base_dir(file_path, base_path): diff --git a/tests/audio/test_scan.py b/tests/audio/test_scan.py index 1e352991..2e91ce32 100644 --- a/tests/audio/test_scan.py +++ b/tests/audio/test_scan.py @@ -295,7 +295,8 @@ class ScannerTest(unittest.TestCase): def find(self, path): media_dir = path_to_data_dir(path) - for path in path_lib.find_mtimes(media_dir): + result, errors = path_lib.find_mtimes(media_dir) + for path in result: yield os.path.join(media_dir, path) def scan(self, paths): diff --git a/tests/data/find/.blank.mp3 b/tests/data/find/.blank.mp3 deleted file mode 100644 index ef159a70..00000000 Binary files a/tests/data/find/.blank.mp3 and /dev/null differ diff --git a/tests/data/find/.hidden/.gitignore b/tests/data/find/.hidden/.gitignore deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/data/find/baz/file b/tests/data/find/baz/file deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/data/find/foo/bar/file b/tests/data/find/foo/bar/file deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/data/find/foo/file b/tests/data/find/foo/file deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/utils/test_path.py b/tests/utils/test_path.py index b33c6681..8dafd951 100644 --- a/tests/utils/test_path.py +++ b/tests/utils/test_path.py @@ -11,7 +11,7 @@ import glib from mopidy.utils import path -from tests import any_int, path_to_data_dir +import tests class GetOrCreateDirTest(unittest.TestCase): @@ -214,31 +214,159 @@ class ExpandPathTest(unittest.TestCase): class FindMTimesTest(unittest.TestCase): maxDiff = None - def find(self, value): - return path.find_mtimes(path_to_data_dir(value)) + def setUp(self): + self.tmpdir = tempfile.mkdtemp(b'.mopidy-tests') - def test_basic_dir(self): - self.assert_(self.find('')) + def tearDown(self): + shutil.rmtree(self.tmpdir, ignore_errors=True) - def test_nonexistant_dir(self): - self.assertEqual(self.find('does-not-exist'), {}) + def mkdir(self, *args): + name = os.path.join(self.tmpdir, *[bytes(a) for a in args]) + os.mkdir(name) + return name - def test_file(self): - self.assertEqual({path_to_data_dir('blank.mp3'): any_int}, - self.find('blank.mp3')) - - def test_files(self): - mtimes = self.find('find') - expected_files = [ - b'find/foo/bar/file', b'find/foo/file', b'find/baz/file'] - expected = {path_to_data_dir(p): any_int for p in expected_files} - self.assertEqual(expected, mtimes) + def touch(self, *args): + name = os.path.join(self.tmpdir, *[bytes(a) for a in args]) + open(name, 'w').close() + return name def test_names_are_bytestrings(self): - is_bytes = lambda f: isinstance(f, bytes) - for name in self.find(''): - self.assert_( - is_bytes(name), '%s is not bytes object' % repr(name)) + """We shouldn't be mixing in unicode for paths.""" + result, errors = path.find_mtimes(tests.path_to_data_dir('')) + for name in result.keys() + errors.keys(): + self.assertEqual(name, tests.IsA(bytes)) + + def test_nonexistent_dir(self): + """Non existent search roots are an error""" + missing = os.path.join(self.tmpdir, 'does-not-exist') + result, errors = path.find_mtimes(missing) + self.assertEqual(result, {}) + self.assertEqual(errors, {missing: tests.IsA(OSError)}) + + def test_empty_dir(self): + """Empty directories should not show up in results""" + self.mkdir('empty') + + result, errors = path.find_mtimes(self.tmpdir) + self.assertEqual(result, {}) + self.assertEqual(errors, {}) + + def test_file_as_the_root(self): + """Specifying a file as the root should just return the file""" + single = self.touch('single') + + result, errors = path.find_mtimes(single) + self.assertEqual(result, {single: tests.any_int}) + self.assertEqual(errors, {}) + + def test_nested_directories(self): + """Searching nested directories should find all files""" + + # Setup foo/bar and baz directories + self.mkdir('foo') + self.mkdir('foo', 'bar') + self.mkdir('baz') + + # Touch foo/file foo/bar/file and baz/file + foo_file = self.touch('foo', 'file') + foo_bar_file = self.touch('foo', 'bar', 'file') + baz_file = self.touch('baz', 'file') + + result, errors = path.find_mtimes(self.tmpdir) + self.assertEqual(result, {foo_file: tests.any_int, + foo_bar_file: tests.any_int, + baz_file: tests.any_int}) + self.assertEqual(errors, {}) + + def test_missing_permission_to_file(self): + """Missing permissions to a file is not a search error""" + target = self.touch('no-permission') + os.chmod(target, 0) + + result, errors = path.find_mtimes(self.tmpdir) + self.assertEqual({target: tests.any_int}, result) + self.assertEqual({}, errors) + + def test_missing_permission_to_directory(self): + """Missing permissions to a directory is an error""" + directory = self.mkdir('no-permission') + os.chmod(directory, 0) + + result, errors = path.find_mtimes(self.tmpdir) + self.assertEqual({}, result) + self.assertEqual({directory: tests.IsA(OSError)}, errors) + + def test_symlinks_are_ignored(self): + """By default symlinks should be treated as an error""" + target = self.touch('target') + link = os.path.join(self.tmpdir, 'link') + os.symlink(target, link) + + result, errors = path.find_mtimes(self.tmpdir) + self.assertEqual(result, {target: tests.any_int}) + self.assertEqual(errors, {link: tests.IsA(Exception)}) + + def test_symlink_to_file_as_root_is_followed(self): + """Passing a symlink as the root should be followed when follow=True""" + target = self.touch('target') + link = os.path.join(self.tmpdir, 'link') + os.symlink(target, link) + + result, errors = path.find_mtimes(link, follow=True) + self.assertEqual({link: tests.any_int}, result) + self.assertEqual({}, errors) + + def test_symlink_to_directory_is_followed(self): + pass + + def test_symlink_pointing_at_itself_fails(self): + """Symlink pointing at itself should give as an OS error""" + link = os.path.join(self.tmpdir, 'link') + os.symlink(link, link) + + result, errors = path.find_mtimes(link, follow=True) + self.assertEqual({}, result) + self.assertEqual({link: tests.IsA(OSError)}, errors) + + def test_symlink_pointing_at_parent_fails(self): + """We should detect a loop via the parent and give up on the branch""" + os.symlink(self.tmpdir, os.path.join(self.tmpdir, 'link')) + + result, errors = path.find_mtimes(self.tmpdir, follow=True) + self.assertEqual({}, result) + self.assertEqual(1, len(errors)) + self.assertEqual(tests.IsA(Exception), errors.values()[0]) + + def test_indirect_symlink_loop(self): + """More indirect loops should also be detected""" + # Setup tmpdir/directory/loop where loop points to tmpdir + directory = os.path.join(self.tmpdir, b'directory') + loop = os.path.join(directory, b'loop') + + os.mkdir(directory) + os.symlink(self.tmpdir, loop) + + result, errors = path.find_mtimes(self.tmpdir, follow=True) + self.assertEqual({}, result) + self.assertEqual({loop: tests.IsA(Exception)}, errors) + + def test_symlink_branches_are_not_excluded(self): + """Using symlinks to make a file show up multiple times should work""" + self.mkdir('directory') + target = self.touch('directory', 'target') + link1 = os.path.join(self.tmpdir, b'link1') + link2 = os.path.join(self.tmpdir, b'link2') + + os.symlink(target, link1) + os.symlink(target, link2) + + expected = {target: tests.any_int, + link1: tests.any_int, + link2: tests.any_int} + + result, errors = path.find_mtimes(self.tmpdir, follow=True) + self.assertEqual(expected, result) + self.assertEqual({}, errors) # TODO: kill this in favour of just os.path.getmtime + mocks