Merge pull request #874 from adamcik/feature/improve-find-and-scan-code
Feature/improve find and scan code
This commit is contained in:
commit
c84ed733ee
@ -86,6 +86,10 @@ See :ref:`config` for general help on configuring Mopidy.
|
||||
Number of milliseconds before giving up scanning a file and moving on to
|
||||
the next file.
|
||||
|
||||
.. confval:: local/scan_follow_symlinks
|
||||
|
||||
If we should follow symlinks found in :confval:`local/media_dir`
|
||||
|
||||
.. confval:: local/scan_flush_threshold
|
||||
|
||||
Number of tracks to wait before telling library it should try and store
|
||||
|
||||
@ -29,6 +29,7 @@ class Extension(ext.Extension):
|
||||
schema['scan_timeout'] = config.Integer(
|
||||
minimum=1000, maximum=1000*60*60)
|
||||
schema['scan_flush_threshold'] = config.Integer(minimum=0)
|
||||
schema['scan_follow_symlinks'] = config.Boolean()
|
||||
schema['excluded_file_extensions'] = config.List(optional=True)
|
||||
return schema
|
||||
|
||||
|
||||
@ -74,9 +74,17 @@ class ScanCommand(commands.Command):
|
||||
uris_to_update = set()
|
||||
uris_to_remove = set()
|
||||
|
||||
file_mtimes = path.find_mtimes(media_dir)
|
||||
file_mtimes, file_errors = path.find_mtimes(
|
||||
media_dir, follow=config['local']['scan_follow_symlinks'])
|
||||
|
||||
logger.info('Found %d files in media_dir.', len(file_mtimes))
|
||||
|
||||
if file_errors:
|
||||
logger.warning('Encountered %d errors while scanning media_dir.',
|
||||
len(file_errors))
|
||||
for name in file_errors:
|
||||
logger.debug('Scan error %r for %r', file_errors[name], name)
|
||||
|
||||
num_tracks = library.load()
|
||||
logger.info('Checking %d tracks from library.', num_tracks)
|
||||
|
||||
@ -97,11 +105,13 @@ class ScanCommand(commands.Command):
|
||||
relpath = os.path.relpath(abspath, media_dir)
|
||||
uri = translator.path_to_local_track_uri(relpath)
|
||||
|
||||
if relpath.lower().endswith(excluded_file_extensions):
|
||||
# TODO: move these to a "predicate" check in the finder?
|
||||
if b'/.' in relpath:
|
||||
logger.debug('Skipped %s: Hidden directory/file.', uri)
|
||||
elif relpath.lower().endswith(excluded_file_extensions):
|
||||
logger.debug('Skipped %s: File extension excluded.', uri)
|
||||
continue
|
||||
|
||||
uris_to_update.add(uri)
|
||||
else:
|
||||
uris_to_update.add(uri)
|
||||
|
||||
logger.info(
|
||||
'Found %d tracks which need to be updated.', len(uris_to_update))
|
||||
|
||||
@ -6,6 +6,7 @@ data_dir = $XDG_DATA_DIR/mopidy/local
|
||||
playlists_dir = $XDG_DATA_DIR/mopidy/local/playlists
|
||||
scan_timeout = 1000
|
||||
scan_flush_threshold = 1000
|
||||
scan_follow_symlinks = false
|
||||
excluded_file_extensions =
|
||||
.directory
|
||||
.html
|
||||
|
||||
@ -110,11 +110,11 @@ def expand_path(path):
|
||||
return path
|
||||
|
||||
|
||||
def _find_worker(relative, hidden, done, work, results, errors):
|
||||
def _find_worker(relative, follow, done, work, results, errors):
|
||||
"""Worker thread for collecting stat() results.
|
||||
|
||||
:param str relative: directory to make results relative to
|
||||
:param bool hidden: whether to include files and dirs starting with '.'
|
||||
:param bool follow: if symlinks should be followed
|
||||
:param threading.Event done: event indicating that all work has been done
|
||||
:param queue.Queue work: queue of paths to process
|
||||
:param dict results: shared dictionary for storing all the stat() results
|
||||
@ -122,7 +122,7 @@ def _find_worker(relative, hidden, done, work, results, errors):
|
||||
"""
|
||||
while not done.is_set():
|
||||
try:
|
||||
entry = work.get(block=False)
|
||||
entry, parents = work.get(block=False)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
@ -132,45 +132,57 @@ def _find_worker(relative, hidden, done, work, results, errors):
|
||||
path = entry
|
||||
|
||||
try:
|
||||
st = os.lstat(entry)
|
||||
if follow:
|
||||
st = os.stat(entry)
|
||||
else:
|
||||
st = os.lstat(entry)
|
||||
|
||||
if (st.st_dev, st.st_ino) in parents:
|
||||
errors[path] = Exception('Sym/hardlink loop found.')
|
||||
continue
|
||||
|
||||
parents = parents + [(st.st_dev, st.st_ino)]
|
||||
if stat.S_ISDIR(st.st_mode):
|
||||
for e in os.listdir(entry):
|
||||
if hidden or not e.startswith(b'.'):
|
||||
work.put(os.path.join(entry, e))
|
||||
work.put((os.path.join(entry, e), parents))
|
||||
elif stat.S_ISREG(st.st_mode):
|
||||
results[path] = st
|
||||
elif stat.S_ISLNK(st.st_mode):
|
||||
errors[path] = Exception('Not following symlinks.')
|
||||
else:
|
||||
errors[path] = 'Not a file or directory'
|
||||
except os.error as e:
|
||||
errors[path] = str(e)
|
||||
errors[path] = Exception('Not a file or directory.')
|
||||
|
||||
except OSError as e:
|
||||
errors[path] = e
|
||||
finally:
|
||||
work.task_done()
|
||||
|
||||
|
||||
def _find(root, thread_count=10, hidden=True, relative=False):
|
||||
def _find(root, thread_count=10, relative=False, follow=False):
|
||||
"""Threaded find implementation that provides stat results for files.
|
||||
|
||||
Note that we do _not_ handle loops from bad sym/hardlinks in any way.
|
||||
Tries to protect against sym/hardlink loops by keeping an eye on parent
|
||||
(st_dev, st_ino) pairs.
|
||||
|
||||
:param str root: root directory to search from, may not be a file
|
||||
:param int thread_count: number of workers to use, mainly useful to
|
||||
mitigate network lag when scanning on NFS etc.
|
||||
:param bool hidden: whether to include files and dirs starting with '.'
|
||||
:param bool relative: if results should be relative to root or absolute
|
||||
:param bool follow: if symlinks should be followed
|
||||
"""
|
||||
threads = []
|
||||
results = {}
|
||||
errors = {}
|
||||
done = threading.Event()
|
||||
work = queue.Queue()
|
||||
work.put(os.path.abspath(root))
|
||||
work.put((os.path.abspath(root), []))
|
||||
|
||||
if not relative:
|
||||
root = None
|
||||
|
||||
args = (root, follow, done, work, results, errors)
|
||||
for i in range(thread_count):
|
||||
t = threading.Thread(target=_find_worker,
|
||||
args=(root, hidden, done, work, results, errors))
|
||||
t = threading.Thread(target=_find_worker, args=args)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
threads.append(t)
|
||||
@ -182,9 +194,10 @@ def _find(root, thread_count=10, hidden=True, relative=False):
|
||||
return results, errors
|
||||
|
||||
|
||||
def find_mtimes(root):
|
||||
results, errors = _find(root, hidden=False, relative=False)
|
||||
return dict((f, int(st.st_mtime)) for f, st in results.iteritems())
|
||||
def find_mtimes(root, follow=False):
|
||||
results, errors = _find(root, relative=False, follow=follow)
|
||||
mtimes = dict((f, int(st.st_mtime)) for f, st in results.iteritems())
|
||||
return mtimes, errors
|
||||
|
||||
|
||||
def check_file_path_is_inside_base_dir(file_path, base_path):
|
||||
|
||||
@ -295,7 +295,8 @@ class ScannerTest(unittest.TestCase):
|
||||
|
||||
def find(self, path):
|
||||
media_dir = path_to_data_dir(path)
|
||||
for path in path_lib.find_mtimes(media_dir):
|
||||
result, errors = path_lib.find_mtimes(media_dir)
|
||||
for path in result:
|
||||
yield os.path.join(media_dir, path)
|
||||
|
||||
def scan(self, paths):
|
||||
|
||||
Binary file not shown.
0
tests/data/find/.hidden/.gitignore
vendored
0
tests/data/find/.hidden/.gitignore
vendored
@ -11,7 +11,7 @@ import glib
|
||||
|
||||
from mopidy.utils import path
|
||||
|
||||
from tests import any_int, path_to_data_dir
|
||||
import tests
|
||||
|
||||
|
||||
class GetOrCreateDirTest(unittest.TestCase):
|
||||
@ -214,31 +214,159 @@ class ExpandPathTest(unittest.TestCase):
|
||||
class FindMTimesTest(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
def find(self, value):
|
||||
return path.find_mtimes(path_to_data_dir(value))
|
||||
def setUp(self):
|
||||
self.tmpdir = tempfile.mkdtemp(b'.mopidy-tests')
|
||||
|
||||
def test_basic_dir(self):
|
||||
self.assert_(self.find(''))
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
||||
|
||||
def test_nonexistant_dir(self):
|
||||
self.assertEqual(self.find('does-not-exist'), {})
|
||||
def mkdir(self, *args):
|
||||
name = os.path.join(self.tmpdir, *[bytes(a) for a in args])
|
||||
os.mkdir(name)
|
||||
return name
|
||||
|
||||
def test_file(self):
|
||||
self.assertEqual({path_to_data_dir('blank.mp3'): any_int},
|
||||
self.find('blank.mp3'))
|
||||
|
||||
def test_files(self):
|
||||
mtimes = self.find('find')
|
||||
expected_files = [
|
||||
b'find/foo/bar/file', b'find/foo/file', b'find/baz/file']
|
||||
expected = {path_to_data_dir(p): any_int for p in expected_files}
|
||||
self.assertEqual(expected, mtimes)
|
||||
def touch(self, *args):
|
||||
name = os.path.join(self.tmpdir, *[bytes(a) for a in args])
|
||||
open(name, 'w').close()
|
||||
return name
|
||||
|
||||
def test_names_are_bytestrings(self):
|
||||
is_bytes = lambda f: isinstance(f, bytes)
|
||||
for name in self.find(''):
|
||||
self.assert_(
|
||||
is_bytes(name), '%s is not bytes object' % repr(name))
|
||||
"""We shouldn't be mixing in unicode for paths."""
|
||||
result, errors = path.find_mtimes(tests.path_to_data_dir(''))
|
||||
for name in result.keys() + errors.keys():
|
||||
self.assertEqual(name, tests.IsA(bytes))
|
||||
|
||||
def test_nonexistent_dir(self):
|
||||
"""Non existent search roots are an error"""
|
||||
missing = os.path.join(self.tmpdir, 'does-not-exist')
|
||||
result, errors = path.find_mtimes(missing)
|
||||
self.assertEqual(result, {})
|
||||
self.assertEqual(errors, {missing: tests.IsA(OSError)})
|
||||
|
||||
def test_empty_dir(self):
|
||||
"""Empty directories should not show up in results"""
|
||||
self.mkdir('empty')
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir)
|
||||
self.assertEqual(result, {})
|
||||
self.assertEqual(errors, {})
|
||||
|
||||
def test_file_as_the_root(self):
|
||||
"""Specifying a file as the root should just return the file"""
|
||||
single = self.touch('single')
|
||||
|
||||
result, errors = path.find_mtimes(single)
|
||||
self.assertEqual(result, {single: tests.any_int})
|
||||
self.assertEqual(errors, {})
|
||||
|
||||
def test_nested_directories(self):
|
||||
"""Searching nested directories should find all files"""
|
||||
|
||||
# Setup foo/bar and baz directories
|
||||
self.mkdir('foo')
|
||||
self.mkdir('foo', 'bar')
|
||||
self.mkdir('baz')
|
||||
|
||||
# Touch foo/file foo/bar/file and baz/file
|
||||
foo_file = self.touch('foo', 'file')
|
||||
foo_bar_file = self.touch('foo', 'bar', 'file')
|
||||
baz_file = self.touch('baz', 'file')
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir)
|
||||
self.assertEqual(result, {foo_file: tests.any_int,
|
||||
foo_bar_file: tests.any_int,
|
||||
baz_file: tests.any_int})
|
||||
self.assertEqual(errors, {})
|
||||
|
||||
def test_missing_permission_to_file(self):
|
||||
"""Missing permissions to a file is not a search error"""
|
||||
target = self.touch('no-permission')
|
||||
os.chmod(target, 0)
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir)
|
||||
self.assertEqual({target: tests.any_int}, result)
|
||||
self.assertEqual({}, errors)
|
||||
|
||||
def test_missing_permission_to_directory(self):
|
||||
"""Missing permissions to a directory is an error"""
|
||||
directory = self.mkdir('no-permission')
|
||||
os.chmod(directory, 0)
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir)
|
||||
self.assertEqual({}, result)
|
||||
self.assertEqual({directory: tests.IsA(OSError)}, errors)
|
||||
|
||||
def test_symlinks_are_ignored(self):
|
||||
"""By default symlinks should be treated as an error"""
|
||||
target = self.touch('target')
|
||||
link = os.path.join(self.tmpdir, 'link')
|
||||
os.symlink(target, link)
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir)
|
||||
self.assertEqual(result, {target: tests.any_int})
|
||||
self.assertEqual(errors, {link: tests.IsA(Exception)})
|
||||
|
||||
def test_symlink_to_file_as_root_is_followed(self):
|
||||
"""Passing a symlink as the root should be followed when follow=True"""
|
||||
target = self.touch('target')
|
||||
link = os.path.join(self.tmpdir, 'link')
|
||||
os.symlink(target, link)
|
||||
|
||||
result, errors = path.find_mtimes(link, follow=True)
|
||||
self.assertEqual({link: tests.any_int}, result)
|
||||
self.assertEqual({}, errors)
|
||||
|
||||
def test_symlink_to_directory_is_followed(self):
|
||||
pass
|
||||
|
||||
def test_symlink_pointing_at_itself_fails(self):
|
||||
"""Symlink pointing at itself should give as an OS error"""
|
||||
link = os.path.join(self.tmpdir, 'link')
|
||||
os.symlink(link, link)
|
||||
|
||||
result, errors = path.find_mtimes(link, follow=True)
|
||||
self.assertEqual({}, result)
|
||||
self.assertEqual({link: tests.IsA(OSError)}, errors)
|
||||
|
||||
def test_symlink_pointing_at_parent_fails(self):
|
||||
"""We should detect a loop via the parent and give up on the branch"""
|
||||
os.symlink(self.tmpdir, os.path.join(self.tmpdir, 'link'))
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir, follow=True)
|
||||
self.assertEqual({}, result)
|
||||
self.assertEqual(1, len(errors))
|
||||
self.assertEqual(tests.IsA(Exception), errors.values()[0])
|
||||
|
||||
def test_indirect_symlink_loop(self):
|
||||
"""More indirect loops should also be detected"""
|
||||
# Setup tmpdir/directory/loop where loop points to tmpdir
|
||||
directory = os.path.join(self.tmpdir, b'directory')
|
||||
loop = os.path.join(directory, b'loop')
|
||||
|
||||
os.mkdir(directory)
|
||||
os.symlink(self.tmpdir, loop)
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir, follow=True)
|
||||
self.assertEqual({}, result)
|
||||
self.assertEqual({loop: tests.IsA(Exception)}, errors)
|
||||
|
||||
def test_symlink_branches_are_not_excluded(self):
|
||||
"""Using symlinks to make a file show up multiple times should work"""
|
||||
self.mkdir('directory')
|
||||
target = self.touch('directory', 'target')
|
||||
link1 = os.path.join(self.tmpdir, b'link1')
|
||||
link2 = os.path.join(self.tmpdir, b'link2')
|
||||
|
||||
os.symlink(target, link1)
|
||||
os.symlink(target, link2)
|
||||
|
||||
expected = {target: tests.any_int,
|
||||
link1: tests.any_int,
|
||||
link2: tests.any_int}
|
||||
|
||||
result, errors = path.find_mtimes(self.tmpdir, follow=True)
|
||||
self.assertEqual(expected, result)
|
||||
self.assertEqual({}, errors)
|
||||
|
||||
|
||||
# TODO: kill this in favour of just os.path.getmtime + mocks
|
||||
|
||||
Loading…
Reference in New Issue
Block a user