Merge pull request #874 from adamcik/feature/improve-find-and-scan-code
Feature/improve find and scan code
This commit is contained in:
commit
c84ed733ee
@ -86,6 +86,10 @@ See :ref:`config` for general help on configuring Mopidy.
|
|||||||
Number of milliseconds before giving up scanning a file and moving on to
|
Number of milliseconds before giving up scanning a file and moving on to
|
||||||
the next file.
|
the next file.
|
||||||
|
|
||||||
|
.. confval:: local/scan_follow_symlinks
|
||||||
|
|
||||||
|
If we should follow symlinks found in :confval:`local/media_dir`
|
||||||
|
|
||||||
.. confval:: local/scan_flush_threshold
|
.. confval:: local/scan_flush_threshold
|
||||||
|
|
||||||
Number of tracks to wait before telling library it should try and store
|
Number of tracks to wait before telling library it should try and store
|
||||||
|
|||||||
@ -29,6 +29,7 @@ class Extension(ext.Extension):
|
|||||||
schema['scan_timeout'] = config.Integer(
|
schema['scan_timeout'] = config.Integer(
|
||||||
minimum=1000, maximum=1000*60*60)
|
minimum=1000, maximum=1000*60*60)
|
||||||
schema['scan_flush_threshold'] = config.Integer(minimum=0)
|
schema['scan_flush_threshold'] = config.Integer(minimum=0)
|
||||||
|
schema['scan_follow_symlinks'] = config.Boolean()
|
||||||
schema['excluded_file_extensions'] = config.List(optional=True)
|
schema['excluded_file_extensions'] = config.List(optional=True)
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|||||||
@ -74,9 +74,17 @@ class ScanCommand(commands.Command):
|
|||||||
uris_to_update = set()
|
uris_to_update = set()
|
||||||
uris_to_remove = set()
|
uris_to_remove = set()
|
||||||
|
|
||||||
file_mtimes = path.find_mtimes(media_dir)
|
file_mtimes, file_errors = path.find_mtimes(
|
||||||
|
media_dir, follow=config['local']['scan_follow_symlinks'])
|
||||||
|
|
||||||
logger.info('Found %d files in media_dir.', len(file_mtimes))
|
logger.info('Found %d files in media_dir.', len(file_mtimes))
|
||||||
|
|
||||||
|
if file_errors:
|
||||||
|
logger.warning('Encountered %d errors while scanning media_dir.',
|
||||||
|
len(file_errors))
|
||||||
|
for name in file_errors:
|
||||||
|
logger.debug('Scan error %r for %r', file_errors[name], name)
|
||||||
|
|
||||||
num_tracks = library.load()
|
num_tracks = library.load()
|
||||||
logger.info('Checking %d tracks from library.', num_tracks)
|
logger.info('Checking %d tracks from library.', num_tracks)
|
||||||
|
|
||||||
@ -97,11 +105,13 @@ class ScanCommand(commands.Command):
|
|||||||
relpath = os.path.relpath(abspath, media_dir)
|
relpath = os.path.relpath(abspath, media_dir)
|
||||||
uri = translator.path_to_local_track_uri(relpath)
|
uri = translator.path_to_local_track_uri(relpath)
|
||||||
|
|
||||||
if relpath.lower().endswith(excluded_file_extensions):
|
# TODO: move these to a "predicate" check in the finder?
|
||||||
|
if b'/.' in relpath:
|
||||||
|
logger.debug('Skipped %s: Hidden directory/file.', uri)
|
||||||
|
elif relpath.lower().endswith(excluded_file_extensions):
|
||||||
logger.debug('Skipped %s: File extension excluded.', uri)
|
logger.debug('Skipped %s: File extension excluded.', uri)
|
||||||
continue
|
else:
|
||||||
|
uris_to_update.add(uri)
|
||||||
uris_to_update.add(uri)
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
'Found %d tracks which need to be updated.', len(uris_to_update))
|
'Found %d tracks which need to be updated.', len(uris_to_update))
|
||||||
|
|||||||
@ -6,6 +6,7 @@ data_dir = $XDG_DATA_DIR/mopidy/local
|
|||||||
playlists_dir = $XDG_DATA_DIR/mopidy/local/playlists
|
playlists_dir = $XDG_DATA_DIR/mopidy/local/playlists
|
||||||
scan_timeout = 1000
|
scan_timeout = 1000
|
||||||
scan_flush_threshold = 1000
|
scan_flush_threshold = 1000
|
||||||
|
scan_follow_symlinks = false
|
||||||
excluded_file_extensions =
|
excluded_file_extensions =
|
||||||
.directory
|
.directory
|
||||||
.html
|
.html
|
||||||
|
|||||||
@ -110,11 +110,11 @@ def expand_path(path):
|
|||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
def _find_worker(relative, hidden, done, work, results, errors):
|
def _find_worker(relative, follow, done, work, results, errors):
|
||||||
"""Worker thread for collecting stat() results.
|
"""Worker thread for collecting stat() results.
|
||||||
|
|
||||||
:param str relative: directory to make results relative to
|
:param str relative: directory to make results relative to
|
||||||
:param bool hidden: whether to include files and dirs starting with '.'
|
:param bool follow: if symlinks should be followed
|
||||||
:param threading.Event done: event indicating that all work has been done
|
:param threading.Event done: event indicating that all work has been done
|
||||||
:param queue.Queue work: queue of paths to process
|
:param queue.Queue work: queue of paths to process
|
||||||
:param dict results: shared dictionary for storing all the stat() results
|
:param dict results: shared dictionary for storing all the stat() results
|
||||||
@ -122,7 +122,7 @@ def _find_worker(relative, hidden, done, work, results, errors):
|
|||||||
"""
|
"""
|
||||||
while not done.is_set():
|
while not done.is_set():
|
||||||
try:
|
try:
|
||||||
entry = work.get(block=False)
|
entry, parents = work.get(block=False)
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -132,45 +132,57 @@ def _find_worker(relative, hidden, done, work, results, errors):
|
|||||||
path = entry
|
path = entry
|
||||||
|
|
||||||
try:
|
try:
|
||||||
st = os.lstat(entry)
|
if follow:
|
||||||
|
st = os.stat(entry)
|
||||||
|
else:
|
||||||
|
st = os.lstat(entry)
|
||||||
|
|
||||||
|
if (st.st_dev, st.st_ino) in parents:
|
||||||
|
errors[path] = Exception('Sym/hardlink loop found.')
|
||||||
|
continue
|
||||||
|
|
||||||
|
parents = parents + [(st.st_dev, st.st_ino)]
|
||||||
if stat.S_ISDIR(st.st_mode):
|
if stat.S_ISDIR(st.st_mode):
|
||||||
for e in os.listdir(entry):
|
for e in os.listdir(entry):
|
||||||
if hidden or not e.startswith(b'.'):
|
work.put((os.path.join(entry, e), parents))
|
||||||
work.put(os.path.join(entry, e))
|
|
||||||
elif stat.S_ISREG(st.st_mode):
|
elif stat.S_ISREG(st.st_mode):
|
||||||
results[path] = st
|
results[path] = st
|
||||||
|
elif stat.S_ISLNK(st.st_mode):
|
||||||
|
errors[path] = Exception('Not following symlinks.')
|
||||||
else:
|
else:
|
||||||
errors[path] = 'Not a file or directory'
|
errors[path] = Exception('Not a file or directory.')
|
||||||
except os.error as e:
|
|
||||||
errors[path] = str(e)
|
except OSError as e:
|
||||||
|
errors[path] = e
|
||||||
finally:
|
finally:
|
||||||
work.task_done()
|
work.task_done()
|
||||||
|
|
||||||
|
|
||||||
def _find(root, thread_count=10, hidden=True, relative=False):
|
def _find(root, thread_count=10, relative=False, follow=False):
|
||||||
"""Threaded find implementation that provides stat results for files.
|
"""Threaded find implementation that provides stat results for files.
|
||||||
|
|
||||||
Note that we do _not_ handle loops from bad sym/hardlinks in any way.
|
Tries to protect against sym/hardlink loops by keeping an eye on parent
|
||||||
|
(st_dev, st_ino) pairs.
|
||||||
|
|
||||||
:param str root: root directory to search from, may not be a file
|
:param str root: root directory to search from, may not be a file
|
||||||
:param int thread_count: number of workers to use, mainly useful to
|
:param int thread_count: number of workers to use, mainly useful to
|
||||||
mitigate network lag when scanning on NFS etc.
|
mitigate network lag when scanning on NFS etc.
|
||||||
:param bool hidden: whether to include files and dirs starting with '.'
|
|
||||||
:param bool relative: if results should be relative to root or absolute
|
:param bool relative: if results should be relative to root or absolute
|
||||||
|
:param bool follow: if symlinks should be followed
|
||||||
"""
|
"""
|
||||||
threads = []
|
threads = []
|
||||||
results = {}
|
results = {}
|
||||||
errors = {}
|
errors = {}
|
||||||
done = threading.Event()
|
done = threading.Event()
|
||||||
work = queue.Queue()
|
work = queue.Queue()
|
||||||
work.put(os.path.abspath(root))
|
work.put((os.path.abspath(root), []))
|
||||||
|
|
||||||
if not relative:
|
if not relative:
|
||||||
root = None
|
root = None
|
||||||
|
|
||||||
|
args = (root, follow, done, work, results, errors)
|
||||||
for i in range(thread_count):
|
for i in range(thread_count):
|
||||||
t = threading.Thread(target=_find_worker,
|
t = threading.Thread(target=_find_worker, args=args)
|
||||||
args=(root, hidden, done, work, results, errors))
|
|
||||||
t.daemon = True
|
t.daemon = True
|
||||||
t.start()
|
t.start()
|
||||||
threads.append(t)
|
threads.append(t)
|
||||||
@ -182,9 +194,10 @@ def _find(root, thread_count=10, hidden=True, relative=False):
|
|||||||
return results, errors
|
return results, errors
|
||||||
|
|
||||||
|
|
||||||
def find_mtimes(root):
|
def find_mtimes(root, follow=False):
|
||||||
results, errors = _find(root, hidden=False, relative=False)
|
results, errors = _find(root, relative=False, follow=follow)
|
||||||
return dict((f, int(st.st_mtime)) for f, st in results.iteritems())
|
mtimes = dict((f, int(st.st_mtime)) for f, st in results.iteritems())
|
||||||
|
return mtimes, errors
|
||||||
|
|
||||||
|
|
||||||
def check_file_path_is_inside_base_dir(file_path, base_path):
|
def check_file_path_is_inside_base_dir(file_path, base_path):
|
||||||
|
|||||||
@ -295,7 +295,8 @@ class ScannerTest(unittest.TestCase):
|
|||||||
|
|
||||||
def find(self, path):
|
def find(self, path):
|
||||||
media_dir = path_to_data_dir(path)
|
media_dir = path_to_data_dir(path)
|
||||||
for path in path_lib.find_mtimes(media_dir):
|
result, errors = path_lib.find_mtimes(media_dir)
|
||||||
|
for path in result:
|
||||||
yield os.path.join(media_dir, path)
|
yield os.path.join(media_dir, path)
|
||||||
|
|
||||||
def scan(self, paths):
|
def scan(self, paths):
|
||||||
|
|||||||
Binary file not shown.
0
tests/data/find/.hidden/.gitignore
vendored
0
tests/data/find/.hidden/.gitignore
vendored
@ -11,7 +11,7 @@ import glib
|
|||||||
|
|
||||||
from mopidy.utils import path
|
from mopidy.utils import path
|
||||||
|
|
||||||
from tests import any_int, path_to_data_dir
|
import tests
|
||||||
|
|
||||||
|
|
||||||
class GetOrCreateDirTest(unittest.TestCase):
|
class GetOrCreateDirTest(unittest.TestCase):
|
||||||
@ -214,31 +214,159 @@ class ExpandPathTest(unittest.TestCase):
|
|||||||
class FindMTimesTest(unittest.TestCase):
|
class FindMTimesTest(unittest.TestCase):
|
||||||
maxDiff = None
|
maxDiff = None
|
||||||
|
|
||||||
def find(self, value):
|
def setUp(self):
|
||||||
return path.find_mtimes(path_to_data_dir(value))
|
self.tmpdir = tempfile.mkdtemp(b'.mopidy-tests')
|
||||||
|
|
||||||
def test_basic_dir(self):
|
def tearDown(self):
|
||||||
self.assert_(self.find(''))
|
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
||||||
|
|
||||||
def test_nonexistant_dir(self):
|
def mkdir(self, *args):
|
||||||
self.assertEqual(self.find('does-not-exist'), {})
|
name = os.path.join(self.tmpdir, *[bytes(a) for a in args])
|
||||||
|
os.mkdir(name)
|
||||||
|
return name
|
||||||
|
|
||||||
def test_file(self):
|
def touch(self, *args):
|
||||||
self.assertEqual({path_to_data_dir('blank.mp3'): any_int},
|
name = os.path.join(self.tmpdir, *[bytes(a) for a in args])
|
||||||
self.find('blank.mp3'))
|
open(name, 'w').close()
|
||||||
|
return name
|
||||||
def test_files(self):
|
|
||||||
mtimes = self.find('find')
|
|
||||||
expected_files = [
|
|
||||||
b'find/foo/bar/file', b'find/foo/file', b'find/baz/file']
|
|
||||||
expected = {path_to_data_dir(p): any_int for p in expected_files}
|
|
||||||
self.assertEqual(expected, mtimes)
|
|
||||||
|
|
||||||
def test_names_are_bytestrings(self):
|
def test_names_are_bytestrings(self):
|
||||||
is_bytes = lambda f: isinstance(f, bytes)
|
"""We shouldn't be mixing in unicode for paths."""
|
||||||
for name in self.find(''):
|
result, errors = path.find_mtimes(tests.path_to_data_dir(''))
|
||||||
self.assert_(
|
for name in result.keys() + errors.keys():
|
||||||
is_bytes(name), '%s is not bytes object' % repr(name))
|
self.assertEqual(name, tests.IsA(bytes))
|
||||||
|
|
||||||
|
def test_nonexistent_dir(self):
|
||||||
|
"""Non existent search roots are an error"""
|
||||||
|
missing = os.path.join(self.tmpdir, 'does-not-exist')
|
||||||
|
result, errors = path.find_mtimes(missing)
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
self.assertEqual(errors, {missing: tests.IsA(OSError)})
|
||||||
|
|
||||||
|
def test_empty_dir(self):
|
||||||
|
"""Empty directories should not show up in results"""
|
||||||
|
self.mkdir('empty')
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir)
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
self.assertEqual(errors, {})
|
||||||
|
|
||||||
|
def test_file_as_the_root(self):
|
||||||
|
"""Specifying a file as the root should just return the file"""
|
||||||
|
single = self.touch('single')
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(single)
|
||||||
|
self.assertEqual(result, {single: tests.any_int})
|
||||||
|
self.assertEqual(errors, {})
|
||||||
|
|
||||||
|
def test_nested_directories(self):
|
||||||
|
"""Searching nested directories should find all files"""
|
||||||
|
|
||||||
|
# Setup foo/bar and baz directories
|
||||||
|
self.mkdir('foo')
|
||||||
|
self.mkdir('foo', 'bar')
|
||||||
|
self.mkdir('baz')
|
||||||
|
|
||||||
|
# Touch foo/file foo/bar/file and baz/file
|
||||||
|
foo_file = self.touch('foo', 'file')
|
||||||
|
foo_bar_file = self.touch('foo', 'bar', 'file')
|
||||||
|
baz_file = self.touch('baz', 'file')
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir)
|
||||||
|
self.assertEqual(result, {foo_file: tests.any_int,
|
||||||
|
foo_bar_file: tests.any_int,
|
||||||
|
baz_file: tests.any_int})
|
||||||
|
self.assertEqual(errors, {})
|
||||||
|
|
||||||
|
def test_missing_permission_to_file(self):
|
||||||
|
"""Missing permissions to a file is not a search error"""
|
||||||
|
target = self.touch('no-permission')
|
||||||
|
os.chmod(target, 0)
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir)
|
||||||
|
self.assertEqual({target: tests.any_int}, result)
|
||||||
|
self.assertEqual({}, errors)
|
||||||
|
|
||||||
|
def test_missing_permission_to_directory(self):
|
||||||
|
"""Missing permissions to a directory is an error"""
|
||||||
|
directory = self.mkdir('no-permission')
|
||||||
|
os.chmod(directory, 0)
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir)
|
||||||
|
self.assertEqual({}, result)
|
||||||
|
self.assertEqual({directory: tests.IsA(OSError)}, errors)
|
||||||
|
|
||||||
|
def test_symlinks_are_ignored(self):
|
||||||
|
"""By default symlinks should be treated as an error"""
|
||||||
|
target = self.touch('target')
|
||||||
|
link = os.path.join(self.tmpdir, 'link')
|
||||||
|
os.symlink(target, link)
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir)
|
||||||
|
self.assertEqual(result, {target: tests.any_int})
|
||||||
|
self.assertEqual(errors, {link: tests.IsA(Exception)})
|
||||||
|
|
||||||
|
def test_symlink_to_file_as_root_is_followed(self):
|
||||||
|
"""Passing a symlink as the root should be followed when follow=True"""
|
||||||
|
target = self.touch('target')
|
||||||
|
link = os.path.join(self.tmpdir, 'link')
|
||||||
|
os.symlink(target, link)
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(link, follow=True)
|
||||||
|
self.assertEqual({link: tests.any_int}, result)
|
||||||
|
self.assertEqual({}, errors)
|
||||||
|
|
||||||
|
def test_symlink_to_directory_is_followed(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_symlink_pointing_at_itself_fails(self):
|
||||||
|
"""Symlink pointing at itself should give as an OS error"""
|
||||||
|
link = os.path.join(self.tmpdir, 'link')
|
||||||
|
os.symlink(link, link)
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(link, follow=True)
|
||||||
|
self.assertEqual({}, result)
|
||||||
|
self.assertEqual({link: tests.IsA(OSError)}, errors)
|
||||||
|
|
||||||
|
def test_symlink_pointing_at_parent_fails(self):
|
||||||
|
"""We should detect a loop via the parent and give up on the branch"""
|
||||||
|
os.symlink(self.tmpdir, os.path.join(self.tmpdir, 'link'))
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir, follow=True)
|
||||||
|
self.assertEqual({}, result)
|
||||||
|
self.assertEqual(1, len(errors))
|
||||||
|
self.assertEqual(tests.IsA(Exception), errors.values()[0])
|
||||||
|
|
||||||
|
def test_indirect_symlink_loop(self):
|
||||||
|
"""More indirect loops should also be detected"""
|
||||||
|
# Setup tmpdir/directory/loop where loop points to tmpdir
|
||||||
|
directory = os.path.join(self.tmpdir, b'directory')
|
||||||
|
loop = os.path.join(directory, b'loop')
|
||||||
|
|
||||||
|
os.mkdir(directory)
|
||||||
|
os.symlink(self.tmpdir, loop)
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir, follow=True)
|
||||||
|
self.assertEqual({}, result)
|
||||||
|
self.assertEqual({loop: tests.IsA(Exception)}, errors)
|
||||||
|
|
||||||
|
def test_symlink_branches_are_not_excluded(self):
|
||||||
|
"""Using symlinks to make a file show up multiple times should work"""
|
||||||
|
self.mkdir('directory')
|
||||||
|
target = self.touch('directory', 'target')
|
||||||
|
link1 = os.path.join(self.tmpdir, b'link1')
|
||||||
|
link2 = os.path.join(self.tmpdir, b'link2')
|
||||||
|
|
||||||
|
os.symlink(target, link1)
|
||||||
|
os.symlink(target, link2)
|
||||||
|
|
||||||
|
expected = {target: tests.any_int,
|
||||||
|
link1: tests.any_int,
|
||||||
|
link2: tests.any_int}
|
||||||
|
|
||||||
|
result, errors = path.find_mtimes(self.tmpdir, follow=True)
|
||||||
|
self.assertEqual(expected, result)
|
||||||
|
self.assertEqual({}, errors)
|
||||||
|
|
||||||
|
|
||||||
# TODO: kill this in favour of just os.path.getmtime + mocks
|
# TODO: kill this in favour of just os.path.getmtime + mocks
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user