Merge pull request #1124 from adamcik/fix/audio-scanner-robustness

Improve audio scanner robustness
2015-04-12 22:10:00 +02:00 · 2015-04-12 22:10:00 +02:00 · a3c69f1597
commit a3c69f1597
parent 5d94a265cd 68c2758009
4 changed files with 75 additions and 35 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -17,6 +17,10 @@ v1.0.1 (UNRELEASED)
  behavior was confusing for many users and doesn't work well with the plans
  for multiple outputs.

+- Audio: Update scanner to decode all media it finds. This should fix cases
+  where the scanner hangs on non-audio files like video. The scanner will now
+  also let us know if we found any decodeable audio. (Fixes: :issue:`726`)
+

 v1.0.0 (2015-03-25)
 ===================
--- a/mopidy/audio/scan.py
+++ b/mopidy/audio/scan.py
@ -1,4 +1,5 @@
-from __future__ import absolute_import, division, unicode_literals
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals)

 import collections

@ -14,7 +15,7 @@ from mopidy.utils import encoding
 _missing_plugin_desc = gst.pbutils.missing_plugin_message_get_description

 _Result = collections.namedtuple(
-    'Result', ('uri', 'tags', 'duration', 'seekable', 'mime'))
+    'Result', ('uri', 'tags', 'duration', 'seekable', 'mime', 'playable'))

 _RAW_AUDIO = gst.Caps(b'audio/x-raw-int; audio/x-raw-float')

@ -51,14 +52,14 @@ class Scanner(object):

        try:
            _start_pipeline(pipeline)
-            tags, mime = _process(pipeline, self._timeout_ms)
+            tags, mime, have_audio = _process(pipeline, self._timeout_ms)
            duration = _query_duration(pipeline)
            seekable = _query_seekable(pipeline)
        finally:
            pipeline.set_state(gst.STATE_NULL)
            del pipeline

-        return _Result(uri, tags, duration, seekable, mime)
+        return _Result(uri, tags, duration, seekable, mime, have_audio)


 # Turns out it's _much_ faster to just create a new pipeline for every as
@ -70,30 +71,38 @@ def _setup_pipeline(uri, proxy_config=None):

    typefind = gst.element_factory_make('typefind')
    decodebin = gst.element_factory_make('decodebin2')
-    sink = gst.element_factory_make('fakesink')

    pipeline = gst.element_factory_make('pipeline')
-    pipeline.add_many(src, typefind, decodebin, sink)
+    pipeline.add_many(src, typefind, decodebin)
    gst.element_link_many(src, typefind, decodebin)

    if proxy_config:
        utils.setup_proxy(src, proxy_config)

-    decodebin.set_property('caps', _RAW_AUDIO)
-    decodebin.connect('pad-added', _pad_added, sink)
    typefind.connect('have-type', _have_type, decodebin)
+    decodebin.connect('pad-added', _pad_added, pipeline)

    return pipeline


 def _have_type(element, probability, caps, decodebin):
    decodebin.set_property('sink-caps', caps)
-    msg = gst.message_new_application(element, caps.get_structure(0))
-    element.get_bus().post(msg)
+    struct = gst.Structure('have-type')
+    struct['caps'] = caps.get_structure(0)
+    element.get_bus().post(gst.message_new_application(element, struct))


-def _pad_added(element, pad, sink):
-    return pad.link(sink.get_pad('sink'))
+def _pad_added(element, pad, pipeline):
+    sink = gst.element_factory_make('fakesink')
+    sink.set_property('sync', False)
+
+    pipeline.add(sink)
+    sink.sync_state_with_parent()
+    pad.link(sink.get_pad('sink'))
+
+    if pad.get_caps().is_subset(_RAW_AUDIO):
+        struct = gst.Structure('have-audio')
+        element.get_bus().post(gst.message_new_application(element, struct))


 def _start_pipeline(pipeline):
@ -123,7 +132,7 @@ def _process(pipeline, timeout_ms):
    clock = pipeline.get_clock()
    bus = pipeline.get_bus()
    timeout = timeout_ms * gst.MSECOND
-    tags, mime, missing_description = {}, None, None
+    tags, mime, have_audio, missing_description = {}, None, False, None

    types = (gst.MESSAGE_ELEMENT | gst.MESSAGE_APPLICATION | gst.MESSAGE_ERROR
             | gst.MESSAGE_EOS | gst.MESSAGE_ASYNC_DONE | gst.MESSAGE_TAG)
@ -139,19 +148,22 @@ def _process(pipeline, timeout_ms):
                missing_description = encoding.locale_decode(
                    _missing_plugin_desc(message))
        elif message.type == gst.MESSAGE_APPLICATION:
-            mime = message.structure.get_name()
-            if mime.startswith('text/') or mime == 'application/xml':
-                return tags, mime
+            if message.structure.get_name() == 'have-type':
+                mime = message.structure['caps'].get_name()
+                if mime.startswith('text/') or mime == 'application/xml':
+                    return tags, mime, have_audio
+            elif message.structure.get_name() == 'have-audio':
+                have_audio = True
        elif message.type == gst.MESSAGE_ERROR:
            error = encoding.locale_decode(message.parse_error()[0])
            if missing_description:
                error = '%s (%s)' % (missing_description, error)
            raise exceptions.ScannerError(error)
        elif message.type == gst.MESSAGE_EOS:
-            return tags, mime
+            return tags, mime, have_audio
        elif message.type == gst.MESSAGE_ASYNC_DONE:
            if message.src == pipeline:
-                return tags, mime
+                return tags, mime, have_audio
        elif message.type == gst.MESSAGE_TAG:
            taglist = message.parse_tag()
            # Note that this will only keep the last tag.
@ -160,3 +172,28 @@ def _process(pipeline, timeout_ms):
        timeout -= clock.get_time() - start

    raise exceptions.ScannerError('Timeout after %dms' % timeout_ms)
+
+
+if __name__ == '__main__':
+    import os
+    import sys
+
+    import gobject
+
+    from mopidy.utils import path
+
+    gobject.threads_init()
+
+    scanner = Scanner(5000)
+    for uri in sys.argv[1:]:
+        if not gst.uri_is_valid(uri):
+            uri = path.path_to_uri(os.path.abspath(uri))
+        try:
+            result = scanner.scan(uri)
+            for key in ('uri', 'mime', 'duration', 'playable', 'seekable'):
+                print('%-20s   %s' % (key, getattr(result, key)))
+            print('tags')
+            for tag, value in result.tags.items():
+                print('%-20s   %s' % (tag, value))
+        except exceptions.ScannerError as error:
+            print('%s: %s' % (uri, error))
--- a/mopidy/local/commands.py
+++ b/mopidy/local/commands.py
@ -135,7 +135,9 @@ class ScanCommand(commands.Command):
                file_uri = path.path_to_uri(os.path.join(media_dir, relpath))
                result = scanner.scan(file_uri)
                tags, duration = result.tags, result.duration
-                if duration < MIN_DURATION_MS:
+                if not result.playable:
+                    logger.warning('Failed %s: No audio found in file.', uri)
+                elif duration < MIN_DURATION_MS:
                    logger.warning('Failed %s: Track shorter than %dms',
                                   uri, MIN_DURATION_MS)
                else:
--- a/tests/audio/test_scan.py
+++ b/tests/audio/test_scan.py
@ -16,8 +16,7 @@ from tests import path_to_data_dir
 class ScannerTest(unittest.TestCase):
    def setUp(self):  # noqa: N802
        self.errors = {}
-        self.tags = {}
-        self.durations = {}
+        self.result = {}

    def find(self, path):
        media_dir = path_to_data_dir(path)
@ -31,19 +30,17 @@ class ScannerTest(unittest.TestCase):
            uri = path_lib.path_to_uri(path)
            key = uri[len('file://'):]
            try:
-                result = scanner.scan(uri)
-                self.tags[key] = result.tags
-                self.durations[key] = result.duration
+                self.result[key] = scanner.scan(uri)
            except exceptions.ScannerError as error:
                self.errors[key] = error

    def check(self, name, key, value):
        name = path_to_data_dir(name)
-        self.assertEqual(self.tags[name][key], value)
+        self.assertEqual(self.result[name].tags[key], value)

    def test_tags_is_set(self):
        self.scan(self.find('scanner/simple'))
-        self.assert_(self.tags)
+        self.assert_(self.result.values()[0].tags)

    def test_errors_is_not_set(self):
        self.scan(self.find('scanner/simple'))
@ -52,10 +49,10 @@ class ScannerTest(unittest.TestCase):
    def test_duration_is_set(self):
        self.scan(self.find('scanner/simple'))

-        self.assertEqual(
-            self.durations[path_to_data_dir('scanner/simple/song1.mp3')], 4680)
-        self.assertEqual(
-            self.durations[path_to_data_dir('scanner/simple/song1.ogg')], 4680)
+        ogg = path_to_data_dir('scanner/simple/song1.ogg')
+        mp3 = path_to_data_dir('scanner/simple/song1.mp3')
+        self.assertEqual(self.result[mp3].duration, 4680)
+        self.assertEqual(self.result[ogg].duration, 4680)

    def test_artist_is_set(self):
        self.scan(self.find('scanner/simple'))
@ -78,17 +75,17 @@ class ScannerTest(unittest.TestCase):

    def test_other_media_is_ignored(self):
        self.scan(self.find('scanner/image'))
-        self.assert_(self.errors)
+        self.assertFalse(self.result.values()[0].playable)

    def test_log_file_that_gst_thinks_is_mpeg_1_is_ignored(self):
        self.scan([path_to_data_dir('scanner/example.log')])
-        self.assertLess(
-            self.durations[path_to_data_dir('scanner/example.log')], 100)
+        log = path_to_data_dir('scanner/example.log')
+        self.assertLess(self.result[log].duration, 100)

    def test_empty_wav_file(self):
        self.scan([path_to_data_dir('scanner/empty.wav')])
-        self.assertEqual(
-            self.durations[path_to_data_dir('scanner/empty.wav')], 0)
+        wav = path_to_data_dir('scanner/empty.wav')
+        self.assertEqual(self.result[wav].duration, 0)

    @unittest.SkipTest
    def test_song_without_time_is_handeled(self):