Merge pull request #1124 from adamcik/fix/audio-scanner-robustness

Improve audio scanner robustness
This commit is contained in:
Stein Magnus Jodal 2015-04-12 22:10:00 +02:00
commit a3c69f1597
4 changed files with 75 additions and 35 deletions

View File

@ -17,6 +17,10 @@ v1.0.1 (UNRELEASED)
behavior was confusing for many users and doesn't work well with the plans
for multiple outputs.
- Audio: Update scanner to decode all media it finds. This should fix cases
where the scanner hangs on non-audio files like video. The scanner will now
also let us know if we found any decodeable audio. (Fixes: :issue:`726`)
v1.0.0 (2015-03-25)
===================

View File

@ -1,4 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from __future__ import (
absolute_import, division, print_function, unicode_literals)
import collections
@ -14,7 +15,7 @@ from mopidy.utils import encoding
_missing_plugin_desc = gst.pbutils.missing_plugin_message_get_description
_Result = collections.namedtuple(
'Result', ('uri', 'tags', 'duration', 'seekable', 'mime'))
'Result', ('uri', 'tags', 'duration', 'seekable', 'mime', 'playable'))
_RAW_AUDIO = gst.Caps(b'audio/x-raw-int; audio/x-raw-float')
@ -51,14 +52,14 @@ class Scanner(object):
try:
_start_pipeline(pipeline)
tags, mime = _process(pipeline, self._timeout_ms)
tags, mime, have_audio = _process(pipeline, self._timeout_ms)
duration = _query_duration(pipeline)
seekable = _query_seekable(pipeline)
finally:
pipeline.set_state(gst.STATE_NULL)
del pipeline
return _Result(uri, tags, duration, seekable, mime)
return _Result(uri, tags, duration, seekable, mime, have_audio)
# Turns out it's _much_ faster to just create a new pipeline for every as
@ -70,30 +71,38 @@ def _setup_pipeline(uri, proxy_config=None):
typefind = gst.element_factory_make('typefind')
decodebin = gst.element_factory_make('decodebin2')
sink = gst.element_factory_make('fakesink')
pipeline = gst.element_factory_make('pipeline')
pipeline.add_many(src, typefind, decodebin, sink)
pipeline.add_many(src, typefind, decodebin)
gst.element_link_many(src, typefind, decodebin)
if proxy_config:
utils.setup_proxy(src, proxy_config)
decodebin.set_property('caps', _RAW_AUDIO)
decodebin.connect('pad-added', _pad_added, sink)
typefind.connect('have-type', _have_type, decodebin)
decodebin.connect('pad-added', _pad_added, pipeline)
return pipeline
def _have_type(element, probability, caps, decodebin):
decodebin.set_property('sink-caps', caps)
msg = gst.message_new_application(element, caps.get_structure(0))
element.get_bus().post(msg)
struct = gst.Structure('have-type')
struct['caps'] = caps.get_structure(0)
element.get_bus().post(gst.message_new_application(element, struct))
def _pad_added(element, pad, sink):
return pad.link(sink.get_pad('sink'))
def _pad_added(element, pad, pipeline):
sink = gst.element_factory_make('fakesink')
sink.set_property('sync', False)
pipeline.add(sink)
sink.sync_state_with_parent()
pad.link(sink.get_pad('sink'))
if pad.get_caps().is_subset(_RAW_AUDIO):
struct = gst.Structure('have-audio')
element.get_bus().post(gst.message_new_application(element, struct))
def _start_pipeline(pipeline):
@ -123,7 +132,7 @@ def _process(pipeline, timeout_ms):
clock = pipeline.get_clock()
bus = pipeline.get_bus()
timeout = timeout_ms * gst.MSECOND
tags, mime, missing_description = {}, None, None
tags, mime, have_audio, missing_description = {}, None, False, None
types = (gst.MESSAGE_ELEMENT | gst.MESSAGE_APPLICATION | gst.MESSAGE_ERROR
| gst.MESSAGE_EOS | gst.MESSAGE_ASYNC_DONE | gst.MESSAGE_TAG)
@ -139,19 +148,22 @@ def _process(pipeline, timeout_ms):
missing_description = encoding.locale_decode(
_missing_plugin_desc(message))
elif message.type == gst.MESSAGE_APPLICATION:
mime = message.structure.get_name()
if mime.startswith('text/') or mime == 'application/xml':
return tags, mime
if message.structure.get_name() == 'have-type':
mime = message.structure['caps'].get_name()
if mime.startswith('text/') or mime == 'application/xml':
return tags, mime, have_audio
elif message.structure.get_name() == 'have-audio':
have_audio = True
elif message.type == gst.MESSAGE_ERROR:
error = encoding.locale_decode(message.parse_error()[0])
if missing_description:
error = '%s (%s)' % (missing_description, error)
raise exceptions.ScannerError(error)
elif message.type == gst.MESSAGE_EOS:
return tags, mime
return tags, mime, have_audio
elif message.type == gst.MESSAGE_ASYNC_DONE:
if message.src == pipeline:
return tags, mime
return tags, mime, have_audio
elif message.type == gst.MESSAGE_TAG:
taglist = message.parse_tag()
# Note that this will only keep the last tag.
@ -160,3 +172,28 @@ def _process(pipeline, timeout_ms):
timeout -= clock.get_time() - start
raise exceptions.ScannerError('Timeout after %dms' % timeout_ms)
if __name__ == '__main__':
import os
import sys
import gobject
from mopidy.utils import path
gobject.threads_init()
scanner = Scanner(5000)
for uri in sys.argv[1:]:
if not gst.uri_is_valid(uri):
uri = path.path_to_uri(os.path.abspath(uri))
try:
result = scanner.scan(uri)
for key in ('uri', 'mime', 'duration', 'playable', 'seekable'):
print('%-20s %s' % (key, getattr(result, key)))
print('tags')
for tag, value in result.tags.items():
print('%-20s %s' % (tag, value))
except exceptions.ScannerError as error:
print('%s: %s' % (uri, error))

View File

@ -135,7 +135,9 @@ class ScanCommand(commands.Command):
file_uri = path.path_to_uri(os.path.join(media_dir, relpath))
result = scanner.scan(file_uri)
tags, duration = result.tags, result.duration
if duration < MIN_DURATION_MS:
if not result.playable:
logger.warning('Failed %s: No audio found in file.', uri)
elif duration < MIN_DURATION_MS:
logger.warning('Failed %s: Track shorter than %dms',
uri, MIN_DURATION_MS)
else:

View File

@ -16,8 +16,7 @@ from tests import path_to_data_dir
class ScannerTest(unittest.TestCase):
def setUp(self): # noqa: N802
self.errors = {}
self.tags = {}
self.durations = {}
self.result = {}
def find(self, path):
media_dir = path_to_data_dir(path)
@ -31,19 +30,17 @@ class ScannerTest(unittest.TestCase):
uri = path_lib.path_to_uri(path)
key = uri[len('file://'):]
try:
result = scanner.scan(uri)
self.tags[key] = result.tags
self.durations[key] = result.duration
self.result[key] = scanner.scan(uri)
except exceptions.ScannerError as error:
self.errors[key] = error
def check(self, name, key, value):
name = path_to_data_dir(name)
self.assertEqual(self.tags[name][key], value)
self.assertEqual(self.result[name].tags[key], value)
def test_tags_is_set(self):
self.scan(self.find('scanner/simple'))
self.assert_(self.tags)
self.assert_(self.result.values()[0].tags)
def test_errors_is_not_set(self):
self.scan(self.find('scanner/simple'))
@ -52,10 +49,10 @@ class ScannerTest(unittest.TestCase):
def test_duration_is_set(self):
self.scan(self.find('scanner/simple'))
self.assertEqual(
self.durations[path_to_data_dir('scanner/simple/song1.mp3')], 4680)
self.assertEqual(
self.durations[path_to_data_dir('scanner/simple/song1.ogg')], 4680)
ogg = path_to_data_dir('scanner/simple/song1.ogg')
mp3 = path_to_data_dir('scanner/simple/song1.mp3')
self.assertEqual(self.result[mp3].duration, 4680)
self.assertEqual(self.result[ogg].duration, 4680)
def test_artist_is_set(self):
self.scan(self.find('scanner/simple'))
@ -78,17 +75,17 @@ class ScannerTest(unittest.TestCase):
def test_other_media_is_ignored(self):
self.scan(self.find('scanner/image'))
self.assert_(self.errors)
self.assertFalse(self.result.values()[0].playable)
def test_log_file_that_gst_thinks_is_mpeg_1_is_ignored(self):
self.scan([path_to_data_dir('scanner/example.log')])
self.assertLess(
self.durations[path_to_data_dir('scanner/example.log')], 100)
log = path_to_data_dir('scanner/example.log')
self.assertLess(self.result[log].duration, 100)
def test_empty_wav_file(self):
self.scan([path_to_data_dir('scanner/empty.wav')])
self.assertEqual(
self.durations[path_to_data_dir('scanner/empty.wav')], 0)
wav = path_to_data_dir('scanner/empty.wav')
self.assertEqual(self.result[wav].duration, 0)
@unittest.SkipTest
def test_song_without_time_is_handeled(self):