Merge pull request #566 from adamcik/feature/scanner
Switch back to custom scanner
This commit is contained in:
commit
71eff1bf45
149
mopidy/audio/scan.py
Normal file
149
mopidy/audio/scan.py
Normal file
@ -0,0 +1,149 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pygst
|
||||
pygst.require('0.10')
|
||||
import gst
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
|
||||
from mopidy import exceptions
|
||||
from mopidy.models import Track, Artist, Album
|
||||
from mopidy.utils import path
|
||||
|
||||
|
||||
class Scanner(object):
|
||||
def __init__(self, timeout=1000, min_duration=100):
|
||||
self.timeout_ms = timeout
|
||||
self.min_duration_ms = min_duration
|
||||
|
||||
sink = gst.element_factory_make('fakesink')
|
||||
|
||||
audio_caps = gst.Caps(b'audio/x-raw-int; audio/x-raw-float')
|
||||
pad_added = lambda src, pad: pad.link(sink.get_pad('sink'))
|
||||
|
||||
self.uribin = gst.element_factory_make('uridecodebin')
|
||||
self.uribin.set_property('caps', audio_caps)
|
||||
self.uribin.connect('pad-added', pad_added)
|
||||
|
||||
self.pipe = gst.element_factory_make('pipeline')
|
||||
self.pipe.add(self.uribin)
|
||||
self.pipe.add(sink)
|
||||
|
||||
self.bus = self.pipe.get_bus()
|
||||
self.bus.set_flushing(True)
|
||||
|
||||
def scan(self, uri):
|
||||
try:
|
||||
self._setup(uri)
|
||||
data = self._collect()
|
||||
# Make sure uri and duration does not come from tags.
|
||||
data[b'uri'] = uri
|
||||
data[b'mtime'] = self._query_mtime(uri)
|
||||
data[gst.TAG_DURATION] = self._query_duration()
|
||||
finally:
|
||||
self._reset()
|
||||
|
||||
if data[gst.TAG_DURATION] < self.min_duration_ms * gst.MSECOND:
|
||||
raise exceptions.ScannerError('Rejecting file with less than %dms '
|
||||
'audio data.' % self.min_duration_ms)
|
||||
return data
|
||||
|
||||
def _setup(self, uri):
|
||||
"""Primes the pipeline for collection."""
|
||||
self.pipe.set_state(gst.STATE_READY)
|
||||
self.uribin.set_property(b'uri', uri)
|
||||
self.bus.set_flushing(False)
|
||||
self.pipe.set_state(gst.STATE_PAUSED)
|
||||
|
||||
def _collect(self):
|
||||
"""Polls for messages to collect data."""
|
||||
start = time.time()
|
||||
timeout_s = self.timeout_ms / float(1000)
|
||||
poll_timeout_ns = 1000
|
||||
data = {}
|
||||
|
||||
while time.time() - start < timeout_s:
|
||||
message = self.bus.poll(gst.MESSAGE_ANY, poll_timeout_ns)
|
||||
|
||||
if message is None:
|
||||
pass # polling the bus timed out.
|
||||
elif message.type == gst.MESSAGE_ERROR:
|
||||
raise exceptions.ScannerError(message.parse_error()[0])
|
||||
elif message.type == gst.MESSAGE_EOS:
|
||||
return data
|
||||
elif message.type == gst.MESSAGE_ASYNC_DONE:
|
||||
if message.src == self.pipe:
|
||||
return data
|
||||
elif message.type == gst.MESSAGE_TAG:
|
||||
taglist = message.parse_tag()
|
||||
for key in taglist.keys():
|
||||
data[key] = taglist[key]
|
||||
|
||||
raise exceptions.ScannerError('Timeout after %dms' % self.timeout_ms)
|
||||
|
||||
def _reset(self):
|
||||
"""Ensures we cleanup child elements and flush the bus."""
|
||||
self.bus.set_flushing(True)
|
||||
self.pipe.set_state(gst.STATE_NULL)
|
||||
|
||||
def _query_duration(self):
|
||||
try:
|
||||
return self.pipe.query_duration(gst.FORMAT_TIME, None)[0]
|
||||
except gst.QueryError:
|
||||
return None
|
||||
|
||||
def _query_mtime(self, uri):
|
||||
if not uri.startswith('file:'):
|
||||
return None
|
||||
return os.path.getmtime(path.uri_to_path(uri))
|
||||
|
||||
|
||||
def audio_data_to_track(data):
|
||||
"""Convert taglist data + our extras to a track."""
|
||||
albumartist_kwargs = {}
|
||||
album_kwargs = {}
|
||||
artist_kwargs = {}
|
||||
track_kwargs = {}
|
||||
|
||||
def _retrieve(source_key, target_key, target):
|
||||
if source_key in data:
|
||||
target[target_key] = data[source_key]
|
||||
|
||||
_retrieve(gst.TAG_ALBUM, 'name', album_kwargs)
|
||||
_retrieve(gst.TAG_TRACK_COUNT, 'num_tracks', album_kwargs)
|
||||
_retrieve(gst.TAG_ALBUM_VOLUME_COUNT, 'num_discs', album_kwargs)
|
||||
_retrieve(gst.TAG_ARTIST, 'name', artist_kwargs)
|
||||
|
||||
if gst.TAG_DATE in data and data[gst.TAG_DATE]:
|
||||
date = data[gst.TAG_DATE]
|
||||
try:
|
||||
date = datetime.date(date.year, date.month, date.day)
|
||||
except ValueError:
|
||||
pass # Ignore invalid dates
|
||||
else:
|
||||
track_kwargs['date'] = date.isoformat()
|
||||
|
||||
_retrieve(gst.TAG_TITLE, 'name', track_kwargs)
|
||||
_retrieve(gst.TAG_TRACK_NUMBER, 'track_no', track_kwargs)
|
||||
_retrieve(gst.TAG_ALBUM_VOLUME_NUMBER, 'disc_no', track_kwargs)
|
||||
|
||||
# Following keys don't seem to have TAG_* constant.
|
||||
_retrieve('album-artist', 'name', albumartist_kwargs)
|
||||
_retrieve('musicbrainz-trackid', 'musicbrainz_id', track_kwargs)
|
||||
_retrieve('musicbrainz-artistid', 'musicbrainz_id', artist_kwargs)
|
||||
_retrieve('musicbrainz-albumid', 'musicbrainz_id', album_kwargs)
|
||||
_retrieve(
|
||||
'musicbrainz-albumartistid', 'musicbrainz_id', albumartist_kwargs)
|
||||
|
||||
if albumartist_kwargs:
|
||||
album_kwargs['artists'] = [Artist(**albumartist_kwargs)]
|
||||
|
||||
track_kwargs['uri'] = data['uri']
|
||||
track_kwargs['last_modified'] = int(data['mtime'])
|
||||
track_kwargs['length'] = data[gst.TAG_DURATION] // gst.MSECOND
|
||||
track_kwargs['album'] = Album(**album_kwargs)
|
||||
track_kwargs['artists'] = [Artist(**artist_kwargs)]
|
||||
|
||||
return Track(**track_kwargs)
|
||||
@ -301,6 +301,7 @@ def _add_to_tag_cache(result, dirs, files, media_dir):
|
||||
relative_path = os.path.relpath(path, base_path)
|
||||
relative_uri = urllib.quote(relative_path)
|
||||
|
||||
# TODO: use track.last_modified
|
||||
track_result['file'] = relative_uri
|
||||
track_result['mtime'] = get_mtime(path)
|
||||
track_result['key'] = os.path.basename(text_path)
|
||||
|
||||
@ -1,28 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import gobject
|
||||
gobject.threads_init()
|
||||
|
||||
|
||||
# Extract any command line arguments. This needs to be done before GStreamer is
|
||||
# imported, so that GStreamer doesn't hijack e.g. ``--help``.
|
||||
mopidy_args = sys.argv[1:]
|
||||
sys.argv[1:] = []
|
||||
|
||||
|
||||
import pygst
|
||||
pygst.require('0.10')
|
||||
import gst
|
||||
import gst.pbutils
|
||||
|
||||
from mopidy import config as config_lib, exceptions, ext
|
||||
from mopidy.models import Track, Artist, Album
|
||||
from mopidy.audio import scan
|
||||
from mopidy.utils import log, path, versioning
|
||||
|
||||
|
||||
@ -80,11 +73,13 @@ def main():
|
||||
logging.info('Checking tracks from library.')
|
||||
for track in local_updater.load():
|
||||
try:
|
||||
# TODO: convert local to file uri / path
|
||||
stat = os.stat(path.uri_to_path(track.uri))
|
||||
if int(stat.st_mtime) > track.last_modified:
|
||||
uris_update.add(track.uri)
|
||||
uris_library.add(track.uri)
|
||||
except OSError:
|
||||
logging.debug('Missing file %s', track.uri)
|
||||
uris_remove.add(track.uri)
|
||||
|
||||
logging.info('Removing %d moved or deleted tracks.', len(uris_remove))
|
||||
@ -103,21 +98,39 @@ def main():
|
||||
logging.info('Found %d new or modified tracks.', len(uris_update))
|
||||
logging.info('Scanning new and modified tracks.')
|
||||
|
||||
scanner = Scanner(config['local']['scan_timeout'])
|
||||
for uri in uris_update:
|
||||
scanner = scan.Scanner(config['local']['scan_timeout'])
|
||||
progress = Progress(len(uris_update))
|
||||
|
||||
for uri in sorted(uris_update):
|
||||
try:
|
||||
data = scanner.scan(uri)
|
||||
data[b'mtime'] = os.path.getmtime(path.uri_to_path(uri))
|
||||
track = translator(data)
|
||||
track = scan.audio_data_to_track(data)
|
||||
local_updater.add(track)
|
||||
logging.debug('Added %s', track.uri)
|
||||
except exceptions.ScannerError as error:
|
||||
logging.warning('Failed %s: %s', uri, error)
|
||||
|
||||
logging.info('Done scanning; commiting changes.')
|
||||
progress.increment()
|
||||
|
||||
logging.info('Commiting changes.')
|
||||
local_updater.commit()
|
||||
|
||||
|
||||
class Progress(object):
|
||||
def __init__(self, total):
|
||||
self.count = 0
|
||||
self.total = total
|
||||
self.start = time.time()
|
||||
|
||||
def increment(self):
|
||||
self.count += 1
|
||||
if self.count % 1000 == 0 or self.count == self.total:
|
||||
duration = time.time() - self.start
|
||||
remainder = duration / self.count * (self.total - self.count)
|
||||
logging.info('Scanned %d of %d files in %ds, ~%ds left.',
|
||||
self.count, self.total, duration, remainder)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
@ -134,95 +147,5 @@ def parse_args():
|
||||
return parser.parse_args(args=mopidy_args)
|
||||
|
||||
|
||||
# TODO: move into scanner.
|
||||
def translator(data):
|
||||
albumartist_kwargs = {}
|
||||
album_kwargs = {}
|
||||
artist_kwargs = {}
|
||||
track_kwargs = {}
|
||||
|
||||
def _retrieve(source_key, target_key, target):
|
||||
if source_key in data:
|
||||
target[target_key] = data[source_key]
|
||||
|
||||
_retrieve(gst.TAG_ALBUM, 'name', album_kwargs)
|
||||
_retrieve(gst.TAG_TRACK_COUNT, 'num_tracks', album_kwargs)
|
||||
_retrieve(gst.TAG_ALBUM_VOLUME_COUNT, 'num_discs', album_kwargs)
|
||||
_retrieve(gst.TAG_ARTIST, 'name', artist_kwargs)
|
||||
|
||||
if gst.TAG_DATE in data and data[gst.TAG_DATE]:
|
||||
date = data[gst.TAG_DATE]
|
||||
try:
|
||||
date = datetime.date(date.year, date.month, date.day)
|
||||
except ValueError:
|
||||
pass # Ignore invalid dates
|
||||
else:
|
||||
track_kwargs['date'] = date.isoformat()
|
||||
|
||||
_retrieve(gst.TAG_TITLE, 'name', track_kwargs)
|
||||
_retrieve(gst.TAG_TRACK_NUMBER, 'track_no', track_kwargs)
|
||||
_retrieve(gst.TAG_ALBUM_VOLUME_NUMBER, 'disc_no', track_kwargs)
|
||||
|
||||
# Following keys don't seem to have TAG_* constant.
|
||||
_retrieve('album-artist', 'name', albumartist_kwargs)
|
||||
_retrieve('musicbrainz-trackid', 'musicbrainz_id', track_kwargs)
|
||||
_retrieve('musicbrainz-artistid', 'musicbrainz_id', artist_kwargs)
|
||||
_retrieve('musicbrainz-albumid', 'musicbrainz_id', album_kwargs)
|
||||
_retrieve(
|
||||
'musicbrainz-albumartistid', 'musicbrainz_id', albumartist_kwargs)
|
||||
|
||||
if albumartist_kwargs:
|
||||
album_kwargs['artists'] = [Artist(**albumartist_kwargs)]
|
||||
|
||||
track_kwargs['uri'] = data['uri']
|
||||
track_kwargs['last_modified'] = int(data['mtime'])
|
||||
track_kwargs['length'] = data[gst.TAG_DURATION]
|
||||
track_kwargs['album'] = Album(**album_kwargs)
|
||||
track_kwargs['artists'] = [Artist(**artist_kwargs)]
|
||||
|
||||
return Track(**track_kwargs)
|
||||
|
||||
|
||||
class Scanner(object):
|
||||
def __init__(self, timeout=1000):
|
||||
self.discoverer = gst.pbutils.Discoverer(timeout * 1000000)
|
||||
|
||||
def scan(self, uri):
|
||||
try:
|
||||
info = self.discoverer.discover_uri(uri)
|
||||
except gobject.GError as e:
|
||||
# Loosing traceback is non-issue since this is from C code.
|
||||
raise exceptions.ScannerError(e)
|
||||
|
||||
data = {}
|
||||
audio_streams = info.get_audio_streams()
|
||||
|
||||
if not audio_streams:
|
||||
raise exceptions.ScannerError('Did not find any audio streams.')
|
||||
|
||||
for stream in audio_streams:
|
||||
taglist = stream.get_tags()
|
||||
if not taglist:
|
||||
continue
|
||||
for key in taglist.keys():
|
||||
# XXX: For some crazy reason some wma files spit out lists
|
||||
# here, not sure if this is due to better data in headers or
|
||||
# wma being stupid. So ugly hack for now :/
|
||||
if type(taglist[key]) is list:
|
||||
data[key] = taglist[key][0]
|
||||
else:
|
||||
data[key] = taglist[key]
|
||||
|
||||
# Never trust metadata for these fields:
|
||||
data[b'uri'] = uri
|
||||
data[b'duration'] = info.get_duration() // gst.MSECOND
|
||||
|
||||
if data[b'duration'] < 100:
|
||||
raise exceptions.ScannerError(
|
||||
'Rejecting file with less than 100ms audio data.')
|
||||
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@ -3,8 +3,8 @@ from __future__ import unicode_literals
|
||||
import unittest
|
||||
|
||||
from mopidy import exceptions
|
||||
from mopidy.audio import scan
|
||||
from mopidy.models import Track, Artist, Album
|
||||
from mopidy.scanner import Scanner, translator
|
||||
from mopidy.utils import path as path_lib
|
||||
|
||||
from tests import path_to_data_dir
|
||||
@ -31,7 +31,7 @@ class TranslatorTest(unittest.TestCase):
|
||||
'album-disc-count': 3,
|
||||
'date': FakeGstDate(2006, 1, 1,),
|
||||
'container-format': 'ID3 tag',
|
||||
'duration': 4531,
|
||||
'duration': 4531000000,
|
||||
'musicbrainz-trackid': 'mbtrackid',
|
||||
'musicbrainz-albumid': 'mbalbumid',
|
||||
'musicbrainz-artistid': 'mbartistid',
|
||||
@ -76,7 +76,7 @@ class TranslatorTest(unittest.TestCase):
|
||||
|
||||
def check(self):
|
||||
expected = self.build_track()
|
||||
actual = translator(self.data)
|
||||
actual = scan.audio_data_to_track(self.data)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_basic_data(self):
|
||||
@ -151,7 +151,7 @@ class ScannerTest(unittest.TestCase):
|
||||
def scan(self, path):
|
||||
paths = path_lib.find_files(path_to_data_dir(path))
|
||||
uris = (path_lib.path_to_uri(p) for p in paths)
|
||||
scanner = Scanner()
|
||||
scanner = scan.Scanner()
|
||||
for uri in uris:
|
||||
key = uri[len('file://'):]
|
||||
try:
|
||||
@ -182,8 +182,8 @@ class ScannerTest(unittest.TestCase):
|
||||
|
||||
def test_duration_is_set(self):
|
||||
self.scan('scanner/simple')
|
||||
self.check('scanner/simple/song1.mp3', 'duration', 4680)
|
||||
self.check('scanner/simple/song1.ogg', 'duration', 4680)
|
||||
self.check('scanner/simple/song1.mp3', 'duration', 4680000000)
|
||||
self.check('scanner/simple/song1.ogg', 'duration', 4680000000)
|
||||
|
||||
def test_artist_is_set(self):
|
||||
self.scan('scanner/simple')
|
||||
Loading…
Reference in New Issue
Block a user