diff --git a/mopidy/audio/scan.py b/mopidy/audio/scan.py new file mode 100644 index 00000000..82803379 --- /dev/null +++ b/mopidy/audio/scan.py @@ -0,0 +1,149 @@ +from __future__ import unicode_literals + +import pygst +pygst.require('0.10') +import gst + +import datetime +import os +import time + +from mopidy import exceptions +from mopidy.models import Track, Artist, Album +from mopidy.utils import path + + +class Scanner(object): + def __init__(self, timeout=1000, min_duration=100): + self.timeout_ms = timeout + self.min_duration_ms = min_duration + + sink = gst.element_factory_make('fakesink') + + audio_caps = gst.Caps(b'audio/x-raw-int; audio/x-raw-float') + pad_added = lambda src, pad: pad.link(sink.get_pad('sink')) + + self.uribin = gst.element_factory_make('uridecodebin') + self.uribin.set_property('caps', audio_caps) + self.uribin.connect('pad-added', pad_added) + + self.pipe = gst.element_factory_make('pipeline') + self.pipe.add(self.uribin) + self.pipe.add(sink) + + self.bus = self.pipe.get_bus() + self.bus.set_flushing(True) + + def scan(self, uri): + try: + self._setup(uri) + data = self._collect() + # Make sure uri and duration does not come from tags. + data[b'uri'] = uri + data[b'mtime'] = self._query_mtime(uri) + data[gst.TAG_DURATION] = self._query_duration() + finally: + self._reset() + + if data[gst.TAG_DURATION] < self.min_duration_ms * gst.MSECOND: + raise exceptions.ScannerError('Rejecting file with less than %dms ' + 'audio data.' % self.min_duration_ms) + return data + + def _setup(self, uri): + """Primes the pipeline for collection.""" + self.pipe.set_state(gst.STATE_READY) + self.uribin.set_property(b'uri', uri) + self.bus.set_flushing(False) + self.pipe.set_state(gst.STATE_PAUSED) + + def _collect(self): + """Polls for messages to collect data.""" + start = time.time() + timeout_s = self.timeout_ms / float(1000) + poll_timeout_ns = 1000 + data = {} + + while time.time() - start < timeout_s: + message = self.bus.poll(gst.MESSAGE_ANY, poll_timeout_ns) + + if message is None: + pass # polling the bus timed out. + elif message.type == gst.MESSAGE_ERROR: + raise exceptions.ScannerError(message.parse_error()[0]) + elif message.type == gst.MESSAGE_EOS: + return data + elif message.type == gst.MESSAGE_ASYNC_DONE: + if message.src == self.pipe: + return data + elif message.type == gst.MESSAGE_TAG: + taglist = message.parse_tag() + for key in taglist.keys(): + data[key] = taglist[key] + + raise exceptions.ScannerError('Timeout after %dms' % self.timeout_ms) + + def _reset(self): + """Ensures we cleanup child elements and flush the bus.""" + self.bus.set_flushing(True) + self.pipe.set_state(gst.STATE_NULL) + + def _query_duration(self): + try: + return self.pipe.query_duration(gst.FORMAT_TIME, None)[0] + except gst.QueryError: + return None + + def _query_mtime(self, uri): + if not uri.startswith('file:'): + return None + return os.path.getmtime(path.uri_to_path(uri)) + + +def audio_data_to_track(data): + """Convert taglist data + our extras to a track.""" + albumartist_kwargs = {} + album_kwargs = {} + artist_kwargs = {} + track_kwargs = {} + + def _retrieve(source_key, target_key, target): + if source_key in data: + target[target_key] = data[source_key] + + _retrieve(gst.TAG_ALBUM, 'name', album_kwargs) + _retrieve(gst.TAG_TRACK_COUNT, 'num_tracks', album_kwargs) + _retrieve(gst.TAG_ALBUM_VOLUME_COUNT, 'num_discs', album_kwargs) + _retrieve(gst.TAG_ARTIST, 'name', artist_kwargs) + + if gst.TAG_DATE in data and data[gst.TAG_DATE]: + date = data[gst.TAG_DATE] + try: + date = datetime.date(date.year, date.month, date.day) + except ValueError: + pass # Ignore invalid dates + else: + track_kwargs['date'] = date.isoformat() + + _retrieve(gst.TAG_TITLE, 'name', track_kwargs) + _retrieve(gst.TAG_TRACK_NUMBER, 'track_no', track_kwargs) + _retrieve(gst.TAG_ALBUM_VOLUME_NUMBER, 'disc_no', track_kwargs) + + # Following keys don't seem to have TAG_* constant. + _retrieve('album-artist', 'name', albumartist_kwargs) + _retrieve('musicbrainz-trackid', 'musicbrainz_id', track_kwargs) + _retrieve('musicbrainz-artistid', 'musicbrainz_id', artist_kwargs) + _retrieve('musicbrainz-albumid', 'musicbrainz_id', album_kwargs) + _retrieve( + 'musicbrainz-albumartistid', 'musicbrainz_id', albumartist_kwargs) + + if albumartist_kwargs: + album_kwargs['artists'] = [Artist(**albumartist_kwargs)] + + track_kwargs['uri'] = data['uri'] + track_kwargs['last_modified'] = int(data['mtime']) + track_kwargs['length'] = data[gst.TAG_DURATION] // gst.MSECOND + track_kwargs['album'] = Album(**album_kwargs) + track_kwargs['artists'] = [Artist(**artist_kwargs)] + + return Track(**track_kwargs) diff --git a/mopidy/frontends/mpd/translator.py b/mopidy/frontends/mpd/translator.py index 880d1411..236b814f 100644 --- a/mopidy/frontends/mpd/translator.py +++ b/mopidy/frontends/mpd/translator.py @@ -301,6 +301,7 @@ def _add_to_tag_cache(result, dirs, files, media_dir): relative_path = os.path.relpath(path, base_path) relative_uri = urllib.quote(relative_path) + # TODO: use track.last_modified track_result['file'] = relative_uri track_result['mtime'] = get_mtime(path) track_result['key'] = os.path.basename(text_path) diff --git a/mopidy/scanner.py b/mopidy/scanner.py index dd21fdb4..30fb553b 100644 --- a/mopidy/scanner.py +++ b/mopidy/scanner.py @@ -1,28 +1,21 @@ from __future__ import unicode_literals import argparse -import datetime import logging import os import sys +import time import gobject gobject.threads_init() - # Extract any command line arguments. This needs to be done before GStreamer is # imported, so that GStreamer doesn't hijack e.g. ``--help``. mopidy_args = sys.argv[1:] sys.argv[1:] = [] - -import pygst -pygst.require('0.10') -import gst -import gst.pbutils - from mopidy import config as config_lib, exceptions, ext -from mopidy.models import Track, Artist, Album +from mopidy.audio import scan from mopidy.utils import log, path, versioning @@ -80,11 +73,13 @@ def main(): logging.info('Checking tracks from library.') for track in local_updater.load(): try: + # TODO: convert local to file uri / path stat = os.stat(path.uri_to_path(track.uri)) if int(stat.st_mtime) > track.last_modified: uris_update.add(track.uri) uris_library.add(track.uri) except OSError: + logging.debug('Missing file %s', track.uri) uris_remove.add(track.uri) logging.info('Removing %d moved or deleted tracks.', len(uris_remove)) @@ -103,21 +98,39 @@ def main(): logging.info('Found %d new or modified tracks.', len(uris_update)) logging.info('Scanning new and modified tracks.') - scanner = Scanner(config['local']['scan_timeout']) - for uri in uris_update: + scanner = scan.Scanner(config['local']['scan_timeout']) + progress = Progress(len(uris_update)) + + for uri in sorted(uris_update): try: data = scanner.scan(uri) - data[b'mtime'] = os.path.getmtime(path.uri_to_path(uri)) - track = translator(data) + track = scan.audio_data_to_track(data) local_updater.add(track) logging.debug('Added %s', track.uri) except exceptions.ScannerError as error: logging.warning('Failed %s: %s', uri, error) - logging.info('Done scanning; commiting changes.') + progress.increment() + + logging.info('Commiting changes.') local_updater.commit() +class Progress(object): + def __init__(self, total): + self.count = 0 + self.total = total + self.start = time.time() + + def increment(self): + self.count += 1 + if self.count % 1000 == 0 or self.count == self.total: + duration = time.time() - self.start + remainder = duration / self.count * (self.total - self.count) + logging.info('Scanned %d of %d files in %ds, ~%ds left.', + self.count, self.total, duration, remainder) + + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( @@ -134,95 +147,5 @@ def parse_args(): return parser.parse_args(args=mopidy_args) -# TODO: move into scanner. -def translator(data): - albumartist_kwargs = {} - album_kwargs = {} - artist_kwargs = {} - track_kwargs = {} - - def _retrieve(source_key, target_key, target): - if source_key in data: - target[target_key] = data[source_key] - - _retrieve(gst.TAG_ALBUM, 'name', album_kwargs) - _retrieve(gst.TAG_TRACK_COUNT, 'num_tracks', album_kwargs) - _retrieve(gst.TAG_ALBUM_VOLUME_COUNT, 'num_discs', album_kwargs) - _retrieve(gst.TAG_ARTIST, 'name', artist_kwargs) - - if gst.TAG_DATE in data and data[gst.TAG_DATE]: - date = data[gst.TAG_DATE] - try: - date = datetime.date(date.year, date.month, date.day) - except ValueError: - pass # Ignore invalid dates - else: - track_kwargs['date'] = date.isoformat() - - _retrieve(gst.TAG_TITLE, 'name', track_kwargs) - _retrieve(gst.TAG_TRACK_NUMBER, 'track_no', track_kwargs) - _retrieve(gst.TAG_ALBUM_VOLUME_NUMBER, 'disc_no', track_kwargs) - - # Following keys don't seem to have TAG_* constant. - _retrieve('album-artist', 'name', albumartist_kwargs) - _retrieve('musicbrainz-trackid', 'musicbrainz_id', track_kwargs) - _retrieve('musicbrainz-artistid', 'musicbrainz_id', artist_kwargs) - _retrieve('musicbrainz-albumid', 'musicbrainz_id', album_kwargs) - _retrieve( - 'musicbrainz-albumartistid', 'musicbrainz_id', albumartist_kwargs) - - if albumartist_kwargs: - album_kwargs['artists'] = [Artist(**albumartist_kwargs)] - - track_kwargs['uri'] = data['uri'] - track_kwargs['last_modified'] = int(data['mtime']) - track_kwargs['length'] = data[gst.TAG_DURATION] - track_kwargs['album'] = Album(**album_kwargs) - track_kwargs['artists'] = [Artist(**artist_kwargs)] - - return Track(**track_kwargs) - - -class Scanner(object): - def __init__(self, timeout=1000): - self.discoverer = gst.pbutils.Discoverer(timeout * 1000000) - - def scan(self, uri): - try: - info = self.discoverer.discover_uri(uri) - except gobject.GError as e: - # Loosing traceback is non-issue since this is from C code. - raise exceptions.ScannerError(e) - - data = {} - audio_streams = info.get_audio_streams() - - if not audio_streams: - raise exceptions.ScannerError('Did not find any audio streams.') - - for stream in audio_streams: - taglist = stream.get_tags() - if not taglist: - continue - for key in taglist.keys(): - # XXX: For some crazy reason some wma files spit out lists - # here, not sure if this is due to better data in headers or - # wma being stupid. So ugly hack for now :/ - if type(taglist[key]) is list: - data[key] = taglist[key][0] - else: - data[key] = taglist[key] - - # Never trust metadata for these fields: - data[b'uri'] = uri - data[b'duration'] = info.get_duration() // gst.MSECOND - - if data[b'duration'] < 100: - raise exceptions.ScannerError( - 'Rejecting file with less than 100ms audio data.') - - return data - - if __name__ == '__main__': main() diff --git a/tests/scanner_test.py b/tests/audio/scan_test.py similarity index 95% rename from tests/scanner_test.py rename to tests/audio/scan_test.py index 1102c525..b53b0b57 100644 --- a/tests/scanner_test.py +++ b/tests/audio/scan_test.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals import unittest from mopidy import exceptions +from mopidy.audio import scan from mopidy.models import Track, Artist, Album -from mopidy.scanner import Scanner, translator from mopidy.utils import path as path_lib from tests import path_to_data_dir @@ -31,7 +31,7 @@ class TranslatorTest(unittest.TestCase): 'album-disc-count': 3, 'date': FakeGstDate(2006, 1, 1,), 'container-format': 'ID3 tag', - 'duration': 4531, + 'duration': 4531000000, 'musicbrainz-trackid': 'mbtrackid', 'musicbrainz-albumid': 'mbalbumid', 'musicbrainz-artistid': 'mbartistid', @@ -76,7 +76,7 @@ class TranslatorTest(unittest.TestCase): def check(self): expected = self.build_track() - actual = translator(self.data) + actual = scan.audio_data_to_track(self.data) self.assertEqual(expected, actual) def test_basic_data(self): @@ -151,7 +151,7 @@ class ScannerTest(unittest.TestCase): def scan(self, path): paths = path_lib.find_files(path_to_data_dir(path)) uris = (path_lib.path_to_uri(p) for p in paths) - scanner = Scanner() + scanner = scan.Scanner() for uri in uris: key = uri[len('file://'):] try: @@ -182,8 +182,8 @@ class ScannerTest(unittest.TestCase): def test_duration_is_set(self): self.scan('scanner/simple') - self.check('scanner/simple/song1.mp3', 'duration', 4680) - self.check('scanner/simple/song1.ogg', 'duration', 4680) + self.check('scanner/simple/song1.mp3', 'duration', 4680000000) + self.check('scanner/simple/song1.ogg', 'duration', 4680000000) def test_artist_is_set(self): self.scan('scanner/simple')