Merge branch 'develop' of https://github.com/mopidy/mopidy into feature/extra_tags

2013-11-09 02:37:44 +01:00 · 2013-11-09 02:37:44 +01:00 · 8b7621c3e3
commit 8b7621c3e3
parent af6225538d 51d1e22655
10 changed files with 248 additions and 150 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -4,25 +4,47 @@ Changelog

 This changelog is used to track all major changes to Mopidy.

+
 v0.17.0 (UNRELEASED)
 ====================

 **Core**

 - The search field ``track`` has been renamed to ``track_name`` to avoid
-  confusion with ``track_no``.
+  confusion with ``track_no``. (Fixes: :issue:`535`)

 **Local backend**

- Fix search filtering by track number.
-
 - When scanning, we no longer default the album artist to be the same as the
  track artist. Album artist is now only populated if the scanned file got an
  explicit album artist set.
+- Library scanning has been switched back to custom code due to various issues
+  with GStreamer's built in scanner in 0.10. This also fixes the scanner slowdown.
+  (Fixes: :issue:`565`)
+- Fix scanner so that mtime is respected when deciding which files can be skipped.
+
+
+v0.16.1 (2013-11-02)
+====================
+
+This is very small release to get Mopidy's Debian package ready for inclusion
+in Debian.
+
+**Commands**
+
+- Fix removal of last dir level in paths to dependencies in
+  ``mopidy --show-deps`` output.
+
+- Add manpages for all commands.
+
+**Local backend**
+
+- Fix search filtering by track number that was added in 0.16.0.

 **MPD frontend**

- Add support for ``list "albumartist" ...``.
+- Add support for ``list "albumartist" ...`` which was missed when ``find`` and
+  ``search`` learned to handle ``albumartist`` in 0.16.0. (Fixes: :issue:`553`)


 v0.16.0 (2013-10-27)
--- a/docs/conf.py
+++ b/docs/conf.py
@ -8,11 +8,6 @@ import os
 import sys


-# -- Read The Docs configuration ----------------------------------------------
-
-RTD_NEW_THEME = True
-
-
 # -- Workarounds to have autodoc generate API docs ----------------------------

 sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
--- a/docs/ext/mpd.rst
+++ b/docs/ext/mpd.rst
@ -33,7 +33,6 @@ Items on this list will probably not be supported in the near future.
 - Stickers are not supported
 - Crossfade is not supported
 - Replay gain is not supported
- ``count`` does not provide any statistics
 - ``stats`` does not provide any statistics
 - ``list`` does not support listing tracks by genre
 - ``decoders`` does not provide information about available decoders
--- a/mopidy/audio/scan.py
+++ b/mopidy/audio/scan.py
@ -0,0 +1,161 @@
+from __future__ import unicode_literals
+
+import pygst
+pygst.require('0.10')
+import gst
+
+import datetime
+import os
+import time
+
+from mopidy import exceptions
+from mopidy.models import Track, Artist, Album
+from mopidy.utils import path
+
+
+class Scanner(object):
+    def __init__(self, timeout=1000, min_duration=100):
+        self.timeout_ms = timeout
+        self.min_duration_ms = min_duration
+
+        sink = gst.element_factory_make('fakesink')
+
+        audio_caps = gst.Caps(b'audio/x-raw-int; audio/x-raw-float')
+        pad_added = lambda src, pad: pad.link(sink.get_pad('sink'))
+
+        self.uribin = gst.element_factory_make('uridecodebin')
+        self.uribin.set_property('caps', audio_caps)
+        self.uribin.connect('pad-added', pad_added)
+
+        self.pipe = gst.element_factory_make('pipeline')
+        self.pipe.add(self.uribin)
+        self.pipe.add(sink)
+
+        self.bus = self.pipe.get_bus()
+        self.bus.set_flushing(True)
+
+    def scan(self, uri):
+        try:
+            self._setup(uri)
+            data = self._collect()
+            # Make sure uri and duration does not come from tags.
+            data[b'uri'] = uri
+            data[b'mtime'] = self._query_mtime(uri)
+            data[gst.TAG_DURATION] = self._query_duration()
+        finally:
+            self._reset()
+
+        if data[gst.TAG_DURATION] < self.min_duration_ms * gst.MSECOND:
+            raise exceptions.ScannerError('Rejecting file with less than %dms '
+                                          'audio data.' % self.min_duration_ms)
+        return data
+
+    def _setup(self, uri):
+        """Primes the pipeline for collection."""
+        self.pipe.set_state(gst.STATE_READY)
+        self.uribin.set_property(b'uri', uri)
+        self.bus.set_flushing(False)
+        self.pipe.set_state(gst.STATE_PAUSED)
+
+    def _collect(self):
+        """Polls for messages to collect data."""
+        start = time.time()
+        timeout_s = self.timeout_ms / float(1000)
+        poll_timeout_ns = 1000
+        data = {}
+
+        while time.time() - start < timeout_s:
+            message = self.bus.poll(gst.MESSAGE_ANY, poll_timeout_ns)
+
+            if message is None:
+                pass  # polling the bus timed out.
+            elif message.type == gst.MESSAGE_ERROR:
+                raise exceptions.ScannerError(message.parse_error()[0])
+            elif message.type == gst.MESSAGE_EOS:
+                return data
+            elif message.type == gst.MESSAGE_ASYNC_DONE:
+                if message.src == self.pipe:
+                    return data
+            elif message.type == gst.MESSAGE_TAG:
+                taglist = message.parse_tag()
+                for key in taglist.keys():
+                    data[key] = taglist[key]
+
+        raise exceptions.ScannerError('Timeout after %dms' % self.timeout_ms)
+
+    def _reset(self):
+        """Ensures we cleanup child elements and flush the bus."""
+        self.bus.set_flushing(True)
+        self.pipe.set_state(gst.STATE_NULL)
+
+    def _query_duration(self):
+        try:
+            return self.pipe.query_duration(gst.FORMAT_TIME, None)[0]
+        except gst.QueryError:
+            return None
+
+    def _query_mtime(self, uri):
+        if not uri.startswith('file:'):
+            return None
+        return os.path.getmtime(path.uri_to_path(uri))
+
+
+def audio_data_to_track(data):
+    """Convert taglist data + our extras to a track."""
+    albumartist_kwargs = {}
+    album_kwargs = {}
+    artist_kwargs = {}
+    composer_kwargs = {}
+    performer_kwargs = {}
+    track_kwargs = {}
+
+    def _retrieve(source_key, target_key, target):
+        if source_key in data:
+            target[target_key] = data[source_key]
+
+    _retrieve(gst.TAG_ALBUM, 'name', album_kwargs)
+    _retrieve(gst.TAG_TRACK_COUNT, 'num_tracks', album_kwargs)
+    _retrieve(gst.TAG_ALBUM_VOLUME_COUNT, 'num_discs', album_kwargs)
+    _retrieve(gst.TAG_ARTIST, 'name', artist_kwargs)
+    _retrieve(gst.TAG_COMPOSER, 'name', composer_kwargs)
+    _retrieve(gst.TAG_PERFORMER, 'name', performer_kwargs)
+    _retrieve(gst.TAG_ALBUM_ARTIST, 'name', albumartist_kwargs)
+    _retrieve(gst.TAG_TITLE, 'name', track_kwargs)
+    _retrieve(gst.TAG_TRACK_NUMBER, 'track_no', track_kwargs)
+    _retrieve(gst.TAG_ALBUM_VOLUME_NUMBER, 'disc_no', track_kwargs)
+    _retrieve(gst.TAG_GENRE, 'genre', track_kwargs)
+    _retrieve(gst.TAG_BITRATE, 'bitrate', track_kwargs)
+
+    # Following keys don't seem to have TAG_* constant.
+    _retrieve('comment', 'comment', track_kwargs)
+    _retrieve('musicbrainz-trackid', 'musicbrainz_id', track_kwargs)
+    _retrieve('musicbrainz-artistid', 'musicbrainz_id', artist_kwargs)
+    _retrieve('musicbrainz-albumid', 'musicbrainz_id', album_kwargs)
+    _retrieve(
+        'musicbrainz-albumartistid', 'musicbrainz_id', albumartist_kwargs)
+
+    if gst.TAG_DATE in data and data[gst.TAG_DATE]:
+        date = data[gst.TAG_DATE]
+        try:
+            date = datetime.date(date.year, date.month, date.day)
+        except ValueError:
+            pass  # Ignore invalid dates
+        else:
+            track_kwargs['date'] = date.isoformat()
+
+    if albumartist_kwargs:
+        album_kwargs['artists'] = [Artist(**albumartist_kwargs)]
+
+    if composer_kwargs:
+        track_kwargs['composers'] = [Artist(**composer_kwargs)]
+
+    if performer_kwargs:
+        track_kwargs['performers'] = [Artist(**performer_kwargs)]
+
+    track_kwargs['uri'] = data['uri']
+    track_kwargs['last_modified'] = int(data['mtime'])
+    track_kwargs['length'] = data[gst.TAG_DURATION] // gst.MSECOND
+    track_kwargs['album'] = Album(**album_kwargs)
+    track_kwargs['artists'] = [Artist(**artist_kwargs)]
+
+    return Track(**track_kwargs)
--- a/mopidy/backends/local/library.py
+++ b/mopidy/backends/local/library.py
@ -8,7 +8,7 @@ from mopidy.backends import base
 from mopidy.frontends.mpd import translator as mpd_translator
 from mopidy.models import Album, SearchResult

-from .translator import parse_mpd_tag_cache
+from .translator import local_to_file_uri, parse_mpd_tag_cache

 logger = logging.getLogger('mopidy.backends.local')

@ -231,7 +231,10 @@ class LocalLibraryUpdateProvider(base.BaseLibraryProvider):
    def load(self):
        tracks = parse_mpd_tag_cache(self._tag_cache_file, self._media_dir)
        for track in tracks:
-            self._tracks[track.uri] = track
+            # TODO: this should use uris as is, i.e. hack that should go away
+            # with tag caches.
+            uri = local_to_file_uri(track.uri, self._media_dir)
+            self._tracks[uri] = track.copy(uri=uri)
        return tracks

    def add(self, track):
--- a/mopidy/backends/local/playback.py
+++ b/mopidy/backends/local/playback.py
@ -1,10 +1,10 @@
 from __future__ import unicode_literals

 import logging
-import os

 from mopidy.backends import base
-from mopidy.utils import path
+
+from . import translator

 logger = logging.getLogger('mopidy.backends.local')

@ -12,8 +12,6 @@ logger = logging.getLogger('mopidy.backends.local')
 class LocalPlaybackProvider(base.BasePlaybackProvider):
    def change_track(self, track):
        media_dir = self.backend.config['local']['media_dir']
-        # TODO: check that type is correct.
-        file_path = path.uri_to_path(track.uri).split(b':', 1)[1]
-        file_path = os.path.join(media_dir, file_path)
-        track = track.copy(uri=path.path_to_uri(file_path))
+        uri = translator.local_to_file_uri(track.uri, media_dir)
+        track = track.copy(uri=uri)
        return super(LocalPlaybackProvider, self).change_track(track)
--- a/mopidy/backends/local/translator.py
+++ b/mopidy/backends/local/translator.py
@ -6,11 +6,18 @@ import urlparse

 from mopidy.models import Track, Artist, Album
 from mopidy.utils.encoding import locale_decode
-from mopidy.utils.path import path_to_uri
+from mopidy.utils.path import path_to_uri, uri_to_path

 logger = logging.getLogger('mopidy.backends.local')


+def local_to_file_uri(uri, media_dir):
+    # TODO: check that type is correct.
+    file_path = uri_to_path(uri).split(b':', 1)[1]
+    file_path = os.path.join(media_dir, file_path)
+    return path_to_uri(file_path)
+
+
 def parse_m3u(file_path, media_dir):
    r"""
    Convert M3U file list of uris
--- a/mopidy/frontends/mpd/translator.py
+++ b/mopidy/frontends/mpd/translator.py
@ -330,6 +330,7 @@ def _add_to_tag_cache(result, dirs, files, media_dir):
        relative_path = os.path.relpath(path, base_path)
        relative_uri = urllib.quote(relative_path)

+        # TODO: use track.last_modified
        track_result['file'] = relative_uri
        track_result['mtime'] = get_mtime(path)
        track_result['key'] = os.path.basename(text_path)
--- a/mopidy/scanner.py
+++ b/mopidy/scanner.py
@ -1,28 +1,22 @@
 from __future__ import unicode_literals

 import argparse
-import datetime
 import logging
 import os
 import sys
+import time

 import gobject
 gobject.threads_init()

-
 # Extract any command line arguments. This needs to be done before GStreamer is
 # imported, so that GStreamer doesn't hijack e.g. ``--help``.
 mopidy_args = sys.argv[1:]
 sys.argv[1:] = []

-
-import pygst
-pygst.require('0.10')
-import gst
-import gst.pbutils
-
 from mopidy import config as config_lib, exceptions, ext
-from mopidy.models import Track, Artist, Album
+from mopidy.audio import scan
+from mopidy.backends.local import translator
 from mopidy.utils import log, path, versioning


@ -73,6 +67,8 @@ def main():
    media_dir = config['local']['media_dir']
    excluded_extensions = config['local']['excluded_file_extensions']

+    # TODO: cleanup to consistently use local urls, not a random mix of local
+    # and file uris depending on how the data was loaded.
    uris_library = set()
    uris_update = set()
    uris_remove = set()
@ -80,18 +76,20 @@ def main():
    logging.info('Checking tracks from library.')
    for track in local_updater.load():
        try:
-            stat = os.stat(path.uri_to_path(track.uri))
+            uri = translator.local_to_file_uri(track.uri, media_dir)
+            stat = os.stat(path.uri_to_path(uri))
            if int(stat.st_mtime) > track.last_modified:
-                uris_update.add(track.uri)
-            uris_library.add(track.uri)
+                uris_update.add(uri)
+            uris_library.add(uri)
        except OSError:
+            logging.debug('Missing file %s', track.uri)
            uris_remove.add(track.uri)

-    logging.info('Removing %d moved or deleted tracks.', len(uris_remove))
+    logging.info('Removing %d missing tracks.', len(uris_remove))
    for uri in uris_remove:
        local_updater.remove(uri)

-    logging.info('Checking %s for new or modified tracks.', media_dir)
+    logging.info('Checking %s for unknown tracks.', media_dir)
    for uri in path.find_uris(config['local']['media_dir']):
        if os.path.splitext(path.uri_to_path(uri))[1] in excluded_extensions:
            logging.debug('Skipped %s: File extension excluded.', uri)
@ -100,24 +98,42 @@ def main():
        if uri not in uris_library:
            uris_update.add(uri)

-    logging.info('Found %d new or modified tracks.', len(uris_update))
-    logging.info('Scanning new and modified tracks.')
+    logging.info('Found %d unknown tracks.', len(uris_update))
+    logging.info('Scanning...')

-    scanner = Scanner(config['local']['scan_timeout'])
-    for uri in uris_update:
+    scanner = scan.Scanner(config['local']['scan_timeout'])
+    progress = Progress(len(uris_update))
+
+    for uri in sorted(uris_update):
        try:
            data = scanner.scan(uri)
-            data[b'mtime'] = os.path.getmtime(path.uri_to_path(uri))
-            track = translator(data)
+            track = scan.audio_data_to_track(data)
            local_updater.add(track)
            logging.debug('Added %s', track.uri)
        except exceptions.ScannerError as error:
            logging.warning('Failed %s: %s', uri, error)

-    logging.info('Done scanning; commiting changes.')
+        progress.increment()
+
+    logging.info('Commiting changes.')
    local_updater.commit()


+class Progress(object):
+    def __init__(self, total):
+        self.count = 0
+        self.total = total
+        self.start = time.time()
+
+    def increment(self):
+        self.count += 1
+        if self.count % 1000 == 0 or self.count == self.total:
+            duration = time.time() - self.start
+            remainder = duration / self.count * (self.total - self.count)
+            logging.info('Scanned %d of %d files in %ds, ~%ds left.',
+                         self.count, self.total, duration, remainder)
+
+
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
@ -134,107 +150,5 @@ def parse_args():
    return parser.parse_args(args=mopidy_args)


-# TODO: move into scanner.
-def translator(data):
-    albumartist_kwargs = {}
-    album_kwargs = {}
-    artist_kwargs = {}
-    composer_kwargs = {}
-    performer_kwargs = {}
-    track_kwargs = {}
-
-    def _retrieve(source_key, target_key, target):
-        if source_key in data:
-            target[target_key] = data[source_key]
-
-    _retrieve(gst.TAG_ALBUM, 'name', album_kwargs)
-    _retrieve(gst.TAG_TRACK_COUNT, 'num_tracks', album_kwargs)
-    _retrieve(gst.TAG_ALBUM_VOLUME_COUNT, 'num_discs', album_kwargs)
-    _retrieve(gst.TAG_ARTIST, 'name', artist_kwargs)
-    _retrieve(gst.TAG_COMPOSER, 'name', composer_kwargs)
-    _retrieve(gst.TAG_PERFORMER, 'name', performer_kwargs)
-    _retrieve(gst.TAG_ALBUM_ARTIST, 'name', albumartist_kwargs)
-    _retrieve(gst.TAG_TITLE, 'name', track_kwargs)
-    _retrieve(gst.TAG_TRACK_NUMBER, 'track_no', track_kwargs)
-    _retrieve(gst.TAG_ALBUM_VOLUME_NUMBER, 'disc_no', track_kwargs)
-    _retrieve(gst.TAG_GENRE, 'genre', track_kwargs)
-    _retrieve(gst.TAG_BITRATE, 'bitrate', track_kwargs)
-
-    # Following keys don't seem to have TAG_* constant.
-    _retrieve('comment', 'comment', track_kwargs)
-    _retrieve('musicbrainz-trackid', 'musicbrainz_id', track_kwargs)
-    _retrieve('musicbrainz-artistid', 'musicbrainz_id', artist_kwargs)
-    _retrieve('musicbrainz-albumid', 'musicbrainz_id', album_kwargs)
-    _retrieve(
-        'musicbrainz-albumartistid', 'musicbrainz_id', albumartist_kwargs)
-
-    if gst.TAG_DATE in data and data[gst.TAG_DATE]:
-        date = data[gst.TAG_DATE]
-        try:
-            date = datetime.date(date.year, date.month, date.day)
-        except ValueError:
-            pass  # Ignore invalid dates
-        else:
-            track_kwargs['date'] = date.isoformat()
-
-    if albumartist_kwargs:
-        album_kwargs['artists'] = [Artist(**albumartist_kwargs)]
-
-    track_kwargs['uri'] = data['uri']
-    track_kwargs['last_modified'] = int(data['mtime'])
-    track_kwargs['length'] = data[gst.TAG_DURATION]
-    track_kwargs['album'] = Album(**album_kwargs)
-    track_kwargs['artists'] = [Artist(**artist_kwargs)]
-
-    if composer_kwargs:
-        track_kwargs['composers'] = [Artist(**composer_kwargs)]
-
-    if performer_kwargs:
-        track_kwargs['performers'] = [Artist(**performer_kwargs)]
-
-    return Track(**track_kwargs)
-
-
-class Scanner(object):
-    def __init__(self, timeout=1000):
-        self.discoverer = gst.pbutils.Discoverer(timeout * 1000000)
-
-    def scan(self, uri):
-        try:
-            info = self.discoverer.discover_uri(uri)
-        except gobject.GError as e:
-            # Loosing traceback is non-issue since this is from C code.
-            raise exceptions.ScannerError(e)
-
-        data = {}
-        audio_streams = info.get_audio_streams()
-
-        if not audio_streams:
-            raise exceptions.ScannerError('Did not find any audio streams.')
-
-        for stream in audio_streams:
-            taglist = stream.get_tags()
-            if not taglist:
-                continue
-            for key in taglist.keys():
-                # XXX: For some crazy reason some wma files spit out lists
-                # here, not sure if this is due to better data in headers or
-                # wma being stupid. So ugly hack for now :/
-                if type(taglist[key]) is list:
-                    data[key] = taglist[key][0]
-                else:
-                    data[key] = taglist[key]
-
-        # Never trust metadata for these fields:
-        data[b'uri'] = uri
-        data[b'duration'] = info.get_duration() // gst.MSECOND
-
-        if data[b'duration'] < 100:
-            raise exceptions.ScannerError(
-                'Rejecting file with less than 100ms audio data.')
-
-        return data
-
-
 if __name__ == '__main__':
    main()
--- a/tests/audio/scan_test.py
+++ b/tests/audio/scan_test.py
@ -3,8 +3,8 @@ from __future__ import unicode_literals
 import unittest

 from mopidy import exceptions
+from mopidy.audio import scan
 from mopidy.models import Track, Artist, Album
-from mopidy.scanner import Scanner, translator
 from mopidy.utils import path as path_lib

 from tests import path_to_data_dir
@ -34,7 +34,7 @@ class TranslatorTest(unittest.TestCase):
            'date': FakeGstDate(2006, 1, 1,),
            'container-format': 'ID3 tag',
            'genre': 'genre',
-            'duration': 4531,
+            'duration': 4531000000,
            'comment': 'comment',
            'musicbrainz-trackid': 'mbtrackid',
            'musicbrainz-albumid': 'mbalbumid',
@ -57,12 +57,10 @@ class TranslatorTest(unittest.TestCase):

        self.composer = {
            'name': 'composer',
-            #'musicbrainz_id': 'mbcomposerid',
        }

        self.performer = {
            'name': 'performer',
-            #'musicbrainz_id': 'mbperformerid',
        }

        self.albumartist = {
@ -96,7 +94,7 @@ class TranslatorTest(unittest.TestCase):

    def check(self):
        expected = self.build_track()
-        actual = translator(self.data)
+        actual = scan.audio_data_to_track(self.data)
        self.assertEqual(expected, actual)

    def test_basic_data(self):
@ -191,7 +189,7 @@ class ScannerTest(unittest.TestCase):
    def scan(self, path):
        paths = path_lib.find_files(path_to_data_dir(path))
        uris = (path_lib.path_to_uri(p) for p in paths)
-        scanner = Scanner()
+        scanner = scan.Scanner()
        for uri in uris:
            key = uri[len('file://'):]
            try:
@ -222,8 +220,8 @@ class ScannerTest(unittest.TestCase):

    def test_duration_is_set(self):
        self.scan('scanner/simple')
-        self.check('scanner/simple/song1.mp3', 'duration', 4680)
-        self.check('scanner/simple/song1.ogg', 'duration', 4680)
+        self.check('scanner/simple/song1.mp3', 'duration', 4680000000)
+        self.check('scanner/simple/song1.ogg', 'duration', 4680000000)

    def test_artist_is_set(self):
        self.scan('scanner/simple')