Use TinyTag for local files to speedup pulling tags from media files

Use TinyTag instead of gstream to significantly speedup collecting tags from media files. It especially helpfull with slow CPU
2019-07-02 13:31:46 -07:00 · 2019-07-02 13:31:46 -07:00 · 76395522d0
commit 76395522d0
parent 49a08f1701
3 changed files with 1234 additions and 56 deletions
--- a/mopidy/audio/scan.py
+++ b/mopidy/audio/scan.py
@ -10,6 +10,9 @@ from mopidy.audio import tags as tags_lib, utils
 from mopidy.internal import encoding, log
 from mopidy.internal.gi import Gst, GstPbutils
 from mopidy.audio.tinytag import TinyTag
 from urllib import unquote
 # GST_ELEMENT_FACTORY_LIST:
 _DECODER = 1 << 0
 _AUDIO = 1 << 50
@ -47,21 +50,70 @@ class Scanner(object):
        self._proxy_config = proxy_config or {}
    def scan(self, uri, timeout=None):
-        """
+      """
-        Scan the given uri collecting relevant metadata.
+      Scan the given uri collecting relevant metadata.
      :param uri: URI of the resource to scan.
      :type uri: string
      :param timeout: timeout for scanning a URI in ms. Defaults to the
          ``timeout`` value used when creating the scanner.
      :type timeout: int
      :return: A named tuple containing
          ``(uri, tags, duration, seekable, mime)``.
          ``tags`` is a dictionary of lists for all the tags we found.
          ``duration`` is the length of the URI in milliseconds, or
          :class:`None` if the URI has no duration. ``seekable`` is boolean.
          indicating if a seek would succeed.
      """
      if uri[:4] == 'file':
            duration, seekable, mime = None, None, None
            tags = {}
            try:
                fname = unquote(uri[7:]).encode('raw_unicode_escape').decode('utf-8')
                supported = False
                extensions = ['.mp3', '.oga', '.ogg', '.opus', '.wav', '.flac', '.wma', '.m4b', '.m4a', '.mp4']
                for fileextension in extensions:
                   if fname.lower().endswith(fileextension):
                       supported = True
                       break
                if supported:
                    tag = TinyTag.get(fname, image=False)
                    if tag.album:        tags['album'] =       tag.album.rstrip('\0')               # album as string
                    if tag.albumartist:  tags['albumartist'] = tag.albumartist.rstrip('\0')   # album artist as string
                    if tag.artist:       tags['artist'] =      tag.artist.rstrip('\0')             # artist name as string
                    #if tag.audio_offset  # number of bytes before audio data begins
                    if tag.bitrate:      tags['bitrate'] =     int(tag.bitrate)       # bitrate in kBits/s
                    if tag.disc:         tags['disc'] =        int(tag.disc.rstrip('\0'))         # disk number in album
                    if tag.disc_total:   tags['disc_total'] =  int(tag.disc_total.rstrip('\0')) # the total number of discs
                    duration=int(float(tag.duration) * 1000)      # duration of the song in seconds
                    #if tag.filesize      # file size in bytes
                    if tag.genre:        tags['genre'] =       tag.genre.rstrip('\0')          # genre as string
                    #if tag.samplerate    # samples per second
                    if tag.title:        tags['title'] =       tag.title.rstrip('\0')          # title of the song
                    if tag.track:        tags['track'] =       int(tag.track.rstrip('\0'))         # track number as string
                    if tag.track_total:  tags['track_total'] = int(tag.track_total.rstrip('\0')) # total number of tracks as string
                    if tag.composer:     tags['composer'] =    tag.composer.rstrip('\0')
                    #try:
                    #    image_data = tag.get_image()
                    #except IOError:
                    #    pass
                    #if image_data:
                    #    tags['image'] =  image_data
                    #if tag.year          # year or data as string
                    have_audio =  duration > 0
                    seekable = True
                else: # not supported
                    duration = 0
                    have_audio = 0
                    seekable = False
            finally:
                pass
            return _Result(uri, tags, duration, seekable, mime, have_audio)
      else:
        :param uri: URI of the resource to scan.
        :type uri: string
        :param timeout: timeout for scanning a URI in ms. Defaults to the
            ``timeout`` value used when creating the scanner.
        :type timeout: int
        :return: A named tuple containing
            ``(uri, tags, duration, seekable, mime)``.
            ``tags`` is a dictionary of lists for all the tags we found.
            ``duration`` is the length of the URI in milliseconds, or
            :class:`None` if the URI has no duration. ``seekable`` is boolean.
            indicating if a seek would succeed.
        """
        timeout = int(timeout or self._timeout_ms)
        tags, duration, seekable, mime = None, None, None, None
        pipeline, signals = _setup_pipeline(uri, self._proxy_config)
--- a/mopidy/audio/tags.py
+++ b/mopidy/audio/tags.py
@ -81,61 +81,96 @@ def _extract_sample_data(sample):
 # TODO: split based on "stream" and "track" based conversion? i.e. handle data
 # from radios in it's own helper instead?
 def convert_tags_to_track(tags):
-    """Convert our normalized tags to a track.
+   """Convert our normalized tags to a track.
-    :param  tags: dictionary of tag keys with a list of values
+   :param  tags: dictionary of tag keys with a list of values
-    :type tags: :class:`dict`
+   :type tags: :class:`dict`
-    :rtype: :class:`mopidy.models.Track`
+   :rtype: :class:`mopidy.models.Track`
-    """
+   """
-    album_kwargs = {}
+   album_kwargs = {}
-    track_kwargs = {}
+   track_kwargs = {}
-    track_kwargs['composers'] = _artists(tags, Gst.TAG_COMPOSER)
+   try:
-    track_kwargs['performers'] = _artists(tags, Gst.TAG_PERFORMER)
+       track_kwargs['composers'] = _artists(tags, Gst.TAG_COMPOSER)
-    track_kwargs['artists'] = _artists(tags, Gst.TAG_ARTIST,
+       track_kwargs['performers'] = _artists(tags, Gst.TAG_PERFORMER)
       track_kwargs['artists'] = _artists(tags, Gst.TAG_ARTIST,
                                       'musicbrainz-artistid',
                                       'musicbrainz-sortname')
-    album_kwargs['artists'] = _artists(
+       album_kwargs['artists'] = _artists(
-        tags, Gst.TAG_ALBUM_ARTIST, 'musicbrainz-albumartistid')
+           tags, Gst.TAG_ALBUM_ARTIST, 'musicbrainz-albumartistid')
-    track_kwargs['genre'] = '; '.join(tags.get(Gst.TAG_GENRE, []))
+       track_kwargs['genre'] = '; '.join(tags.get(Gst.TAG_GENRE, []))
-    track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_TITLE, []))
+       track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_TITLE, []))
-    if not track_kwargs['name']:
+       if not track_kwargs['name']:
-        track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_ORGANIZATION, []))
+           track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_ORGANIZATION, []))
-    track_kwargs['comment'] = '; '.join(tags.get('comment', []))
+       track_kwargs['comment'] = '; '.join(tags.get('comment', []))
-    if not track_kwargs['comment']:
+       if not track_kwargs['comment']:
-        track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_LOCATION, []))
+           track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_LOCATION, []))
-    if not track_kwargs['comment']:
+       if not track_kwargs['comment']:
-        track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_COPYRIGHT, []))
+           track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_COPYRIGHT, []))
-    track_kwargs['track_no'] = tags.get(Gst.TAG_TRACK_NUMBER, [None])[0]
+       track_kwargs['track_no'] = tags.get(Gst.TAG_TRACK_NUMBER, [None])[0]
-    track_kwargs['disc_no'] = tags.get(Gst.TAG_ALBUM_VOLUME_NUMBER, [None])[0]
+       track_kwargs['disc_no'] = tags.get(Gst.TAG_ALBUM_VOLUME_NUMBER, [None])[0]
-    track_kwargs['bitrate'] = tags.get(Gst.TAG_BITRATE, [None])[0]
+       track_kwargs['bitrate'] = tags.get(Gst.TAG_BITRATE, [None])[0]
-    track_kwargs['musicbrainz_id'] = tags.get('musicbrainz-trackid', [None])[0]
+       track_kwargs['musicbrainz_id'] = tags.get('musicbrainz-trackid', [None])[0]
-    album_kwargs['name'] = tags.get(Gst.TAG_ALBUM, [None])[0]
+       album_kwargs['name'] = tags.get(Gst.TAG_ALBUM, [None])[0]
-    album_kwargs['num_tracks'] = tags.get(Gst.TAG_TRACK_COUNT, [None])[0]
+       album_kwargs['num_tracks'] = tags.get(Gst.TAG_TRACK_COUNT, [None])[0]
-    album_kwargs['num_discs'] = tags.get(Gst.TAG_ALBUM_VOLUME_COUNT, [None])[0]
+       album_kwargs['num_discs'] = tags.get(Gst.TAG_ALBUM_VOLUME_COUNT, [None])[0]
-    album_kwargs['musicbrainz_id'] = tags.get('musicbrainz-albumid', [None])[0]
+       album_kwargs['musicbrainz_id'] = tags.get('musicbrainz-albumid', [None])[0]
-    album_kwargs['date'] = tags.get(Gst.TAG_DATE, [None])[0]
+       album_kwargs['date'] = tags.get(Gst.TAG_DATE, [None])[0]
-    if not album_kwargs['date']:
+       if not album_kwargs['date']:
-        datetime = tags.get(Gst.TAG_DATE_TIME, [None])[0]
+           datetime = tags.get(Gst.TAG_DATE_TIME, [None])[0]
-        if datetime is not None:
+           if datetime is not None:
-            album_kwargs['date'] = datetime.split('T')[0]
+               album_kwargs['date'] = datetime.split('T')[0]
-    track_kwargs['date'] = album_kwargs['date']
+       track_kwargs['date'] = album_kwargs['date']
-    # Clear out any empty values we found
+       # Clear out any empty values we found
-    track_kwargs = {k: v for k, v in track_kwargs.items() if v}
+       track_kwargs = {k: v for k, v in track_kwargs.items() if v}
-    album_kwargs = {k: v for k, v in album_kwargs.items() if v}
+       album_kwargs = {k: v for k, v in album_kwargs.items() if v}
-    # Only bother with album if we have a name to show.
+       # Only bother with album if we have a name to show.
-    if album_kwargs.get('name'):
+       if album_kwargs.get('name'):
-        track_kwargs['album'] = Album(**album_kwargs)
+           track_kwargs['album'] = Album(**album_kwargs)
-    return Track(**track_kwargs)
+   except:
       if 'title' in tags: track_kwargs['name'] = tags['title']
       if 'genre' in tags: track_kwargs['genre'] = tags['genre']
       if 'track' in tags: track_kwargs['track_no'] = tags['track']
       if 'bitrate' in tags: track_kwargs['bitrate'] = tags['bitrate']
       if 'artist' in tags:
           album_kwargs['artists'] = [Artist({'name': tags['artist']})]
       if 'album' in tags: album_kwargs['name'] =  tags['album']
       # Clear out any empty values we found
       if 'composer' in tags:
           track_kwargs['composers'] = [Artist({'name': tags['composer']})]
       if 'disc' in tags: track_kwargs['disc_no'] = tags['disc']
       if 'disc_total' in tags: album_kwargs['num_discs'] = tags['disc_total']
       if 'track_total' in tags: album_kwargs['num_tracks'] = tags['track_total']
       #if 'image' in tags: track_kwargs['image'] = tags['image']
       track_kwargs = {k: v for k, v in track_kwargs.items() if v}
       album_kwargs = {k: v for k, v in album_kwargs.items() if v}
       # Only bother with album if we have a name to show.
       if album_kwargs.get('name'):
           track_kwargs['album'] = Album(**album_kwargs)
       #if 'album' in tags:
       #    track_kwargs['album'] = Album(name=tags['album'])
       if 'artist' in tags:
           track_kwargs['artists'] = [Artist(name=tags['artist'])]
       #for i in track_kwargs:
       #    if not i == 'image' : print(i, track_kwargs[i])
   finally:
       return Track(**track_kwargs)
 def _artists(tags, artist_name, artist_id=None, artist_sortname=None):
    # Name missing, don't set artist
--- a/mopidy/audio/tinytag.py
+++ b/mopidy/audio/tinytag.py