Use TinyTag for local files to speedup pulling tags from media files

Use TinyTag instead of gstream to significantly speedup collecting tags
from media files. It especially helpfull with slow CPU
This commit is contained in:
Andrey Perminov 2019-07-02 13:31:46 -07:00
parent 49a08f1701
commit 76395522d0
3 changed files with 1234 additions and 56 deletions

View File

@ -10,6 +10,9 @@ from mopidy.audio import tags as tags_lib, utils
from mopidy.internal import encoding, log from mopidy.internal import encoding, log
from mopidy.internal.gi import Gst, GstPbutils from mopidy.internal.gi import Gst, GstPbutils
from mopidy.audio.tinytag import TinyTag
from urllib import unquote
# GST_ELEMENT_FACTORY_LIST: # GST_ELEMENT_FACTORY_LIST:
_DECODER = 1 << 0 _DECODER = 1 << 0
_AUDIO = 1 << 50 _AUDIO = 1 << 50
@ -47,21 +50,70 @@ class Scanner(object):
self._proxy_config = proxy_config or {} self._proxy_config = proxy_config or {}
def scan(self, uri, timeout=None): def scan(self, uri, timeout=None):
""" """
Scan the given uri collecting relevant metadata. Scan the given uri collecting relevant metadata.
:param uri: URI of the resource to scan.
:type uri: string
:param timeout: timeout for scanning a URI in ms. Defaults to the
``timeout`` value used when creating the scanner.
:type timeout: int
:return: A named tuple containing
``(uri, tags, duration, seekable, mime)``.
``tags`` is a dictionary of lists for all the tags we found.
``duration`` is the length of the URI in milliseconds, or
:class:`None` if the URI has no duration. ``seekable`` is boolean.
indicating if a seek would succeed.
"""
if uri[:4] == 'file':
duration, seekable, mime = None, None, None
tags = {}
try:
fname = unquote(uri[7:]).encode('raw_unicode_escape').decode('utf-8')
supported = False
extensions = ['.mp3', '.oga', '.ogg', '.opus', '.wav', '.flac', '.wma', '.m4b', '.m4a', '.mp4']
for fileextension in extensions:
if fname.lower().endswith(fileextension):
supported = True
break
if supported:
tag = TinyTag.get(fname, image=False)
if tag.album: tags['album'] = tag.album.rstrip('\0') # album as string
if tag.albumartist: tags['albumartist'] = tag.albumartist.rstrip('\0') # album artist as string
if tag.artist: tags['artist'] = tag.artist.rstrip('\0') # artist name as string
#if tag.audio_offset # number of bytes before audio data begins
if tag.bitrate: tags['bitrate'] = int(tag.bitrate) # bitrate in kBits/s
if tag.disc: tags['disc'] = int(tag.disc.rstrip('\0')) # disk number in album
if tag.disc_total: tags['disc_total'] = int(tag.disc_total.rstrip('\0')) # the total number of discs
duration=int(float(tag.duration) * 1000) # duration of the song in seconds
#if tag.filesize # file size in bytes
if tag.genre: tags['genre'] = tag.genre.rstrip('\0') # genre as string
#if tag.samplerate # samples per second
if tag.title: tags['title'] = tag.title.rstrip('\0') # title of the song
if tag.track: tags['track'] = int(tag.track.rstrip('\0')) # track number as string
if tag.track_total: tags['track_total'] = int(tag.track_total.rstrip('\0')) # total number of tracks as string
if tag.composer: tags['composer'] = tag.composer.rstrip('\0')
#try:
# image_data = tag.get_image()
#except IOError:
# pass
#if image_data:
# tags['image'] = image_data
#if tag.year # year or data as string
have_audio = duration > 0
seekable = True
else: # not supported
duration = 0
have_audio = 0
seekable = False
finally:
pass
return _Result(uri, tags, duration, seekable, mime, have_audio)
else:
:param uri: URI of the resource to scan.
:type uri: string
:param timeout: timeout for scanning a URI in ms. Defaults to the
``timeout`` value used when creating the scanner.
:type timeout: int
:return: A named tuple containing
``(uri, tags, duration, seekable, mime)``.
``tags`` is a dictionary of lists for all the tags we found.
``duration`` is the length of the URI in milliseconds, or
:class:`None` if the URI has no duration. ``seekable`` is boolean.
indicating if a seek would succeed.
"""
timeout = int(timeout or self._timeout_ms) timeout = int(timeout or self._timeout_ms)
tags, duration, seekable, mime = None, None, None, None tags, duration, seekable, mime = None, None, None, None
pipeline, signals = _setup_pipeline(uri, self._proxy_config) pipeline, signals = _setup_pipeline(uri, self._proxy_config)

View File

@ -81,61 +81,96 @@ def _extract_sample_data(sample):
# TODO: split based on "stream" and "track" based conversion? i.e. handle data # TODO: split based on "stream" and "track" based conversion? i.e. handle data
# from radios in it's own helper instead? # from radios in it's own helper instead?
def convert_tags_to_track(tags): def convert_tags_to_track(tags):
"""Convert our normalized tags to a track. """Convert our normalized tags to a track.
:param tags: dictionary of tag keys with a list of values :param tags: dictionary of tag keys with a list of values
:type tags: :class:`dict` :type tags: :class:`dict`
:rtype: :class:`mopidy.models.Track` :rtype: :class:`mopidy.models.Track`
""" """
album_kwargs = {} album_kwargs = {}
track_kwargs = {} track_kwargs = {}
track_kwargs['composers'] = _artists(tags, Gst.TAG_COMPOSER) try:
track_kwargs['performers'] = _artists(tags, Gst.TAG_PERFORMER) track_kwargs['composers'] = _artists(tags, Gst.TAG_COMPOSER)
track_kwargs['artists'] = _artists(tags, Gst.TAG_ARTIST, track_kwargs['performers'] = _artists(tags, Gst.TAG_PERFORMER)
track_kwargs['artists'] = _artists(tags, Gst.TAG_ARTIST,
'musicbrainz-artistid', 'musicbrainz-artistid',
'musicbrainz-sortname') 'musicbrainz-sortname')
album_kwargs['artists'] = _artists( album_kwargs['artists'] = _artists(
tags, Gst.TAG_ALBUM_ARTIST, 'musicbrainz-albumartistid') tags, Gst.TAG_ALBUM_ARTIST, 'musicbrainz-albumartistid')
track_kwargs['genre'] = '; '.join(tags.get(Gst.TAG_GENRE, [])) track_kwargs['genre'] = '; '.join(tags.get(Gst.TAG_GENRE, []))
track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_TITLE, [])) track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_TITLE, []))
if not track_kwargs['name']: if not track_kwargs['name']:
track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_ORGANIZATION, [])) track_kwargs['name'] = '; '.join(tags.get(Gst.TAG_ORGANIZATION, []))
track_kwargs['comment'] = '; '.join(tags.get('comment', [])) track_kwargs['comment'] = '; '.join(tags.get('comment', []))
if not track_kwargs['comment']: if not track_kwargs['comment']:
track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_LOCATION, [])) track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_LOCATION, []))
if not track_kwargs['comment']: if not track_kwargs['comment']:
track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_COPYRIGHT, [])) track_kwargs['comment'] = '; '.join(tags.get(Gst.TAG_COPYRIGHT, []))
track_kwargs['track_no'] = tags.get(Gst.TAG_TRACK_NUMBER, [None])[0] track_kwargs['track_no'] = tags.get(Gst.TAG_TRACK_NUMBER, [None])[0]
track_kwargs['disc_no'] = tags.get(Gst.TAG_ALBUM_VOLUME_NUMBER, [None])[0] track_kwargs['disc_no'] = tags.get(Gst.TAG_ALBUM_VOLUME_NUMBER, [None])[0]
track_kwargs['bitrate'] = tags.get(Gst.TAG_BITRATE, [None])[0] track_kwargs['bitrate'] = tags.get(Gst.TAG_BITRATE, [None])[0]
track_kwargs['musicbrainz_id'] = tags.get('musicbrainz-trackid', [None])[0] track_kwargs['musicbrainz_id'] = tags.get('musicbrainz-trackid', [None])[0]
album_kwargs['name'] = tags.get(Gst.TAG_ALBUM, [None])[0] album_kwargs['name'] = tags.get(Gst.TAG_ALBUM, [None])[0]
album_kwargs['num_tracks'] = tags.get(Gst.TAG_TRACK_COUNT, [None])[0] album_kwargs['num_tracks'] = tags.get(Gst.TAG_TRACK_COUNT, [None])[0]
album_kwargs['num_discs'] = tags.get(Gst.TAG_ALBUM_VOLUME_COUNT, [None])[0] album_kwargs['num_discs'] = tags.get(Gst.TAG_ALBUM_VOLUME_COUNT, [None])[0]
album_kwargs['musicbrainz_id'] = tags.get('musicbrainz-albumid', [None])[0] album_kwargs['musicbrainz_id'] = tags.get('musicbrainz-albumid', [None])[0]
album_kwargs['date'] = tags.get(Gst.TAG_DATE, [None])[0] album_kwargs['date'] = tags.get(Gst.TAG_DATE, [None])[0]
if not album_kwargs['date']: if not album_kwargs['date']:
datetime = tags.get(Gst.TAG_DATE_TIME, [None])[0] datetime = tags.get(Gst.TAG_DATE_TIME, [None])[0]
if datetime is not None: if datetime is not None:
album_kwargs['date'] = datetime.split('T')[0] album_kwargs['date'] = datetime.split('T')[0]
track_kwargs['date'] = album_kwargs['date'] track_kwargs['date'] = album_kwargs['date']
# Clear out any empty values we found # Clear out any empty values we found
track_kwargs = {k: v for k, v in track_kwargs.items() if v} track_kwargs = {k: v for k, v in track_kwargs.items() if v}
album_kwargs = {k: v for k, v in album_kwargs.items() if v} album_kwargs = {k: v for k, v in album_kwargs.items() if v}
# Only bother with album if we have a name to show. # Only bother with album if we have a name to show.
if album_kwargs.get('name'): if album_kwargs.get('name'):
track_kwargs['album'] = Album(**album_kwargs) track_kwargs['album'] = Album(**album_kwargs)
return Track(**track_kwargs) except:
if 'title' in tags: track_kwargs['name'] = tags['title']
if 'genre' in tags: track_kwargs['genre'] = tags['genre']
if 'track' in tags: track_kwargs['track_no'] = tags['track']
if 'bitrate' in tags: track_kwargs['bitrate'] = tags['bitrate']
if 'artist' in tags:
album_kwargs['artists'] = [Artist({'name': tags['artist']})]
if 'album' in tags: album_kwargs['name'] = tags['album']
# Clear out any empty values we found
if 'composer' in tags:
track_kwargs['composers'] = [Artist({'name': tags['composer']})]
if 'disc' in tags: track_kwargs['disc_no'] = tags['disc']
if 'disc_total' in tags: album_kwargs['num_discs'] = tags['disc_total']
if 'track_total' in tags: album_kwargs['num_tracks'] = tags['track_total']
#if 'image' in tags: track_kwargs['image'] = tags['image']
track_kwargs = {k: v for k, v in track_kwargs.items() if v}
album_kwargs = {k: v for k, v in album_kwargs.items() if v}
# Only bother with album if we have a name to show.
if album_kwargs.get('name'):
track_kwargs['album'] = Album(**album_kwargs)
#if 'album' in tags:
# track_kwargs['album'] = Album(name=tags['album'])
if 'artist' in tags:
track_kwargs['artists'] = [Artist(name=tags['artist'])]
#for i in track_kwargs:
# if not i == 'image' : print(i, track_kwargs[i])
finally:
return Track(**track_kwargs)
def _artists(tags, artist_name, artist_id=None, artist_sortname=None): def _artists(tags, artist_name, artist_id=None, artist_sortname=None):
# Name missing, don't set artist # Name missing, don't set artist

1091
mopidy/audio/tinytag.py Normal file

File diff suppressed because it is too large Load Diff