From b5d9dc10a70a660184757760fb55223ef2d164ae Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Mon, 3 Dec 2012 15:03:46 +0100 Subject: [PATCH 01/11] utils: Handle paths with non-UTF-8 encodings - path_to_uri() encodes unicode input as UTF-8 and leaves bytestring input unchanged before it is converted to file:// URIs. - uri_to_path() will now always return bytestrings, since we don't know if there is any non-UTF-8 encoded chars in the file path, and converting it to unicode would make such paths no longer match the dir or file it was referring to. - split_path() will now assume it gets a bytestring in. --- mopidy/utils/path.py | 31 ++++++++++++++++++++++++++----- tests/utils/path_test.py | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/mopidy/utils/path.py b/mopidy/utils/path.py index 73063183..eea13fb1 100644 --- a/mopidy/utils/path.py +++ b/mopidy/utils/path.py @@ -51,19 +51,40 @@ def get_or_create_file(filename): def path_to_uri(*paths): + """ + Convert OS specific path to file:// URI. + + Accepts either unicode strings or bytestrings. The encoding of any + bytestring will be maintained so that :func:`uri_to_path` can return the + same bytestring. + + Returns a file:// URI as an unicode string. + """ path = os.path.join(*paths) - path = path.encode('utf-8') + if isinstance(path, unicode): + path = path.encode('utf-8') if sys.platform == 'win32': return 'file:' + urllib.pathname2url(path) return 'file://' + urllib.pathname2url(path) def uri_to_path(uri): + """ + Convert the file:// to a OS specific path. + + Returns a bytestring, since the file path can contain chars with other + encoding than UTF-8. + + If we had returned these paths as unicode strings, you wouldn't be able to + look up the matching dir or file on your file system because the exact path + would be lost by ignoring its encoding. + """ + if isinstance(uri, unicode): + uri = uri.encode('utf-8') if sys.platform == 'win32': - path = urllib.url2pathname(re.sub('^file:', '', uri)) + return urllib.url2pathname(re.sub(b'^file:', b'', uri)) else: - path = urllib.url2pathname(re.sub('^file://', '', uri)) - return path.encode('latin1').decode('utf-8') # Undo double encoding + return urllib.url2pathname(re.sub(b'^file://', b'', uri)) def split_path(path): @@ -72,7 +93,7 @@ def split_path(path): path, part = os.path.split(path) if part: parts.insert(0, part) - if not path or path == '/': + if not path or path == b'/': break return parts diff --git a/tests/utils/path_test.py b/tests/utils/path_test.py index 512a3ba1..cfe58e0a 100644 --- a/tests/utils/path_test.py +++ b/tests/utils/path_test.py @@ -90,31 +90,55 @@ class PathToFileURITest(unittest.TestCase): result = path.path_to_uri('/tmp/æøå') self.assertEqual(result, 'file:///tmp/%C3%A6%C3%B8%C3%A5') + def test_utf8_in_path(self): + if sys.platform == 'win32': + result = path.path_to_uri('C:/æøå'.encode('utf-8')) + self.assertEqual(result, 'file:///C://%C3%A6%C3%B8%C3%A5') + else: + result = path.path_to_uri('/tmp/æøå'.encode('utf-8')) + self.assertEqual(result, 'file:///tmp/%C3%A6%C3%B8%C3%A5') + + def test_latin1_in_path(self): + if sys.platform == 'win32': + result = path.path_to_uri('C:/æøå'.encode('latin-1')) + self.assertEqual(result, 'file:///C://%E6%F8%E5') + else: + result = path.path_to_uri('/tmp/æøå'.encode('latin-1')) + self.assertEqual(result, 'file:///tmp/%E6%F8%E5') + class UriToPathTest(unittest.TestCase): def test_simple_uri(self): if sys.platform == 'win32': result = path.uri_to_path('file:///C://WINDOWS/clock.avi') - self.assertEqual(result, 'C:/WINDOWS/clock.avi') + self.assertEqual(result, 'C:/WINDOWS/clock.avi'.encode('utf-8')) else: result = path.uri_to_path('file:///etc/fstab') - self.assertEqual(result, '/etc/fstab') + self.assertEqual(result, '/etc/fstab'.encode('utf-8')) def test_space_in_uri(self): if sys.platform == 'win32': result = path.uri_to_path('file:///C://test%20this') - self.assertEqual(result, 'C:/test this') + self.assertEqual(result, 'C:/test this'.encode('utf-8')) else: result = path.uri_to_path('file:///tmp/test%20this') - self.assertEqual(result, '/tmp/test this') + self.assertEqual(result, '/tmp/test this'.encode('utf-8')) def test_unicode_in_uri(self): if sys.platform == 'win32': result = path.uri_to_path('file:///C://%C3%A6%C3%B8%C3%A5') - self.assertEqual(result, 'C:/æøå') + self.assertEqual(result, 'C:/æøå'.encode('utf-8')) else: result = path.uri_to_path('file:///tmp/%C3%A6%C3%B8%C3%A5') - self.assertEqual(result, '/tmp/æøå') + self.assertEqual(result, '/tmp/æøå'.encode('utf-8')) + + def test_latin1_in_uri(self): + if sys.platform == 'win32': + result = path.uri_to_path('file:///C://%E6%F8%E5') + self.assertEqual(result, 'C:/æøå'.encode('latin-1')) + else: + result = path.uri_to_path('file:///tmp/%E6%F8%E5') + self.assertEqual(result, '/tmp/æøå'.encode('latin-1')) class SplitPathTest(unittest.TestCase): From 905ceeb72a8ee5bb0aa8911f7a6fa06a40458417 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Mon, 3 Dec 2012 15:05:18 +0100 Subject: [PATCH 02/11] utils: find_files() returns bytestrings --- mopidy/utils/path.py | 30 ++++++++++++++++-------------- tests/utils/path_test.py | 6 +++--- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/mopidy/utils/path.py b/mopidy/utils/path.py index eea13fb1..8ef5e187 100644 --- a/mopidy/utils/path.py +++ b/mopidy/utils/path.py @@ -106,30 +106,32 @@ def expand_path(path): def find_files(path): + """ + Finds all files within a path. + + Directories and files with names starting with ``.`` is ignored. + + :returns: yields the full path to files as bytestrings + """ + if isinstance(path, unicode): + path = path.encode('utf-8') + if os.path.isfile(path): - if not isinstance(path, unicode): - path = path.decode('utf-8') - if not os.path.basename(path).startswith('.'): + if not os.path.basename(path).startswith(b'.'): yield path else: for dirpath, dirnames, filenames in os.walk(path): - # Filter out hidden folders by modifying dirnames in place. for dirname in dirnames: - if dirname.startswith('.'): + if dirname.startswith(b'.'): + # Skip hidden folders by modifying dirnames inplace dirnames.remove(dirname) for filename in filenames: - # Skip hidden files. - if filename.startswith('.'): + if filename.startswith(b'.'): + # Skip hidden files continue - filename = os.path.join(dirpath, filename) - if not isinstance(filename, unicode): - try: - filename = filename.decode('utf-8') - except UnicodeDecodeError: - filename = filename.decode('latin1') - yield filename + yield os.path.join(dirpath, filename) def check_file_path_is_inside_base_dir(file_path, base_path): diff --git a/tests/utils/path_test.py b/tests/utils/path_test.py index cfe58e0a..629819f8 100644 --- a/tests/utils/path_test.py +++ b/tests/utils/path_test.py @@ -201,11 +201,11 @@ class FindFilesTest(unittest.TestCase): self.assertEqual(len(files), 1) self.assert_(files[0], path_to_data_dir('blank.mp3')) - def test_names_are_unicode(self): - is_unicode = lambda f: isinstance(f, unicode) + def test_names_are_bytestrings(self): + is_bytes = lambda f: isinstance(f, bytes) for name in self.find(''): self.assert_( - is_unicode(name), '%s is not unicode object' % repr(name)) + is_bytes(name), '%s is not unicode object' % repr(name)) def test_ignores_hidden_folders(self): self.assertEqual(self.find('.hidden'), []) From a006918453b9b7e2cf89635967687ff8730a037b Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Fri, 7 Dec 2012 12:11:48 +0100 Subject: [PATCH 03/11] mpd: Use bytestrings in dir tree building --- mopidy/frontends/mpd/translator.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mopidy/frontends/mpd/translator.py b/mopidy/frontends/mpd/translator.py index 3b77f929..b16264a1 100644 --- a/mopidy/frontends/mpd/translator.py +++ b/mopidy/frontends/mpd/translator.py @@ -177,16 +177,17 @@ def _add_to_tag_cache(result, folders, files): def tracks_to_directory_tree(tracks): directories = ({}, []) + for track in tracks: - path = '' + path = b'' current = directories - local_folder = settings.LOCAL_MUSIC_PATH - track_path = uri_to_path(track.uri) - track_path = re.sub('^' + re.escape(local_folder), '', track_path) - track_dir = os.path.dirname(track_path) + absolute_track_dir_path = os.path.dirname(uri_to_path(track.uri)) + relative_track_dir_path = re.sub( + '^' + re.escape(settings.LOCAL_MUSIC_PATH), b'', + absolute_track_dir_path) - for part in split_path(track_dir): + for part in split_path(relative_track_dir_path): path = os.path.join(path, part) if path not in current[0]: current[0][path] = ({}, []) From 6311e13cecc1c1cd8868d0e53890b2b42f7149d8 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Tue, 11 Dec 2012 12:03:26 +0100 Subject: [PATCH 04/11] mpd: urlencode any non-UTF-8 path so it can be displayed as UTF-8 --- mopidy/frontends/mpd/translator.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/mopidy/frontends/mpd/translator.py b/mopidy/frontends/mpd/translator.py index b16264a1..1d7b52aa 100644 --- a/mopidy/frontends/mpd/translator.py +++ b/mopidy/frontends/mpd/translator.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import os import re +import urllib from mopidy import settings from mopidy.frontends.mpd import protocol @@ -153,13 +154,16 @@ def tracks_to_tag_cache_format(tracks): def _add_to_tag_cache(result, folders, files): - music_folder = settings.LOCAL_MUSIC_PATH + base_path = settings.LOCAL_MUSIC_PATH.encode('utf-8') for path, entry in folders.items(): - name = os.path.split(path)[1] - mtime = get_mtime(os.path.join(music_folder, path)) - result.append(('directory', path)) - result.append(('mtime', mtime)) + try: + text_path = path.decode('utf-8') + except UnicodeDecodeError: + text_path = urllib.pathname2url(path).decode('utf-8') + name = os.path.split(text_path)[1] + result.append(('directory', text_path)) + result.append(('mtime', get_mtime(os.path.join(base_path, path)))) result.append(('begin', name)) _add_to_tag_cache(result, *entry) result.append(('end', name)) @@ -167,9 +171,13 @@ def _add_to_tag_cache(result, folders, files): result.append(('songList begin',)) for track in files: track_result = dict(track_to_mpd_format(track)) - track_result['mtime'] = get_mtime(uri_to_path(track_result['file'])) - track_result['file'] = track_result['file'] - track_result['key'] = os.path.basename(track_result['file']) + path = uri_to_path(track_result['file']) + try: + text_path = path.decode('utf-8') + except UnicodeDecodeError: + text_path = urllib.pathname2url(path).decode('utf-8') + track_result['mtime'] = get_mtime(path) + track_result['key'] = os.path.basename(text_path) track_result = order_mpd_track_info(track_result.items()) result.extend(track_result) result.append(('songList end',)) From e9eac16284153b44eed556dba9753f5789ddf99a Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 15:07:00 +0100 Subject: [PATCH 05/11] mpd: Use relative urlencoded paths in tag cache This partly reverts "beac2e8 mpd: Use file:// URIs in tag_cache" by removing the "file://" URI scheme and the music dir base path from the "file:" fields in the tag cache. The advantage is that the tag cache becomes independent of the music dir location and the tag cache loader can be made compatible with both old and new tag caches. --- mopidy/frontends/mpd/translator.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mopidy/frontends/mpd/translator.py b/mopidy/frontends/mpd/translator.py index 1d7b52aa..b2113dda 100644 --- a/mopidy/frontends/mpd/translator.py +++ b/mopidy/frontends/mpd/translator.py @@ -169,17 +169,25 @@ def _add_to_tag_cache(result, folders, files): result.append(('end', name)) result.append(('songList begin',)) + for track in files: track_result = dict(track_to_mpd_format(track)) + path = uri_to_path(track_result['file']) try: text_path = path.decode('utf-8') except UnicodeDecodeError: text_path = urllib.pathname2url(path).decode('utf-8') + relative_path = os.path.relpath(path, base_path) + relative_uri = urllib.pathname2url(relative_path) + + track_result['file'] = relative_uri track_result['mtime'] = get_mtime(path) track_result['key'] = os.path.basename(text_path) track_result = order_mpd_track_info(track_result.items()) + result.extend(track_result) + result.append(('songList end',)) From c8a068b02ca6665e4bc335fb808071894e4c3ef4 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 15:08:51 +0100 Subject: [PATCH 06/11] local: Support tag caches with urlencoded paths This adds support for loading tag caches where the "file:" field has urlencoded paths. For old tag caches without the urlencoding, this is a noop. Thus, old tag caches continues to work. --- mopidy/backends/local/translator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mopidy/backends/local/translator.py b/mopidy/backends/local/translator.py index 59e2957a..5f2a9bc5 100644 --- a/mopidy/backends/local/translator.py +++ b/mopidy/backends/local/translator.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import logging +import urllib from mopidy.models import Track, Artist, Album from mopidy.utils.encoding import locale_decode @@ -139,6 +140,7 @@ def _convert_mpd_data(data, tracks, music_dir): path = data['file'][1:] else: path = data['file'] + path = urllib.uri2pathname(path) if artist_kwargs: artist = Artist(**artist_kwargs) From b397162bd0020332c45d9ab1fa0a3555066cdb20 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 14:45:36 +0100 Subject: [PATCH 07/11] docs: Update changelog --- docs/changes.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/changes.rst b/docs/changes.rst index aa69536c..acade010 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -29,6 +29,10 @@ v0.10.0 (in development) :option:`-v`/:option:`--verbose` options to control the amount of logging output when scanning. +- The scanner can now handle files with other encodings than UTF-8. Rebuild + your tag cache with ``mopidy-scan`` to include tracks that may have been + ignored previously. + **HTTP frontend** - Added new optional HTTP frontend which exposes Mopidy's core API through From 1707d6ae6e708b47586bfb5416f649e052c9c823 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 15:43:16 +0100 Subject: [PATCH 08/11] local: Fix typo --- mopidy/backends/local/translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mopidy/backends/local/translator.py b/mopidy/backends/local/translator.py index 5f2a9bc5..00500ef7 100644 --- a/mopidy/backends/local/translator.py +++ b/mopidy/backends/local/translator.py @@ -140,7 +140,7 @@ def _convert_mpd_data(data, tracks, music_dir): path = data['file'][1:] else: path = data['file'] - path = urllib.uri2pathname(path) + path = urllib.url2pathname(path) if artist_kwargs: artist = Artist(**artist_kwargs) From b76e27a62bda316c5a673845929efec04b26cfe8 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 15:54:04 +0100 Subject: [PATCH 09/11] mpd: Revert full URI in tag cache test as well --- tests/frontends/mpd/serializer_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/frontends/mpd/serializer_test.py b/tests/frontends/mpd/serializer_test.py index 211db600..aa3b77bb 100644 --- a/tests/frontends/mpd/serializer_test.py +++ b/tests/frontends/mpd/serializer_test.py @@ -4,7 +4,7 @@ import datetime import os from mopidy import settings -from mopidy.utils.path import mtime +from mopidy.utils.path import mtime, uri_to_path from mopidy.frontends.mpd import translator, protocol from mopidy.models import Album, Artist, TlTrack, Playlist, Track @@ -131,7 +131,9 @@ class TracksToTagCacheFormatTest(unittest.TestCase): mtime.undo_fake() def translate(self, track): + base_path = settings.LOCAL_MUSIC_PATH.encode('utf-8') result = dict(translator.track_to_mpd_format(track)) + result['file'] = uri_to_path(result['file'])[len(base_path) + 1:] result['key'] = os.path.basename(result['file']) result['mtime'] = mtime('') return translator.order_mpd_track_info(result.items()) From a221036e5a4f831690fe56fe71d9f506d3489c4e Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 23:05:12 +0100 Subject: [PATCH 10/11] tests: Fix error message --- tests/utils/path_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/path_test.py b/tests/utils/path_test.py index 629819f8..461f0809 100644 --- a/tests/utils/path_test.py +++ b/tests/utils/path_test.py @@ -205,7 +205,7 @@ class FindFilesTest(unittest.TestCase): is_bytes = lambda f: isinstance(f, bytes) for name in self.find(''): self.assert_( - is_bytes(name), '%s is not unicode object' % repr(name)) + is_bytes(name), '%s is not bytes object' % repr(name)) def test_ignores_hidden_folders(self): self.assertEqual(self.find('.hidden'), []) From 0f603c3eded58cef063f1f033f3c7a29a0c8e7b3 Mon Sep 17 00:00:00 2001 From: Stein Magnus Jodal Date: Wed, 12 Dec 2012 23:13:52 +0100 Subject: [PATCH 11/11] Use urllib.{quote,unquote} instead of {pathname2url,url2pathname} --- mopidy/backends/local/translator.py | 2 +- mopidy/frontends/mpd/translator.py | 6 +++--- mopidy/utils/path.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mopidy/backends/local/translator.py b/mopidy/backends/local/translator.py index 00500ef7..ff58a16e 100644 --- a/mopidy/backends/local/translator.py +++ b/mopidy/backends/local/translator.py @@ -140,7 +140,7 @@ def _convert_mpd_data(data, tracks, music_dir): path = data['file'][1:] else: path = data['file'] - path = urllib.url2pathname(path) + path = urllib.unquote(path) if artist_kwargs: artist = Artist(**artist_kwargs) diff --git a/mopidy/frontends/mpd/translator.py b/mopidy/frontends/mpd/translator.py index b2113dda..0c95f044 100644 --- a/mopidy/frontends/mpd/translator.py +++ b/mopidy/frontends/mpd/translator.py @@ -160,7 +160,7 @@ def _add_to_tag_cache(result, folders, files): try: text_path = path.decode('utf-8') except UnicodeDecodeError: - text_path = urllib.pathname2url(path).decode('utf-8') + text_path = urllib.quote(path).decode('utf-8') name = os.path.split(text_path)[1] result.append(('directory', text_path)) result.append(('mtime', get_mtime(os.path.join(base_path, path)))) @@ -177,9 +177,9 @@ def _add_to_tag_cache(result, folders, files): try: text_path = path.decode('utf-8') except UnicodeDecodeError: - text_path = urllib.pathname2url(path).decode('utf-8') + text_path = urllib.quote(path).decode('utf-8') relative_path = os.path.relpath(path, base_path) - relative_uri = urllib.pathname2url(relative_path) + relative_uri = urllib.quote(relative_path) track_result['file'] = relative_uri track_result['mtime'] = get_mtime(path) diff --git a/mopidy/utils/path.py b/mopidy/utils/path.py index 8ef5e187..c4fa0ce2 100644 --- a/mopidy/utils/path.py +++ b/mopidy/utils/path.py @@ -64,8 +64,8 @@ def path_to_uri(*paths): if isinstance(path, unicode): path = path.encode('utf-8') if sys.platform == 'win32': - return 'file:' + urllib.pathname2url(path) - return 'file://' + urllib.pathname2url(path) + return 'file:' + urllib.quote(path) + return 'file://' + urllib.quote(path) def uri_to_path(uri): @@ -82,9 +82,9 @@ def uri_to_path(uri): if isinstance(uri, unicode): uri = uri.encode('utf-8') if sys.platform == 'win32': - return urllib.url2pathname(re.sub(b'^file:', b'', uri)) + return urllib.unquote(re.sub(b'^file:', b'', uri)) else: - return urllib.url2pathname(re.sub(b'^file://', b'', uri)) + return urllib.unquote(re.sub(b'^file://', b'', uri)) def split_path(path):