From b34a8c1f738db684a75eb90a5ca0f5dd73a77e07 Mon Sep 17 00:00:00 2001 From: Thomas Adamcik Date: Sun, 19 Jan 2014 13:47:09 +0100 Subject: [PATCH] mpd: Add MPD tokenizer - Adds tests for correctness of tokenizer (which also would have shown shlex wouldn't have worked). - Should conform with the original's behavior, though we won't be able to match the error messages without a lot of extra work as a non-regexp version is likely a no go on python due to speed. --- mopidy/mpd/protocol/__init__.py | 40 +++++++++ tests/mpd/protocol/test_tokenizer.py | 129 +++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 tests/mpd/protocol/test_tokenizer.py diff --git a/mopidy/mpd/protocol/__init__.py b/mopidy/mpd/protocol/__init__.py index 8a0993d8..5504086a 100644 --- a/mopidy/mpd/protocol/__init__.py +++ b/mopidy/mpd/protocol/__init__.py @@ -93,3 +93,43 @@ def load_protocol_modules(): audio_output, channels, command_list, connection, current_playlist, empty, music_db, playback, reflection, status, stickers, stored_playlists) + + +WORD_RE = re.compile(r""" + ^ # Leading whitespace is not allowed + ([a-z][a-z0-9_]*) # A command name + (?:\s+|$) # trailing whitespace or EOS + (.*) # Possibly a remainder to be parsed + """, re.VERBOSE) + +# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*" +PARAM_RE = re.compile(r""" + ^ # Leading whitespace is not allowed + (?: + ([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash + | # or + "([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes + ) + (?:\s+|$) # trailing whitespace or EOS + (.*) # Possibly a remainder to be parsed + """ % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE) + +UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char. + + +# TODO: update exception usage and messages +def tokenize(line): + match = WORD_RE.match(line) + if not match: + raise Exception('Invalid command') + command, remainder = match.groups() + result = [command] + + while remainder: + match = PARAM_RE.match(remainder) + if not match: + raise Exception('Invalid parameter') + unquoted, quoted, remainder = match.groups() + result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted)) + + return result diff --git a/tests/mpd/protocol/test_tokenizer.py b/tests/mpd/protocol/test_tokenizer.py new file mode 100644 index 00000000..27c9ca2d --- /dev/null +++ b/tests/mpd/protocol/test_tokenizer.py @@ -0,0 +1,129 @@ +#encoding: utf-8 + +from __future__ import unicode_literals + +import unittest + +from mopidy.mpd import protocol + + +class TestTokenizer(unittest.TestCase): + def assertTokenizeEquals(self, expected, line): + self.assertEqual(expected, protocol.tokenize(line)) + + def assertTokenizeRaises(self, exception, line): + with self.assertRaises(exception): + protocol.tokenize(line) + + def test_empty_string(self): + self.assertTokenizeRaises(Exception, '') + + def test_whitespace(self): + self.assertTokenizeRaises(Exception, ' ') + self.assertTokenizeRaises(Exception, '\t\t\t') + + def test_command(self): + self.assertTokenizeEquals(['test'], 'test') + self.assertTokenizeEquals(['test123'], 'test123') + self.assertTokenizeEquals(['foo_bar'], 'foo_bar') + + def test_command_trailing_whitespace(self): + self.assertTokenizeEquals(['test'], 'test ') + self.assertTokenizeEquals(['test'], 'test\t\t\t') + + def test_command_leading_whitespace(self): + self.assertTokenizeRaises(Exception, ' test') + self.assertTokenizeRaises(Exception, '\ttest') + + def test_invalid_command(self): + self.assertTokenizeRaises(Exception, 'foo/bar') + self.assertTokenizeRaises(Exception, 'æøå') + self.assertTokenizeRaises(Exception, 'test?') + self.assertTokenizeRaises(Exception, 'te"st') + + def test_unquoted_param(self): + self.assertTokenizeEquals(['test', 'param'], 'test param') + self.assertTokenizeEquals(['test', 'param'], 'test\tparam') + + def test_unquoted_param_leading_whitespace(self): + self.assertTokenizeEquals(['test', 'param'], 'test param') + self.assertTokenizeEquals(['test', 'param'], 'test\t\tparam') + + def test_unquoted_param_trailing_whitespace(self): + self.assertTokenizeEquals(['test', 'param'], 'test param ') + self.assertTokenizeEquals(['test', 'param'], 'test param\t\t') + + def test_unquoted_param_invalid_chars(self): + self.assertTokenizeRaises(Exception, 'test par"m') + self.assertTokenizeRaises(Exception, 'test foo\\bar') + self.assertTokenizeRaises(Exception, 'test foo\bbar') + self.assertTokenizeRaises(Exception, 'test "foo"bar') + self.assertTokenizeRaises(Exception, 'test foo"bar"baz') + + def test_unquoted_param_numbers(self): + self.assertTokenizeEquals(['test', '123'], 'test 123') + self.assertTokenizeEquals(['test', '+123'], 'test +123') + self.assertTokenizeEquals(['test', '-123'], 'test -123') + self.assertTokenizeEquals(['test', '3.14'], 'test 3.14') + + def test_unquoted_param_extended_chars(self): + self.assertTokenizeEquals(['test', 'æøå'], 'test æøå') + self.assertTokenizeEquals(['test', '?#\'$'], 'test ?#\'$') + self.assertTokenizeEquals(['test', '/foo/bar/'], 'test /foo/bar/') + + def test_unquoted_params(self): + self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test foo bar') + + def test_quoted_param(self): + self.assertTokenizeEquals(['test', 'param'], 'test "param"') + self.assertTokenizeEquals(['test', 'param'], 'test\t"param"') + + def test_quoted_param_leading_whitespace(self): + self.assertTokenizeEquals(['test', 'param'], 'test "param"') + self.assertTokenizeEquals(['test', 'param'], 'test\t\t"param"') + + def test_quoted_param_trailing_whitespace(self): + self.assertTokenizeEquals(['test', 'param'], 'test "param" ') + self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t') + + def test_quoted_param_invalid_chars(self): + self.assertTokenizeRaises(Exception, 'test "par"m"') + + def test_quoted_param_numbers(self): + self.assertTokenizeEquals(['test', '123'], 'test "123"') + self.assertTokenizeEquals(['test', '+123'], 'test "+123"') + self.assertTokenizeEquals(['test', '-123'], 'test "-123"') + self.assertTokenizeEquals(['test', '3.14'], 'test "3.14"') + + def test_quoted_param_spaces(self): + self.assertTokenizeEquals(['test', 'foo bar'], 'test "foo bar"') + self.assertTokenizeEquals(['test', 'foo bar'], 'test "foo bar"') + self.assertTokenizeEquals(['test', ' param\t'], 'test " param\t"') + + def test_quoted_param_extended_chars(self): + self.assertTokenizeEquals(['test', 'æøå'], 'test "æøå"') + self.assertTokenizeEquals(['test', '?#$'], 'test "?#$"') + self.assertTokenizeEquals(['test', '/foo/bar/'], 'test "/foo/bar/"') + + def test_quoted_param_escaping(self): + self.assertTokenizeEquals(['test', '\\'], r'test "\\"') + self.assertTokenizeEquals(['test', '"'], r'test "\""') + self.assertTokenizeEquals(['test', ' '], r'test "\ "') + self.assertTokenizeEquals(['test', '\\n'], r'test "\\\n"') + + def test_quoted_params(self): + self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test "foo" "bar"') + + def test_mixed_params(self): + self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test foo "bar"') + self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test "foo" bar') + self.assertTokenizeEquals(['test', '1', '2'], 'test 1 "2"') + self.assertTokenizeEquals(['test', '1', '2'], 'test "1" 2') + + self.assertTokenizeEquals(['test', 'foo bar', 'baz', '123'], + 'test "foo bar" baz 123') + self.assertTokenizeEquals(['test', 'foo"bar', 'baz', '123'], + r'test "foo\"bar" baz 123') + + def test_unbalanced_quotes(self): + self.assertTokenizeRaises(Exception, 'test "foo bar" baz"')