mpd: Add MPD tokenizer

- Adds tests for correctness of tokenizer (which also would have shown shlex
  wouldn't have worked).
- Should conform with the original's behavior, though we won't be able to match
  the error messages without a lot of extra work as a non-regexp version is
  likely a no go on python due to speed.
This commit is contained in:
Thomas Adamcik 2014-01-19 13:47:09 +01:00
parent 4026e16996
commit b34a8c1f73
2 changed files with 169 additions and 0 deletions

View File

@ -93,3 +93,43 @@ def load_protocol_modules():
audio_output, channels, command_list, connection, current_playlist,
empty, music_db, playback, reflection, status, stickers,
stored_playlists)
WORD_RE = re.compile(r"""
^ # Leading whitespace is not allowed
([a-z][a-z0-9_]*) # A command name
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""", re.VERBOSE)
# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*"
PARAM_RE = re.compile(r"""
^ # Leading whitespace is not allowed
(?:
([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash
| # or
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
)
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""" % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE)
UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char.
# TODO: update exception usage and messages
def tokenize(line):
match = WORD_RE.match(line)
if not match:
raise Exception('Invalid command')
command, remainder = match.groups()
result = [command]
while remainder:
match = PARAM_RE.match(remainder)
if not match:
raise Exception('Invalid parameter')
unquoted, quoted, remainder = match.groups()
result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted))
return result

View File

@ -0,0 +1,129 @@
#encoding: utf-8
from __future__ import unicode_literals
import unittest
from mopidy.mpd import protocol
class TestTokenizer(unittest.TestCase):
def assertTokenizeEquals(self, expected, line):
self.assertEqual(expected, protocol.tokenize(line))
def assertTokenizeRaises(self, exception, line):
with self.assertRaises(exception):
protocol.tokenize(line)
def test_empty_string(self):
self.assertTokenizeRaises(Exception, '')
def test_whitespace(self):
self.assertTokenizeRaises(Exception, ' ')
self.assertTokenizeRaises(Exception, '\t\t\t')
def test_command(self):
self.assertTokenizeEquals(['test'], 'test')
self.assertTokenizeEquals(['test123'], 'test123')
self.assertTokenizeEquals(['foo_bar'], 'foo_bar')
def test_command_trailing_whitespace(self):
self.assertTokenizeEquals(['test'], 'test ')
self.assertTokenizeEquals(['test'], 'test\t\t\t')
def test_command_leading_whitespace(self):
self.assertTokenizeRaises(Exception, ' test')
self.assertTokenizeRaises(Exception, '\ttest')
def test_invalid_command(self):
self.assertTokenizeRaises(Exception, 'foo/bar')
self.assertTokenizeRaises(Exception, 'æøå')
self.assertTokenizeRaises(Exception, 'test?')
self.assertTokenizeRaises(Exception, 'te"st')
def test_unquoted_param(self):
self.assertTokenizeEquals(['test', 'param'], 'test param')
self.assertTokenizeEquals(['test', 'param'], 'test\tparam')
def test_unquoted_param_leading_whitespace(self):
self.assertTokenizeEquals(['test', 'param'], 'test param')
self.assertTokenizeEquals(['test', 'param'], 'test\t\tparam')
def test_unquoted_param_trailing_whitespace(self):
self.assertTokenizeEquals(['test', 'param'], 'test param ')
self.assertTokenizeEquals(['test', 'param'], 'test param\t\t')
def test_unquoted_param_invalid_chars(self):
self.assertTokenizeRaises(Exception, 'test par"m')
self.assertTokenizeRaises(Exception, 'test foo\\bar')
self.assertTokenizeRaises(Exception, 'test foo\bbar')
self.assertTokenizeRaises(Exception, 'test "foo"bar')
self.assertTokenizeRaises(Exception, 'test foo"bar"baz')
def test_unquoted_param_numbers(self):
self.assertTokenizeEquals(['test', '123'], 'test 123')
self.assertTokenizeEquals(['test', '+123'], 'test +123')
self.assertTokenizeEquals(['test', '-123'], 'test -123')
self.assertTokenizeEquals(['test', '3.14'], 'test 3.14')
def test_unquoted_param_extended_chars(self):
self.assertTokenizeEquals(['test', 'æøå'], 'test æøå')
self.assertTokenizeEquals(['test', '?#\'$'], 'test ?#\'$')
self.assertTokenizeEquals(['test', '/foo/bar/'], 'test /foo/bar/')
def test_unquoted_params(self):
self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test foo bar')
def test_quoted_param(self):
self.assertTokenizeEquals(['test', 'param'], 'test "param"')
self.assertTokenizeEquals(['test', 'param'], 'test\t"param"')
def test_quoted_param_leading_whitespace(self):
self.assertTokenizeEquals(['test', 'param'], 'test "param"')
self.assertTokenizeEquals(['test', 'param'], 'test\t\t"param"')
def test_quoted_param_trailing_whitespace(self):
self.assertTokenizeEquals(['test', 'param'], 'test "param" ')
self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t')
def test_quoted_param_invalid_chars(self):
self.assertTokenizeRaises(Exception, 'test "par"m"')
def test_quoted_param_numbers(self):
self.assertTokenizeEquals(['test', '123'], 'test "123"')
self.assertTokenizeEquals(['test', '+123'], 'test "+123"')
self.assertTokenizeEquals(['test', '-123'], 'test "-123"')
self.assertTokenizeEquals(['test', '3.14'], 'test "3.14"')
def test_quoted_param_spaces(self):
self.assertTokenizeEquals(['test', 'foo bar'], 'test "foo bar"')
self.assertTokenizeEquals(['test', 'foo bar'], 'test "foo bar"')
self.assertTokenizeEquals(['test', ' param\t'], 'test " param\t"')
def test_quoted_param_extended_chars(self):
self.assertTokenizeEquals(['test', 'æøå'], 'test "æøå"')
self.assertTokenizeEquals(['test', '?#$'], 'test "?#$"')
self.assertTokenizeEquals(['test', '/foo/bar/'], 'test "/foo/bar/"')
def test_quoted_param_escaping(self):
self.assertTokenizeEquals(['test', '\\'], r'test "\\"')
self.assertTokenizeEquals(['test', '"'], r'test "\""')
self.assertTokenizeEquals(['test', ' '], r'test "\ "')
self.assertTokenizeEquals(['test', '\\n'], r'test "\\\n"')
def test_quoted_params(self):
self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test "foo" "bar"')
def test_mixed_params(self):
self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test foo "bar"')
self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test "foo" bar')
self.assertTokenizeEquals(['test', '1', '2'], 'test 1 "2"')
self.assertTokenizeEquals(['test', '1', '2'], 'test "1" 2')
self.assertTokenizeEquals(['test', 'foo bar', 'baz', '123'],
'test "foo bar" baz 123')
self.assertTokenizeEquals(['test', 'foo"bar', 'baz', '123'],
r'test "foo\"bar" baz 123')
def test_unbalanced_quotes(self):
self.assertTokenizeRaises(Exception, 'test "foo bar" baz"')