mpd: Fix tokenizer error messages to match original protocol

This commit is contained in:
Thomas Adamcik 2014-01-21 22:10:00 +01:00
parent f7aff706a8
commit f7ec1fba01
2 changed files with 38 additions and 26 deletions

View File

@ -3,12 +3,13 @@ from __future__ import unicode_literals
import re import re
class TokenizeError(Exception): class Error(Exception):
pass pass
WORD_RE = re.compile(r""" WORD_RE = re.compile(r"""
^ # Leading whitespace is not allowed ^
(\s*) # Leading whitespace not allowed, capture it to report.
([a-z][a-z0-9_]*) # A command name ([a-z][a-z0-9_]*) # A command name
(?:\s+|$) # trailing whitespace or EOS (?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed (.*) # Possibly a remainder to be parsed
@ -18,7 +19,7 @@ WORD_RE = re.compile(r"""
PARAM_RE = re.compile(r""" PARAM_RE = re.compile(r"""
^ # Leading whitespace is not allowed ^ # Leading whitespace is not allowed
(?: (?:
([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash ([^%(unprintable)s"']+) # ord(char) < 0x20, not ", not '
| # or | # or
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes "([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
) )
@ -30,16 +31,20 @@ UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char.
def split(line): def split(line):
if not line.strip():
raise Error('No command given')
match = WORD_RE.match(line) match = WORD_RE.match(line)
if not match: if not match:
raise TokenizeError('Invalid word') raise Error('Invalid word character')
command, remainder = match.groups() whitespace, command, remainder = match.groups()
if whitespace:
raise Error('Letter expected')
result = [command] result = [command]
while remainder: while remainder:
match = PARAM_RE.match(remainder) match = PARAM_RE.match(remainder)
if not match: if not match:
raise TokenizeError('Invalid parameter') raise Error('Invalid unquoted character')
unquoted, quoted, remainder = match.groups() unquoted, quoted, remainder = match.groups()
result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted)) result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted))

View File

@ -11,16 +11,18 @@ class TestTokenizer(unittest.TestCase):
def assertTokenizeEquals(self, expected, line): def assertTokenizeEquals(self, expected, line):
self.assertEqual(expected, tokenize.split(line)) self.assertEqual(expected, tokenize.split(line))
def assertTokenizeRaises(self, exception, line): def assertTokenizeRaisesError(self, line, message=None):
with self.assertRaises(exception): with self.assertRaises(tokenize.Error) as cm:
tokenize.split(line) tokenize.split(line)
if message:
self.assertEqual(cm.exception.message, message)
def test_empty_string(self): def test_empty_string(self):
self.assertTokenizeRaises(tokenize.TokenizeError, '') self.assertTokenizeRaisesError('', 'No command given')
def test_whitespace(self): def test_whitespace(self):
self.assertTokenizeRaises(tokenize.TokenizeError, ' ') self.assertTokenizeRaisesError(' ', 'No command given')
self.assertTokenizeRaises(tokenize.TokenizeError, '\t\t\t') self.assertTokenizeRaisesError('\t\t\t', 'No command given')
def test_command(self): def test_command(self):
self.assertTokenizeEquals(['test'], 'test') self.assertTokenizeEquals(['test'], 'test')
@ -32,14 +34,14 @@ class TestTokenizer(unittest.TestCase):
self.assertTokenizeEquals(['test'], 'test\t\t\t') self.assertTokenizeEquals(['test'], 'test\t\t\t')
def test_command_leading_whitespace(self): def test_command_leading_whitespace(self):
self.assertTokenizeRaises(tokenize.TokenizeError, ' test') self.assertTokenizeRaisesError(' test', 'Letter expected')
self.assertTokenizeRaises(tokenize.TokenizeError, '\ttest') self.assertTokenizeRaisesError('\ttest', 'Letter expected')
def test_invalid_command(self): def test_invalid_command(self):
self.assertTokenizeRaises(tokenize.TokenizeError, 'foo/bar') self.assertTokenizeRaisesError('foo/bar', 'Invalid word character')
self.assertTokenizeRaises(tokenize.TokenizeError, 'æøå') self.assertTokenizeRaisesError('æøå', 'Invalid word character')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test?') self.assertTokenizeRaisesError('test?', 'Invalid word character')
self.assertTokenizeRaises(tokenize.TokenizeError, 'te"st') self.assertTokenizeRaisesError('te"st', 'Invalid word character')
def test_unquoted_param(self): def test_unquoted_param(self):
self.assertTokenizeEquals(['test', 'param'], 'test param') self.assertTokenizeEquals(['test', 'param'], 'test param')
@ -54,11 +56,12 @@ class TestTokenizer(unittest.TestCase):
self.assertTokenizeEquals(['test', 'param'], 'test param\t\t') self.assertTokenizeEquals(['test', 'param'], 'test param\t\t')
def test_unquoted_param_invalid_chars(self): def test_unquoted_param_invalid_chars(self):
self.assertTokenizeRaises(tokenize.TokenizeError, 'test par"m') msg = 'Invalid unquoted character'
self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\\bar') self.assertTokenizeRaisesError('test par"m', msg)
self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\bbar') self.assertTokenizeRaisesError('test foo\bbar', msg)
self.assertTokenizeRaises(tokenize.TokenizeError, 'test "foo"bar') self.assertTokenizeRaisesError('test "foo"bar', msg)
self.assertTokenizeRaises(tokenize.TokenizeError, 'test fo"b"ar') self.assertTokenizeRaisesError('test foo"bar"baz', msg)
self.assertTokenizeRaisesError('test foo\'bar', msg)
def test_unquoted_param_numbers(self): def test_unquoted_param_numbers(self):
self.assertTokenizeEquals(['test', '123'], 'test 123') self.assertTokenizeEquals(['test', '123'], 'test 123')
@ -68,8 +71,9 @@ class TestTokenizer(unittest.TestCase):
def test_unquoted_param_extended_chars(self): def test_unquoted_param_extended_chars(self):
self.assertTokenizeEquals(['test', 'æøå'], 'test æøå') self.assertTokenizeEquals(['test', 'æøå'], 'test æøå')
self.assertTokenizeEquals(['test', '?#\'$'], 'test ?#\'$') self.assertTokenizeEquals(['test', '?#$'], 'test ?#$')
self.assertTokenizeEquals(['test', '/foo/bar/'], 'test /foo/bar/') self.assertTokenizeEquals(['test', '/foo/bar/'], 'test /foo/bar/')
self.assertTokenizeEquals(['test', 'foo\\bar'], 'test foo\\bar')
def test_unquoted_params(self): def test_unquoted_params(self):
self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test foo bar') self.assertTokenizeEquals(['test', 'foo', 'bar'], 'test foo bar')
@ -87,7 +91,10 @@ class TestTokenizer(unittest.TestCase):
self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t') self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t')
def test_quoted_param_invalid_chars(self): def test_quoted_param_invalid_chars(self):
self.assertTokenizeRaises(tokenize.TokenizeError, 'test "par"m"') # TODO: Figure out how to check for " without space behind it.
#msg = """Space expected after closing '"'"""
msg = 'Invalid unquoted character'
self.assertTokenizeRaisesError('test "par"m"', msg)
def test_quoted_param_numbers(self): def test_quoted_param_numbers(self):
self.assertTokenizeEquals(['test', '123'], 'test "123"') self.assertTokenizeEquals(['test', '123'], 'test "123"')
@ -126,5 +133,5 @@ class TestTokenizer(unittest.TestCase):
r'test "foo\"bar" baz 123') r'test "foo\"bar" baz 123')
def test_unbalanced_quotes(self): def test_unbalanced_quotes(self):
self.assertTokenizeRaises(tokenize.TokenizeError, msg = 'Invalid unquoted character'
'test "foo bar" baz"') self.assertTokenizeRaisesError('test "foo bar" baz"', msg)