mpd: Split out tokenizer and add proper errors.

This commit is contained in:
Thomas Adamcik 2014-01-21 21:10:55 +01:00
parent 335cf4e612
commit d445740318
3 changed files with 66 additions and 59 deletions

View File

@ -96,46 +96,6 @@ def load_protocol_modules():
stored_playlists)
WORD_RE = re.compile(r"""
^ # Leading whitespace is not allowed
([a-z][a-z0-9_]*) # A command name
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""", re.VERBOSE)
# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*"
PARAM_RE = re.compile(r"""
^ # Leading whitespace is not allowed
(?:
([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash
| # or
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
)
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""" % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE)
UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char.
# TODO: update exception usage and messages
def tokenize(line):
match = WORD_RE.match(line)
if not match:
raise Exception('Invalid command')
command, remainder = match.groups()
result = [command]
while remainder:
match = PARAM_RE.match(remainder)
if not match:
raise Exception('Invalid parameter')
unquoted, quoted, remainder = match.groups()
result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted))
return result
def integer(value):
if value is None:
raise ValueError('None is not a valid integer')

46
mopidy/mpd/tokenize.py Normal file
View File

@ -0,0 +1,46 @@
from __future__ import unicode_literals
import re
class TokenizeError(Exception):
pass
WORD_RE = re.compile(r"""
^ # Leading whitespace is not allowed
([a-z][a-z0-9_]*) # A command name
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""", re.VERBOSE)
# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*"
PARAM_RE = re.compile(r"""
^ # Leading whitespace is not allowed
(?:
([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash
| # or
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
)
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""" % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE)
UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char.
def split(line):
match = WORD_RE.match(line)
if not match:
raise TokenizeError('Invalid word')
command, remainder = match.groups()
result = [command]
while remainder:
match = PARAM_RE.match(remainder)
if not match:
raise TokenizeError('Invalid parameter')
unquoted, quoted, remainder = match.groups()
result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted))
return result

View File

@ -4,23 +4,23 @@ from __future__ import unicode_literals
import unittest
from mopidy.mpd import protocol
from mopidy.mpd import tokenize
class TestTokenizer(unittest.TestCase):
def assertTokenizeEquals(self, expected, line):
self.assertEqual(expected, protocol.tokenize(line))
self.assertEqual(expected, tokenize.split(line))
def assertTokenizeRaises(self, exception, line):
with self.assertRaises(exception):
protocol.tokenize(line)
tokenize.split(line)
def test_empty_string(self):
self.assertTokenizeRaises(Exception, '')
self.assertTokenizeRaises(tokenize.TokenizeError, '')
def test_whitespace(self):
self.assertTokenizeRaises(Exception, ' ')
self.assertTokenizeRaises(Exception, '\t\t\t')
self.assertTokenizeRaises(tokenize.TokenizeError, ' ')
self.assertTokenizeRaises(tokenize.TokenizeError, '\t\t\t')
def test_command(self):
self.assertTokenizeEquals(['test'], 'test')
@ -32,14 +32,14 @@ class TestTokenizer(unittest.TestCase):
self.assertTokenizeEquals(['test'], 'test\t\t\t')
def test_command_leading_whitespace(self):
self.assertTokenizeRaises(Exception, ' test')
self.assertTokenizeRaises(Exception, '\ttest')
self.assertTokenizeRaises(tokenize.TokenizeError, ' test')
self.assertTokenizeRaises(tokenize.TokenizeError, '\ttest')
def test_invalid_command(self):
self.assertTokenizeRaises(Exception, 'foo/bar')
self.assertTokenizeRaises(Exception, 'æøå')
self.assertTokenizeRaises(Exception, 'test?')
self.assertTokenizeRaises(Exception, 'te"st')
self.assertTokenizeRaises(tokenize.TokenizeError, 'foo/bar')
self.assertTokenizeRaises(tokenize.TokenizeError, 'æøå')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test?')
self.assertTokenizeRaises(tokenize.TokenizeError, 'te"st')
def test_unquoted_param(self):
self.assertTokenizeEquals(['test', 'param'], 'test param')
@ -54,11 +54,11 @@ class TestTokenizer(unittest.TestCase):
self.assertTokenizeEquals(['test', 'param'], 'test param\t\t')
def test_unquoted_param_invalid_chars(self):
self.assertTokenizeRaises(Exception, 'test par"m')
self.assertTokenizeRaises(Exception, 'test foo\\bar')
self.assertTokenizeRaises(Exception, 'test foo\bbar')
self.assertTokenizeRaises(Exception, 'test "foo"bar')
self.assertTokenizeRaises(Exception, 'test foo"bar"baz')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test par"m')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\\bar')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\bbar')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test "foo"bar')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test fo"b"ar')
def test_unquoted_param_numbers(self):
self.assertTokenizeEquals(['test', '123'], 'test 123')
@ -87,7 +87,7 @@ class TestTokenizer(unittest.TestCase):
self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t')
def test_quoted_param_invalid_chars(self):
self.assertTokenizeRaises(Exception, 'test "par"m"')
self.assertTokenizeRaises(tokenize.TokenizeError, 'test "par"m"')
def test_quoted_param_numbers(self):
self.assertTokenizeEquals(['test', '123'], 'test "123"')
@ -126,4 +126,5 @@ class TestTokenizer(unittest.TestCase):
r'test "foo\"bar" baz 123')
def test_unbalanced_quotes(self):
self.assertTokenizeRaises(Exception, 'test "foo bar" baz"')
self.assertTokenizeRaises(tokenize.TokenizeError,
'test "foo bar" baz"')