From d4457403186ccc9dc2b0eb1a16532ce043f2c2a3 Mon Sep 17 00:00:00 2001 From: Thomas Adamcik Date: Tue, 21 Jan 2014 21:10:55 +0100 Subject: [PATCH] mpd: Split out tokenizer and add proper errors. --- mopidy/mpd/protocol/__init__.py | 40 ------------------- mopidy/mpd/tokenize.py | 46 ++++++++++++++++++++++ tests/mpd/{protocol => }/test_tokenizer.py | 39 +++++++++--------- 3 files changed, 66 insertions(+), 59 deletions(-) create mode 100644 mopidy/mpd/tokenize.py rename tests/mpd/{protocol => }/test_tokenizer.py (77%) diff --git a/mopidy/mpd/protocol/__init__.py b/mopidy/mpd/protocol/__init__.py index e7ffb32c..8aa5f3d1 100644 --- a/mopidy/mpd/protocol/__init__.py +++ b/mopidy/mpd/protocol/__init__.py @@ -96,46 +96,6 @@ def load_protocol_modules(): stored_playlists) -WORD_RE = re.compile(r""" - ^ # Leading whitespace is not allowed - ([a-z][a-z0-9_]*) # A command name - (?:\s+|$) # trailing whitespace or EOS - (.*) # Possibly a remainder to be parsed - """, re.VERBOSE) - -# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*" -PARAM_RE = re.compile(r""" - ^ # Leading whitespace is not allowed - (?: - ([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash - | # or - "([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes - ) - (?:\s+|$) # trailing whitespace or EOS - (.*) # Possibly a remainder to be parsed - """ % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE) - -UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char. - - -# TODO: update exception usage and messages -def tokenize(line): - match = WORD_RE.match(line) - if not match: - raise Exception('Invalid command') - command, remainder = match.groups() - result = [command] - - while remainder: - match = PARAM_RE.match(remainder) - if not match: - raise Exception('Invalid parameter') - unquoted, quoted, remainder = match.groups() - result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted)) - - return result - - def integer(value): if value is None: raise ValueError('None is not a valid integer') diff --git a/mopidy/mpd/tokenize.py b/mopidy/mpd/tokenize.py new file mode 100644 index 00000000..2b7ab237 --- /dev/null +++ b/mopidy/mpd/tokenize.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import re + + +class TokenizeError(Exception): + pass + + +WORD_RE = re.compile(r""" + ^ # Leading whitespace is not allowed + ([a-z][a-z0-9_]*) # A command name + (?:\s+|$) # trailing whitespace or EOS + (.*) # Possibly a remainder to be parsed + """, re.VERBOSE) + +# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*" +PARAM_RE = re.compile(r""" + ^ # Leading whitespace is not allowed + (?: + ([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash + | # or + "([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes + ) + (?:\s+|$) # trailing whitespace or EOS + (.*) # Possibly a remainder to be parsed + """ % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE) + +UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char. + + +def split(line): + match = WORD_RE.match(line) + if not match: + raise TokenizeError('Invalid word') + command, remainder = match.groups() + result = [command] + + while remainder: + match = PARAM_RE.match(remainder) + if not match: + raise TokenizeError('Invalid parameter') + unquoted, quoted, remainder = match.groups() + result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted)) + + return result diff --git a/tests/mpd/protocol/test_tokenizer.py b/tests/mpd/test_tokenizer.py similarity index 77% rename from tests/mpd/protocol/test_tokenizer.py rename to tests/mpd/test_tokenizer.py index 27c9ca2d..3ed3eb02 100644 --- a/tests/mpd/protocol/test_tokenizer.py +++ b/tests/mpd/test_tokenizer.py @@ -4,23 +4,23 @@ from __future__ import unicode_literals import unittest -from mopidy.mpd import protocol +from mopidy.mpd import tokenize class TestTokenizer(unittest.TestCase): def assertTokenizeEquals(self, expected, line): - self.assertEqual(expected, protocol.tokenize(line)) + self.assertEqual(expected, tokenize.split(line)) def assertTokenizeRaises(self, exception, line): with self.assertRaises(exception): - protocol.tokenize(line) + tokenize.split(line) def test_empty_string(self): - self.assertTokenizeRaises(Exception, '') + self.assertTokenizeRaises(tokenize.TokenizeError, '') def test_whitespace(self): - self.assertTokenizeRaises(Exception, ' ') - self.assertTokenizeRaises(Exception, '\t\t\t') + self.assertTokenizeRaises(tokenize.TokenizeError, ' ') + self.assertTokenizeRaises(tokenize.TokenizeError, '\t\t\t') def test_command(self): self.assertTokenizeEquals(['test'], 'test') @@ -32,14 +32,14 @@ class TestTokenizer(unittest.TestCase): self.assertTokenizeEquals(['test'], 'test\t\t\t') def test_command_leading_whitespace(self): - self.assertTokenizeRaises(Exception, ' test') - self.assertTokenizeRaises(Exception, '\ttest') + self.assertTokenizeRaises(tokenize.TokenizeError, ' test') + self.assertTokenizeRaises(tokenize.TokenizeError, '\ttest') def test_invalid_command(self): - self.assertTokenizeRaises(Exception, 'foo/bar') - self.assertTokenizeRaises(Exception, 'æøå') - self.assertTokenizeRaises(Exception, 'test?') - self.assertTokenizeRaises(Exception, 'te"st') + self.assertTokenizeRaises(tokenize.TokenizeError, 'foo/bar') + self.assertTokenizeRaises(tokenize.TokenizeError, 'æøå') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test?') + self.assertTokenizeRaises(tokenize.TokenizeError, 'te"st') def test_unquoted_param(self): self.assertTokenizeEquals(['test', 'param'], 'test param') @@ -54,11 +54,11 @@ class TestTokenizer(unittest.TestCase): self.assertTokenizeEquals(['test', 'param'], 'test param\t\t') def test_unquoted_param_invalid_chars(self): - self.assertTokenizeRaises(Exception, 'test par"m') - self.assertTokenizeRaises(Exception, 'test foo\\bar') - self.assertTokenizeRaises(Exception, 'test foo\bbar') - self.assertTokenizeRaises(Exception, 'test "foo"bar') - self.assertTokenizeRaises(Exception, 'test foo"bar"baz') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test par"m') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\\bar') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\bbar') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test "foo"bar') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test fo"b"ar') def test_unquoted_param_numbers(self): self.assertTokenizeEquals(['test', '123'], 'test 123') @@ -87,7 +87,7 @@ class TestTokenizer(unittest.TestCase): self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t') def test_quoted_param_invalid_chars(self): - self.assertTokenizeRaises(Exception, 'test "par"m"') + self.assertTokenizeRaises(tokenize.TokenizeError, 'test "par"m"') def test_quoted_param_numbers(self): self.assertTokenizeEquals(['test', '123'], 'test "123"') @@ -126,4 +126,5 @@ class TestTokenizer(unittest.TestCase): r'test "foo\"bar" baz 123') def test_unbalanced_quotes(self): - self.assertTokenizeRaises(Exception, 'test "foo bar" baz"') + self.assertTokenizeRaises(tokenize.TokenizeError, + 'test "foo bar" baz"')