mpd: Split out tokenizer and add proper errors.
This commit is contained in:
parent
335cf4e612
commit
d445740318
@ -96,46 +96,6 @@ def load_protocol_modules():
|
||||
stored_playlists)
|
||||
|
||||
|
||||
WORD_RE = re.compile(r"""
|
||||
^ # Leading whitespace is not allowed
|
||||
([a-z][a-z0-9_]*) # A command name
|
||||
(?:\s+|$) # trailing whitespace or EOS
|
||||
(.*) # Possibly a remainder to be parsed
|
||||
""", re.VERBOSE)
|
||||
|
||||
# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*"
|
||||
PARAM_RE = re.compile(r"""
|
||||
^ # Leading whitespace is not allowed
|
||||
(?:
|
||||
([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash
|
||||
| # or
|
||||
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
|
||||
)
|
||||
(?:\s+|$) # trailing whitespace or EOS
|
||||
(.*) # Possibly a remainder to be parsed
|
||||
""" % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE)
|
||||
|
||||
UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char.
|
||||
|
||||
|
||||
# TODO: update exception usage and messages
|
||||
def tokenize(line):
|
||||
match = WORD_RE.match(line)
|
||||
if not match:
|
||||
raise Exception('Invalid command')
|
||||
command, remainder = match.groups()
|
||||
result = [command]
|
||||
|
||||
while remainder:
|
||||
match = PARAM_RE.match(remainder)
|
||||
if not match:
|
||||
raise Exception('Invalid parameter')
|
||||
unquoted, quoted, remainder = match.groups()
|
||||
result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def integer(value):
|
||||
if value is None:
|
||||
raise ValueError('None is not a valid integer')
|
||||
|
||||
46
mopidy/mpd/tokenize.py
Normal file
46
mopidy/mpd/tokenize.py
Normal file
@ -0,0 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class TokenizeError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
WORD_RE = re.compile(r"""
|
||||
^ # Leading whitespace is not allowed
|
||||
([a-z][a-z0-9_]*) # A command name
|
||||
(?:\s+|$) # trailing whitespace or EOS
|
||||
(.*) # Possibly a remainder to be parsed
|
||||
""", re.VERBOSE)
|
||||
|
||||
# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*"
|
||||
PARAM_RE = re.compile(r"""
|
||||
^ # Leading whitespace is not allowed
|
||||
(?:
|
||||
([^%(unprintable)s"\\]+) # ord(char) < 0x20, not ", not backslash
|
||||
| # or
|
||||
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
|
||||
)
|
||||
(?:\s+|$) # trailing whitespace or EOS
|
||||
(.*) # Possibly a remainder to be parsed
|
||||
""" % {'unprintable': ''.join(map(chr, range(0x21)))}, re.VERBOSE)
|
||||
|
||||
UNESCAPE_RE = re.compile(r'\\(.)') # Backslash escapes any following char.
|
||||
|
||||
|
||||
def split(line):
|
||||
match = WORD_RE.match(line)
|
||||
if not match:
|
||||
raise TokenizeError('Invalid word')
|
||||
command, remainder = match.groups()
|
||||
result = [command]
|
||||
|
||||
while remainder:
|
||||
match = PARAM_RE.match(remainder)
|
||||
if not match:
|
||||
raise TokenizeError('Invalid parameter')
|
||||
unquoted, quoted, remainder = match.groups()
|
||||
result.append(unquoted or UNESCAPE_RE.sub(r'\g<1>', quoted))
|
||||
|
||||
return result
|
||||
@ -4,23 +4,23 @@ from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
|
||||
from mopidy.mpd import protocol
|
||||
from mopidy.mpd import tokenize
|
||||
|
||||
|
||||
class TestTokenizer(unittest.TestCase):
|
||||
def assertTokenizeEquals(self, expected, line):
|
||||
self.assertEqual(expected, protocol.tokenize(line))
|
||||
self.assertEqual(expected, tokenize.split(line))
|
||||
|
||||
def assertTokenizeRaises(self, exception, line):
|
||||
with self.assertRaises(exception):
|
||||
protocol.tokenize(line)
|
||||
tokenize.split(line)
|
||||
|
||||
def test_empty_string(self):
|
||||
self.assertTokenizeRaises(Exception, '')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, '')
|
||||
|
||||
def test_whitespace(self):
|
||||
self.assertTokenizeRaises(Exception, ' ')
|
||||
self.assertTokenizeRaises(Exception, '\t\t\t')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, ' ')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, '\t\t\t')
|
||||
|
||||
def test_command(self):
|
||||
self.assertTokenizeEquals(['test'], 'test')
|
||||
@ -32,14 +32,14 @@ class TestTokenizer(unittest.TestCase):
|
||||
self.assertTokenizeEquals(['test'], 'test\t\t\t')
|
||||
|
||||
def test_command_leading_whitespace(self):
|
||||
self.assertTokenizeRaises(Exception, ' test')
|
||||
self.assertTokenizeRaises(Exception, '\ttest')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, ' test')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, '\ttest')
|
||||
|
||||
def test_invalid_command(self):
|
||||
self.assertTokenizeRaises(Exception, 'foo/bar')
|
||||
self.assertTokenizeRaises(Exception, 'æøå')
|
||||
self.assertTokenizeRaises(Exception, 'test?')
|
||||
self.assertTokenizeRaises(Exception, 'te"st')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'foo/bar')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'æøå')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test?')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'te"st')
|
||||
|
||||
def test_unquoted_param(self):
|
||||
self.assertTokenizeEquals(['test', 'param'], 'test param')
|
||||
@ -54,11 +54,11 @@ class TestTokenizer(unittest.TestCase):
|
||||
self.assertTokenizeEquals(['test', 'param'], 'test param\t\t')
|
||||
|
||||
def test_unquoted_param_invalid_chars(self):
|
||||
self.assertTokenizeRaises(Exception, 'test par"m')
|
||||
self.assertTokenizeRaises(Exception, 'test foo\\bar')
|
||||
self.assertTokenizeRaises(Exception, 'test foo\bbar')
|
||||
self.assertTokenizeRaises(Exception, 'test "foo"bar')
|
||||
self.assertTokenizeRaises(Exception, 'test foo"bar"baz')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test par"m')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\\bar')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test foo\bbar')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test "foo"bar')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test fo"b"ar')
|
||||
|
||||
def test_unquoted_param_numbers(self):
|
||||
self.assertTokenizeEquals(['test', '123'], 'test 123')
|
||||
@ -87,7 +87,7 @@ class TestTokenizer(unittest.TestCase):
|
||||
self.assertTokenizeEquals(['test', 'param'], 'test "param"\t\t')
|
||||
|
||||
def test_quoted_param_invalid_chars(self):
|
||||
self.assertTokenizeRaises(Exception, 'test "par"m"')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError, 'test "par"m"')
|
||||
|
||||
def test_quoted_param_numbers(self):
|
||||
self.assertTokenizeEquals(['test', '123'], 'test "123"')
|
||||
@ -126,4 +126,5 @@ class TestTokenizer(unittest.TestCase):
|
||||
r'test "foo\"bar" baz 123')
|
||||
|
||||
def test_unbalanced_quotes(self):
|
||||
self.assertTokenizeRaises(Exception, 'test "foo bar" baz"')
|
||||
self.assertTokenizeRaises(tokenize.TokenizeError,
|
||||
'test "foo bar" baz"')
|
||||
Loading…
Reference in New Issue
Block a user