-
Notifications
You must be signed in to change notification settings - Fork 20
/
tokenize.py
93 lines (81 loc) · 3.14 KB
/
tokenize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import re
from mopidy_mpd import exceptions
WORD_RE = re.compile(
r"""
^
(\s*) # Leading whitespace not allowed, capture it to report.
([a-z][a-z0-9_]*) # A command name
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""",
re.VERBOSE,
)
# Quotes matching is an unrolled version of "(?:[^"\\]|\\.)*"
PARAM_RE = re.compile(
r"""
^ # Leading whitespace is not allowed
(?:
([^{unprintable}"']+) # ord(char) < 0x20, not ", not '
| # or
"([^"\\]*(?:\\.[^"\\]*)*)" # anything surrounded by quotes
)
(?:\s+|$) # trailing whitespace or EOS
(.*) # Possibly a remainder to be parsed
""".format(unprintable="".join(map(chr, range(0x21)))),
re.VERBOSE,
)
BAD_QUOTED_PARAM_RE = re.compile(
r"""
^
"[^"\\]*(?:\\.[^"\\]*)* # start of a quoted value
(?: # followed by:
("[^\s]) # non-escaped quote, followed by non-whitespace
| # or
([^"]) # anything that is not a quote
)
""",
re.VERBOSE,
)
UNESCAPE_RE = re.compile(r"\\(.)") # Backslash escapes any following char.
def split(line):
"""Splits a line into tokens using same rules as MPD.
- Lines may not start with whitespace
- Tokens are split by arbitrary amount of spaces or tabs
- First token must match `[a-z][a-z0-9_]*`
- Remaining tokens can be unquoted or quoted tokens.
- Unquoted tokens consist of all printable characters except double quotes,
single quotes, spaces and tabs.
- Quoted tokens are surrounded by a matching pair of double quotes.
- The closing quote must be followed by space, tab or end of line.
- Any value is allowed inside a quoted token. Including double quotes,
assuming it is correctly escaped.
- Backslash inside a quoted token is used to escape the following
character.
For examples see the tests for this function.
"""
if not line.strip():
raise exceptions.MpdNoCommandError("No command given")
match = WORD_RE.match(line)
if not match:
raise exceptions.MpdUnknownError("Invalid word character")
whitespace, command, remainder = match.groups()
if whitespace:
raise exceptions.MpdUnknownError("Letter expected")
result = [command]
while remainder:
match = PARAM_RE.match(remainder)
if not match:
msg = _determine_error_message(remainder)
raise exceptions.MpdArgError(msg, command=command)
unquoted, quoted, remainder = match.groups()
result.append(unquoted or UNESCAPE_RE.sub(r"\g<1>", quoted))
return result
def _determine_error_message(remainder):
"""Helper to emulate MPD errors."""
# Following checks are simply to match MPD error messages:
match = BAD_QUOTED_PARAM_RE.match(remainder)
if not match:
return "Invalid unquoted character"
if match.group(1):
return "Space expected after closing '\"'"
return "Missing closing '\"'"