parse.py
unknown
python
a year ago
7.5 kB
18
Indexable
#!/usr/bin/python3
from collections import namedtuple
# Characters that should not be considered option chars
OPTION_BREAK_CHARS = [' ', '\t', ',', '|', '=', '[']
# Characters that delimit options
OPTION_DELIMITER_CHARS = [',', '|']
Unparsed = namedtuple('Unparsed', ['text'])
OptionWithMetavar = namedtuple('OptionWithMetavar', ['option', 'metavar', 'optional'])
OptionsWithDescription = namedtuple('OptionsWithDescription', ['options', 'description'])
class CharStream:
def __init__(self, s, pos = 0):
self.s = s
self.len = len(s)
self.pos = pos
def peek(self, relative_pos = 0):
try:
return self.s[self.pos + relative_pos]
except IndexError:
return None
def peek_str(self, length):
return self.s[self.pos:self.pos + length]
def get(self):
c = self.s[self.pos]
self.pos += 1
return c
def is_space(self):
return self.peek() in (' ', '\t')
def is_end(self):
return self.pos >= self.len
def copy(self):
return CharStream(self.s, self.pos)
def __repr__(self):
line = ''
i = self.pos
while i < self.len:
if self.s[i] == '\n':
break
else:
line += self.s[i]
i += 1
return "CharStream(%r)" % line
def eat_line(s):
'''
Read the remainling line and return it (including the newline character).
'''
content = ''
while not s.is_end():
char = s.get()
content += char
if char == '\n':
break
return content
def eat_space(s):
'''
Read spaces and tabs and return it.
'''
content = ''
while s.is_space():
content += s.get()
return content
def parse_option_string(s):
'''
Read an option string and return it.
All chars except OPTION_BREAK_CHARS are considered valid option chars.
Example option strings: --help, -h
If the resulting option string is '-' or '--', it is not considered an option.
'''
option = ''
p = s.copy()
eat_space(p)
if p.peek() != '-':
return None
while not p.is_end() and p.peek() not in OPTION_BREAK_CHARS:
option += p.get()
if option == '-' or option == '--':
return None
s.pos = p.pos
return option
def parse_bracket(s):
'''
Read and return a bracketed expression.
Bracketed expressions are:
<foo bar>
[foo bar]
(foo bar)
{foo bar}
'''
content = s.peek()
try:
closing = {'<':'>', '[':']', '(':')', '{':'}'}[content]
except KeyError:
return None
s.get()
while not s.is_end():
char = s.get()
content += char
if char == closing:
break
return content
def parse_string(s):
'''
Read and return a string.
Strings are:
'foo bar'
"foo bar"
Since it is unlikely that we encountej escape sequences in a description string
of an option, we don't process any escape sequences.
'''
quote = s.peek()
if quote not in ('"', "'"):
return None
s.get()
content = quote
while not s.is_end():
char = s.get()
content += char
if char == quote:
break
return content
def parse_metavar(s):
'''
Read and return a metavar.
Everything until a tab, space or newline is considered a metavar.
Special cases:
- Bracketed expressions (e.g., '<foo bar>') and quoted strings (e.g., '"foo bar"')
are handled, and the spaces within them are preserved.
- The function supports metavars enclosed by `<`, `[`, `(`, `{`, as well as
single (`'`) and double (`"`) quotes.
Metavars are:
foo_bar
'foo bar'
"foo bar"
<foo bar>
'''
metavar = ''
while not s.is_end() and not s.peek() in (' ', '\t', '\n'):
if s.peek() in ('<', '[', '(', '{'):
metavar += parse_bracket(s)
elif s.peek() in ('"', "'"):
metavar += parse_string(s)
else:
metavar += s.get()
return metavar
def parse_trailing_description_line(s):
'''
Reads and returns a trailing description line.
A line is considered a trailing description line if it meets the following criteria:
- It starts with whitespace (indicating continuation from a previous line).
- It does not begin with a hyphen ('-'), which would indicate the start of a new option.
'''
p = s.copy()
if not p.is_space():
return None
while p.is_space():
p.get()
if p.peek() == '-':
return None
content = eat_line(p)
s.pos = p.pos
return content
def parse_description(s):
'''
Reads and returns the description of an option.
'''
eat_space(s)
content = eat_line(s)
while True:
line = parse_trailing_description_line(s)
if line:
content += line
else:
break
return content
def parse_option_with_metavar(s):
'''
Read and return an option with its metavar (if any).
Valid inputs are:
--option=METAVAR
--option[=METAVAR] (in this case, 'optional' is set to True)
--option METAVAR
Invalid inputs areE:
--option METAVAR (notice two spaces)
'''
opt = parse_option_string(s)
metavar = None
optional = False
if opt:
if s.peek_str(2) == '[=':
optional = True
metavar = parse_metavar(s)
elif s.peek() == '=':
s.get()
metavar = parse_metavar(s)
# Two spaces after --option means the descriptoin follows
elif s.peek_str(2).isspace():
return OptionWithMetavar(opt, metavar, optional)
# An option delimiter cannot be a metavar
elif parse_option_delimiter(s.copy()):
return OptionWithMetavar(opt, metavar, optional)
elif not s.is_end() and s.is_space():
s.get()
return OptionWithMetavar(opt, parse_metavar(s), optional)
return OptionWithMetavar(opt, metavar, optional)
else:
return None
def parse_option_delimiter(s):
'''
Parse an option delimiter and return True if it was found, False otherwise.
'''
p = s.copy()
eat_space(p)
if p.get() in (',', '|'):
s.pos = p.pos
return True
return False
def parse_options_with_description(s):
options = []
description = None
while not s.is_end():
option = parse_option_with_metavar(s)
if option:
options.append(option)
else:
break
if not parse_option_delimiter(s):
break
if not options:
return None
#if s.peek() == '\n' or s.peek_str(2) in (' ', ' \n'):
if s.peek() in (' ', '\t', '\n'):
description = parse_description(s)
return OptionsWithDescription(options, description)
def parse(s):
r = []
while not s.is_end():
options = parse_options_with_description(s)
if options:
r.append(options)
else:
line = eat_line(s)
r.append(Unparsed(line))
return r
if __name__ == '__main__':
import sys
try:
file = sys.argv[1]
except IndexError:
file = '/dev/stdin'
with open(file, 'r') as fh:
content = fh.read()
s = CharStream(content)
r = parse(s)
for o in r:
print(o)
Editor is loading...
Leave a Comment