parse.py

mail@pastecode.io avatar
unknown
python
18 days ago
7.5 kB
9
Indexable
Never
#!/usr/bin/python3

from collections import namedtuple

# Characters that should not be considered option chars
OPTION_BREAK_CHARS = [' ', '\t', ',', '|', '=', '[']

# Characters that delimit options
OPTION_DELIMITER_CHARS = [',', '|']

Unparsed = namedtuple('Unparsed', ['text'])
OptionWithMetavar = namedtuple('OptionWithMetavar', ['option', 'metavar', 'optional'])
OptionsWithDescription = namedtuple('OptionsWithDescription', ['options', 'description'])

class CharStream:
    def __init__(self, s, pos = 0):
        self.s = s
        self.len = len(s)
        self.pos = pos

    def peek(self, relative_pos = 0):
        try:
            return self.s[self.pos + relative_pos]
        except IndexError:
            return None

    def peek_str(self, length):
        return self.s[self.pos:self.pos + length]

    def get(self):
        c = self.s[self.pos]
        self.pos += 1
        return c

    def is_space(self):
        return self.peek() in (' ', '\t')

    def is_end(self):
        return self.pos >= self.len

    def copy(self):
        return CharStream(self.s, self.pos)

    def __repr__(self):
        line = ''
        i = self.pos
        while i < self.len:
            if self.s[i] == '\n':
                break
            else:
                line += self.s[i]
            i += 1
        return "CharStream(%r)" % line

def eat_line(s):
    '''
    Read the remainling line and return it (including the newline character).
    '''
    content = ''
    while not s.is_end():
        char = s.get()
        content += char
        if char == '\n':
            break
    return content

def eat_space(s):
    '''
    Read spaces and tabs and return it.
    '''
    content = ''
    while s.is_space():
        content += s.get()
    return content

def parse_option_string(s):
    '''
    Read an option string and return it.

    All chars except OPTION_BREAK_CHARS are considered valid option chars.

    Example option strings: --help, -h

    If the resulting option string is '-' or '--', it is not considered an option.
    '''
    option = ''
    p = s.copy()

    eat_space(p)

    if p.peek() != '-':
        return None

    while not p.is_end() and p.peek() not in OPTION_BREAK_CHARS:
        option += p.get()

    if option == '-' or option == '--':
        return None

    s.pos = p.pos
    return option

def parse_bracket(s):
    '''
    Read and return a bracketed expression.

    Bracketed expressions are:
        <foo bar>
        [foo bar]
        (foo bar)
        {foo bar}
    '''
    content = s.peek()
    try:
        closing = {'<':'>', '[':']', '(':')', '{':'}'}[content]
    except KeyError:
        return None

    s.get()
    while not s.is_end():
        char = s.get()
        content += char
        if char == closing:
            break

    return content

def parse_string(s):
    '''
    Read and return a string.

    Strings are:
        'foo bar'
        "foo bar"

    Since it is unlikely that we encountej escape sequences in a description string
    of an option, we don't process any escape sequences.
    '''
    quote = s.peek()
    if quote not in ('"', "'"):
        return None

    s.get()
    content = quote
    while not s.is_end():
        char = s.get()
        content += char
        if char == quote:
            break

    return content

def parse_metavar(s):
    '''
    Read and return a metavar.

    Everything until a tab, space or newline is considered a metavar.

    Special cases:
      - Bracketed expressions (e.g., '<foo bar>') and quoted strings (e.g., '"foo bar"') 
        are handled, and the spaces within them are preserved.
      - The function supports metavars enclosed by `<`, `[`, `(`, `{`, as well as 
        single (`'`) and double (`"`) quotes.

    Metavars are:
        foo_bar
        'foo bar'
        "foo bar"
        <foo bar>
    '''
    metavar = ''

    while not s.is_end() and not s.peek() in (' ', '\t', '\n'):
        if s.peek() in ('<', '[', '(', '{'):
            metavar += parse_bracket(s)
        elif s.peek() in ('"', "'"):
            metavar += parse_string(s)
        else:
            metavar += s.get()

    return metavar

def parse_trailing_description_line(s):
    '''
    Reads and returns a trailing description line.

    A line is considered a trailing description line if it meets the following criteria:
      - It starts with whitespace (indicating continuation from a previous line).
      - It does not begin with a hyphen ('-'), which would indicate the start of a new option.
    '''
    p = s.copy()

    if not p.is_space():
        return None

    while p.is_space():
        p.get()

    if p.peek() == '-':
        return None

    content = eat_line(p)
    s.pos = p.pos
    return content

def parse_description(s):
    '''
    Reads and returns the description of an option.
    '''
    eat_space(s)
    content = eat_line(s)
    while True:
        line = parse_trailing_description_line(s)
        if line:
            content += line
        else:
            break

    return content

def parse_option_with_metavar(s):
    '''
    Read and return an option with its metavar (if any).

    Valid inputs are:
      --option=METAVAR
      --option[=METAVAR] (in this case, 'optional' is set to True)
      --option METAVAR

    Invalid inputs areE:
      --option  METAVAR (notice two spaces)
    '''
    opt = parse_option_string(s)
    metavar = None
    optional = False

    if opt:
        if s.peek_str(2) == '[=':
            optional = True
            metavar = parse_metavar(s)

        elif s.peek() == '=':
            s.get()
            metavar = parse_metavar(s)

        # Two spaces after --option means the descriptoin follows
        elif s.peek_str(2).isspace():
            return OptionWithMetavar(opt, metavar, optional)

        # An option delimiter cannot be a metavar
        elif parse_option_delimiter(s.copy()):
            return OptionWithMetavar(opt, metavar, optional)

        elif not s.is_end() and s.is_space():
            s.get()
            return OptionWithMetavar(opt, parse_metavar(s), optional)

        return OptionWithMetavar(opt, metavar, optional)
    else:
        return None

def parse_option_delimiter(s):
    '''
    Parse an option delimiter and return True if it was found, False otherwise.
    '''
    p = s.copy()
    eat_space(p)
    if p.get() in (',', '|'):
        s.pos = p.pos
        return True
    return False

def parse_options_with_description(s):
    options = []
    description = None

    while not s.is_end():
        option = parse_option_with_metavar(s)
        if option:
            options.append(option)
        else:
            break

        if not parse_option_delimiter(s):
            break

    if not options:
        return None

    #if s.peek() == '\n' or s.peek_str(2) in ('  ', ' \n'):
    if s.peek() in (' ', '\t', '\n'):
        description = parse_description(s)

    return OptionsWithDescription(options, description)

def parse(s):
    r = []

    while not s.is_end():
        options = parse_options_with_description(s)
        if options:
            r.append(options)
        else:
            line = eat_line(s)
            r.append(Unparsed(line))

    return r

if __name__ == '__main__':
    import sys
    try:
        file = sys.argv[1]
    except IndexError:
        file = '/dev/stdin'

    with open(file, 'r') as fh:
        content = fh.read()

    s = CharStream(content)
    r = parse(s)
    for o in r:
        print(o)
Leave a Comment