def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None):
"""
Return a pair consisting of a starting category and a list of
``Productions``.
:param input: a grammar, either in the form of a string or else
as a list of strings.
:param nonterm_parser: a function for parsing nonterminals.
It should take a ``(string, position)`` as argument and
return a ``(nonterminal, position)`` as result.
:param probabilistic: are the grammar rules probabilistic?
:type probabilistic: bool
:param encoding: the encoding of the grammar, if it is a binary string
:type encoding: str
"""
if encoding is not None:
input = input.decode(encoding)
if isinstance(input, string_types):
lines = input.split('\n')
else:
lines = input
start = None
productions = []
continue_line = ''
for linenum, line in enumerate(lines):
line = continue_line + line.strip()
if line.startswith('#') or line=='': continue
if line.endswith('\\'):
continue_line = line[:-1].rstrip()+' '
continue
continue_line = ''
try:
if line[0] == '%':
directive, args = line[1:].split(None, 1)
if directive == 'start':
start, pos = nonterm_parser(args, 0)
if pos != len(args):
raise ValueError('Bad argument to start directive')
else:
raise ValueError('Bad directive')
else:
# expand out the disjunctions on the RHS
productions += _read_production(line, nonterm_parser, probabilistic)
except ValueError as e:
raise ValueError('Unable to parse line %s: %s\n%s' %
(linenum+1, line, e))
if not productions:
raise ValueError('No productions found!')
if not start:
start = productions[0].lhs()
return (start, productions)
评论列表
文章目录