Remove opmap from pgen.

This commit is contained in:
Dave Halter
2017-08-22 08:45:10 +02:00
parent d7f2051f8a
commit 929593701a
3 changed files with 32 additions and 6 deletions

View File

@@ -6,6 +6,7 @@ from parso.pgen2.pgen import generate_grammar
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
from parso.python.diff import DiffParser
from parso.python.tokenize import tokenize_lines, tokenize
from parso.python import token
from parso.cache import parser_cache, load_module, save_module
from parso.parser import BaseParser
from parso.python.parser import Parser as PythonParser
@@ -22,10 +23,14 @@ class Grammar(object):
:param text: A BNF representation of your grammar.
"""
_error_normalizer_config = None
_token_namespace = None
_default_normalizer_config = pep8.PEP8NormalizerConfig()
def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None):
self._pgen_grammar = generate_grammar(text)
self._pgen_grammar = generate_grammar(
text,
token_namespace=self._get_token_namespace()
)
self._parser = parser
self._tokenizer = tokenizer
self._diff_parser = diff_parser
@@ -131,6 +136,12 @@ class Grammar(object):
cache_path=cache_path)
return root_node
def _get_token_namespace(self):
ns = self._token_namespace
if ns is None:
raise ValueError("The token namespace should be set.")
return ns
def iter_errors(self, node):
if self._error_normalizer_config is None:
raise ValueError("No error normalizer specified for this grammar.")
@@ -167,6 +178,7 @@ class Grammar(object):
class PythonGrammar(Grammar):
_error_normalizer_config = ErrorFinderConfig()
_token_namespace = token
def __init__(self, version_info, bnf_text):
super(PythonGrammar, self).__init__(

View File

@@ -12,7 +12,7 @@ from parso.utils import parse_version_string
class ParserGenerator(object):
def __init__(self, bnf_text):
def __init__(self, bnf_text, token_namespace):
self._bnf_text = bnf_text
self.generator = tokenize.tokenize(
bnf_text,
@@ -22,6 +22,7 @@ class ParserGenerator(object):
self.dfas, self.startsymbol = self._parse()
self.first = {} # map from symbol name to set of tokens
self._addfirstsets()
self._token_namespace = token_namespace
def make_grammar(self):
c = grammar.Grammar(self._bnf_text)
@@ -73,7 +74,7 @@ class ParserGenerator(object):
return ilabel
else:
# A named token (NAME, NUMBER, STRING)
itoken = getattr(token, label, None)
itoken = getattr(self._token_namespace, label, None)
assert isinstance(itoken, int), label
if itoken in c.tokens:
return c.tokens[itoken]
@@ -90,12 +91,13 @@ class ParserGenerator(object):
if value in c.keywords:
return c.keywords[value]
else:
# TODO this might be an issue?! Using token.NAME here?
c.labels.append((token.NAME, value))
c.keywords[value] = ilabel
return ilabel
else:
# An operator (any non-numeric token)
itoken = token.opmap[value] # Fails if unknown token
itoken = self._token_namespace.generate_token_id(value)
if itoken in c.tokens:
return c.tokens[itoken]
else:
@@ -384,7 +386,7 @@ class DFAState(object):
__hash__ = None # For Py3 compatibility.
def generate_grammar(bnf_text):
def generate_grammar(bnf_text, token_namespace):
"""
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and ()
@@ -393,5 +395,5 @@ def generate_grammar(bnf_text):
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
own parser.
"""
p = ParserGenerator(bnf_text)
p = ParserGenerator(bnf_text, token_namespace)
return p.make_grammar()

View File

@@ -93,3 +93,15 @@ opmap = {}
for line in opmap_raw.splitlines():
op, name = line.split()
opmap[op] = globals()[name]
def generate_token_id(string):
"""
Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
ID for it. The strings are part of the grammar file.
"""
try:
return opmap[string]
except KeyError:
pass
return globals()[string]