From 929593701a5f1558fc17c1179214ded83189e854 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 22 Aug 2017 08:45:10 +0200 Subject: [PATCH] Remove opmap from pgen. --- parso/grammar.py | 14 +++++++++++++- parso/pgen2/pgen.py | 12 +++++++----- parso/python/token.py | 12 ++++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/parso/grammar.py b/parso/grammar.py index 34989db..88a8aa3 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -6,6 +6,7 @@ from parso.pgen2.pgen import generate_grammar from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize +from parso.python import token from parso.cache import parser_cache, load_module, save_module from parso.parser import BaseParser from parso.python.parser import Parser as PythonParser @@ -22,10 +23,14 @@ class Grammar(object): :param text: A BNF representation of your grammar. """ _error_normalizer_config = None + _token_namespace = None _default_normalizer_config = pep8.PEP8NormalizerConfig() def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None): - self._pgen_grammar = generate_grammar(text) + self._pgen_grammar = generate_grammar( + text, + token_namespace=self._get_token_namespace() + ) self._parser = parser self._tokenizer = tokenizer self._diff_parser = diff_parser @@ -131,6 +136,12 @@ class Grammar(object): cache_path=cache_path) return root_node + def _get_token_namespace(self): + ns = self._token_namespace + if ns is None: + raise ValueError("The token namespace should be set.") + return ns + def iter_errors(self, node): if self._error_normalizer_config is None: raise ValueError("No error normalizer specified for this grammar.") @@ -167,6 +178,7 @@ class Grammar(object): class PythonGrammar(Grammar): _error_normalizer_config = ErrorFinderConfig() + _token_namespace = token def __init__(self, version_info, bnf_text): super(PythonGrammar, self).__init__( diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 9b58893..10ef6ff 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -12,7 +12,7 @@ from parso.utils import parse_version_string class ParserGenerator(object): - def __init__(self, bnf_text): + def __init__(self, bnf_text, token_namespace): self._bnf_text = bnf_text self.generator = tokenize.tokenize( bnf_text, @@ -22,6 +22,7 @@ class ParserGenerator(object): self.dfas, self.startsymbol = self._parse() self.first = {} # map from symbol name to set of tokens self._addfirstsets() + self._token_namespace = token_namespace def make_grammar(self): c = grammar.Grammar(self._bnf_text) @@ -73,7 +74,7 @@ class ParserGenerator(object): return ilabel else: # A named token (NAME, NUMBER, STRING) - itoken = getattr(token, label, None) + itoken = getattr(self._token_namespace, label, None) assert isinstance(itoken, int), label if itoken in c.tokens: return c.tokens[itoken] @@ -90,12 +91,13 @@ class ParserGenerator(object): if value in c.keywords: return c.keywords[value] else: + # TODO this might be an issue?! Using token.NAME here? c.labels.append((token.NAME, value)) c.keywords[value] = ilabel return ilabel else: # An operator (any non-numeric token) - itoken = token.opmap[value] # Fails if unknown token + itoken = self._token_namespace.generate_token_id(value) if itoken in c.tokens: return c.tokens[itoken] else: @@ -384,7 +386,7 @@ class DFAState(object): __hash__ = None # For Py3 compatibility. -def generate_grammar(bnf_text): +def generate_grammar(bnf_text, token_namespace): """ ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for at-least-once repetition, [] for optional parts, | for alternatives and () @@ -393,5 +395,5 @@ def generate_grammar(bnf_text): It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its own parser. """ - p = ParserGenerator(bnf_text) + p = ParserGenerator(bnf_text, token_namespace) return p.make_grammar() diff --git a/parso/python/token.py b/parso/python/token.py index b63bd33..e23f78b 100644 --- a/parso/python/token.py +++ b/parso/python/token.py @@ -93,3 +93,15 @@ opmap = {} for line in opmap_raw.splitlines(): op, name = line.split() opmap[op] = globals()[name] + + +def generate_token_id(string): + """ + Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding + ID for it. The strings are part of the grammar file. + """ + try: + return opmap[string] + except KeyError: + pass + return globals()[string]